From c06e2d16e4791bdd4bb36e661d36bec21c627c89 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 25 Oct 2011 23:02:47 -0700 Subject: [PATCH 0001/1087] initial commit of rocket chisel project, riscv assembly tests and benchmarks --- rocket/src/main/scala/arbiter.scala | 74 ++++ rocket/src/main/scala/consts.scala | 153 ++++++++ rocket/src/main/scala/cpu.scala | 95 +++++ rocket/src/main/scala/ctrl.scala | 405 ++++++++++++++++++++ rocket/src/main/scala/ctrl_util.scala | 62 +++ rocket/src/main/scala/dcache.scala | 251 ++++++++++++ rocket/src/main/scala/divider.scala | 147 +++++++ rocket/src/main/scala/dpath.scala | 345 +++++++++++++++++ rocket/src/main/scala/dpath_alu.scala | 90 +++++ rocket/src/main/scala/dpath_util.scala | 184 +++++++++ rocket/src/main/scala/icache.scala | 120 ++++++ rocket/src/main/scala/icache_prefetch.scala | 91 +++++ rocket/src/main/scala/instructions.scala | 162 ++++++++ rocket/src/main/scala/memory.scala | 124 ++++++ rocket/src/main/scala/multiplier.scala | 61 +++ rocket/src/main/scala/queues.scala | 225 +++++++++++ rocket/src/main/scala/top.scala | 54 +++ rocket/src/main/scala/writeback.scala | 56 +++ 18 files changed, 2699 insertions(+) create mode 100644 rocket/src/main/scala/arbiter.scala create mode 100644 rocket/src/main/scala/consts.scala create mode 100644 rocket/src/main/scala/cpu.scala create mode 100644 rocket/src/main/scala/ctrl.scala create mode 100644 rocket/src/main/scala/ctrl_util.scala create mode 100644 rocket/src/main/scala/dcache.scala create mode 100644 rocket/src/main/scala/divider.scala create mode 100644 rocket/src/main/scala/dpath.scala create mode 100644 rocket/src/main/scala/dpath_alu.scala create mode 100644 rocket/src/main/scala/dpath_util.scala create mode 100644 rocket/src/main/scala/icache.scala create mode 100644 rocket/src/main/scala/icache_prefetch.scala create mode 100644 rocket/src/main/scala/instructions.scala create mode 100644 rocket/src/main/scala/memory.scala create mode 100644 rocket/src/main/scala/multiplier.scala create mode 100644 rocket/src/main/scala/queues.scala create mode 100644 rocket/src/main/scala/top.scala create mode 100644 rocket/src/main/scala/writeback.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala new file mode 100644 index 00000000..eaf0241d --- /dev/null +++ b/rocket/src/main/scala/arbiter.scala @@ -0,0 +1,74 @@ +package Top { + +import Chisel._; +import Node._; +import Constants._; + +class ioMem() extends Bundle +{ + val req_val = Bool('output); + val req_rdy = Bool('input); + val req_rw = Bool('output); + val req_addr = UFix(32, 'output); + val req_wdata = Bits(128, 'output); + val req_tag = Bits(4, 'output); + + val resp_val = Bool('input); + val resp_tag = Bits(4, 'input); + val resp_data = Bits(128, 'input); +} + +class ioArbiter extends Bundle() { + val mem = new ioMem(); + val dcache = new ioDcache(); +// val icache = new ioIcache(); + val icache = new ioIPrefetcherMem().flip(); +} + +class rocketMemArbiter extends Component { + val io = new ioArbiter(); + + // ***************************** + // Interface to memory + // ***************************** + + // Memory request is valid if either icache or dcache have a valid request + io.mem.req_val := (io.icache.req_val || io.dcache.req_val); + + // Set read/write bit. Icache always reads + io.mem.req_rw := Mux(io.icache.req_val,Bool(false),io.dcache.req_rw); + + // Give priority to Icache + io.mem.req_addr := Mux(io.icache.req_val,io.icache.req_addr,io.dcache.req_addr); + + // high bit of tag=0 for I$, tag=0 for D$ +// io.mem.req_tag := Mux(io.icache.req_val,Bits(0,4),Bits(1,4)); + io.mem.req_tag := Mux(io.icache.req_val, + Cat(Bits(0,1), io.icache.req_tag), + Cat(Bits(1,1), io.dcache.req_tag)); + + // Just pass through write data (only D$ will write) + io.mem.req_wdata := io.dcache.req_wdata; + + // ***************************** + // Interface to caches + // ***************************** + + // Read for request from cache if the memory is ready. Give priority to I$ + io.icache.req_rdy := io.mem.req_rdy; + io.dcache.req_rdy := io.mem.req_rdy && !io.icache.req_val; + + // Response will only be valid for D$ or I$ not both because of tag bits + io.icache.resp_val := io.mem.resp_val && !io.mem.resp_tag(3).toBool; + io.dcache.resp_val := io.mem.resp_val && io.mem.resp_tag(3).toBool; + + // Feed through data to both + io.icache.resp_data := io.mem.resp_data; + io.dcache.resp_data := io.mem.resp_data; + + io.icache.resp_tag := io.mem.resp_tag(2,0); + io.dcache.resp_tag := io.mem.resp_tag(2,0); + +} + +} diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala new file mode 100644 index 00000000..e5660ca6 --- /dev/null +++ b/rocket/src/main/scala/consts.scala @@ -0,0 +1,153 @@ +package Top { + +import Chisel._ + +object Constants +{ + val BR_N = UFix(0, 4); + val BR_EQ = UFix(1, 4); + val BR_NE = UFix(2, 4); + val BR_LT = UFix(3, 4); + val BR_LTU = UFix(4, 4); + val BR_GE = UFix(5, 4); + val BR_GEU = UFix(6, 4); + val BR_J = UFix(7, 4); + val BR_JR = UFix(8, 4); + + val PC_4 = UFix(0, 3); + val PC_BTB = UFix(1, 3); + val PC_EX4 = UFix(2, 3); + val PC_BR = UFix(3, 3); + val PC_J = UFix(4, 3); + val PC_JR = UFix(5, 3); + val PC_PCR = UFix(6, 3); + + val KF_Y = UFix(1, 1); + val KF_N = UFix(0, 1); + + val REN_Y = UFix(1, 1); + val REN_N = UFix(0, 1); + + val AS_X = UFix(0, 1); + val AS_IMM = UFix(0, 1); + val AS_RS2 = UFix(1, 1); + + val A2_X = UFix(0, 2); + val A2_0 = UFix(0, 2); + val A2_SEXT = UFix(1, 2); + val A2_RS2 = UFix(2, 2); + val A2_SPLIT = UFix(3, 2); + + val A1_X = UFix(0, 1); + val A1_RS1 = UFix(0, 1); + val A1_LUI = UFix(1, 1); + + val MUL_X = UFix(0, 3); + val MUL_NO = UFix(0, 3); + val MUL_64 = UFix(1, 3); + val MUL_64H = UFix(2, 3); + val MUL_64HU = UFix(3, 3); + val MUL_64HSU = UFix(4, 3); + val MUL_32 = UFix(5, 3); + + val DIV_X = UFix(0, 4); + val DIV_NO = UFix(0, 4); + val DIV_64D = UFix(1, 4); + val DIV_64DU = UFix(2, 4); + val DIV_64R = UFix(3, 4); + val DIV_64RU = UFix(4, 4); + val DIV_32D = UFix(5, 4); + val DIV_32DU = UFix(6, 4); + val DIV_32R = UFix(7, 4); + val DIV_32RU = UFix(8, 4); + + val M_N = UFix(0, 1); + val M_Y = UFix(1, 1); + + val WEN_N = UFix(0, 1); + val WEN_Y = UFix(1, 1); + + val WA_X = UFix(0, 1); + val WA_RD = UFix(0, 1); + val WA_RA = UFix(1, 1); + + val WB_X = UFix(0, 3); + val WB_PC = UFix(0, 3); + val WB_ALU = UFix(1, 3); + val WB_PCR = UFix(2, 3); + val WB_CR = UFix(3, 3); + val WB_MUL = UFix(4, 3); + + val N = UFix(0, 1); + val Y = UFix(1, 1); + val Y_SH = UFix(1, 1); + +// val FPU_N = UFix(0, 1); +// val FPU_Y = FPU_N; + + val FWBQ_N = UFix(0, 1); + val FWBQ_Y = UFix(1, 1); + + val FSDQ_N = UFix(0, 1); + val FSDQ_Y = UFix(1, 1); + + val FN_X = UFix(0, 4); + val FN_ADD = UFix(0, 4); + val FN_SUB = UFix(1, 4); + val FN_SLT = UFix(2, 4); + val FN_SLTU = UFix(3, 4); + val FN_AND = UFix(4, 4); + val FN_OR = UFix(5, 4); + val FN_XOR = UFix(6, 4); + val FN_SL = UFix(7, 4); + val FN_SR = UFix(8, 4); + val FN_SRA = UFix(9, 4); + + val DW_X = UFix(0, 1); + val DW_32 = UFix(0, 1); + val DW_64 = UFix(1, 1); + val DW_XPR = UFix(1, 1); + + val RA = UFix(1, 5); + + val MT_X = Bits("b000", 3); + val MT_B = Bits("b000", 3); + val MT_H = Bits("b001", 3); + val MT_W = Bits("b010", 3); + val MT_D = Bits("b011", 3); + val MT_BU = Bits("b100", 3); + val MT_HU = Bits("b101", 3); + val MT_WU = Bits("b110", 3); + + val M_X = UFix(0, 4); + val M_XRD = Bits("b0000", 4); // int load + val M_XWR = Bits("b0001", 4); // int store + val M_FRD = Bits("b0010", 4); // fp load + val M_FWR = Bits("b0011", 4); // fp store + val M_FLA = Bits("b0100", 4); // flush cache + val M_XA_ADD = Bits("b1000", 4); + val M_XA_SWAP = Bits("b1001", 4); + val M_XA_AND = Bits("b1010", 4); + val M_XA_OR = Bits("b1011", 4); + val M_XA_MIN = Bits("b1100", 4); + val M_XA_MAX = Bits("b1101", 4); + val M_XA_MINU = Bits("b1110", 4); + val M_XA_MAXU = Bits("b1111", 4); + + val PCR_STATUS = UFix( 0, 5); + val PCR_EPC = UFix( 1, 5); + val PCR_BADVADDR = UFix( 2, 5); + val PCR_EVEC = UFix( 3, 5); + val PCR_COUNT = UFix( 4, 5); + val PCR_COMPARE = UFix( 5, 5); + val PCR_CAUSE = UFix( 6, 5); + val PCR_MEMSIZE = UFix( 8, 5); + val PCR_LOG = UFix(10, 5); + val PCR_TOHOST = UFix(16, 5); + val PCR_FROMHOST = UFix(17, 5); + val PCR_CONSOLE = UFix(18, 5); + val PCR_K0 = UFix(24, 5); + val PCR_K1 = UFix(25, 5); +} + +} diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala new file mode 100644 index 00000000..a891582e --- /dev/null +++ b/rocket/src/main/scala/cpu.scala @@ -0,0 +1,95 @@ +package Top { + +import Chisel._ +import Node._; +import Constants._; + +class ioDebug extends Bundle() +{ + val error_mode = Bool('output); + val log_control = Bool('output); +} + +class ioHost(view: List[String] = null) extends Bundle(view) +{ + val start = Bool('input); + val from_wen = Bool('input); + val from = Bits(32, 'input); + val to = Bits(32, 'output); +} + +class ioConsole(view: List[String] = null) extends Bundle(view) +{ + val rdy = Bool('input); + val valid = Bool('output); + val bits = Bits(8, 'output); +} + +class ioRocket extends Bundle() +{ + val debug = new ioDebug(); + val console = new ioConsole(); + val host = new ioHost(); + val imem = new ioImem().flip(); + val dmem = new ioDmem().flip(); +} + +class rocketProc extends Component +{ + val io = new ioRocket(); + + val ctrl = new rocketCtrl(); + val dpath = new rocketDpath(); + val mem = new rocketMemory(); + val wb = new rocketWriteback(); + + dpath.io.host ^^ io.host; + dpath.io.debug ^^ io.debug; + // dpath.io.wb <> wb.io; + dpath.io.wb.wen <> wb.io.wb_wen; + dpath.io.wb.waddr <> wb.io.wb_waddr; + dpath.io.wb.wdata <> wb.io.wb_wdata; + dpath.io.imem.req_addr ^^ io.imem.req_addr; + dpath.io.imem.resp_data ^^ io.imem.resp_data; + + ctrl.io.ctrl <> dpath.io.ctrl; + ctrl.io.dpath <> dpath.io.dpath; + // ctrl.io.mem <> mem.io; + ctrl.io.mem.mrq_val <> mem.io.mem_mrq_val; + ctrl.io.mem.mrq_cmd <> mem.io.mem_mrq_cmd; + ctrl.io.mem.mrq_type <> mem.io.mem_mrq_type; + ctrl.io.mem.mrq_deq <> mem.io.mem_mrq_deq; + ctrl.io.mem.xsdq_rdy <> mem.io.mem_xsdq_rdy; + ctrl.io.mem.xsdq_val <> mem.io.mem_xsdq_val; + ctrl.io.mem.dc_busy := !io.dmem.req_rdy; + ctrl.io.host.start ^^ io.host.start; + ctrl.io.imem ^^ io.imem; +// ctrl.io.console ^^ io.console; + ctrl.io.wb.waddr <> wb.io.wb_waddr; + ctrl.io.wb.wen <> wb.io.wb_wen; + + // TODO: SHOULD BE THE FOLLOWING BUT NEED BETTER INTERFACE CHUNKS + // mem.io.dmem >< io.dmem; + + mem.io.dmem_req_val ^^ io.dmem.req_val; + mem.io.dmem_req_rdy ^^ io.dmem.req_rdy; + mem.io.dmem_req_op ^^ io.dmem.req_op; + mem.io.dmem_req_addr ^^ io.dmem.req_addr; + mem.io.dmem_req_data ^^ io.dmem.req_data; + mem.io.dmem_req_wmask ^^ io.dmem.req_wmask; + mem.io.dmem_req_tag ^^ io.dmem.req_tag; + + mem.io.dpath_rs2 <> dpath.io.dpath.rs2; + mem.io.dpath_waddr <> dpath.io.dpath.waddr; + mem.io.dpath_alu_out <> dpath.io.dpath.alu_out; + + wb.io.dmem_resp_val ^^ io.dmem.resp_val; + wb.io.dmem_resp_data ^^ io.dmem.resp_data; + wb.io.dmem_resp_tag ^^ io.dmem.resp_tag; + + io.console.bits := dpath.io.dpath.rs1(7,0); + io.console.valid := ctrl.io.console.valid; + ctrl.io.console.rdy := io.console.rdy; +} + +} diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala new file mode 100644 index 00000000..90899de1 --- /dev/null +++ b/rocket/src/main/scala/ctrl.scala @@ -0,0 +1,405 @@ +package Top { + +import Chisel._ +import Node._; + +import Constants._ +import Instructions._ + +class ioCtrl extends Bundle() +{ + val sel_pc = UFix(3, 'output); + val wen_btb = Bool('output); + val stallf = Bool('output); + val stalld = Bool('output); + val killf = Bool('output); + val killd = Bool('output); + val ren2 = Bool('output); + val ren1 = Bool('output); + val sel_alu2 = UFix(2, 'output); + val sel_alu1 = Bool('output); + val fn_dw = Bool('output); + val fn_alu = UFix(4, 'output); + val mul_val = Bool('output); + val mul_fn = UFix(3, 'output); + val mul_wb = Bool('output); + val div_val = Bool('output); + val div_fn = UFix(4, 'output); + val div_wb = Bool('output); + val wen = Bool('output); + val sel_wa = Bool('output); + val sel_wb = UFix(3, 'output); + val ren_pcr = Bool('output); + val wen_pcr = Bool('output); + val xcpt_illegal = Bool('output); + val xcpt_privileged = Bool('output); + val xcpt_fpu = Bool('output); + val xcpt_syscall = Bool('output); + val eret = Bool('output); +} + +class ioCtrlDpath extends Bundle() +{ + val btb_hit = Bool('input); + val inst = UFix(32, 'input); + val br_eq = Bool('input); + val br_lt = Bool('input); + val br_ltu = Bool('input); + val div_rdy = Bool('input); + val div_result_val = Bool('input); + val mul_result_val = Bool('input); + val wen = Bool('input); + val waddr = UFix(5, 'input); + val exception = Bool('input); + val status = Bits(8, 'input); +} + +class ioCtrlMem extends Bundle() +{ + val mrq_val = Bool('output); + val mrq_cmd = UFix(4, 'output); + val mrq_type = UFix(3, 'output); + val mrq_deq = Bool('input); + val xsdq_rdy = Bool('input); + val xsdq_val = Bool('output); + val dc_busy = Bool('input); +} + +class ioCtrlImem extends Bundle() +{ + val req_val = Bool('output); + val req_rdy = Bool('input); + val resp_val = Bool('input); +} + +class ioCtrlWB extends Bundle() +{ + val waddr = UFix(5, 'input); + val wen = Bool('input); +} + +class ioCtrlAll extends Bundle() +{ + val ctrl = new ioCtrl(); + val console = new ioConsole(List("rdy", "valid")); + val dpath = new ioCtrlDpath(); + val imem = new ioCtrlImem(); + val mem = new ioCtrlMem(); + val wb = new ioCtrlWB(); + val host = new ioHost(List("start")); +} + +class rocketCtrl extends Component +{ + val io = new ioCtrlAll(); + + val xpr64 = Y; + val cs = + ListLookup(io.dpath.inst, + List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + Array( + BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + + J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,N,N,N,N), + JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), + JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), + JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), + + LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + + LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + + ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64H, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_32, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + + SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,Y,N), + EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), + DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), + ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,Y,N,Y), + FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,Y,N,N,N), + CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), + MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,N,N,N,Y), + MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,N,N,N,Y) + + // Instructions that have not yet been implemented +/* + // floating point + FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + + // atomic memory operations + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + + // miscellaneous + RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), +*/ + )); + + val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; + val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; + + val id_raddr2 = io.dpath.inst(21,17); + val id_raddr1 = io.dpath.inst(26,22); + val id_waddr = io.dpath.inst(31,27); + + val id_ren2 = id_renx2; + val id_ren1 = id_renx1; + + val id_console_out_val = id_wen_pcr & (id_raddr2 === PCR_CONSOLE); + + val id_mem_val_masked = id_mem_val; + + val mem_xload_val = id_mem_val_masked & (id_mem_cmd === M_XRD); + val mem_xstore_val = id_mem_val_masked & (id_mem_cmd === M_XWR); + + val mem_fire = id_mem_val_masked & ~io.ctrl.killd; + val mem_xload_fire = mem_xload_val & ~io.ctrl.killd; + val mem_xstore_fire = mem_xstore_val & ~io.ctrl.killd; + + val console_out_fire = id_console_out_val & ~io.ctrl.killd; + + val div_fire = id_div_val & ~io.ctrl.killd; + val mul_fire = id_mul_val & ~io.ctrl.killd; + + val sboard_wen = mem_xload_fire | div_fire | mul_fire; + val sboard_waddr = id_waddr; + + val sboard = new rocketCtrlSboard(); + sboard.io.raddra := id_raddr2; + sboard.io.raddrb := id_raddr1; + sboard.io.raddrc := id_waddr; + sboard.io.set := sboard_wen.toBool; + sboard.io.seta := sboard_waddr; + sboard.io.clr0 := io.wb.wen.toBool; + sboard.io.clr0a ^^ io.wb.waddr; + sboard.io.clr1 := io.dpath.wen.toBool; + sboard.io.clr1a := io.dpath.waddr; + + val id_stall_raddr2 = sboard.io.stalla; + val id_stall_raddr1 = sboard.io.stallb; + val id_stall_waddr = sboard.io.stallc; + val id_stall_ra = sboard.io.stallra; + + val mrq = new rocketCtrlCnt(3, 4); + mrq.io.enq := mem_fire.toBool; + mrq.io.deq ^^ io.mem.mrq_deq; + val id_empty_mrq = mrq.io.empty; + val id_full_mrq = mrq.io.full; + + val id_reg_btb_hit = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_br_type = Reg(){UFix(width = 4)}; + val ex_reg_btb_hit = Reg(){Bool()}; + val ex_reg_mem_val = Reg(){Bool()}; + val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; + val ex_reg_mem_type = Reg(){UFix(width = 3)}; + val ex_reg_eret = Reg(resetVal = Bool(false)); + val ex_reg_privileged = Reg(resetVal = Bool(false)); + + when (!io.ctrl.stalld) { + when (io.ctrl.killf) { + id_reg_btb_hit <== Bool(false); + } + otherwise{ + id_reg_btb_hit <== io.dpath.btb_hit; + } + } + when (reset.toBool || io.ctrl.killd) { + ex_reg_br_type <== BR_N; + ex_reg_btb_hit <== Bool(false); + ex_reg_mem_val <== Bool(false); + ex_reg_mem_cmd <== UFix(0, 4); + ex_reg_mem_type <== UFix(0, 3); + ex_reg_eret <== Bool(false); + ex_reg_privileged <== Bool(false); + } + otherwise { + ex_reg_br_type <== id_br_type; + ex_reg_btb_hit <== id_reg_btb_hit; + ex_reg_mem_val <== id_mem_val_masked.toBool; + ex_reg_mem_cmd <== id_mem_cmd; + ex_reg_mem_type <== id_mem_type; + ex_reg_eret <== id_eret.toBool; + ex_reg_privileged <== id_privileged.toBool; + } + + val beq = io.dpath.br_eq; + val bne = ~io.dpath.br_eq; + val blt = io.dpath.br_lt; + val bltu = io.dpath.br_ltu; + val bge = ~io.dpath.br_lt; + val bgeu = ~io.dpath.br_ltu; + + val br_taken = + (ex_reg_br_type === BR_EQ) & beq | + (ex_reg_br_type === BR_NE) & bne | + (ex_reg_br_type === BR_LT) & blt | + (ex_reg_br_type === BR_LTU) & bltu | + (ex_reg_br_type === BR_GE) & bge | + (ex_reg_br_type === BR_GEU) & bgeu; + + val jr_taken = (ex_reg_br_type === BR_JR); + val j_taken = (ex_reg_br_type === BR_J); + + io.imem.req_val := io.host.start; +// io.imem.req_val := Bool(true); + + io.mem.mrq_val := ex_reg_mem_val; + io.mem.mrq_cmd := ex_reg_mem_cmd; + io.mem.mrq_type := ex_reg_mem_type; + io.mem.xsdq_val := mem_xstore_fire.toBool; + io.console.valid := console_out_fire.toBool; + + io.ctrl.sel_pc := + Mux(io.dpath.exception || ex_reg_eret, PC_PCR, + Mux(!ex_reg_btb_hit && br_taken, PC_BR, + Mux(ex_reg_btb_hit && !br_taken || ex_reg_privileged, PC_EX4, + Mux(jr_taken, PC_JR, + Mux(j_taken, PC_J, + Mux(io.dpath.btb_hit, PC_BTB, + PC_4)))))); + + io.ctrl.wen_btb := ~ex_reg_btb_hit & br_taken; + + val take_pc = + ~ex_reg_btb_hit & br_taken | + ex_reg_btb_hit & ~br_taken | + jr_taken | + j_taken | + io.dpath.exception | + ex_reg_privileged | + ex_reg_eret; + + io.ctrl.stallf := + ~take_pc & + ( + ~io.imem.req_rdy | + ~io.imem.resp_val | + io.ctrl.stalld + ); + + val ctrl_stalld_wo_fpu_rdy = + ~take_pc & + ( + id_ren2 & id_stall_raddr2 | + id_ren1 & id_stall_raddr1 | + (id_sel_wa === WA_RD) & id_stall_waddr | + (id_sel_wa === WA_RA) & id_stall_ra | + id_mem_val_masked & id_full_mrq | + id_sync & (~id_empty_mrq | io.mem.dc_busy) | + mem_xstore_val & ~io.mem.xsdq_rdy | + id_console_out_val & ~io.console.rdy | + id_div_val & ~io.dpath.div_rdy | + io.dpath.div_result_val | + io.dpath.mul_result_val + ); + + // for divider, multiplier writeback + val mul_wb = io.dpath.mul_result_val; + val div_wb = io.dpath.div_result_val & !mul_wb; + + io.ctrl.stalld := ctrl_stalld_wo_fpu_rdy.toBool; + + io.ctrl.killf := take_pc | ~io.imem.resp_val; + val ctrl_killd_wo_fpu_rdy = take_pc | ctrl_stalld_wo_fpu_rdy; + io.ctrl.killd := ctrl_killd_wo_fpu_rdy.toBool; + + io.ctrl.ren2 := id_ren2.toBool; + io.ctrl.ren1 := id_ren1.toBool; + io.ctrl.sel_alu2 := id_sel_alu2; + io.ctrl.sel_alu1 := id_sel_alu1.toBool; + io.ctrl.fn_dw := id_fn_dw.toBool; + io.ctrl.fn_alu := id_fn_alu; + io.ctrl.div_fn := id_div_fn; + io.ctrl.div_val := id_div_val.toBool; + io.ctrl.div_wb := div_wb; + io.ctrl.mul_fn := id_mul_fn; + io.ctrl.mul_val := id_mul_val.toBool; + io.ctrl.mul_wb := mul_wb; + io.ctrl.wen := id_wen.toBool; + io.ctrl.sel_wa := id_sel_wa.toBool; + io.ctrl.sel_wb := id_sel_wb; + io.ctrl.ren_pcr := id_ren_pcr.toBool; + io.ctrl.wen_pcr := id_wen_pcr.toBool; + io.ctrl.eret := id_eret.toBool; + + io.ctrl.xcpt_illegal := ~id_int_val.toBool; + io.ctrl.xcpt_privileged := (id_privileged & ~io.dpath.status(5)).toBool; + io.ctrl.xcpt_fpu := Bool(false); + io.ctrl.xcpt_syscall := id_syscall.toBool; +} + +} diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala new file mode 100644 index 00000000..73661fa6 --- /dev/null +++ b/rocket/src/main/scala/ctrl_util.scala @@ -0,0 +1,62 @@ +package Top +{ +import Chisel._ +import Node._; +import Constants._; + +class ioCtrlSboard extends Bundle() +{ + val clr0 = Bool('input); + val clr0a = UFix(5, 'input); + val clr1 = Bool('input); + val clr1a = UFix(5, 'input); + val set = Bool('input); + val seta = UFix(5, 'input); + val raddra = UFix(5, 'input); + val raddrb = UFix(5, 'input); + val raddrc = UFix(5, 'input); + val stalla = Bool('output); + val stallb = Bool('output); + val stallc = Bool('output); + val stallra = Bool('output); +} + +class rocketCtrlSboard extends Component +{ + override val io = new ioCtrlSboard(); + val reg_busy = Reg(width = 32, resetVal = Bits(0, 32)); + + val set_mask = Mux(io.set, UFix(1,1) << io.seta, UFix(0,32)); + val clr0_mask = Mux(io.clr0, ~(UFix(1,1) << io.clr0a), ~UFix(0,32)); + val clr1_mask = Mux(io.clr1, ~(UFix(1,1) << io.clr1a), ~UFix(0,32)); + reg_busy <== ((reg_busy | set_mask) & clr0_mask) & clr1_mask; + + io.stalla := reg_busy(io.raddra).toBool; + io.stallb := reg_busy(io.raddrb).toBool; + io.stallc := reg_busy(io.raddrc).toBool; + io.stallra := reg_busy(RA).toBool; +} + +class ioCtrlCnt extends Bundle() +{ + val enq = Bool('input); + val deq = Bool('input); + val empty = Bool('output); + val full = Bool('output); +} + +class rocketCtrlCnt(n_bits: Int, limit: Int) extends Component +{ + override val io = new ioCtrlCnt(); + val counter = Reg(width = n_bits, resetVal = UFix(0, n_bits)); + when (io.enq && !io.deq) { + counter <== counter + UFix(1, n_bits); + } + when (!io.enq && io.deq) { + counter <== counter - UFix(1, n_bits); + } + io.empty := counter === UFix(0, n_bits); + io.full := counter === UFix(limit, n_bits); +} + +} diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala new file mode 100644 index 00000000..e0cdc8cd --- /dev/null +++ b/rocket/src/main/scala/dcache.scala @@ -0,0 +1,251 @@ +package Top { + +import Chisel._ +import Node._; +import Constants._; +import scala.math._; + +// interface between D$ and processor +class ioDmem(view: List[String] = null) extends Bundle(view) { + val req_val = Bool('input); + val req_rdy = Bool('output); + val req_op = Bits(4, 'input); + val req_addr = UFix(32, 'input); + val req_data = Bits(64, 'input); + val req_wmask = Bits(8, 'input); + val req_tag = Bits(12, 'input); + val resp_val = Bool('output); + val resp_data = Bits(64, 'output); + val resp_tag = Bits(12, 'output); +} + +// interface between D$ and memory +class ioDcache(view: List[String] = null) extends Bundle(view) { + val req_addr = UFix(32, 'input); + val req_tag = UFix(3, 'input); + val req_val = Bool('input); + val req_rdy = Bool('output); + val req_wdata = Bits(128, 'input); + val req_rw = Bool('input); + val resp_data = Bits(128, 'output); + val resp_tag = Bits(3, 'output); + val resp_val = Bool('output); +} + +class ioDCacheDM extends Bundle() { + val cpu = new ioDmem(); + val mem = new ioDcache().flip(); +} + +// state machine to flush (write back dirty lines, invalidate clean ones) the D$ +class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { + val io = new ioDCacheDM(); + val dcache = new rocketDCacheDM(lines, addrbits); + + val indexbits = ceil(log10(lines)/log10(2)).toInt; + val offsetbits = 6; + val tagmsb = addrbits - 1; + val taglsb = indexbits+offsetbits; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = 3; + + val flush_count = Reg(resetVal = UFix(0, indexbits)); + val flush_resp_count = Reg(resetVal = UFix(0, indexbits)); + val flushing = Reg(resetVal = Bool(false)); + val flush_waiting = Reg(resetVal = Bool(false)); + val r_cpu_req_tag = Reg(resetVal = Bits(0, 12)); + + when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_op === M_FLA)) + { + r_cpu_req_tag <== io.cpu.req_tag; + flushing <== Bool(true); + flush_waiting <== Bool(true); + } + + when (dcache.io.cpu.req_rdy && + (flush_count === ~Bits(0, indexbits))) { flushing <== Bool(false); } + when (dcache.io.cpu.resp_val && + (dcache.io.cpu.resp_tag === r_cpu_req_tag) && + (flush_resp_count === ~Bits(0, indexbits))) { flush_waiting <== Bool(false); } + + when (flushing && dcache.io.cpu.req_rdy) { flush_count <== flush_count + UFix(1,1); } + when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag)) + { flush_resp_count <== flush_resp_count + UFix(1,1); } + + dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_op != M_FLA) && !flush_waiting) || flushing; + dcache.io.cpu.req_op := Mux(flushing, M_FLA, io.cpu.req_op); + dcache.io.cpu.req_addr := Mux(flushing, Cat(Bits(0,tagmsb-taglsb+1), flush_count, Bits(0,offsetbits)).toUFix, io.cpu.req_addr); + dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); + dcache.io.cpu.req_data ^^ io.cpu.req_data; + dcache.io.cpu.req_wmask ^^ io.cpu.req_wmask; + dcache.io.mem ^^ io.mem; + + io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; + io.cpu.resp_data := dcache.io.cpu.resp_data; + io.cpu.resp_tag := dcache.io.cpu.resp_tag; + io.cpu.resp_val := dcache.io.cpu.resp_val & + !(flush_waiting && (io.cpu.resp_tag === r_cpu_req_tag) && (flush_count != ~Bits(0, addrbits))); + +} + +// basic direct mapped data cache, 2 cycle read latency +// parameters : +// lines = # of cache lines +// addr_bits = address width (word addressable) bits +// 64 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines + +class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { + val io = new ioDCacheDM(); + + val indexbits = ceil(log10(lines)/log10(2)).toInt; + val offsetbits = 6; + val tagmsb = addrbits - 1; + val taglsb = indexbits+offsetbits; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = 3; + + val s_reset :: s_ready :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(7) { UFix() }; + val state = Reg(resetVal = s_reset); + + val r_cpu_req_addr = Reg(Bits(0, addrbits)); + val r_r_cpu_req_addr = Reg(r_cpu_req_addr); + val r_cpu_req_val = Reg(Bool(false)); + val r_cpu_req_data = Reg(Bits(0,64)); + val r_cpu_req_op = Reg(Bits(0,4)); + val r_cpu_req_wmask = Reg(Bits(0,8)); + val r_cpu_req_tag = Reg(Bits(0,12)); + val r_cpu_resp_tag = Reg(r_cpu_req_tag); + val r_cpu_resp_val = Reg(Bool(false)); + + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_addr <== io.cpu.req_addr; + r_cpu_req_data <== io.cpu.req_data; + r_cpu_req_op <== io.cpu.req_op; + r_cpu_req_wmask <== io.cpu.req_wmask; + r_cpu_req_tag <== io.cpu.req_tag; } + + val req_load = (r_cpu_req_op === M_XRD); + val req_store = (r_cpu_req_op === M_XWR); + val req_flush = (r_cpu_req_op === M_FLA); + + when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } + otherwise { r_cpu_req_val <== Bool(false); } + + // counter + val rr_count = Reg(resetVal = UFix(0,2)); + val rr_count_next = rr_count + UFix(1); + when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) + { rr_count <== rr_count_next; } + + // tag array + val tag_we = (state === s_resolve_miss); + val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; + val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); + val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); + val tag_raddr = Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, r_cpu_req_addr(indexmsb, indexlsb).toUFix); + val tag_rdata = Reg(tag_array.read(tag_raddr)); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, lines)); + val vb_rdata = Reg(vb_array(tag_raddr)); + when (tag_we && !req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + when (tag_we && req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } + + val tag_valid = vb_rdata.toBool; + val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); + val store = ((state === s_ready) && r_cpu_req_val && req_store && tag_match ) || + ((state === s_resolve_miss) && req_store); + + // dirty bit array + val db_array = Reg(resetVal = Bits(0, lines)); + val db_rdata = Reg(db_array(tag_raddr)); + val tag_dirty = db_rdata.toBool; + when (store) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } + + // data array + val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; + val data_array_waddr = Mux((state === s_refill), + Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + r_cpu_req_addr(indexmsb, offsetmsb-1).toUFix); + + val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, Cat(r_cpu_req_data, r_cpu_req_data)); + + val req_wmask_expand = Cat(Fill(8, r_cpu_req_wmask(7)), + Fill(8, r_cpu_req_wmask(6)), + Fill(8, r_cpu_req_wmask(5)), + Fill(8, r_cpu_req_wmask(4)), + Fill(8, r_cpu_req_wmask(3)), + Fill(8, r_cpu_req_wmask(2)), + Fill(8, r_cpu_req_wmask(1)), + Fill(8, r_cpu_req_wmask(0))); + + val store_wmask = Mux(r_cpu_req_addr(offsetlsb).toBool, + Cat(req_wmask_expand, Bits(0,64)), + Cat(Bits(0,64), req_wmask_expand)); + + val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), store_wmask); + val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); + val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, + Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + r_cpu_req_addr(indexmsb, offsetmsb-1))); + val data_array_rdata = Reg(data_array.read(data_array_raddr)); + + // output signals + io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match); + + when ((((state === s_ready) && r_cpu_req_val && tag_match) || (state === s_resolve_miss)) && !req_store) + { r_cpu_resp_val <== Bool(true); } + otherwise { r_cpu_resp_val <== Bool(false); } + + io.cpu.resp_val := r_cpu_resp_val; + io.cpu.resp_data := Mux(r_r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); + io.cpu.resp_tag := r_cpu_resp_tag; + + io.mem.req_val := (state === s_req_refill) || (state === s_writeback); + io.mem.req_rw := (state === s_writeback); + io.mem.req_wdata := data_array_rdata; + io.mem.req_tag := UFix(0); + io.mem.req_addr := Mux(state === s_writeback, + Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); + + // control state machine + switch (state) { + is (s_reset) { + state <== s_ready; + } + is (s_ready) { + when (~r_cpu_req_val) { state <== s_ready; } + when (r_cpu_req_val & tag_match) { state <== s_ready; } + when (tag_valid & tag_dirty) { state <== s_start_writeback; } + when (req_flush) { state <== s_resolve_miss; } + otherwise { state <== s_req_refill; } + } + is (s_start_writeback) { + state <== s_writeback; + } + is (s_writeback) { + when (io.mem.req_rdy && (rr_count === UFix(3,2))) { + when (req_flush) { state <== s_resolve_miss; } + otherwise { state <== s_req_refill; } + } + } + is (s_req_refill) + { + when (io.mem.req_rdy) { state <== s_refill; } + } + is (s_refill) { + when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } + } + is (s_resolve_miss) { + state <== s_ready; + } + } +} + +} diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala new file mode 100644 index 00000000..6e383a3e --- /dev/null +++ b/rocket/src/main/scala/divider.scala @@ -0,0 +1,147 @@ +package Top { + +import Chisel._ +import Node._; +import Constants._; + +class ioDivider(width: Int) extends Bundle { + // requests + val div_val = Bool('input); + val div_rdy = Bool('output); + val div_fn = UFix(4, 'input); + val div_waddr = UFix(5, 'input); + val dpath_rs1 = Bits(width, 'input); + val dpath_rs2 = Bits(width, 'input); + // responses + val div_result_bits = Bits(width, 'output); + val div_result_tag = UFix(5, 'output); + val div_result_val = Bool('output); + val div_result_rdy = Bool('input); +} + +// class ioDivider extends Bundle { +// // requests +// val req_val = Bool('input); +// val req_rdy = Bool('output); +// val req_fn = UFix(3, 'input); +// val req_tag = UFix(5, 'input); +// val req_rs1 = Bits(64, 'input); +// val req_rs2 = Bits(64, 'input); +// // responses +// val resp_val = Bool('output); +// val resp_data = Bits(64, 'output); +// val resp_tag = UFix(5, 'output); +// } + +class rocketDivider(width : Int) extends Component { + val io = new ioDivider(width); + + val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; + val state = Reg(resetVal = s_ready); + + val count_bits = java.math.BigInteger.valueOf(width).bitLength(); + val count = Reg(resetVal = UFix(0, count_bits)); + val divby0 = Reg(resetVal = Bool(false)); + val neg_quo = Reg(resetVal = Bool(false)); + val neg_rem = Reg(resetVal = Bool(false)); + val reg_waddr = Reg(resetVal = UFix(0, 5)); + val rem = Reg(resetVal = Bool(false)); + val half = Reg(resetVal = Bool(false)); + val tc = Reg(resetVal = Bool(false)); + + val divisor = Reg(resetVal = UFix(0, width)); + val remainder = Reg(resetVal = UFix(0, 2*width+1)); + val subtractor = remainder(2*width, width).toUFix - divisor; + + val v_tc = ((io.div_fn === DIV_64D) || (io.div_fn === DIV_64R)) || + ((io.div_fn === DIV_32D) || (io.div_fn === DIV_32R)); + + val v_rem = ((io.div_fn === DIV_32R) || (io.div_fn === DIV_32RU)) || + ((io.div_fn === DIV_64R) || (io.div_fn === DIV_64RU)); + + val v_half = ((io.div_fn === DIV_32R) || (io.div_fn === DIV_32RU)) || + ((io.div_fn === DIV_32D) || (io.div_fn === DIV_32DU)); + + // state machine + switch (state) { + is (s_ready) { + when (!io.div_val) { state <== s_ready; } + when (v_tc) { state <== s_neg_inputs }; + otherwise { state <== s_busy; } + } + is (s_neg_inputs) { state <== s_busy; } + is (s_busy) { + when (count != UFix(width)) { state <== s_busy; } + when (!(neg_quo || neg_rem)) { state <== s_done; } + otherwise { state <== s_neg_outputs; } + } + is (s_neg_outputs) { state <== s_done; } + is (s_done) { + when (io.div_result_rdy) { state <== s_ready; } + } + } + + // if we're doing 32-bit unsigned division, then we don't want the 32-bit + // inputs to be sign-extended. + val in_lhs = Mux((v_half && !v_tc), + Cat(Fill(width/2, UFix(0,1)), io.dpath_rs1(width/2-1, 0)), + io.dpath_rs1).toUFix; + + val in_rhs = Mux((v_half && !v_tc), + Cat(Fill(width/2, UFix(0,1)), io.dpath_rs2(width/2-1, 0)), + io.dpath_rs2).toUFix; + + when ((state === s_ready) && io.div_val) { + count <== UFix(0, count_bits); + half <== v_half; + neg_quo <== Bool(false); + neg_rem <== Bool(false); + rem <== v_rem; + tc <== v_tc; + reg_waddr <== io.div_waddr; + divby0 <== Bool(true); + divisor <== in_rhs; + remainder <== Cat(Fill(width+1, UFix(0,1)), in_lhs).toUFix; + } + + when (state === s_neg_inputs) { + neg_rem <== remainder(width-1).toBool; + neg_quo <== (remainder(width-1) != divisor(width-1)); + when (remainder(width-1).toBool) { + remainder <== Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; + } + when (divisor(width-1).toBool) { + divisor <== subtractor(width-1,0); + } + } + when (state === s_neg_outputs) { + when (neg_rem && neg_quo && !divby0) { + remainder <== Cat(-remainder(2*width, width+1), remainder(width), -remainder(width-1,0)).toUFix; + } + when (neg_quo && !divby0) { + remainder <== Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; + } + when (neg_rem) { + remainder <== Cat(-remainder(2*width, width+1), remainder(width,0)).toUFix; + } + when (divisor(width-1).toBool) { + divisor <== subtractor(width-1,0); + } + } + when (state === s_busy) { + count <== count + UFix(1); + divby0 <== divby0 && !subtractor(width).toBool; + remainder <== Mux(subtractor(width).toBool, + Cat(remainder(2*width-1, width), remainder(width-1,0), ~subtractor(width)), + Cat(subtractor(width-1, 0), remainder(width-1,0), ~subtractor(width))).toUFix; + } + + val result = Mux(rem, remainder(2*width, width+1), remainder(width-1,0)); + + io.div_result_bits := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result); + io.div_rdy := (state === s_ready); + io.div_result_tag := reg_waddr; + io.div_result_val := (state === s_done); +} + +} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala new file mode 100644 index 00000000..6c7e5287 --- /dev/null +++ b/rocket/src/main/scala/dpath.scala @@ -0,0 +1,345 @@ +package Top { + +import Chisel._ +import Node._; +import Constants._ +import Instructions._ + +class ioDpath extends Bundle() +{ + val btb_hit = Bool('output); + val inst = Bits(32, 'output); + val rs2 = Bits(64, 'output); + val rs1 = Bits(64, 'output); + val br_eq = Bool('output); + val br_lt = Bool('output); + val br_ltu = Bool('output); + val div_result_val = Bool('output); + val div_rdy = Bool('output); + val mul_result_val = Bool('output); + val wen = Bool('output); + val waddr = UFix(5, 'output); + val alu_out = UFix(64, 'output); + val exception = Bool('output); + val status = Bits(8, 'output); +} + +class ioDpathImem extends Bundle() +{ + val req_addr = UFix(32, 'output); + val resp_data = Bits(32, 'input); +} + +class ioDpathWB extends Bundle() +{ + val waddr = UFix(5, 'input); + val wen = Bool('input); + val wdata = Bits(64, 'input); +} + +class ioDpathAll extends Bundle() +{ + val dpath = new ioDpath(); + val host = new ioHost(); + val ctrl = new ioCtrl().flip(); + val debug = new ioDebug(); + val wb = new ioDpathWB(); + val imem = new ioDpathImem(); +} + +class rocketDpath extends Component +{ + val io = new ioDpathAll(); + + val btb = new rocketDpathBTB(); + val if_btb_target = btb.io.target; + + val pcr = new rocketDpathPCR(); + val ex_pcr = pcr.io.r.data; + + val alu = new rocketDpathALU(); + val ex_alu_out = alu.io.out; + val ex_jr_target = ex_alu_out(31,0); + + val div = new rocketDivider(64); + val div_result = div.io.div_result_bits; + val div_result_tag = div.io.div_result_tag; + val div_result_val = div.io.div_result_val; + + val mul = new rocketMultiplier(); + val mul_result = mul.io.result; + val mul_result_tag = mul.io.result_tag; + val mul_result_val = mul.io.result_val; + + val rfile = new rocketDpathRegfile(); + + // instruction fetch definitions + val if_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); + + // instruction decode definitions + val id_reg_pc = Reg(){UFix(width = 32)}; + val id_reg_pc_plus4 = Reg(){UFix(width = 32)}; + val id_reg_inst = Reg(width = 32, resetVal = NOP); + + // execute definitions + val ex_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); + val ex_reg_pc_plus4 = Reg(width = 32, resetVal = UFix(0, 32)); + val ex_reg_inst = Reg(width = 32, resetVal = Bits(0, 32)); + val ex_reg_raddr2 = Reg(width = 5, resetVal = UFix(0, 5)); + val ex_reg_raddr1 = Reg(width = 5, resetVal = UFix(0, 5)); + val ex_reg_rs2 = Reg(width = 64, resetVal = Bits(0, 64)); + val ex_reg_rs1 = Reg(width = 64, resetVal = Bits(0, 64)); + val ex_reg_waddr = Reg(width = 5, resetVal = UFix(0, 5)); + val ex_reg_ctrl_sel_alu2 = Reg(width = 2, resetVal = A2_X); + val ex_reg_ctrl_sel_alu1 = Reg(width = 1, resetVal = A1_X); + val ex_reg_ctrl_fn_dw = Reg(width = 1, resetVal = DW_X); + val ex_reg_ctrl_fn_alu = Reg(width = 4, resetVal = FN_X); + val ex_reg_ctrl_ll_wb = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_mul_val = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_mul_fn = Reg(width = 3, resetVal = MUL_X); + val ex_reg_ctrl_div_val = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_div_fn = Reg(width = 4, resetVal = DIV_X); + val ex_reg_ctrl_sel_wb = Reg(width = 3, resetVal = WB_X); + val ex_reg_ctrl_wen = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_ren_pcr = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_wen_pcr = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_eret = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_exception = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_ctrl_cause = Reg(width = 5, resetVal = UFix(0,5)); + val ex_wdata = Wire() { Bits() }; + + // instruction fetch stage + val if_pc_plus4 = if_reg_pc + UFix(4, 32); + + val ex_sign_extend = + Cat(Fill(52, ex_reg_inst(21)), ex_reg_inst(21,10)); + val ex_sign_extend_split = + Cat(Fill(52, ex_reg_inst(31)), ex_reg_inst(31,27), ex_reg_inst(16,10)); + + val branch_adder_rhs = + Mux(io.ctrl.sel_pc === PC_BR, Cat(ex_sign_extend_split(30,0), UFix(0, 1)), + Cat(Fill(6, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0, 1))); + + val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; + + btb.io.correct_target := ex_branch_target; + + val if_next_pc = + Mux(io.ctrl.sel_pc === PC_4, if_pc_plus4, + Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, + Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, + Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, + Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, + Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, + Mux(io.ctrl.sel_pc === PC_PCR, ex_pcr(31,0).toUFix, + UFix(0, 32)))))))); + + when (!io.host.start){ + if_reg_pc <== UFix(0, 32); //32'hFFFF_FFFC; + } + when (!io.ctrl.stallf) { + if_reg_pc <== if_next_pc; + } + + io.imem.req_addr := + Mux(io.ctrl.stallf, if_reg_pc, + if_next_pc); + + btb.io.current_pc4 := if_pc_plus4; + btb.io.hit ^^ io.dpath.btb_hit; + btb.io.wen ^^ io.ctrl.wen_btb; + btb.io.correct_pc4 := ex_reg_pc_plus4; + + // instruction decode stage + when (!io.ctrl.stalld) { + id_reg_pc <== if_reg_pc; + id_reg_pc_plus4 <== if_pc_plus4; + when(io.ctrl.killf) { + id_reg_inst <== NOP; + } + otherwise { + id_reg_inst <== io.imem.resp_data; + } + } + + val id_raddr1 = id_reg_inst(26,22).toUFix; + val id_raddr2 = id_reg_inst(21,17).toUFix; + + // regfile read + rfile.io.r0.en ^^ io.ctrl.ren2; + rfile.io.r0.addr := id_raddr2; + val id_rdata2 = rfile.io.r0.data; + + rfile.io.r1.en ^^ io.ctrl.ren1; + rfile.io.r1.addr := id_raddr1; + val id_rdata1 = rfile.io.r1.data; + + val id_waddr = + Mux(io.ctrl.div_wb, div_result_tag, + Mux(io.ctrl.mul_wb, mul_result_tag, + Mux(io.ctrl.sel_wa === WA_RD, id_reg_inst(31,27).toUFix, + Mux(io.ctrl.sel_wa === WA_RA, RA, + UFix(0, 5))))); + + val id_rs1 = + Mux(io.ctrl.div_wb, div_result, + Mux(io.ctrl.mul_wb, mul_result, + Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, + id_rdata1))); + + val id_rs2 = + Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, + id_rdata2); + + val id_exception = io.ctrl.xcpt_illegal || io.ctrl.xcpt_privileged || io.ctrl.xcpt_fpu || io.ctrl.xcpt_syscall; + val id_cause = + Mux(io.ctrl.xcpt_illegal, UFix(2,5), + Mux(io.ctrl.xcpt_privileged, UFix(3,5), + Mux(io.ctrl.xcpt_fpu, UFix(4,5), + Mux(io.ctrl.xcpt_syscall, UFix(6,5), + UFix(0,5))))); + + io.dpath.inst := id_reg_inst; + io.dpath.rs1 := id_rs1; + io.dpath.rs2 := id_rs2; + + // execute stage + ex_reg_pc <== id_reg_pc; + ex_reg_pc_plus4 <== id_reg_pc_plus4; + ex_reg_inst <== id_reg_inst; + ex_reg_raddr2 <== id_raddr2; + ex_reg_raddr1 <== id_raddr1; + ex_reg_rs2 <== id_rs2; + ex_reg_rs1 <== id_rs1; + ex_reg_waddr <== id_waddr; + ex_reg_ctrl_sel_alu2 <== io.ctrl.sel_alu2; + ex_reg_ctrl_sel_alu1 <== io.ctrl.sel_alu1.toUFix; + ex_reg_ctrl_fn_dw <== io.ctrl.fn_dw.toUFix; + ex_reg_ctrl_fn_alu <== io.ctrl.fn_alu; + ex_reg_ctrl_mul_fn <== io.ctrl.mul_fn; + ex_reg_ctrl_div_fn <== io.ctrl.div_fn; + ex_reg_ctrl_ll_wb <== io.ctrl.div_wb | io.ctrl.mul_wb; // TODO: verify + ex_reg_ctrl_sel_wb <== io.ctrl.sel_wb; + ex_reg_ctrl_ren_pcr <== io.ctrl.ren_pcr; + ex_reg_ctrl_cause <== id_cause; + + when(io.ctrl.killd) { + ex_reg_ctrl_div_val <== Bool(false); + ex_reg_ctrl_mul_val <== Bool(false); + ex_reg_ctrl_wen <== Bool(false); + ex_reg_ctrl_wen_pcr <== Bool(false); + ex_reg_ctrl_eret <== Bool(false); + ex_reg_ctrl_exception <== Bool(false); + } + otherwise { + ex_reg_ctrl_div_val <== io.ctrl.div_val; + ex_reg_ctrl_mul_val <== io.ctrl.mul_val; + ex_reg_ctrl_wen <== io.ctrl.wen; + ex_reg_ctrl_wen_pcr <== io.ctrl.wen_pcr; + ex_reg_ctrl_eret <== io.ctrl.eret; + ex_reg_ctrl_exception <== id_exception; + } + + val ex_alu_in2 = + Mux(ex_reg_ctrl_sel_alu2 === A2_0, UFix(0, 64), + Mux(ex_reg_ctrl_sel_alu2 === A2_SEXT, ex_sign_extend, + Mux(ex_reg_ctrl_sel_alu2 === A2_SPLIT, ex_sign_extend_split, + Mux(ex_reg_ctrl_sel_alu2 === A2_RS2, ex_reg_rs2, + UFix(0, 64))))); + + val ex_alu_in1 = + Mux(ex_reg_ctrl_sel_alu1 === A1_RS1, ex_reg_rs1, + Mux(ex_reg_ctrl_sel_alu1 === A1_LUI, Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12)), + UFix(0, 64))); + + val ex_alu_shamt = + Cat(ex_alu_in2(5) & ex_reg_ctrl_fn_dw === DW_64, ex_alu_in2(4,0)).toUFix; + + alu.io.dw := ex_reg_ctrl_fn_dw; + alu.io.fn := ex_reg_ctrl_fn_alu; + alu.io.shamt := ex_alu_shamt.toUFix; + alu.io.in2 := ex_alu_in2.toUFix; + alu.io.in1 := ex_alu_in1.toUFix; + + // divider + div.io.div_fn := ex_reg_ctrl_div_fn; + div.io.div_val := ex_reg_ctrl_div_val; + div.io.div_waddr := ex_reg_waddr; + div.io.dpath_rs1 := ex_reg_rs1; + div.io.dpath_rs2 := ex_reg_rs2; + div.io.div_result_rdy := io.ctrl.div_wb; + + io.dpath.div_rdy := div.io.div_rdy; + io.dpath.div_result_val := div.io.div_result_val; + + // multiplier + mul.io.mul_val := ex_reg_ctrl_mul_val; + mul.io.mul_fn := ex_reg_ctrl_mul_fn; + mul.io.mul_tag := ex_reg_waddr; + mul.io.in0 := ex_reg_rs1; + mul.io.in1 := ex_reg_rs2; + + io.dpath.mul_result_val := mul.io.result_val; + + // processor control register i/o + pcr.io.host.from_wen ^^ io.host.from_wen; + pcr.io.host.from ^^ io.host.from; + pcr.io.host.to ^^ io.host.to; + + pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_exception | ex_reg_ctrl_eret; + pcr.io.r.addr := + Mux(ex_reg_ctrl_exception, PCR_EVEC, + Mux(ex_reg_ctrl_eret, PCR_EPC, + ex_reg_raddr2)); + + pcr.io.w.addr := ex_reg_raddr2; + pcr.io.w.en := ex_reg_ctrl_wen_pcr; + pcr.io.w.data := ex_reg_rs1; + + pcr.io.eret := ex_reg_ctrl_eret; + pcr.io.exception := ex_reg_ctrl_exception; + pcr.io.cause := ex_reg_ctrl_cause; + pcr.io.pc := ex_reg_pc; + + io.dpath.status := pcr.io.status; +// io.debug ^^ pcr.io.debug; + + io.debug.error_mode := pcr.io.debug.error_mode; + io.debug.log_control := pcr.io.debug.log_control; + + // branch resolution logic + io.dpath.br_eq := (ex_reg_rs1 === ex_reg_rs2); + io.dpath.br_ltu := (ex_reg_rs1.toUFix < ex_reg_rs2.toUFix); + io.dpath.br_lt := + (~(ex_reg_rs1(63) ^ ex_reg_rs2(63)) & io.dpath.br_ltu | + ex_reg_rs1(63) & ~ex_reg_rs2(63)).toBool; + + io.dpath.alu_out := ex_alu_out; + + // writeback select mux + ex_wdata := + Mux(ex_reg_ctrl_ll_wb, ex_reg_rs1, + Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_reg_pc_plus4, + Mux(ex_reg_ctrl_sel_wb === WB_ALU, ex_alu_out, + Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, + Bits(0, 64))))).toBits; + + // regfile write + rfile.io.w0.addr := ex_reg_waddr; + rfile.io.w0.en := ex_reg_ctrl_wen | ex_reg_ctrl_ll_wb; + rfile.io.w0.data := ex_wdata; + + rfile.io.w1.addr ^^ io.wb.waddr; + rfile.io.w1.en ^^ io.wb.wen; + rfile.io.w1.data ^^ io.wb.wdata; + + // clear scoreboard for "long latency" writebacks + io.dpath.wen := ex_reg_ctrl_ll_wb; + io.dpath.waddr := ex_reg_waddr; + + // exception signal to control (for NPC select) + io.dpath.exception := ex_reg_ctrl_exception; + +} + +} diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala new file mode 100644 index 00000000..d7ae1e90 --- /dev/null +++ b/rocket/src/main/scala/dpath_alu.scala @@ -0,0 +1,90 @@ +package Top { + + +import Chisel._ +import Node._; + +import Constants._ +import Instructions._ + +class ioALU extends Bundle(){ + val dw = UFix(1, 'input); + val fn = UFix(4, 'input); + val shamt = UFix(6, 'input); + val in2 = UFix(64, 'input); + val in1 = UFix(64, 'input); + val out = UFix(64, 'output); +} + +class rocketDpathALU extends Component +{ + override val io = new ioALU(); + val out64 = + MuxCase(Fix(0, 64), Array( + (io.fn === FN_ADD) -> (io.in1 + io.in2).toFix, + (io.fn === FN_SUB) -> (io.in1 - io.in2).toFix, + (io.fn === FN_SLT) -> (io.in1.toFix < io.in2.toFix), //(io.in1 < io.in2) + (io.fn === FN_SLTU) -> (io.in1 < io.in2).toFix, + (io.fn === FN_AND) -> (io.in1 & io.in2).toFix, + (io.fn === FN_OR) -> (io.in1 | io.in2).toFix, + (io.fn === FN_XOR) -> (io.in1 ^ io.in2).toFix, + (io.fn === FN_SL) -> (io.in1 << io.shamt).toFix, + (io.fn === FN_SR && io.dw === DW_64) -> (io.in1 >> io.shamt).toFix, + (io.fn === FN_SR && io.dw === DW_32) -> (Cat(Fix(0, 32),io.in1(31, 0)).toUFix >> io.shamt), + (io.fn === FN_SRA) -> (io.in1.toFix >>> io.shamt))); + + io.out := MuxLookup(io.dw, Fix(0, 64), Array( + DW_64 -> out64, + DW_32 -> Cat(Fill(32, out64(31)), out64(31,0)).toFix)).toUFix; + +} + +/* +class IoDpathALU extends Bundle { + val in0 = Bits(32,'input); + val in1 = Bits(32,'input); + val fn = Bits(4,'input); + val out = Bits(32,'output); +} + +class DpathALU extends Component { + val io = new IoDpathALU(); + + val adder_in0 = MuxCase(io.in0,Array( + ((io.fn === FN_SUB) | (io.fn === FN_SLT) | (io.fn === FN_SLTU)) -> (~io.in0))); + + val adder_in1 = io.in1; + val adder_cin = MuxCase(Bits(0),Array( + ((io.fn === FN_SUB) | (io.fn === FN_SLT) | (io.fn === FN_SLTU)) -> Bits(1))); + + // Need to make the same width? + val adder_out = Cat(Bits(0,1),adder_in1).toUFix + Cat(Bits(0,1),adder_in0).toUFix + adder_cin.toUFix; + //adder_out := (adder_in1.toUFix + adder_in0.toUFix + adder_cin.toUFix); + + // Determine if there is overflow + val overflow = (io.in0(31) ^ io.in1(31)) & (adder_out(32) != io.in0(31)); + + val compare_yes = MuxLookup(io.fn,Bits(0),Array( + // If unsigned, do subtraction, and if the result is negative, then slt=true + FN_SLTU -> ~adder_out(32), + // If signed, do subtraction, and if the result is negative, then slt=true as well + // But if there is bad overflow (operands same sign and result is a different sign), + // then need to flip + FN_SLT -> ~(adder_out(32) ^ overflow))); + + io.out := MuxLookup(io.fn,Fix(0),Array( + FN_ADD -> adder_out, + FN_SUB -> adder_out, + FN_SLT -> compare_yes, + FN_SLTU -> compare_yes, + FN_AND -> (io.in0 & io.in1), + FN_OR -> (io.in0 | io.in1), + FN_XOR -> (io.in0 ^ io.in1), + FN_SL -> (io.in1 << io.in0(4,0).toUFix), + FN_SR -> (io.in1 >> io.in0(4,0).toUFix), + FN_SRA -> (io.in1.toFix >> io.in0(4,0).toUFix) + )); +} +*/ + +} diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala new file mode 100644 index 00000000..4c53db11 --- /dev/null +++ b/rocket/src/main/scala/dpath_util.scala @@ -0,0 +1,184 @@ +package Top +{ + +import Chisel._ +import Node._; + +import Constants._; + +class ioDpathBTB extends Bundle() +{ + val current_pc4 = UFix(32, 'input); + val hit = Bool('output); + val target = UFix(32, 'output); + val wen = Bool('input); + val correct_pc4 = UFix(32, 'input); + val correct_target = UFix(32, 'input); +} + +class rocketDpathBTB extends Component +{ + override val io = new ioDpathBTB(); + val rst_lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), UFix(1, 1), resetVal = UFix(0, 1)); + val lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), + Cat(io.correct_pc4(31,4), io.correct_target(31,2)), resetVal = UFix(0, 1)); + val is_val = rst_lwlr_pf(io.current_pc4(3, 2)); + val tag_target = lwlr_pf(io.current_pc4(3, 2)); + io.hit := (is_val & (tag_target(57,30) === io.current_pc4(31, 4))).toBool; + io.target := Cat(tag_target(29, 0), Bits(0,2)).toUFix; +} + +class ioDpathPCR extends Bundle() +{ + val host = new ioHost(List("from", "from_wen", "to")); + val debug = new ioDebug(); + val r = new ioReadPort(); + val w = new ioWritePort(); + + val status = Bits(8, 'output); + val exception = Bool('input); + val cause = UFix(5, 'input); + val pc = UFix(32, 'input); + val eret = Bool('input); +} + +class rocketDpathPCR extends Component +{ + override val io = new ioDpathPCR(); + + val HAVE_FPU = Bool(false); + val HAVE_VEC = Bool(false); + val w = 32; + val reg_epc = Reg(resetVal = Bits(0, w)); + val reg_badvaddr = Reg(resetVal = Bits(0, w)); + val reg_ebase = Reg(resetVal = Bits(0, w)); + val reg_count = Reg(resetVal = Bits(0, w)); + val reg_compare = Reg(resetVal = Bits(0, w)); + val reg_cause = Reg(resetVal = Bits(0, 5)); + val reg_tohost = Reg(resetVal = Bits(0, w)); + val reg_fromhost = Reg(resetVal = Bits(0, w)); + val reg_k0 = Reg(resetVal = Bits(0, 2*w)); + val reg_k1 = Reg(resetVal = Bits(0, 2*w)); + + val reg_error_mode = Reg(resetVal = Bool(false)); + val reg_log_control = Reg(resetVal = Bool(false)); + val reg_status_im = Reg(resetVal = Bits(0,8)); + val reg_status_sx = Reg(resetVal = Bool(true)); + val reg_status_ux = Reg(resetVal = Bool(true)); + val reg_status_ef = Reg(resetVal = Bool(false)); + val reg_status_ev = Reg(resetVal = Bool(false)); + val reg_status_s = Reg(resetVal = Bool(true)); + val reg_status_ps = Reg(resetVal = Bool(false)); + val reg_status_et = Reg(resetVal = Bool(false)); + + val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, Bits(0,1), reg_status_ev, reg_status_ef, reg_status_et); + val rdata = Wire() { Bits() }; + + io.status := reg_status; + io.host.to := Mux(io.host.from_wen, Bits(0, w), reg_tohost); + io.debug.error_mode := reg_error_mode; + io.debug.log_control := reg_log_control; + io.r.data := rdata; + + when (io.host.from_wen) { + reg_tohost <== Bits(0, w); + reg_fromhost <== io.host.from; + } + otherwise { + when (!io.exception && io.w.en && (io.w.addr === PCR_TOHOST)) { + reg_tohost <== io.w.data(w-1, 0); + reg_fromhost <== Bits(0, w); + } + } + + when (io.exception && !reg_status_et) { + reg_error_mode <== Bool(true); + } + + when (io.exception && reg_status_et) { + reg_status_s <== Bool(true); + reg_status_ps <== reg_status_s; + reg_status_et <== Bool(false); + reg_epc <== io.pc; + reg_cause <== io.cause; + } + + when (!io.exception && io.eret) { + reg_status_s <== reg_status_ps; + reg_status_et <== Bool(true); + } + + when (!io.exception && !io.eret && io.w.en) { + when (io.w.addr === PCR_STATUS) { + reg_status_im <== io.w.data(15,8); + reg_status_sx <== io.w.data(7).toBool; + reg_status_ux <== io.w.data(6).toBool; + reg_status_s <== io.w.data(5).toBool; + reg_status_ps <== io.w.data(4).toBool; + reg_status_ev <== HAVE_VEC && io.w.data(2).toBool; + reg_status_ef <== HAVE_FPU && io.w.data(1).toBool; + reg_status_et <== io.w.data(0).toBool; + } + when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(w-1,0); } + when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(w-1,0); } + when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(w-1,0); } + when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(w-1,0); } + when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(w-1,0); } + when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } + when (io.w.addr === PCR_LOG) { reg_log_control <== io.w.data(0).toBool; } + when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(w-1,0); } + when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } + when (io.w.addr === PCR_K1) { reg_k1 <== io.w.data; } + } + + when (!io.r.en) { rdata <== Bits(0,2*w); } + switch (io.r.addr) { + is (PCR_STATUS) { rdata <== Cat(Bits(0,w+16), reg_status_im, reg_status); } + is (PCR_EPC) { rdata <== Cat(Fill(w, reg_epc(w-1)), reg_epc); } + is (PCR_BADVADDR) { rdata <== Cat(Fill(w, reg_badvaddr(w-1)), reg_badvaddr); } + is (PCR_EVEC) { rdata <== Cat(Fill(w, reg_ebase(w-1)), reg_ebase); } + is (PCR_COUNT) { rdata <== Cat(Fill(w, reg_count(w-1)), reg_count); } + is (PCR_COMPARE) { rdata <== Cat(Fill(w, reg_compare(w-1)), reg_compare); } + is (PCR_CAUSE) { rdata <== Cat(Bits(0,w+27), reg_cause); } + is (PCR_MEMSIZE) { rdata <== Bits("h2000", 2*w); } + is (PCR_LOG) { rdata <== Cat(Bits(0,63), reg_log_control); } + is (PCR_FROMHOST) { rdata <== Cat(Fill(w, reg_fromhost(w-1)), reg_fromhost); } + is (PCR_TOHOST) { rdata <== Cat(Fill(w, reg_tohost(w-1)), reg_tohost); } + is (PCR_K0) { rdata <== reg_k0; } + is (PCR_K1) { rdata <== reg_k1; } + otherwise { rdata <== Bits(0,2*w); } + } +} + +class ioReadPort extends Bundle() +{ + val addr = UFix(5, 'input); + val en = Bool('input); + val data = Bits(64, 'output); +} + +class ioWritePort extends Bundle() +{ + val addr = UFix(5, 'input); + val en = Bool('input); + val data = Bits(64, 'input); +} + +class ioRegfile extends Bundle() +{ + val r0 = new ioReadPort(); + val r1 = new ioReadPort(); + val w0 = new ioWritePort(); + val w1 = new ioWritePort(); +} + +class rocketDpathRegfile extends Component +{ + override val io = new ioRegfile(); + val regfile = Mem(32, io.w0.en && (io.w0.addr != UFix(0,5)), io.w0.addr, io.w0.data); + regfile.write(io.w1.en && (io.w1.addr != UFix(0,5)), io.w1.addr, io.w1.data); + io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); + io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); +} + +} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala new file mode 100644 index 00000000..7bf4ad3f --- /dev/null +++ b/rocket/src/main/scala/icache.scala @@ -0,0 +1,120 @@ +package Top { + +import Chisel._ +import Node._; +import scala.math._; + +// interface between I$ and processor (32 bits wide) +class ioImem(view: List[String] = null) extends Bundle (view) +{ + val req_addr = UFix(32, 'input); + val req_val = Bool('input); + val req_rdy = Bool('output); + val resp_data = Bits(32, 'output); + val resp_val = Bool('output); +} + +// interface between I$ and memory (128 bits wide) +class ioIcache(view: List[String] = null) extends Bundle (view) +{ + val req_addr = UFix(32, 'input); + val req_val = Bool('input); + val req_rdy = Bool('output); + val resp_data = Bits(128, 'output); + val resp_val = Bool('output); +} + +class ioICacheDM extends Bundle() { + val cpu = new ioImem(); + val mem = new ioIcache().flip(); +} + +// basic direct mapped instruction cache +// parameters : +// lines = # cache lines +// addr_bits = address width (word addressable) bits +// 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines + +class rocketICacheDM(lines: Int, addrbits : Int) extends Component { + val io = new ioICacheDM(); + + val indexbits = ceil(log10(lines)/log10(2)).toInt; + val offsetbits = 6; + val tagmsb = addrbits - 1; + val taglsb = indexbits+offsetbits; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = 2; + + val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: s_resolve_miss :: Nil = Enum(6) { UFix() }; + val state = Reg(resetVal = s_reset); + + val r_cpu_req_addr = Reg(Bits(0, addrbits)); + when (io.cpu.req_val && ((state === s_ready) || (state === s_resolve_miss))) { r_cpu_req_addr <== io.cpu.req_addr; } + + val r_cpu_req_val = Reg(Bool(false)); + when ((state === s_ready) || (state === s_resolve_miss)) { r_cpu_req_val <== io.cpu.req_val; } + otherwise { r_cpu_req_val <== Bool(false); } + + val refill_count = Reg(resetVal = UFix(0,2)); + when (io.mem.resp_val) { refill_count <== refill_count + UFix(1); } + + // tag array + val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); + val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; + val tag_we = (state === s_refill_wait) && io.mem.resp_val; + val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); + val tag_raddr = io.cpu.req_addr(indexmsb, indexlsb);; + val tag_lookup = Reg(tag_array.read(tag_raddr)); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, lines)); + val vb_rdata = Reg(vb_array(io.cpu.req_addr(indexmsb, indexlsb))); + + when ((state === s_refill_wait) && io.mem.resp_val) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + + val tag_match = vb_rdata.toBool && (tag_lookup === r_cpu_req_addr(tagmsb, taglsb)); + + // data array + val data_array_waddr = Cat(r_cpu_req_addr(indexmsb, indexlsb), refill_count).toUFix; + val data_array = Mem(lines*4, io.mem.resp_val, data_array_waddr, io.mem.resp_data); + val data_array_raddr = Cat(io.cpu.req_addr(indexmsb, indexlsb), io.cpu.req_addr(offsetmsb, offsetmsb-1)); + val data_array_read = data_array(data_array_raddr); + val data_array_rdata = Reg(data_array_read); + + io.cpu.resp_val := (r_cpu_req_val && tag_match && (state === s_ready)); // || (state === s_resolve_miss); + io.cpu.req_rdy := ((state === s_ready) && (!r_cpu_req_val || (r_cpu_req_val && tag_match))); // || (state === s_resolve_miss); + io.cpu.resp_data := MuxLookup(r_cpu_req_addr(offsetmsb-2, offsetlsb).toUFix, data_array_rdata(127, 96), + Array(UFix(2) -> data_array_rdata(95,64), + UFix(1) -> data_array_rdata(63,32), + UFix(0) -> data_array_rdata(31,0))); + + io.mem.req_val := (state === s_request); + io.mem.req_addr := Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix; + + // control state machine + switch (state) { + is (s_reset) { + state <== s_ready; + } + is (s_ready) { + when (r_cpu_req_val && !tag_match) { state <== s_request; } + } + is (s_request) + { + when (io.mem.req_rdy) { state <== s_refill_wait; } + } + is (s_refill_wait) { + when (io.mem.resp_val) { state <== s_refill; } + } + is (s_refill) { + when (io.mem.resp_val && (refill_count === UFix(3,2))) { state <== s_resolve_miss; } + } + is (s_resolve_miss) { + state <== s_ready; + } + } +} + +} diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala new file mode 100644 index 00000000..1c1219e3 --- /dev/null +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -0,0 +1,91 @@ +package Top { + +import Chisel._; +import Node._; +import queues._; + +class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) +{ + val req_addr = UFix(32, 'output); + val req_val = Bool('output); + val req_rdy = Bool('input); + val req_tag = Bits(3, 'output); + val resp_data = Bits(128, 'input); + val resp_val = Bool('input); + val resp_tag = Bits(3, 'input); +} + +class ioIPrefetcher extends Bundle() { + val icache = new ioIcache(); + val mem = new ioIPrefetcherMem(); +} + +class rocketIPrefetcher extends Component() { + val io = new ioIPrefetcher(); + val pdq = new queueSimplePF(128, 4, 2); + + val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; + val state = Reg(resetVal = s_invalid); + + val demand_miss = io.icache.req_val & io.icache.req_rdy; + val prefetch_addr = Reg(resetVal = UFix(0,32)); + when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(4); } + + val addr_match = (prefetch_addr === io.icache.req_addr); + val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; + + io.icache.req_rdy := io.mem.req_rdy; + val ip_mem_req_rdy = io.mem.req_rdy & ~(io.icache.req_val & ~hit); + val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; + + io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); + io.mem.req_tag := !(io.icache.req_val && !hit); + io.mem.req_addr := Mux(io.mem.req_tag.toBool, prefetch_addr, io.icache.req_addr); + + val pdq_reset = Reg(resetVal = Bool(true)); + pdq_reset <== demand_miss & ~hit | (state === s_bad_resp_wait); + + val fill_cnt = Reg(resetVal = UFix(0, 2)); + when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1,1); } + val fill_done = (fill_cnt === UFix(3,2)) & ip_mem_resp_val; + + val forward = Reg(resetVal = Bool(false)); + val forward_cnt = Reg(resetVal = UFix(0, 2)); + when (forward & pdq.io.deq_val) { forward_cnt <== forward_cnt + UFix(1,1); } + val forward_done = (forward_cnt === UFix(3, 2)) & pdq.io.deq_val; + forward <== (demand_miss & hit | forward & ~forward_done); + + io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag.toBool) || (forward && pdq.io.deq_val); + io.icache.resp_data := Mux(forward, pdq.io.deq_bits, io.mem.resp_data); + + pdq.io.q_reset := pdq_reset; + pdq.io.enq_bits := io.mem.resp_data; + pdq.io.enq_val := ip_mem_resp_val.toBool; + pdq.io.deq_rdy := forward; + + switch (state) { + is (s_invalid) { + when (demand_miss) { state <== s_req_wait; } + } + is (s_valid) { + when (demand_miss | (forward & forward_done)) { state <== s_req_wait; } + } + is (s_refilling) { + when (demand_miss & ~addr_match & fill_done.toBool) { state <== s_req_wait; } + when (demand_miss & ~addr_match) { state <== s_bad_resp_wait; } + when (fill_done.toBool) { state <== s_valid; } + } + is (s_req_wait) { + when (ip_mem_req_rdy) { state <== s_resp_wait; } + } + is (s_resp_wait) { + when (demand_miss & ~addr_match) { state <== s_bad_resp_wait; } + when (ip_mem_resp_val.toBool) { state <== s_refilling; } + } + is (s_bad_resp_wait) { + when (fill_done.toBool & ip_mem_resp_val.toBool) { state <== s_req_wait; } + } + } +} + +} \ No newline at end of file diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala new file mode 100644 index 00000000..13d822b1 --- /dev/null +++ b/rocket/src/main/scala/instructions.scala @@ -0,0 +1,162 @@ +package Top { +import Chisel._ +import Node._; + +object Instructions +{ + val UNIMP = Bits("b00000000000000000000000000000000", 32); + val J = Bits("b?????????????????????????_1100111", 32); + val JAL = Bits("b?????????????????????????_1101111", 32); + val JALR_C = Bits("b?????_?????_????????????_000_1101011", 32); + val JALR_R = Bits("b?????_?????_????????????_001_1101011", 32); + val JALR_J = Bits("b?????_?????_????????????_010_1101011", 32); + val BEQ = Bits("b?????_?????_?????_???????_000_1100011", 32); + val BNE = Bits("b?????_?????_?????_???????_001_1100011", 32); + val BLT = Bits("b?????_?????_?????_???????_100_1100011", 32); + val BGE = Bits("b?????_?????_?????_???????_101_1100011", 32); + val BLTU = Bits("b?????_?????_?????_???????_110_1100011", 32); + val BGEU = Bits("b?????_?????_?????_???????_111_1100011", 32); + val LUI = Bits("b?????_????????????????????_0110111", 32); + val ADDI = Bits("b?????_?????_????????????_000_0010011", 32); + val SLLI = Bits("b?????_?????_000000_??????_001_0010011", 32); + val SLTI = Bits("b?????_?????_????????????_010_0010011", 32); + val SLTIU = Bits("b?????_?????_????????????_011_0010011", 32); + val XORI = Bits("b?????_?????_????????????_100_0010011", 32); + val SRLI = Bits("b?????_?????_000000_??????_101_0010011", 32); + val SRAI = Bits("b?????_?????_000001_??????_101_0010011", 32); + val ORI = Bits("b?????_?????_????????????_110_0010011", 32); + val ANDI = Bits("b?????_?????_????????????_111_0010011", 32); + val ADD = Bits("b?????_?????_?????_0000000000_0110011", 32); + val SUB = Bits("b?????_?????_?????_1000000000_0110011", 32); + val SLL = Bits("b?????_?????_?????_0000000001_0110011", 32); + val SLT = Bits("b?????_?????_?????_0000000010_0110011", 32); + val SLTU = Bits("b?????_?????_?????_0000000011_0110011", 32); + val riscvXOR = Bits("b?????_?????_?????_0000000100_0110011", 32); + val SRL = Bits("b?????_?????_?????_0000000101_0110011", 32); + val SRA = Bits("b?????_?????_?????_1000000101_0110011", 32); + val riscvOR = Bits("b?????_?????_?????_0000000110_0110011", 32); + val riscvAND = Bits("b?????_?????_?????_0000000111_0110011", 32); + val MUL = Bits("b?????_?????_?????_0000001000_0110011", 32); + val MULH = Bits("b?????_?????_?????_0000001001_0110011", 32); + val MULHSU = Bits("b?????_?????_?????_0000001010_0110011", 32); + val MULHU = Bits("b?????_?????_?????_0000001011_0110011", 32); + val DIV = Bits("b?????_?????_?????_0000001100_0110011", 32); + val DIVU = Bits("b?????_?????_?????_0000001101_0110011", 32); + val REM = Bits("b?????_?????_?????_0000001110_0110011", 32); + val REMU = Bits("b?????_?????_?????_0000001111_0110011", 32); + val ADDIW = Bits("b?????_?????_????????????_000_0011011", 32); + val SLLIW = Bits("b?????_?????_000000_0_?????_001_0011011", 32); + val SRLIW = Bits("b?????_?????_000000_0_?????_101_0011011", 32); + val SRAIW = Bits("b?????_?????_000001_0_?????_101_0011011", 32); + val ADDW = Bits("b?????_?????_?????_0000000000_0111011", 32); + val SUBW = Bits("b?????_?????_?????_1000000000_0111011", 32); + val SLLW = Bits("b?????_?????_?????_0000000001_0111011", 32); + val SRLW = Bits("b?????_?????_?????_0000000101_0111011", 32); + val SRAW = Bits("b?????_?????_?????_1000000101_0111011", 32); + val MULW = Bits("b?????_?????_?????_0000001000_0111011", 32); + val DIVW = Bits("b?????_?????_?????_0000001100_0111011", 32); + val DIVUW = Bits("b?????_?????_?????_0000001101_0111011", 32); + val REMW = Bits("b?????_?????_?????_0000001110_0111011", 32); + val REMUW = Bits("b?????_?????_?????_0000001111_0111011", 32); + val LB = Bits("b?????_?????_????????????_000_0000011", 32); + val LH = Bits("b?????_?????_????????????_001_0000011", 32); + val LW = Bits("b?????_?????_????????????_010_0000011", 32); + val LD = Bits("b?????_?????_????????????_011_0000011", 32); + val LBU = Bits("b?????_?????_????????????_100_0000011", 32); + val LHU = Bits("b?????_?????_????????????_101_0000011", 32); + val LWU = Bits("b?????_?????_????????????_110_0000011", 32); + val SB = Bits("b?????_?????_?????_???????_000_0100011", 32); + val SH = Bits("b?????_?????_?????_???????_001_0100011", 32); + val SW = Bits("b?????_?????_?????_???????_010_0100011", 32); + val SD = Bits("b?????_?????_?????_???????_011_0100011", 32); + val AMOADD_W = Bits("b?????_?????_?????_00000_000_10_1000011", 32); + val AMOSWAP_W = Bits("b?????_?????_?????_00000_010_10_1000011", 32); + val AMOAND_W = Bits("b?????_?????_?????_00000_100_10_1000011", 32); + val AMOOR_W = Bits("b?????_?????_?????_00000_110_10_1000011", 32); + val AMOMIN_W = Bits("b?????_?????_?????_00001_000_10_1000011", 32); + val AMOMAX_W = Bits("b?????_?????_?????_00001_010_10_1000011", 32); + val AMOMINU_W = Bits("b?????_?????_?????_00001_100_10_1000011", 32); + val AMOMAXU_W = Bits("b?????_?????_?????_00001_110_10_1000011", 32); + val AMOADD_D = Bits("b?????_?????_?????_00000_000_11_1000011", 32); + val AMOSWAP_D = Bits("b?????_?????_?????_00000_010_11_1000011", 32); + val AMOAND_D = Bits("b?????_?????_?????_00000_100_11_1000011", 32); + val AMOOR_D = Bits("b?????_?????_?????_00000_110_11_1000011", 32); + val AMOMIN_D = Bits("b?????_?????_?????_00001_000_11_1000011", 32); + val AMOMAX_D = Bits("b?????_?????_?????_00001_010_11_1000011", 32); + val AMOMINU_D = Bits("b?????_?????_?????_00001_100_11_1000011", 32); + val AMOMAXU_D = Bits("b?????_?????_?????_00001_110_11_1000011", 32); + val FENCE = Bits("b?????_?????_????????????_010_0101111", 32); + val FENCE_I = Bits("b?????_?????_????????????_001_0101111", 32); + val RDNPC = Bits("b?????_00000_00000_0000000000_0010111", 32); + val SYNCI = Bits("b00000_00000_00000_0000000001_0010111", 32); + val SYNC = Bits("b00000_00000_00000_0000000010_0010111", 32); + val SYSCALL = Bits("b00000_00000_00000_0000000000_1110111", 32); + val BREAK = Bits("b00000_00000_00000_0000000001_1110111", 32); + val EI = Bits("b?????_00000_00000_0000000000_1111011", 32); + val DI = Bits("b?????_00000_00000_0000000001_1111011", 32); + val MFPCR = Bits("b?????_00000_?????_0000000010_1111011", 32); + val MTPCR = Bits("b00000_?????_?????_0000000011_1111011", 32); + val ERET = Bits("b00000_00000_00000_0000000100_1111011", 32); + val FADD_S = Bits("b?????_?????_?????_00000_???_00_1010011", 32); + val FSUB_S = Bits("b?????_?????_?????_00001_???_00_1010011", 32); + val FMUL_S = Bits("b?????_?????_?????_00010_???_00_1010011", 32); + val FDIV_S = Bits("b?????_?????_?????_00011_???_00_1010011", 32); + val FSQRT_S = Bits("b?????_?????_00000_00100_???_00_1010011", 32); + val FSGNJ_S = Bits("b?????_?????_?????_0010111100_1010011", 32); + val FSGNJN_S = Bits("b?????_?????_?????_0011011100_1010011", 32); + val FSGNJX_S = Bits("b?????_?????_?????_0011111100_1010011", 32); + val FADD_D = Bits("b?????_?????_?????_00000_???_01_1010011", 32); + val FSUB_D = Bits("b?????_?????_?????_00001_???_01_1010011", 32); + val FMUL_D = Bits("b?????_?????_?????_00010_???_01_1010011", 32); + val FDIV_D = Bits("b?????_?????_?????_00011_???_01_1010011", 32); + val FSQRT_D = Bits("b?????_?????_00000_00100_???_01_1010011", 32); + val FSGNJ_D = Bits("b?????_?????_?????_0010111101_1010011", 32); + val FSGNJN_D = Bits("b?????_?????_?????_0011011101_1010011", 32); + val FSGNJX_D = Bits("b?????_?????_?????_0011111101_1010011", 32); + val FCVT_L_S = Bits("b?????_?????_00000_01000_???_00_1010011", 32); + val FCVT_LU_S = Bits("b?????_?????_00000_01001_???_00_1010011", 32); + val FCVT_W_S = Bits("b?????_?????_00000_01010_???_00_1010011", 32); + val FCVT_WU_S = Bits("b?????_?????_00000_01011_???_00_1010011", 32); + val FCVT_L_D = Bits("b?????_?????_00000_01000_???_01_1010011", 32); + val FCVT_LU_D = Bits("b?????_?????_00000_01001_???_01_1010011", 32); + val FCVT_W_D = Bits("b?????_?????_00000_01010_???_01_1010011", 32); + val FCVT_WU_D = Bits("b?????_?????_00000_01011_???_01_1010011", 32); + val FCVT_S_L = Bits("b?????_?????_00000_01100_???_00_1010011", 32); + val FCVT_S_LU = Bits("b?????_?????_00000_01101_???_00_1010011", 32); + val FCVT_S_W = Bits("b?????_?????_00000_01110_???_00_1010011", 32); + val FCVT_S_WU = Bits("b?????_?????_00000_01111_???_00_1010011", 32); + val FCVT_D_L = Bits("b?????_?????_00000_01100_???_01_1010011", 32); + val FCVT_D_LU = Bits("b?????_?????_00000_01101_???_01_1010011", 32); + val FCVT_D_W = Bits("b?????_?????_00000_0111011101_1010011", 32); + val FCVT_D_WU = Bits("b?????_?????_00000_0111111101_1010011", 32); + val FCVT_S_D = Bits("b?????_?????_00000_10001_???_00_1010011", 32); + val FCVT_D_S = Bits("b?????_?????_00000_10000_???_01_1010011", 32); + val FEQ_S = Bits("b?????_?????_?????_1010111100_1010011", 32); + val FLT_S = Bits("b?????_?????_?????_1011011100_1010011", 32); + val FLE_S = Bits("b?????_?????_?????_1011111100_1010011", 32); + val FEQ_D = Bits("b?????_?????_?????_1010111101_1010011", 32); + val FLT_D = Bits("b?????_?????_?????_1011011101_1010011", 32); + val FLE_D = Bits("b?????_?????_?????_1011111101_1010011", 32); + val MFTX_S = Bits("b?????_00000_?????_1100011100_1010011", 32); + val MFTX_D = Bits("b?????_00000_?????_1100011101_1010011", 32); + val MFFSR = Bits("b?????_00000_00000_1101111100_1010011", 32); + val MXTF_S = Bits("b?????_?????_00000_1110011100_1010011", 32); + val MXTF_D = Bits("b?????_?????_00000_1110011101_1010011", 32); + val MTFSR = Bits("b00000_?????_00000_1110111100_1010011", 32); + val FLW = Bits("b?????_?????_????????????_010_0000111", 32); + val FLD = Bits("b?????_?????_????????????_011_0000111", 32); + val FSW = Bits("b?????_?????_?????_???????_010_0100111", 32); + val FSD = Bits("b?????_?????_?????_???????_011_0100111", 32); + val FMADD_S = Bits("b?????_?????_?????_?????_???_00_1000011", 32); + val FMSUB_S = Bits("b?????_?????_?????_?????_???_00_1000111", 32); + val FNMSUB_S = Bits("b?????_?????_?????_?????_???_00_1001011", 32); + val FNMADD_S = Bits("b?????_?????_?????_?????_???_00_1001111", 32); + val FMADD_D = Bits("b?????_?????_?????_?????_???_01_1000011", 32); + val FMSUB_D = Bits("b?????_?????_?????_?????_???_01_1000111", 32); + val FNMSUB_D = Bits("b?????_?????_?????_?????_???_01_1001011", 32); + val FNMADD_D = Bits("b?????_?????_?????_?????_???_01_1001111", 32); + val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); + val CFLUSH = Bits("b00000_00000_00000_0000000101_1111011", 32); +} + +} diff --git a/rocket/src/main/scala/memory.scala b/rocket/src/main/scala/memory.scala new file mode 100644 index 00000000..3d0b5869 --- /dev/null +++ b/rocket/src/main/scala/memory.scala @@ -0,0 +1,124 @@ +package Top +{ + +import Chisel._ +import Node._; + +import queues._; +import Constants._; + +class ioMemory extends Bundle() +{ + val mem_mrq_val = Bool('input); + val mem_mrq_cmd = Bits(4, 'input); + val mem_mrq_type = Bits(3, 'input); + val mem_xsdq_rdy = Bool('output); + val mem_xsdq_val = Bool('input); + val mem_mrq_deq = Bool('output); + val dpath_rs2 = Bits(64, 'input); + val dpath_waddr = UFix(5, 'input); + val dpath_alu_out = UFix(64, 'input); + val dmem_req_val = Bool('output); + val dmem_req_rdy = Bool('input); + val dmem_req_op = Bits(4, 'output); + val dmem_req_addr = UFix(32, 'output); + val dmem_req_data = Bits(64, 'output); + val dmem_req_wmask = Bits(8, 'output); + val dmem_req_tag = Bits(12, 'output); +} + +class rocketMemory extends Component +{ + override val io = new ioMemory(); + val mrq_enq_xf + = (io.mem_mrq_cmd === M_FRD || io.mem_mrq_cmd === M_FWR); + + val mrq_enq_op + = Mux(io.mem_mrq_cmd === M_FRD, M_XRD, + Mux(io.mem_mrq_cmd === M_FWR, M_XWR, + io.mem_mrq_cmd)); + + val mrq_enq_type = io.mem_mrq_type; + + val mrq = new queueSimplePF(45, 4, 2); + val xsdq = new queueSimplePF(64, 4, 2); + + mrq.io.q_reset := Bool(false); + mrq.io.enq_bits := Cat(mrq_enq_xf,mrq_enq_op,mrq_enq_type,io.dpath_waddr,io.dpath_alu_out(31,0)); + mrq.io.enq_val ^^ io.mem_mrq_val; + // mrq.io.enq_rdy <> (); issue logic takes care of this + + val mrq_deq_xf = Wire(){Bits(width = 1)}; + val mrq_deq_op = Wire(){Bits(width = 4)}; + val mrq_deq_type = Wire(){Bits(width = 3)}; + val mrq_deq_waddr = Wire(){Bits(width = 5)}; + val mrq_deq_addr = Wire(){Bits(width = 32)}; + val mrq_deq_bits = mrq.io.deq_bits; + mrq_deq_bits.Match(Array(mrq_deq_xf, mrq_deq_op, mrq_deq_type, mrq_deq_waddr, mrq_deq_addr)); + val mrq_deq_val = mrq.io.deq_val; + + xsdq.io.q_reset := Bool(false); + xsdq.io.enq_bits ^^ io.dpath_rs2; + xsdq.io.enq_val ^^ io.mem_xsdq_val; + xsdq.io.enq_rdy ^^ io.mem_xsdq_rdy; + + val mrq_deq_flush = mrq_deq_op === M_FLA; + val mrq_deq_load = mrq_deq_op === M_XRD; + val mrq_deq_xstore = mrq_deq_op === M_XWR & ~mrq_deq_xf & xsdq.io.deq_val; + + val mrq_deq_rdy = io.dmem_req_rdy & (mrq_deq_load | mrq_deq_xstore | mrq_deq_flush); + io.mem_mrq_deq := (mrq_deq_val & mrq_deq_rdy).toBool; + mrq.io.deq_rdy := mrq_deq_rdy.toBool; + val xsdq_deq_rdy = io.dmem_req_rdy & mrq_deq_val & mrq_deq_op === M_XWR & ~mrq_deq_xf; + xsdq.io.deq_rdy := xsdq_deq_rdy.toBool; + + val wdata = xsdq.io.deq_bits; + + val wmask_b = + Mux(mrq_deq_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), + Mux(mrq_deq_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), + Mux(mrq_deq_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), + Mux(mrq_deq_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), + Mux(mrq_deq_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), + Mux(mrq_deq_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), + Mux(mrq_deq_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), + Mux(mrq_deq_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), + UFix(0, 8))))))))); + + val wmask_h = + Mux(mrq_deq_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), + Mux(mrq_deq_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), + Mux(mrq_deq_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), + Mux(mrq_deq_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), + UFix(0, 8))))); + + val wmask_w = + Mux(mrq_deq_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), + Mux(mrq_deq_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), + UFix(0, 8))); + + val wmask_d = + Bits("b1111_1111", 8); + + io.dmem_req_val := (mrq_deq_val & (mrq_deq_load | mrq_deq_xstore | mrq_deq_flush)).toBool; + io.dmem_req_op := mrq_deq_op; + io.dmem_req_addr := Cat(mrq_deq_addr(31,3), UFix(0, 3)).toUFix; + + io.dmem_req_data := + Mux(mrq_deq_type === MT_B, Fill(8, wdata( 7,0)), + Mux(mrq_deq_type === MT_H, Fill(4, wdata(15,0)), + Mux(mrq_deq_type === MT_W, Fill(2, wdata(31,0)), + Mux(mrq_deq_type === MT_D, wdata, + UFix(0, 64))))); + + io.dmem_req_wmask := + Mux(mrq_deq_type === MT_B, wmask_b, + Mux(mrq_deq_type === MT_H, wmask_h, + Mux(mrq_deq_type === MT_W, wmask_w, + Mux(mrq_deq_type === MT_D, wmask_d, + UFix(0, 8))))); + + io.dmem_req_tag := Cat(mrq_deq_xf,mrq_deq_type,mrq_deq_addr(2,0),mrq_deq_waddr); +} + +} diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala new file mode 100644 index 00000000..67a53ac3 --- /dev/null +++ b/rocket/src/main/scala/multiplier.scala @@ -0,0 +1,61 @@ +package Top { + +import Chisel._ +import Node._; +import Constants._; + +class ioMultiplier(width: Int) extends Bundle { + // requests + val mul_val = Bool('input); + val mul_fn = UFix(3, 'input); + val mul_tag = UFix(5, 'input); + val in0 = Bits(width, 'input); + val in1 = Bits(width, 'input); + + // responses + val result = Bits(width, 'output); + val result_tag = UFix(5, 'output); + val result_val = Bool('output); +} + +class rocketMultiplier extends Component { + val io = new ioMultiplier(64); + + val r_val = Reg(resetVal = Bool(false)); + val r_fn = Reg(resetVal = UFix(0,3)); + val r_tag = Reg(resetVal = UFix(0,5)); + val r_in0 = Reg(resetVal = Bits(0,64)); + val r_in1 = Reg(resetVal = Bits(0,64)); + + r_val <== io.mul_val; + when (io.mul_val) { + r_fn <== io.mul_fn; + r_tag <== io.mul_tag; + r_in0 <== io.in0; + r_in1 <== io.in1; + } + + val sxl64 = (r_fn === MUL_64H) || (r_fn === MUL_64HSU); + val sxr64 = (r_fn === MUL_64H); + + val lhs = Cat(r_in0(63) & sxl64, r_in0); + val rhs = Cat(r_in1(63) & sxr64, r_in1); + + val mul_result = lhs.toFix * rhs.toFix; + + val mul_output = MuxCase(mul_result(63,0), Array( + ((r_fn === MUL_64H) || (r_fn === MUL_64HU) || (r_fn === MUL_64HSU)) -> mul_result(127,64), + (r_fn === MUL_32) -> Cat(Fill(32, mul_result(31)), mul_result(31, 0)))); + + // just a hack for now, this should be a parameterized number of stages + val r_result = Reg(Reg(Reg(mul_output))); + val r_result_tag = Reg(Reg(Reg(r_tag))); + val r_result_val = Reg(Reg(Reg(r_val))); + + io.result := r_result; + io.result_tag := r_result_tag; + io.result_val := r_result_val; + +} + +} diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala new file mode 100644 index 00000000..b2f7aa16 --- /dev/null +++ b/rocket/src/main/scala/queues.scala @@ -0,0 +1,225 @@ +package queues +{ + +import Chisel._ +import Node._; + +class ioQueueCtrl(addr_sz: Int) extends Bundle() +{ + val q_reset = Bool('input); + val enq_val = Bool('input); + val enq_rdy = Bool('output); + val deq_val = Bool('output); + val deq_rdy = Bool('input); + val wen = Bool('output); + val waddr = UFix(addr_sz, 'output); + val raddr = UFix(addr_sz, 'output); +} + +class queueCtrl(entries: Int, addr_sz: Int) extends Component +{ + override val io = new ioQueueCtrl(addr_sz); + + // Enqueue and dequeue pointers + + val enq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); + val deq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); + val full = Reg(width = 1, resetVal = Bool(false)); + + when (io.q_reset) { + enq_ptr <== UFix(0, addr_sz); + deq_ptr <== UFix(0, addr_sz); + full <== Bool(false); + } + + io.waddr := enq_ptr; + io.raddr := deq_ptr; + + // We enq/deq only when they are both ready and valid + + val do_enq = io.enq_rdy && io.enq_val; + val do_deq = io.deq_rdy && io.deq_val; + + // Determine if we have pipeline or flowthrough behaviour and + // set the write enable accordingly. + + val empty = ~full && (enq_ptr === deq_ptr); + + io.wen := do_enq; + + // Ready signals are calculated from full register. If pipeline + // behavior is enabled, then the enq_rdy signal is also calculated + // combinationally from the deq_rdy signal. If flowthrough behavior + // is enabled then the deq_val signal is also calculated combinationally + // from the enq_val signal. + + io.enq_rdy := ~full; + io.deq_val := ~empty; + + // Control logic for the enq/deq pointers and full register + + val deq_ptr_inc = deq_ptr + UFix(1, 1); + val enq_ptr_inc = enq_ptr + UFix(1, 1); + + val deq_ptr_next = + Mux(do_deq, deq_ptr_inc, + deq_ptr); + + val enq_ptr_next = + Mux(do_enq, enq_ptr_inc, + enq_ptr); + + val full_next = + Mux(do_enq && ~do_deq && ( enq_ptr_inc === deq_ptr ), Bool(true), + Mux(do_deq && full, Bool(false), + full)); + + enq_ptr <== enq_ptr_next; + deq_ptr <== deq_ptr_next; + full <== full_next; +} + +class ioQueueSimplePF(data_sz: Int) extends Bundle() +{ + val q_reset = Bool('input); + val enq_val = Bool('input); + val enq_rdy = Bool('output); + val deq_val = Bool('output); + val deq_rdy = Bool('input); + val enq_bits = Bits(data_sz, 'input); + val deq_bits = Bits(data_sz, 'output); +} + +class queueSimplePF(data_sz: Int, entries: Int, addr_sz: Int) extends Component +{ + override val io = new ioQueueSimplePF(data_sz); + val ctrl = new queueCtrl(entries, addr_sz); + ctrl.io.q_reset ^^ io.q_reset; + ctrl.io.deq_val ^^ io.deq_val; + ctrl.io.enq_rdy ^^ io.enq_rdy; + ctrl.io.enq_val ^^ io.enq_val; + ctrl.io.deq_rdy ^^ io.deq_rdy; + val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq_bits); + io.deq_bits := ram(ctrl.io.raddr); +} + +// TODO: SHOULD USE INHERITANCE BUT BREAKS INTROSPECTION CODE +// class IOqueueCtrlFlow extends IOqueueCtrl +class ioQueueCtrlFlow(addr_sz: Int) extends Bundle() /* IOqueueCtrl */ +{ + val enq_val = Bool('input); + val enq_rdy = Bool('output); + val deq_val = Bool('output); + val deq_rdy = Bool('input); + val wen = Bool('output); + val waddr = UFix(addr_sz, 'output); + val raddr = UFix(addr_sz, 'output); + val flowthru = Bool('output); +} + +class queueCtrlFlow(entries: Int, addr_sz: Int) extends Component +{ + override val io = new ioQueueCtrlFlow(addr_sz); + // Enqueue and dequeue pointers + + val enq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); + val deq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); + val full = Reg(width = 1, resetVal = Bool(false)); + + io.waddr := enq_ptr; + io.raddr := deq_ptr; + + // We enq/deq only when they are both ready and valid + + val do_enq = io.enq_rdy && io.enq_val; + val do_deq = io.deq_rdy && io.deq_val; + + // Determine if we have pipeline or flowthrough behaviour and + // set the write enable accordingly. + + val empty = ~full && (enq_ptr === deq_ptr); + val do_flowthru = empty && do_enq && do_deq; + io.flowthru := do_flowthru; + + io.wen := do_enq && ~do_flowthru; + + // Ready signals are calculated from full register. If pipeline + // behavior is enabled, then the enq_rdy signal is also calculated + // combinationally from the deq_rdy signal. If flowthrough behavior + // is enabled then the deq_val signal is also calculated combinationally + // from the enq_val signal. + + io.enq_rdy := ~full; + io.deq_val := ~empty || ( empty && io.enq_val ); + + // Control logic for the enq/deq pointers and full register + + val deq_ptr_inc = deq_ptr + UFix(1, 1); + val enq_ptr_inc = enq_ptr + UFix(1, 1); + + val deq_ptr_next = + Mux(do_deq && ~do_flowthru, deq_ptr_inc, + deq_ptr); + + val enq_ptr_next = + Mux(do_enq && ~do_flowthru, enq_ptr_inc, + enq_ptr); + + val full_next = + Mux(do_enq && ~do_deq && ( enq_ptr_inc === deq_ptr ), Bool(true), + Mux(do_deq && full, Bool(false), + full)); + + enq_ptr <== enq_ptr_next; + deq_ptr <== deq_ptr_next; + full <== full_next; +} + +class ioQueueDpathFlow(data_sz: Int, addr_sz: Int) extends Bundle() +{ + val wen = Bool('input); + val flowthru = Bool('input); + val deq_bits = Bits(data_sz, 'output); + val enq_bits = Bits(data_sz, 'input); + val waddr = UFix(addr_sz, 'input); + val raddr = UFix(addr_sz, 'input); +} + +class queueDpathFlow(data_sz: Int, entries: Int, addr_sz: Int) extends Component +{ + override val io = new ioQueueDpathFlow(data_sz, addr_sz); + val ram = Mem(entries, io.wen, io.waddr, io.enq_bits); + val rout = ram(io.raddr); + io.deq_bits := Mux(io.flowthru, io.enq_bits, rout); +} + +class ioQueueFlowPF(data_sz: Int) extends Bundle() +{ + val enq_val = Bool('input); + val enq_rdy = Bool('output); + val enq_bits = Bits(data_sz, 'input); + val deq_val = Bool('output); + val deq_rdy = Bool('input); + val deq_bits = Bits(data_sz, 'output); +} + +class queueFlowPF(data_sz: Int, entries: Int, addr_sz: Int) extends Component +{ + override val io = new ioQueueFlowPF(data_sz); + val ctrl = new queueCtrlFlow(entries, addr_sz); + val dpath = new queueDpathFlow(data_sz, entries, addr_sz); + + ctrl.io.deq_rdy ^^ io.deq_rdy; + ctrl.io.wen <> dpath.io.wen; + ctrl.io.raddr <> dpath.io.raddr; + ctrl.io.waddr <> dpath.io.waddr; + ctrl.io.flowthru <> dpath.io.flowthru; + ctrl.io.enq_val ^^ io.enq_val; + dpath.io.enq_bits ^^ io.enq_bits; + + ctrl.io.deq_val ^^ io.deq_val; + ctrl.io.enq_rdy ^^ io.enq_rdy; + dpath.io.deq_bits ^^ io.deq_bits; +} + +} diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala new file mode 100644 index 00000000..bb035340 --- /dev/null +++ b/rocket/src/main/scala/top.scala @@ -0,0 +1,54 @@ +package Top { + +import Chisel._ +import Node._; +import Constants._; + +class ioTop extends Bundle { + val debug = new ioDebug(); + val console = new ioConsole(); + val host = new ioHost(); + val mem = new ioMem(); +} + +class Top() extends Component { + val io = new ioTop(); + + val cpu = new rocketProc(); + val icache = new rocketICacheDM(128, 32); // lines, address bits + val icache_pf = new rocketIPrefetcher(); + val dcache = new rocketDCacheDM_flush(128, 32); + val arbiter = new rocketMemArbiter(); + + arbiter.io.mem ^^ io.mem; + arbiter.io.dcache <> dcache.io.mem; + arbiter.io.icache <> icache_pf.io.mem; + + cpu.io.host ^^ io.host; + cpu.io.debug ^^ io.debug; + cpu.io.console ^^ io.console; + + icache.io.mem <> icache_pf.io.icache; + cpu.io.imem <> icache.io.cpu; + cpu.io.dmem <> dcache.io.cpu; + +} + +object top_main { + def main(args: Array[String]) = { + // Can turn off --debug and --vcd when done with debugging to improve emulator performance +// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); + val cpu_args = args ++ Array("--target-dir", "generated-src","--debug"); + // Set variables based off of command flags +// for(a <- args) { +// a match { +// case "-bp" => isBranchPrediction = true; +// case any => +// } +// } + + chiselMain(cpu_args, () => new Top()); + } +} + +} diff --git a/rocket/src/main/scala/writeback.scala b/rocket/src/main/scala/writeback.scala new file mode 100644 index 00000000..327358c6 --- /dev/null +++ b/rocket/src/main/scala/writeback.scala @@ -0,0 +1,56 @@ +package Top +{ + +import Chisel._ +import Node._; +import Constants._; + +class ioWriteback extends Bundle() +{ + val dmem_resp_val = Bool('input); + val dmem_resp_data = UFix(64, 'input); + val dmem_resp_tag = UFix(12, 'input); + val wb_waddr = UFix(5, 'output); + val wb_wen = Bool('output); + val wb_wdata = Bits(64, 'output); +} + +class rocketWriteback extends Component +{ + override val io = new ioWriteback(); + + val r_dmem_resp_val = Reg(io.dmem_resp_val); + val r_dmem_resp_data = Reg(io.dmem_resp_data); + val r_dmem_resp_tag = Reg(io.dmem_resp_tag); + + val dmem_resp_xf = r_dmem_resp_tag(11); + val dmem_resp_type = r_dmem_resp_tag(10, 8); + val dmem_resp_pos = r_dmem_resp_tag(7, 5); + val dmem_resp_waddr = r_dmem_resp_tag(4, 0); + val dmem_resp_xval = r_dmem_resp_val & ~dmem_resp_xf; + val dmem_resp_fval = r_dmem_resp_val & dmem_resp_xf; + + val dmem_resp_data_w = + Mux(dmem_resp_pos(2).toBool, r_dmem_resp_data(63, 32), r_dmem_resp_data(31, 0)); + val dmem_resp_data_h = + Mux(dmem_resp_pos(1).toBool, dmem_resp_data_w(31, 16), dmem_resp_data_w(15, 0)); + val dmem_resp_data_b = + Mux(dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); + + val dmem_resp_data_final = + Mux(dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), + Mux(dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), + Mux(dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), + Mux(dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), + Mux(dmem_resp_type === MT_W, Cat(Fill(32, dmem_resp_data_w(31)), dmem_resp_data_w), + Mux(dmem_resp_type === MT_WU, Cat(UFix(0, 32), dmem_resp_data_w), + Mux(dmem_resp_type === MT_D, r_dmem_resp_data, + UFix(0, 64)))))))); + + io.wb_wen := dmem_resp_xval.toBool; + io.wb_waddr := dmem_resp_waddr; + io.wb_wdata := dmem_resp_data_final; + +} + +} From 172e561a78c7d3d869ad6e1e570554d96057827f Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 31 Oct 2011 15:37:37 -0700 Subject: [PATCH 0002/1087] added once cycle latency store pipelined d$ --- rocket/src/main/scala/dcache.scala | 208 ++++++++++++++++++++++++++++- 1 file changed, 207 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index e0cdc8cd..a81a9fa9 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -40,7 +40,8 @@ class ioDCacheDM extends Bundle() { // state machine to flush (write back dirty lines, invalidate clean ones) the D$ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { val io = new ioDCacheDM(); - val dcache = new rocketDCacheDM(lines, addrbits); +// val dcache = new rocketDCacheDM(lines, addrbits); + val dcache = new rocketDCacheDM_1C(lines, addrbits); val indexbits = ceil(log10(lines)/log10(2)).toInt; val offsetbits = 6; @@ -248,4 +249,209 @@ class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { } } +class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { + val io = new ioDCacheDM(); + + val indexbits = ceil(log10(lines)/log10(2)).toInt; + val offsetbits = 6; + val tagmsb = addrbits - 1; + val taglsb = indexbits+offsetbits; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = 3; + + val s_reset :: s_ready :: s_replay_load :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(8) { UFix() }; + val state = Reg(resetVal = s_reset); + + val r_cpu_req_addr = Reg(resetVal = Bits(0, addrbits)); + val r_cpu_req_val = Reg(resetVal = Bool(false)); +// val r_cpu_req_data = Reg(resetVal = Bits(0,64)); + val r_cpu_req_op = Reg(resetVal = Bits(0,4)); +// val r_cpu_req_wmask = Reg(resetVal = Bits(0,8)); + val r_cpu_req_tag = Reg(resetVal = Bits(0,12)); + + val p_store_data = Reg(resetVal = Bits(0,64)); + val p_store_addr = Reg(resetVal = Bits(0,64)); + val p_store_wmask = Reg(resetVal = Bits(0,64)); + val p_store_valid = Reg(resetVal = Bool(false)); + + val req_load = (r_cpu_req_op === M_XRD); + val req_store = (r_cpu_req_op === M_XWR); + val req_flush = (r_cpu_req_op === M_FLA); + + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_addr <== io.cpu.req_addr; +// r_cpu_req_data <== io.cpu.req_data; + r_cpu_req_op <== io.cpu.req_op; +// r_cpu_req_wmask <== io.cpu.req_wmask; + r_cpu_req_tag <== io.cpu.req_tag; + } + + when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_op === M_XWR)) { + p_store_data <== io.cpu.req_data; + p_store_addr <== io.cpu.req_addr; + p_store_wmask <== io.cpu.req_wmask; + p_store_valid <== Bool(true); + } + + when (io.cpu.req_rdy) { + r_cpu_req_val <== io.cpu.req_val; + } + when ((state === s_resolve_miss) && !req_load) { + r_cpu_req_val <== Bool(false); + } + + // counter + val rr_count = Reg(resetVal = UFix(0,2)); + val rr_count_next = rr_count + UFix(1); + when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) { + rr_count <== rr_count_next; + } + + // tag array + val tag_we = (state === s_resolve_miss); + val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; + val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); + val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); + val tag_raddr = + Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, + r_cpu_req_addr(indexmsb, indexlsb).toUFix); + val tag_rdata = Reg(tag_array.read(tag_raddr)); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, lines)); + val vb_rdata = Reg(vb_array(tag_raddr)); + when (tag_we && !req_flush) { + vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); + } + when (tag_we && req_flush) { + vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); + } + + val tag_valid = vb_rdata.toBool; + val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); + + val ldst_conflict = io.cpu.req_val && (io.cpu.req_op === M_XRD) && (io.cpu.req_addr === p_store_addr); + + val store = ((state === s_ready) && p_store_valid && (!io.cpu.req_val || ldst_conflict || io.cpu.req_op === M_XWR)) || + ((state === s_resolve_miss) && req_store); + + // dirty bit array + val db_array = Reg(resetVal = Bits(0, lines)); + val db_rdata = Reg(db_array(tag_raddr)); + val tag_dirty = db_rdata.toBool; + + when (store) { + p_store_valid <== Bool(false); + db_array <== db_array.bitSet(p_store_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); + } + when (tag_we) { + db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); + } + + // data array + val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; + val data_array_waddr = + Mux((state === s_refill), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + p_store_addr(indexmsb, offsetmsb-1).toUFix); + + val data_array_wdata = + Mux((state === s_refill), io.mem.resp_data, + Cat(p_store_data, p_store_data)); + + val p_wmask_expand = + Cat(Fill(8, p_store_wmask(7)), + Fill(8, p_store_wmask(6)), + Fill(8, p_store_wmask(5)), + Fill(8, p_store_wmask(4)), + Fill(8, p_store_wmask(3)), + Fill(8, p_store_wmask(2)), + Fill(8, p_store_wmask(1)), + Fill(8, p_store_wmask(0))); + + val store_wmask = + Mux(p_store_addr(offsetlsb).toBool, + Cat(p_wmask_expand, Bits(0,64)), + Cat(Bits(0,64), p_wmask_expand)); + + val data_array_wmask = + Mux((state === s_refill), ~Bits(0,128), + store_wmask); + val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); + val data_array_raddr = + Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, + Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + Mux((state === s_resolve_miss), r_cpu_req_addr(indexmsb, offsetmsb-1), + io.cpu.req_addr(indexmsb, offsetmsb-1)))); + val data_array_rdata = Reg(data_array.read(data_array_raddr)); + + // output signals +// io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || (tag_match && req_load && !(p_store_valid && (r_cpu_req_addr === p_store_addr)))); + io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match) && !(p_store_valid && ldst_conflict); + + io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load) || + ((state === s_resolve_miss) && req_flush); + + io.cpu.resp_tag := r_cpu_req_tag; + + io.cpu.resp_data := + Mux(r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), + data_array_rdata(63,0)); + + io.mem.req_val := (state === s_req_refill) || (state === s_writeback); + io.mem.req_rw := (state === s_writeback); + io.mem.req_wdata := data_array_rdata; + io.mem.req_tag := UFix(0); + io.mem.req_addr := + Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); + + // control state machine + switch (state) { + is (s_reset) { + state <== s_ready; + } + is (s_ready) { + when (p_store_valid && ldst_conflict) { + state <== s_replay_load; + } + when (!r_cpu_req_val || tag_match) { + state <== s_ready; + } + when (tag_valid & tag_dirty) { + state <== s_start_writeback; + } + when (req_flush) { + state <== s_resolve_miss; + } + otherwise { + state <== s_req_refill; + } + } + is (s_replay_load) { + state <== s_ready; + } + is (s_start_writeback) { + state <== s_writeback; + } + is (s_writeback) { + when (io.mem.req_rdy && (rr_count === UFix(3,2))) { + when (req_flush) { state <== s_resolve_miss; } + otherwise { state <== s_req_refill; } + } + } + is (s_req_refill) + { + when (io.mem.req_rdy) { state <== s_refill; } + } + is (s_refill) { + when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } + } + is (s_resolve_miss) { + state <== s_ready; + } + } +} + } From 65f8b2461cab5d84bb5749e4aca76146375f1fe9 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 31 Oct 2011 16:47:31 -0700 Subject: [PATCH 0003/1087] dcache tweaks --- rocket/src/main/scala/dcache.scala | 47 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index a81a9fa9..0bf03d17 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -266,9 +266,9 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val r_cpu_req_addr = Reg(resetVal = Bits(0, addrbits)); val r_cpu_req_val = Reg(resetVal = Bool(false)); -// val r_cpu_req_data = Reg(resetVal = Bits(0,64)); + val r_cpu_req_data = Reg(resetVal = Bits(0,64)); val r_cpu_req_op = Reg(resetVal = Bits(0,4)); -// val r_cpu_req_wmask = Reg(resetVal = Bits(0,8)); + val r_cpu_req_wmask = Reg(resetVal = Bits(0,8)); val r_cpu_req_tag = Reg(resetVal = Bits(0,12)); val p_store_data = Reg(resetVal = Bits(0,64)); @@ -282,18 +282,11 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_addr <== io.cpu.req_addr; -// r_cpu_req_data <== io.cpu.req_data; + r_cpu_req_data <== io.cpu.req_data; r_cpu_req_op <== io.cpu.req_op; -// r_cpu_req_wmask <== io.cpu.req_wmask; + r_cpu_req_wmask <== io.cpu.req_wmask; r_cpu_req_tag <== io.cpu.req_tag; } - - when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_op === M_XWR)) { - p_store_data <== io.cpu.req_data; - p_store_addr <== io.cpu.req_addr; - p_store_wmask <== io.cpu.req_wmask; - p_store_valid <== Bool(true); - } when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; @@ -331,18 +324,25 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val tag_valid = vb_rdata.toBool; val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); - - val ldst_conflict = io.cpu.req_val && (io.cpu.req_op === M_XRD) && (io.cpu.req_addr === p_store_addr); - - val store = ((state === s_ready) && p_store_valid && (!io.cpu.req_val || ldst_conflict || io.cpu.req_op === M_XWR)) || - ((state === s_resolve_miss) && req_store); + + when ((state === s_ready) && r_cpu_req_val && req_store) { + p_store_data <== r_cpu_req_data; + p_store_addr <== r_cpu_req_addr; + p_store_wmask <== r_cpu_req_wmask; + p_store_valid <== Bool(true); + } + + val addr_match = (r_cpu_req_addr(tagmsb, offsetlsb) === p_store_addr(tagmsb, offsetlsb)); + val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !req_load || addr_match)) + val resolve_store = (state === s_resolve_miss) && req_store; + val do_store = drain_store | resolve_store; // dirty bit array val db_array = Reg(resetVal = Bits(0, lines)); val db_rdata = Reg(db_array(tag_raddr)); val tag_dirty = db_rdata.toBool; - when (store) { + when (do_store) { p_store_valid <== Bool(false); db_array <== db_array.bitSet(p_store_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } @@ -351,7 +351,7 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // data array - val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; + val data_array_we = ((state === s_refill) && io.mem.resp_val) || do_store; val data_array_waddr = Mux((state === s_refill), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, p_store_addr(indexmsb, offsetmsb-1).toUFix); @@ -382,15 +382,16 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - Mux((state === s_resolve_miss), r_cpu_req_addr(indexmsb, offsetmsb-1), + Mux((state === s_resolve_miss) || (state === s_replay_load), r_cpu_req_addr(indexmsb, offsetmsb-1), io.cpu.req_addr(indexmsb, offsetmsb-1)))); val data_array_rdata = Reg(data_array.read(data_array_raddr)); + + val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; // output signals -// io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || (tag_match && req_load && !(p_store_valid && (r_cpu_req_addr === p_store_addr)))); - io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match) && !(p_store_valid && ldst_conflict); + io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || tag_match); - io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load) || + io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && req_flush); io.cpu.resp_tag := r_cpu_req_tag; @@ -413,7 +414,7 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { state <== s_ready; } is (s_ready) { - when (p_store_valid && ldst_conflict) { + when (ldst_conflict) { state <== s_replay_load; } when (!r_cpu_req_val || tag_match) { From ace4c9d13c801cba1d0f7a264cdf08adf9875829 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 31 Oct 2011 17:17:36 -0700 Subject: [PATCH 0004/1087] dcache fixes --- rocket/src/main/scala/dcache.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 0bf03d17..eb24aacc 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -321,9 +321,9 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { when (tag_we && req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } - + val tag_valid = vb_rdata.toBool; - val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); + val tag_match = tag_valid && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); when ((state === s_ready) && r_cpu_req_val && req_store) { p_store_data <== r_cpu_req_data; @@ -333,7 +333,7 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } val addr_match = (r_cpu_req_addr(tagmsb, offsetlsb) === p_store_addr(tagmsb, offsetlsb)); - val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !req_load || addr_match)) + val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !tag_match || !req_load || addr_match)) val resolve_store = (state === s_resolve_miss) && req_store; val do_store = drain_store | resolve_store; @@ -389,7 +389,7 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; // output signals - io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || tag_match); + io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (tag_match && !req_flush)); io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && req_flush); @@ -400,11 +400,11 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { Mux(r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); - io.mem.req_val := (state === s_req_refill) || (state === s_writeback); - io.mem.req_rw := (state === s_writeback); + io.mem.req_val := (state === s_req_refill) || (state === s_writeback); + io.mem.req_rw := (state === s_writeback); io.mem.req_wdata := data_array_rdata; - io.mem.req_tag := UFix(0); - io.mem.req_addr := + io.mem.req_tag := UFix(0); + io.mem.req_addr := Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); From 08b89e77100a9599a08ef4cbca48e6e59c660918 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 1 Nov 2011 17:59:27 -0700 Subject: [PATCH 0005/1087] interface cleanup, major pipeline changes --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/cpu.scala | 89 ++++---- rocket/src/main/scala/ctrl.scala | 240 ++++++++++----------- rocket/src/main/scala/dcache.scala | 94 +++++--- rocket/src/main/scala/dpath.scala | 297 ++++++++++++++++---------- rocket/src/main/scala/memory.scala | 124 ----------- rocket/src/main/scala/writeback.scala | 56 ----- 7 files changed, 426 insertions(+), 475 deletions(-) delete mode 100644 rocket/src/main/scala/memory.scala delete mode 100644 rocket/src/main/scala/writeback.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e5660ca6..8b8c8740 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -21,6 +21,7 @@ object Constants val PC_J = UFix(4, 3); val PC_JR = UFix(5, 3); val PC_PCR = UFix(6, 3); + val PC_MEM = UFix(7, 3); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index a891582e..2e3374f7 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -39,57 +39,64 @@ class rocketProc extends Component val io = new ioRocket(); val ctrl = new rocketCtrl(); - val dpath = new rocketDpath(); - val mem = new rocketMemory(); - val wb = new rocketWriteback(); + val dpath = new rocketDpath(); +// val mem = new rocketMemory(); +// val wb = new rocketWriteback(); - dpath.io.host ^^ io.host; - dpath.io.debug ^^ io.debug; - // dpath.io.wb <> wb.io; - dpath.io.wb.wen <> wb.io.wb_wen; - dpath.io.wb.waddr <> wb.io.wb_waddr; - dpath.io.wb.wdata <> wb.io.wb_wdata; - dpath.io.imem.req_addr ^^ io.imem.req_addr; - dpath.io.imem.resp_data ^^ io.imem.resp_data; - - ctrl.io.ctrl <> dpath.io.ctrl; - ctrl.io.dpath <> dpath.io.dpath; - // ctrl.io.mem <> mem.io; - ctrl.io.mem.mrq_val <> mem.io.mem_mrq_val; - ctrl.io.mem.mrq_cmd <> mem.io.mem_mrq_cmd; - ctrl.io.mem.mrq_type <> mem.io.mem_mrq_type; - ctrl.io.mem.mrq_deq <> mem.io.mem_mrq_deq; - ctrl.io.mem.xsdq_rdy <> mem.io.mem_xsdq_rdy; - ctrl.io.mem.xsdq_val <> mem.io.mem_xsdq_val; - ctrl.io.mem.dc_busy := !io.dmem.req_rdy; + ctrl.io.dpath <> dpath.io.ctrl; ctrl.io.host.start ^^ io.host.start; ctrl.io.imem ^^ io.imem; + dpath.io.imem.req_addr ^^ io.imem.req_addr; + dpath.io.imem.resp_data ^^ io.imem.resp_data; + dpath.io.host ^^ io.host; + dpath.io.debug ^^ io.debug; + + ctrl.io.dmem ^^ io.dmem; + dpath.io.dmem ^^ io.dmem; + + // FIXME +// io.console.bits := dpath.io.dpath.rs1(7,0); + io.console.bits := Bits(0,8); + io.console.valid := ctrl.io.console.valid; + ctrl.io.console.rdy := io.console.rdy; + + // dpath.io.wb <> wb.io; +// dpath.io.wb.wen <> wb.io.wb_wen; +// dpath.io.wb.waddr <> wb.io.wb_waddr; +// dpath.io.wb.wdata <> wb.io.wb_wdata; + + +// ctrl.io.mem.mrq_val <> mem.io.mem_mrq_val; +// ctrl.io.mem.mrq_cmd <> mem.io.mem_mrq_cmd; +// ctrl.io.mem.mrq_type <> mem.io.mem_mrq_type; +// ctrl.io.mem.mrq_deq <> mem.io.mem_mrq_deq; +// ctrl.io.mem.xsdq_rdy <> mem.io.mem_xsdq_rdy; +// ctrl.io.mem.xsdq_val <> mem.io.mem_xsdq_val; +// ctrl.io.mem.dc_busy := !io.dmem.req_rdy; + // ctrl.io.console ^^ io.console; - ctrl.io.wb.waddr <> wb.io.wb_waddr; - ctrl.io.wb.wen <> wb.io.wb_wen; +// ctrl.io.wb.waddr <> wb.io.wb_waddr; +// ctrl.io.wb.wen <> wb.io.wb_wen; // TODO: SHOULD BE THE FOLLOWING BUT NEED BETTER INTERFACE CHUNKS // mem.io.dmem >< io.dmem; - mem.io.dmem_req_val ^^ io.dmem.req_val; - mem.io.dmem_req_rdy ^^ io.dmem.req_rdy; - mem.io.dmem_req_op ^^ io.dmem.req_op; - mem.io.dmem_req_addr ^^ io.dmem.req_addr; - mem.io.dmem_req_data ^^ io.dmem.req_data; - mem.io.dmem_req_wmask ^^ io.dmem.req_wmask; - mem.io.dmem_req_tag ^^ io.dmem.req_tag; +// mem.io.dmem_req_val ^^ io.dmem.req_val; +// mem.io.dmem_req_rdy ^^ io.dmem.req_rdy; +// mem.io.dmem_req_op ^^ io.dmem.req_op; +// mem.io.dmem_req_addr ^^ io.dmem.req_addr; +// mem.io.dmem_req_data ^^ io.dmem.req_data; +// mem.io.dmem_req_wmask ^^ io.dmem.req_wmask; +// mem.io.dmem_req_tag ^^ io.dmem.req_tag; - mem.io.dpath_rs2 <> dpath.io.dpath.rs2; - mem.io.dpath_waddr <> dpath.io.dpath.waddr; - mem.io.dpath_alu_out <> dpath.io.dpath.alu_out; +// +// mem.io.dpath_rs2 <> dpath.io.dpath.rs2; +// mem.io.dpath_waddr <> dpath.io.dpath.waddr; +// mem.io.dpath_alu_out <> dpath.io.dpath.alu_out; - wb.io.dmem_resp_val ^^ io.dmem.resp_val; - wb.io.dmem_resp_data ^^ io.dmem.resp_data; - wb.io.dmem_resp_tag ^^ io.dmem.resp_tag; - - io.console.bits := dpath.io.dpath.rs1(7,0); - io.console.valid := ctrl.io.console.valid; - ctrl.io.console.rdy := io.console.rdy; +// wb.io.dmem_resp_val ^^ io.dmem.resp_val; +// wb.io.dmem_resp_data ^^ io.dmem.resp_data; +// wb.io.dmem_resp_tag ^^ io.dmem.resp_tag; } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 90899de1..8d6a7d22 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -6,14 +6,17 @@ import Node._; import Constants._ import Instructions._ -class ioCtrl extends Bundle() +class ioCtrlDpath extends Bundle() { + // outputs to datapath val sel_pc = UFix(3, 'output); val wen_btb = Bool('output); val stallf = Bool('output); val stalld = Bool('output); val killf = Bool('output); val killd = Bool('output); + val killx = Bool('output); + val killm = Bool('output); val ren2 = Bool('output); val ren1 = Bool('output); val sel_alu2 = UFix(2, 'output); @@ -26,7 +29,6 @@ class ioCtrl extends Bundle() val div_val = Bool('output); val div_fn = UFix(4, 'output); val div_wb = Bool('output); - val wen = Bool('output); val sel_wa = Bool('output); val sel_wb = UFix(3, 'output); val ren_pcr = Bool('output); @@ -36,56 +38,34 @@ class ioCtrl extends Bundle() val xcpt_fpu = Bool('output); val xcpt_syscall = Bool('output); val eret = Bool('output); -} - -class ioCtrlDpath extends Bundle() -{ + val dcache_miss = Bool('output); + val wen = Bool('output); + // inputs from datapath val btb_hit = Bool('input); - val inst = UFix(32, 'input); + val inst = Bits(32, 'input); val br_eq = Bool('input); val br_lt = Bool('input); val br_ltu = Bool('input); val div_rdy = Bool('input); val div_result_val = Bool('input); val mul_result_val = Bool('input); - val wen = Bool('input); - val waddr = UFix(5, 'input); + val ex_waddr = UFix(5,'input); // write addr from execute stage val exception = Bool('input); val status = Bits(8, 'input); -} - -class ioCtrlMem extends Bundle() -{ - val mrq_val = Bool('output); - val mrq_cmd = UFix(4, 'output); - val mrq_type = UFix(3, 'output); - val mrq_deq = Bool('input); - val xsdq_rdy = Bool('input); - val xsdq_val = Bool('output); - val dc_busy = Bool('input); -} - -class ioCtrlImem extends Bundle() -{ - val req_val = Bool('output); - val req_rdy = Bool('input); - val resp_val = Bool('input); -} - -class ioCtrlWB extends Bundle() -{ - val waddr = UFix(5, 'input); - val wen = Bool('input); + val sboard_set = Bool('input); + val sboard_seta = UFix(5, 'input); + val sboard_clr0 = Bool('input); + val sboard_clr0a = UFix(5, 'input); + val sboard_clr1 = Bool('input); + val sboard_clr1a = UFix(5, 'input); } class ioCtrlAll extends Bundle() { - val ctrl = new ioCtrl(); - val console = new ioConsole(List("rdy", "valid")); val dpath = new ioCtrlDpath(); - val imem = new ioCtrlImem(); - val mem = new ioCtrlMem(); - val wb = new ioCtrlWB(); + val console = new ioConsole(List("rdy", "valid")); + val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_val")).flip(); val host = new ioHost(List("start")); } @@ -219,47 +199,27 @@ class rocketCtrl extends Component val id_ren2 = id_renx2; val id_ren1 = id_renx1; - val id_console_out_val = id_wen_pcr & (id_raddr2 === PCR_CONSOLE); + val id_console_out_val = id_wen_pcr & (id_raddr2 === PCR_CONSOLE); + val console_out_fire = id_console_out_val & ~io.dpath.killd; + io.console.valid := console_out_fire.toBool; - val id_mem_val_masked = id_mem_val; - - val mem_xload_val = id_mem_val_masked & (id_mem_cmd === M_XRD); - val mem_xstore_val = id_mem_val_masked & (id_mem_cmd === M_XWR); - - val mem_fire = id_mem_val_masked & ~io.ctrl.killd; - val mem_xload_fire = mem_xload_val & ~io.ctrl.killd; - val mem_xstore_fire = mem_xstore_val & ~io.ctrl.killd; - - val console_out_fire = id_console_out_val & ~io.ctrl.killd; - - val div_fire = id_div_val & ~io.ctrl.killd; - val mul_fire = id_mul_val & ~io.ctrl.killd; - - val sboard_wen = mem_xload_fire | div_fire | mul_fire; - val sboard_waddr = id_waddr; - val sboard = new rocketCtrlSboard(); - sboard.io.raddra := id_raddr2; - sboard.io.raddrb := id_raddr1; - sboard.io.raddrc := id_waddr; - sboard.io.set := sboard_wen.toBool; - sboard.io.seta := sboard_waddr; - sboard.io.clr0 := io.wb.wen.toBool; - sboard.io.clr0a ^^ io.wb.waddr; - sboard.io.clr1 := io.dpath.wen.toBool; - sboard.io.clr1a := io.dpath.waddr; + sboard.io.raddra := id_raddr2.toUFix; + sboard.io.raddrb := id_raddr1.toUFix; + sboard.io.raddrc := id_waddr.toUFix; + + sboard.io.set := io.dpath.sboard_set; + sboard.io.seta := io.dpath.sboard_seta; + sboard.io.clr0 := io.dpath.sboard_clr0; + sboard.io.clr0a := io.dpath.sboard_clr0a; + sboard.io.clr1 := io.dpath.sboard_clr1; + sboard.io.clr1a := io.dpath.sboard_clr1a; val id_stall_raddr2 = sboard.io.stalla; val id_stall_raddr1 = sboard.io.stallb; val id_stall_waddr = sboard.io.stallc; val id_stall_ra = sboard.io.stallra; - val mrq = new rocketCtrlCnt(3, 4); - mrq.io.enq := mem_fire.toBool; - mrq.io.deq ^^ io.mem.mrq_deq; - val id_empty_mrq = mrq.io.empty; - val id_full_mrq = mrq.io.full; - val id_reg_btb_hit = Reg(width = 1, resetVal = Bool(false)); val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; @@ -269,15 +229,16 @@ class rocketCtrl extends Component val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_privileged = Reg(resetVal = Bool(false)); - when (!io.ctrl.stalld) { - when (io.ctrl.killf) { + when (!io.dpath.stalld) { + when (io.dpath.killf) { id_reg_btb_hit <== Bool(false); } otherwise{ id_reg_btb_hit <== io.dpath.btb_hit; } } - when (reset.toBool || io.ctrl.killd) { + + when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; ex_reg_btb_hit <== Bool(false); ex_reg_mem_val <== Bool(false); @@ -289,7 +250,7 @@ class rocketCtrl extends Component otherwise { ex_reg_br_type <== id_br_type; ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_mem_val <== id_mem_val_masked.toBool; + ex_reg_mem_val <== id_mem_val.toBool; ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; ex_reg_eret <== id_eret.toBool; @@ -317,22 +278,42 @@ class rocketCtrl extends Component io.imem.req_val := io.host.start; // io.imem.req_val := Bool(true); - io.mem.mrq_val := ex_reg_mem_val; - io.mem.mrq_cmd := ex_reg_mem_cmd; - io.mem.mrq_type := ex_reg_mem_type; - io.mem.xsdq_val := mem_xstore_fire.toBool; - io.console.valid := console_out_fire.toBool; + io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; + io.dmem.req_cmd := ex_reg_mem_cmd; + io.dmem.req_type := ex_reg_mem_type; - io.ctrl.sel_pc := + val mem_reg_mem_val = Reg(){Bool()}; + val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; + val mem_reg_mem_type = Reg(){UFix(width = 3)}; + + when (reset.toBool || io.dpath.killx) { + mem_reg_mem_val <== Bool(false); + mem_reg_mem_cmd <== UFix(0, 4); + mem_reg_mem_type <== UFix(0, 3); + } + otherwise { + mem_reg_mem_val <== ex_reg_mem_val; + mem_reg_mem_cmd <== ex_reg_mem_cmd; + mem_reg_mem_type <== ex_reg_mem_type; + } + + // replay on a D$ load miss : FIXME - add a miss signal to D$ + val replay_mem = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD) && !io.dmem.resp_val; + val dcache_miss = Reg(replay_mem); + + io.dpath.dcache_miss := dcache_miss; + + io.dpath.sel_pc := + Mux(replay_mem, PC_MEM, Mux(io.dpath.exception || ex_reg_eret, PC_PCR, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken || ex_reg_privileged, PC_EX4, Mux(jr_taken, PC_JR, Mux(j_taken, PC_J, Mux(io.dpath.btb_hit, PC_BTB, - PC_4)))))); + PC_4))))))); - io.ctrl.wen_btb := ~ex_reg_btb_hit & br_taken; + io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken; val take_pc = ~ex_reg_btb_hit & br_taken | @@ -341,65 +322,86 @@ class rocketCtrl extends Component j_taken | io.dpath.exception | ex_reg_privileged | - ex_reg_eret; + ex_reg_eret | + replay_mem; - io.ctrl.stallf := + io.dpath.stallf := ~take_pc & ( ~io.imem.req_rdy | ~io.imem.resp_val | - io.ctrl.stalld + io.dpath.stalld ); - val ctrl_stalld_wo_fpu_rdy = + // check for loads in execute stage to detect load/use hazards + val lu_stall_raddr1 = + ex_reg_mem_val && + (ex_reg_mem_cmd === M_XRD) && + id_ren1.toBool && + (id_raddr1 === io.dpath.ex_waddr); + + val lu_stall_raddr2 = + ex_reg_mem_val && + (ex_reg_mem_cmd === M_XRD) && + id_ren2.toBool && + (id_raddr2 === io.dpath.ex_waddr); + + val ctrl_stalld = ~take_pc & ( - id_ren2 & id_stall_raddr2 | - id_ren1 & id_stall_raddr1 | - (id_sel_wa === WA_RD) & id_stall_waddr | + lu_stall_raddr1 | + lu_stall_raddr2 | + id_ren2 & id_stall_raddr2 | + id_ren1 & id_stall_raddr1 | + (id_sel_wa === WA_RD) && id_stall_waddr | (id_sel_wa === WA_RA) & id_stall_ra | - id_mem_val_masked & id_full_mrq | - id_sync & (~id_empty_mrq | io.mem.dc_busy) | - mem_xstore_val & ~io.mem.xsdq_rdy | + id_mem_val & ~io.dmem.req_rdy | + id_sync & ~io.dmem.req_rdy | +// id_mem_val_masked & id_full_mrq | +// id_sync & (~id_empty_mrq | io.mem.dc_busy) | +// mem_xstore_val & ~io.mem.xsdq_rdy | id_console_out_val & ~io.console.rdy | id_div_val & ~io.dpath.div_rdy | io.dpath.div_result_val | io.dpath.mul_result_val ); + val ctrl_killd = take_pc | ctrl_stalld; + // for divider, multiplier writeback val mul_wb = io.dpath.mul_result_val; val div_wb = io.dpath.div_result_val & !mul_wb; - io.ctrl.stalld := ctrl_stalld_wo_fpu_rdy.toBool; + io.dpath.stalld := ctrl_stalld.toBool; - io.ctrl.killf := take_pc | ~io.imem.resp_val; - val ctrl_killd_wo_fpu_rdy = take_pc | ctrl_stalld_wo_fpu_rdy; - io.ctrl.killd := ctrl_killd_wo_fpu_rdy.toBool; + io.dpath.killf := take_pc | ~io.imem.resp_val; + io.dpath.killd := ctrl_killd.toBool; + io.dpath.killx := replay_mem.toBool; + io.dpath.killm := replay_mem.toBool; - io.ctrl.ren2 := id_ren2.toBool; - io.ctrl.ren1 := id_ren1.toBool; - io.ctrl.sel_alu2 := id_sel_alu2; - io.ctrl.sel_alu1 := id_sel_alu1.toBool; - io.ctrl.fn_dw := id_fn_dw.toBool; - io.ctrl.fn_alu := id_fn_alu; - io.ctrl.div_fn := id_div_fn; - io.ctrl.div_val := id_div_val.toBool; - io.ctrl.div_wb := div_wb; - io.ctrl.mul_fn := id_mul_fn; - io.ctrl.mul_val := id_mul_val.toBool; - io.ctrl.mul_wb := mul_wb; - io.ctrl.wen := id_wen.toBool; - io.ctrl.sel_wa := id_sel_wa.toBool; - io.ctrl.sel_wb := id_sel_wb; - io.ctrl.ren_pcr := id_ren_pcr.toBool; - io.ctrl.wen_pcr := id_wen_pcr.toBool; - io.ctrl.eret := id_eret.toBool; + io.dpath.ren2 := id_ren2.toBool; + io.dpath.ren1 := id_ren1.toBool; + io.dpath.sel_alu2 := id_sel_alu2; + io.dpath.sel_alu1 := id_sel_alu1.toBool; + io.dpath.fn_dw := id_fn_dw.toBool; + io.dpath.fn_alu := id_fn_alu; + io.dpath.div_fn := id_div_fn; + io.dpath.div_val := id_div_val.toBool; + io.dpath.div_wb := div_wb; + io.dpath.mul_fn := id_mul_fn; + io.dpath.mul_val := id_mul_val.toBool; + io.dpath.mul_wb := mul_wb; + io.dpath.wen := id_wen.toBool; + io.dpath.sel_wa := id_sel_wa.toBool; + io.dpath.sel_wb := id_sel_wb; + io.dpath.ren_pcr := id_ren_pcr.toBool; + io.dpath.wen_pcr := id_wen_pcr.toBool; + io.dpath.eret := id_eret.toBool; - io.ctrl.xcpt_illegal := ~id_int_val.toBool; - io.ctrl.xcpt_privileged := (id_privileged & ~io.dpath.status(5)).toBool; - io.ctrl.xcpt_fpu := Bool(false); - io.ctrl.xcpt_syscall := id_syscall.toBool; + io.dpath.xcpt_illegal := ~id_int_val.toBool; + io.dpath.xcpt_privileged := (id_privileged & ~io.dpath.status(5)).toBool; + io.dpath.xcpt_fpu := Bool(false); + io.dpath.xcpt_syscall := id_syscall.toBool; } } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index eb24aacc..0bddf7d4 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -9,10 +9,10 @@ import scala.math._; class ioDmem(view: List[String] = null) extends Bundle(view) { val req_val = Bool('input); val req_rdy = Bool('output); - val req_op = Bits(4, 'input); + val req_cmd = Bits(4, 'input); + val req_type = Bits(3, 'input); val req_addr = UFix(32, 'input); val req_data = Bits(64, 'input); - val req_wmask = Bits(8, 'input); val req_tag = Bits(12, 'input); val resp_val = Bool('output); val resp_data = Bits(64, 'output); @@ -58,7 +58,7 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { val flush_waiting = Reg(resetVal = Bool(false)); val r_cpu_req_tag = Reg(resetVal = Bits(0, 12)); - when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_op === M_FLA)) + when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) { r_cpu_req_tag <== io.cpu.req_tag; flushing <== Bool(true); @@ -75,12 +75,12 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag)) { flush_resp_count <== flush_resp_count + UFix(1,1); } - dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_op != M_FLA) && !flush_waiting) || flushing; - dcache.io.cpu.req_op := Mux(flushing, M_FLA, io.cpu.req_op); + dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_cmd != M_FLA) && !flush_waiting) || flushing; + dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); dcache.io.cpu.req_addr := Mux(flushing, Cat(Bits(0,tagmsb-taglsb+1), flush_count, Bits(0,offsetbits)).toUFix, io.cpu.req_addr); dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); + dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; - dcache.io.cpu.req_wmask ^^ io.cpu.req_wmask; dcache.io.mem ^^ io.mem; io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; @@ -96,7 +96,7 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { // lines = # of cache lines // addr_bits = address width (word addressable) bits // 64 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines - +/* class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { val io = new ioDCacheDM(); @@ -116,7 +116,7 @@ class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { val r_r_cpu_req_addr = Reg(r_cpu_req_addr); val r_cpu_req_val = Reg(Bool(false)); val r_cpu_req_data = Reg(Bits(0,64)); - val r_cpu_req_op = Reg(Bits(0,4)); + val r_cpu_req_cmd = Reg(Bits(0,4)); val r_cpu_req_wmask = Reg(Bits(0,8)); val r_cpu_req_tag = Reg(Bits(0,12)); val r_cpu_resp_tag = Reg(r_cpu_req_tag); @@ -125,13 +125,13 @@ class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_addr <== io.cpu.req_addr; r_cpu_req_data <== io.cpu.req_data; - r_cpu_req_op <== io.cpu.req_op; + r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_wmask <== io.cpu.req_wmask; r_cpu_req_tag <== io.cpu.req_tag; } - val req_load = (r_cpu_req_op === M_XRD); - val req_store = (r_cpu_req_op === M_XWR); - val req_flush = (r_cpu_req_op === M_FLA); + val req_load = (r_cpu_req_cmd === M_XRD); + val req_store = (r_cpu_req_cmd === M_XWR); + val req_flush = (r_cpu_req_cmd === M_FLA); when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } otherwise { r_cpu_req_val <== Bool(false); } @@ -248,6 +248,7 @@ class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { } } } +*/ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val io = new ioDCacheDM(); @@ -267,24 +268,26 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val r_cpu_req_addr = Reg(resetVal = Bits(0, addrbits)); val r_cpu_req_val = Reg(resetVal = Bool(false)); val r_cpu_req_data = Reg(resetVal = Bits(0,64)); - val r_cpu_req_op = Reg(resetVal = Bits(0,4)); - val r_cpu_req_wmask = Reg(resetVal = Bits(0,8)); - val r_cpu_req_tag = Reg(resetVal = Bits(0,12)); + val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); + val r_cpu_req_type = Reg(resetVal = Bits(0,3)); +// val r_cpu_req_wmask = Reg(resetVal = Bits(0,8)); + val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); val p_store_data = Reg(resetVal = Bits(0,64)); val p_store_addr = Reg(resetVal = Bits(0,64)); val p_store_wmask = Reg(resetVal = Bits(0,64)); val p_store_valid = Reg(resetVal = Bool(false)); - val req_load = (r_cpu_req_op === M_XRD); - val req_store = (r_cpu_req_op === M_XWR); - val req_flush = (r_cpu_req_op === M_FLA); + val req_load = (r_cpu_req_cmd === M_XRD); + val req_store = (r_cpu_req_cmd === M_XWR); + val req_flush = (r_cpu_req_cmd === M_FLA); when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_addr <== io.cpu.req_addr; r_cpu_req_data <== io.cpu.req_data; - r_cpu_req_op <== io.cpu.req_op; - r_cpu_req_wmask <== io.cpu.req_wmask; + r_cpu_req_cmd <== io.cpu.req_cmd; + r_cpu_req_type <== io.cpu.req_type; +// r_cpu_req_wmask <== io.cpu.req_wmask; r_cpu_req_tag <== io.cpu.req_tag; } @@ -325,10 +328,51 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val tag_valid = vb_rdata.toBool; val tag_match = tag_valid && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); + // generate write mask and store data signals based on store type and address LSBs + val wmask_b = + Mux(r_cpu_req_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), + Mux(r_cpu_req_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), + Mux(r_cpu_req_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), + Mux(r_cpu_req_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), + Mux(r_cpu_req_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), + Mux(r_cpu_req_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), + Mux(r_cpu_req_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), + Mux(r_cpu_req_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), + UFix(0, 8))))))))); + + val wmask_h = + Mux(r_cpu_req_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), + Mux(r_cpu_req_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), + Mux(r_cpu_req_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), + Mux(r_cpu_req_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), + UFix(0, 8))))); + + val wmask_w = + Mux(r_cpu_req_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), + Mux(r_cpu_req_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), + UFix(0, 8))); + + val wmask_d = + Bits("b1111_1111", 8); + + val store_wmask = + Mux(r_cpu_req_type === MT_B, wmask_b, + Mux(r_cpu_req_type === MT_H, wmask_h, + Mux(r_cpu_req_type === MT_W, wmask_w, + Mux(r_cpu_req_type === MT_D, wmask_d, + UFix(0, 8))))); + + val store_data = + Mux(r_cpu_req_type === MT_B, Fill(8, r_cpu_req_data( 7,0)), + Mux(r_cpu_req_type === MT_H, Fill(4, r_cpu_req_data(15,0)), + Mux(r_cpu_req_type === MT_W, Fill(2, r_cpu_req_data(31,0)), + Mux(r_cpu_req_type === MT_D, r_cpu_req_data, + UFix(0, 64))))); + when ((state === s_ready) && r_cpu_req_val && req_store) { - p_store_data <== r_cpu_req_data; + p_store_data <== store_data; p_store_addr <== r_cpu_req_addr; - p_store_wmask <== r_cpu_req_wmask; + p_store_wmask <== store_wmask; p_store_valid <== Bool(true); } @@ -370,14 +414,14 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { Fill(8, p_store_wmask(1)), Fill(8, p_store_wmask(0))); - val store_wmask = + val da_store_wmask = Mux(p_store_addr(offsetlsb).toBool, Cat(p_wmask_expand, Bits(0,64)), Cat(Bits(0,64), p_wmask_expand)); val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), - store_wmask); + da_store_wmask); val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, @@ -394,7 +438,7 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && req_flush); - io.cpu.resp_tag := r_cpu_req_tag; + io.cpu.resp_tag := Cat(Bits(0,1), r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); io.cpu.resp_data := Mux(r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 6c7e5287..f9039a0a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -5,46 +5,13 @@ import Node._; import Constants._ import Instructions._ -class ioDpath extends Bundle() -{ - val btb_hit = Bool('output); - val inst = Bits(32, 'output); - val rs2 = Bits(64, 'output); - val rs1 = Bits(64, 'output); - val br_eq = Bool('output); - val br_lt = Bool('output); - val br_ltu = Bool('output); - val div_result_val = Bool('output); - val div_rdy = Bool('output); - val mul_result_val = Bool('output); - val wen = Bool('output); - val waddr = UFix(5, 'output); - val alu_out = UFix(64, 'output); - val exception = Bool('output); - val status = Bits(8, 'output); -} - -class ioDpathImem extends Bundle() -{ - val req_addr = UFix(32, 'output); - val resp_data = Bits(32, 'input); -} - -class ioDpathWB extends Bundle() -{ - val waddr = UFix(5, 'input); - val wen = Bool('input); - val wdata = Bits(64, 'input); -} - class ioDpathAll extends Bundle() { - val dpath = new ioDpath(); val host = new ioHost(); - val ctrl = new ioCtrl().flip(); + val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); - val wb = new ioDpathWB(); - val imem = new ioDpathImem(); + val dmem = new ioDmem(List("req_addr", "req_data", "req_tag", "resp_val", "resp_tag", "resp_data")).flip(); + val imem = new ioImem(List("req_addr", "resp_data")).flip(); } class rocketDpath extends Component @@ -77,37 +44,60 @@ class rocketDpath extends Component val if_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); // instruction decode definitions - val id_reg_pc = Reg(){UFix(width = 32)}; - val id_reg_pc_plus4 = Reg(){UFix(width = 32)}; - val id_reg_inst = Reg(width = 32, resetVal = NOP); + val id_reg_pc = Reg(resetVal = UFix(0,32)); + val id_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val id_reg_inst = Reg(resetVal = NOP); // execute definitions - val ex_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); - val ex_reg_pc_plus4 = Reg(width = 32, resetVal = UFix(0, 32)); - val ex_reg_inst = Reg(width = 32, resetVal = Bits(0, 32)); - val ex_reg_raddr2 = Reg(width = 5, resetVal = UFix(0, 5)); - val ex_reg_raddr1 = Reg(width = 5, resetVal = UFix(0, 5)); - val ex_reg_rs2 = Reg(width = 64, resetVal = Bits(0, 64)); - val ex_reg_rs1 = Reg(width = 64, resetVal = Bits(0, 64)); - val ex_reg_waddr = Reg(width = 5, resetVal = UFix(0, 5)); - val ex_reg_ctrl_sel_alu2 = Reg(width = 2, resetVal = A2_X); - val ex_reg_ctrl_sel_alu1 = Reg(width = 1, resetVal = A1_X); - val ex_reg_ctrl_fn_dw = Reg(width = 1, resetVal = DW_X); - val ex_reg_ctrl_fn_alu = Reg(width = 4, resetVal = FN_X); - val ex_reg_ctrl_ll_wb = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_mul_val = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_mul_fn = Reg(width = 3, resetVal = MUL_X); - val ex_reg_ctrl_div_val = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_div_fn = Reg(width = 4, resetVal = DIV_X); - val ex_reg_ctrl_sel_wb = Reg(width = 3, resetVal = WB_X); - val ex_reg_ctrl_wen = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_ren_pcr = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_wen_pcr = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_eret = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_exception = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_ctrl_cause = Reg(width = 5, resetVal = UFix(0,5)); - val ex_wdata = Wire() { Bits() }; - + val ex_reg_pc = Reg(resetVal = UFix(0,32)); + val ex_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val ex_reg_inst = Reg(resetVal = Bits(0,32)); + val ex_reg_raddr2 = Reg(resetVal = UFix(0,5)); + val ex_reg_raddr1 = Reg(resetVal = UFix(0,5)); + val ex_reg_rs2 = Reg(resetVal = Bits(0,64)); + val ex_reg_rs1 = Reg(resetVal = Bits(0,64)); + val ex_reg_waddr = Reg(resetVal = UFix(0,5)); + val ex_reg_ctrl_sel_alu2 = Reg(resetVal = A2_X); + val ex_reg_ctrl_sel_alu1 = Reg(resetVal = A1_X); + val ex_reg_ctrl_fn_dw = Reg(resetVal = DW_X); + val ex_reg_ctrl_fn_alu = Reg(resetVal = FN_X); + val ex_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_mul_fn = Reg(resetVal = MUL_X); + val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_div_fn = Reg(resetVal = DIV_X); + val ex_reg_ctrl_sel_wb = Reg(resetVal = WB_X); + val ex_reg_ctrl_wen = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_exception = Reg(resetVal = Bool(false)); + val ex_reg_ctrl_cause = Reg(resetVal = UFix(0,5)); + val ex_wdata = Wire() { Bits() }; + + val mem_reg_pc = Reg(resetVal = UFix(0,32)); + val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val mem_reg_waddr = Reg(resetVal = UFix(0,5)); + val mem_reg_wdata = Reg(resetVal = Bits(0,64)); + val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); + val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_exception = Reg(resetVal = Bool(false)); + + val wb_reg_pc = Reg(resetVal = UFix(0,32)); + val wb_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val wb_reg_waddr = Reg(resetVal = UFix(0,5)); + val wb_reg_wdata = Reg(resetVal = Bits(0,64)); + val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); + val wb_reg_raddr2 = Reg(resetVal = UFix(0,5)); + val wb_reg_ctrl_div_val = Reg(resetVal = Bool(false)); + val wb_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); + val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); + val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + // instruction fetch stage val if_pc_plus4 = if_reg_pc + UFix(4, 32); @@ -132,7 +122,8 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, Mux(io.ctrl.sel_pc === PC_PCR, ex_pcr(31,0).toUFix, - UFix(0, 32)))))))); + Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, + UFix(0, 32))))))))); when (!io.host.start){ if_reg_pc <== UFix(0, 32); //32'hFFFF_FFFC; @@ -146,7 +137,7 @@ class rocketDpath extends Component if_next_pc); btb.io.current_pc4 := if_pc_plus4; - btb.io.hit ^^ io.dpath.btb_hit; + btb.io.hit ^^ io.ctrl.btb_hit; btb.io.wen ^^ io.ctrl.wen_btb; btb.io.correct_pc4 := ex_reg_pc_plus4; @@ -174,6 +165,7 @@ class rocketDpath extends Component rfile.io.r1.addr := id_raddr1; val id_rdata1 = rfile.io.r1.data; + // destination register selection val id_waddr = Mux(io.ctrl.div_wb, div_result_tag, Mux(io.ctrl.mul_wb, mul_result_tag, @@ -181,16 +173,22 @@ class rocketDpath extends Component Mux(io.ctrl.sel_wa === WA_RA, RA, UFix(0, 5))))); + // bypass muxes val id_rs1 = Mux(io.ctrl.div_wb, div_result, Mux(io.ctrl.mul_wb, mul_result, - Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, - id_rdata1))); + Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, + Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata, + id_rdata1))))); val id_rs2 = - Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, - id_rdata2); + Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, + Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata, + id_rdata2))); + // write value to cause register based on exception type val id_exception = io.ctrl.xcpt_illegal || io.ctrl.xcpt_privileged || io.ctrl.xcpt_fpu || io.ctrl.xcpt_syscall; val id_cause = Mux(io.ctrl.xcpt_illegal, UFix(2,5), @@ -199,9 +197,9 @@ class rocketDpath extends Component Mux(io.ctrl.xcpt_syscall, UFix(6,5), UFix(0,5))))); - io.dpath.inst := id_reg_inst; - io.dpath.rs1 := id_rs1; - io.dpath.rs2 := id_rs2; + io.ctrl.inst := id_reg_inst; +// io.ctrl.rs1 := id_rs1; +// io.ctrl.rs2 := id_rs2; // execute stage ex_reg_pc <== id_reg_pc; @@ -269,8 +267,8 @@ class rocketDpath extends Component div.io.dpath_rs2 := ex_reg_rs2; div.io.div_result_rdy := io.ctrl.div_wb; - io.dpath.div_rdy := div.io.div_rdy; - io.dpath.div_result_val := div.io.div_result_val; + io.ctrl.div_rdy := div.io.div_rdy; + io.ctrl.div_result_val := div.io.div_result_val; // multiplier mul.io.mul_val := ex_reg_ctrl_mul_val; @@ -279,66 +277,145 @@ class rocketDpath extends Component mul.io.in0 := ex_reg_rs1; mul.io.in1 := ex_reg_rs2; - io.dpath.mul_result_val := mul.io.result_val; + io.ctrl.mul_result_val := mul.io.result_val; + + io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection - // processor control register i/o - pcr.io.host.from_wen ^^ io.host.from_wen; - pcr.io.host.from ^^ io.host.from; - pcr.io.host.to ^^ io.host.to; + // D$ request interface (registered inside D$ module) + // other signals (req_val, req_rdy) connect to control module + io.dmem.req_addr := ex_alu_out; + io.dmem.req_data := ex_reg_rs2; + io.dmem.req_tag := ex_reg_waddr; + // processor control regfile read pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_exception | ex_reg_ctrl_eret; pcr.io.r.addr := Mux(ex_reg_ctrl_exception, PCR_EVEC, Mux(ex_reg_ctrl_eret, PCR_EPC, ex_reg_raddr2)); - - pcr.io.w.addr := ex_reg_raddr2; - pcr.io.w.en := ex_reg_ctrl_wen_pcr; - pcr.io.w.data := ex_reg_rs1; - + + pcr.io.host.from_wen ^^ io.host.from_wen; + pcr.io.host.from ^^ io.host.from; + pcr.io.host.to ^^ io.host.to; + pcr.io.eret := ex_reg_ctrl_eret; pcr.io.exception := ex_reg_ctrl_exception; pcr.io.cause := ex_reg_ctrl_cause; pcr.io.pc := ex_reg_pc; - io.dpath.status := pcr.io.status; -// io.debug ^^ pcr.io.debug; - + io.ctrl.status := pcr.io.status; io.debug.error_mode := pcr.io.debug.error_mode; io.debug.log_control := pcr.io.debug.log_control; - + // branch resolution logic - io.dpath.br_eq := (ex_reg_rs1 === ex_reg_rs2); - io.dpath.br_ltu := (ex_reg_rs1.toUFix < ex_reg_rs2.toUFix); - io.dpath.br_lt := - (~(ex_reg_rs1(63) ^ ex_reg_rs2(63)) & io.dpath.br_ltu | + io.ctrl.br_eq := (ex_reg_rs1 === ex_reg_rs2); + io.ctrl.br_ltu := (ex_reg_rs1.toUFix < ex_reg_rs2.toUFix); + io.ctrl.br_lt := + (~(ex_reg_rs1(63) ^ ex_reg_rs2(63)) & io.ctrl.br_ltu | ex_reg_rs1(63) & ~ex_reg_rs2(63)).toBool; - io.dpath.alu_out := ex_alu_out; - // writeback select mux ex_wdata := - Mux(ex_reg_ctrl_ll_wb, ex_reg_rs1, + Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1, Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_reg_pc_plus4, Mux(ex_reg_ctrl_sel_wb === WB_ALU, ex_alu_out, Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Bits(0, 64))))).toBits; - // regfile write - rfile.io.w0.addr := ex_reg_waddr; - rfile.io.w0.en := ex_reg_ctrl_wen | ex_reg_ctrl_ll_wb; - rfile.io.w0.data := ex_wdata; - - rfile.io.w1.addr ^^ io.wb.waddr; - rfile.io.w1.en ^^ io.wb.wen; - rfile.io.w1.data ^^ io.wb.wdata; - - // clear scoreboard for "long latency" writebacks - io.dpath.wen := ex_reg_ctrl_ll_wb; - io.dpath.waddr := ex_reg_waddr; + // memory stage + mem_reg_pc <== ex_reg_pc; + mem_reg_pc_plus4 <== ex_reg_pc_plus4; + mem_reg_waddr <== ex_reg_waddr; + mem_reg_wdata <== ex_wdata; + mem_reg_ctrl_ll_wb <== ex_reg_ctrl_ll_wb; + mem_reg_raddr2 <== ex_reg_raddr2; + + when (io.ctrl.killx) { + mem_reg_ctrl_div_val <== Bool(false); + mem_reg_ctrl_mul_val <== Bool(false); + mem_reg_ctrl_wen <== Bool(false); + mem_reg_ctrl_wen_pcr <== Bool(false); + mem_reg_ctrl_exception <== Bool(false); + } + otherwise { + mem_reg_ctrl_div_val <== ex_reg_ctrl_div_val; + mem_reg_ctrl_mul_val <== ex_reg_ctrl_mul_val; + mem_reg_ctrl_wen <== ex_reg_ctrl_wen; + mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; + mem_reg_ctrl_exception <== ex_reg_ctrl_exception; + } // exception signal to control (for NPC select) - io.dpath.exception := ex_reg_ctrl_exception; + io.ctrl.exception := mem_reg_ctrl_exception; + + // writeback stage + val r_dmem_resp_val = Reg(io.dmem.resp_val); + val r_dmem_resp_waddr = Reg(io.dmem.resp_tag(4,0).toUFix); + val r_dmem_resp_pos = Reg(io.dmem.resp_tag(7,5)); + val r_dmem_resp_type = Reg(io.dmem.resp_tag(9,8)); + val r_dmem_resp_data = Reg(io.dmem.resp_data); + + wb_reg_pc <== mem_reg_pc; + wb_reg_pc_plus4 <== mem_reg_pc_plus4; + wb_reg_waddr <== mem_reg_waddr; + wb_reg_wdata <== mem_reg_wdata; + wb_reg_ctrl_ll_wb <== mem_reg_ctrl_ll_wb; + wb_reg_raddr2 <== mem_reg_raddr2; + + when (io.ctrl.killm) { + wb_reg_ctrl_div_val <== Bool(false); + wb_reg_ctrl_mul_val <== Bool(false); + wb_reg_ctrl_wen <== Bool(false); + wb_reg_ctrl_wen_pcr <== Bool(false); + } + otherwise { + wb_reg_ctrl_div_val <== mem_reg_ctrl_div_val; + wb_reg_ctrl_mul_val <== mem_reg_ctrl_mul_val; + wb_reg_ctrl_wen <== mem_reg_ctrl_wen; + wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; + } + + // crossbar/sign extension for 8/16/32 bit loads + val dmem_resp_data_w = + Mux(r_dmem_resp_pos(2).toBool, r_dmem_resp_data(63, 32), r_dmem_resp_data(31, 0)); + val dmem_resp_data_h = + Mux(r_dmem_resp_pos(1).toBool, dmem_resp_data_w(31, 16), dmem_resp_data_w(15, 0)); + val dmem_resp_data_b = + Mux(r_dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); + + val dmem_resp_data_final = + Mux(r_dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), + Mux(r_dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), + Mux(r_dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), + Mux(r_dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), + Mux(r_dmem_resp_type === MT_W, Cat(Fill(32, dmem_resp_data_w(31)), dmem_resp_data_w), + Mux(r_dmem_resp_type === MT_WU, Cat(UFix(0, 32), dmem_resp_data_w), + Mux(r_dmem_resp_type === MT_D, r_dmem_resp_data, + UFix(0, 64)))))))); + + // regfile write + rfile.io.w0.addr := wb_reg_waddr; + rfile.io.w0.en := wb_reg_ctrl_wen | wb_reg_ctrl_ll_wb; + rfile.io.w0.data := wb_reg_wdata; + + rfile.io.w1.addr := r_dmem_resp_waddr; + rfile.io.w1.en := r_dmem_resp_val; + rfile.io.w1.data := dmem_resp_data_final; + + // scoreboard set (for D$ misses, div, mul) + io.ctrl.sboard_set := wb_reg_ctrl_div_val | wb_reg_ctrl_mul_val | io.ctrl.dcache_miss; + io.ctrl.sboard_seta := wb_reg_waddr; + + // scoreboard clear (for div/mul and D$ load miss writebacks) + io.ctrl.sboard_clr0 := wb_reg_ctrl_ll_wb; + io.ctrl.sboard_clr0a := wb_reg_waddr; + io.ctrl.sboard_clr1 := r_dmem_resp_val; + io.ctrl.sboard_clr1a := r_dmem_resp_waddr; + + // processor control regfile write + pcr.io.w.addr := wb_reg_raddr2; + pcr.io.w.en := wb_reg_ctrl_wen_pcr; + pcr.io.w.data := wb_reg_wdata; } diff --git a/rocket/src/main/scala/memory.scala b/rocket/src/main/scala/memory.scala deleted file mode 100644 index 3d0b5869..00000000 --- a/rocket/src/main/scala/memory.scala +++ /dev/null @@ -1,124 +0,0 @@ -package Top -{ - -import Chisel._ -import Node._; - -import queues._; -import Constants._; - -class ioMemory extends Bundle() -{ - val mem_mrq_val = Bool('input); - val mem_mrq_cmd = Bits(4, 'input); - val mem_mrq_type = Bits(3, 'input); - val mem_xsdq_rdy = Bool('output); - val mem_xsdq_val = Bool('input); - val mem_mrq_deq = Bool('output); - val dpath_rs2 = Bits(64, 'input); - val dpath_waddr = UFix(5, 'input); - val dpath_alu_out = UFix(64, 'input); - val dmem_req_val = Bool('output); - val dmem_req_rdy = Bool('input); - val dmem_req_op = Bits(4, 'output); - val dmem_req_addr = UFix(32, 'output); - val dmem_req_data = Bits(64, 'output); - val dmem_req_wmask = Bits(8, 'output); - val dmem_req_tag = Bits(12, 'output); -} - -class rocketMemory extends Component -{ - override val io = new ioMemory(); - val mrq_enq_xf - = (io.mem_mrq_cmd === M_FRD || io.mem_mrq_cmd === M_FWR); - - val mrq_enq_op - = Mux(io.mem_mrq_cmd === M_FRD, M_XRD, - Mux(io.mem_mrq_cmd === M_FWR, M_XWR, - io.mem_mrq_cmd)); - - val mrq_enq_type = io.mem_mrq_type; - - val mrq = new queueSimplePF(45, 4, 2); - val xsdq = new queueSimplePF(64, 4, 2); - - mrq.io.q_reset := Bool(false); - mrq.io.enq_bits := Cat(mrq_enq_xf,mrq_enq_op,mrq_enq_type,io.dpath_waddr,io.dpath_alu_out(31,0)); - mrq.io.enq_val ^^ io.mem_mrq_val; - // mrq.io.enq_rdy <> (); issue logic takes care of this - - val mrq_deq_xf = Wire(){Bits(width = 1)}; - val mrq_deq_op = Wire(){Bits(width = 4)}; - val mrq_deq_type = Wire(){Bits(width = 3)}; - val mrq_deq_waddr = Wire(){Bits(width = 5)}; - val mrq_deq_addr = Wire(){Bits(width = 32)}; - val mrq_deq_bits = mrq.io.deq_bits; - mrq_deq_bits.Match(Array(mrq_deq_xf, mrq_deq_op, mrq_deq_type, mrq_deq_waddr, mrq_deq_addr)); - val mrq_deq_val = mrq.io.deq_val; - - xsdq.io.q_reset := Bool(false); - xsdq.io.enq_bits ^^ io.dpath_rs2; - xsdq.io.enq_val ^^ io.mem_xsdq_val; - xsdq.io.enq_rdy ^^ io.mem_xsdq_rdy; - - val mrq_deq_flush = mrq_deq_op === M_FLA; - val mrq_deq_load = mrq_deq_op === M_XRD; - val mrq_deq_xstore = mrq_deq_op === M_XWR & ~mrq_deq_xf & xsdq.io.deq_val; - - val mrq_deq_rdy = io.dmem_req_rdy & (mrq_deq_load | mrq_deq_xstore | mrq_deq_flush); - io.mem_mrq_deq := (mrq_deq_val & mrq_deq_rdy).toBool; - mrq.io.deq_rdy := mrq_deq_rdy.toBool; - val xsdq_deq_rdy = io.dmem_req_rdy & mrq_deq_val & mrq_deq_op === M_XWR & ~mrq_deq_xf; - xsdq.io.deq_rdy := xsdq_deq_rdy.toBool; - - val wdata = xsdq.io.deq_bits; - - val wmask_b = - Mux(mrq_deq_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), - Mux(mrq_deq_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), - Mux(mrq_deq_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), - Mux(mrq_deq_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), - Mux(mrq_deq_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), - Mux(mrq_deq_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), - Mux(mrq_deq_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), - Mux(mrq_deq_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), - UFix(0, 8))))))))); - - val wmask_h = - Mux(mrq_deq_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), - Mux(mrq_deq_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), - Mux(mrq_deq_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), - Mux(mrq_deq_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), - UFix(0, 8))))); - - val wmask_w = - Mux(mrq_deq_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), - Mux(mrq_deq_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), - UFix(0, 8))); - - val wmask_d = - Bits("b1111_1111", 8); - - io.dmem_req_val := (mrq_deq_val & (mrq_deq_load | mrq_deq_xstore | mrq_deq_flush)).toBool; - io.dmem_req_op := mrq_deq_op; - io.dmem_req_addr := Cat(mrq_deq_addr(31,3), UFix(0, 3)).toUFix; - - io.dmem_req_data := - Mux(mrq_deq_type === MT_B, Fill(8, wdata( 7,0)), - Mux(mrq_deq_type === MT_H, Fill(4, wdata(15,0)), - Mux(mrq_deq_type === MT_W, Fill(2, wdata(31,0)), - Mux(mrq_deq_type === MT_D, wdata, - UFix(0, 64))))); - - io.dmem_req_wmask := - Mux(mrq_deq_type === MT_B, wmask_b, - Mux(mrq_deq_type === MT_H, wmask_h, - Mux(mrq_deq_type === MT_W, wmask_w, - Mux(mrq_deq_type === MT_D, wmask_d, - UFix(0, 8))))); - - io.dmem_req_tag := Cat(mrq_deq_xf,mrq_deq_type,mrq_deq_addr(2,0),mrq_deq_waddr); -} - -} diff --git a/rocket/src/main/scala/writeback.scala b/rocket/src/main/scala/writeback.scala deleted file mode 100644 index 327358c6..00000000 --- a/rocket/src/main/scala/writeback.scala +++ /dev/null @@ -1,56 +0,0 @@ -package Top -{ - -import Chisel._ -import Node._; -import Constants._; - -class ioWriteback extends Bundle() -{ - val dmem_resp_val = Bool('input); - val dmem_resp_data = UFix(64, 'input); - val dmem_resp_tag = UFix(12, 'input); - val wb_waddr = UFix(5, 'output); - val wb_wen = Bool('output); - val wb_wdata = Bits(64, 'output); -} - -class rocketWriteback extends Component -{ - override val io = new ioWriteback(); - - val r_dmem_resp_val = Reg(io.dmem_resp_val); - val r_dmem_resp_data = Reg(io.dmem_resp_data); - val r_dmem_resp_tag = Reg(io.dmem_resp_tag); - - val dmem_resp_xf = r_dmem_resp_tag(11); - val dmem_resp_type = r_dmem_resp_tag(10, 8); - val dmem_resp_pos = r_dmem_resp_tag(7, 5); - val dmem_resp_waddr = r_dmem_resp_tag(4, 0); - val dmem_resp_xval = r_dmem_resp_val & ~dmem_resp_xf; - val dmem_resp_fval = r_dmem_resp_val & dmem_resp_xf; - - val dmem_resp_data_w = - Mux(dmem_resp_pos(2).toBool, r_dmem_resp_data(63, 32), r_dmem_resp_data(31, 0)); - val dmem_resp_data_h = - Mux(dmem_resp_pos(1).toBool, dmem_resp_data_w(31, 16), dmem_resp_data_w(15, 0)); - val dmem_resp_data_b = - Mux(dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); - - val dmem_resp_data_final = - Mux(dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), - Mux(dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), - Mux(dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), - Mux(dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), - Mux(dmem_resp_type === MT_W, Cat(Fill(32, dmem_resp_data_w(31)), dmem_resp_data_w), - Mux(dmem_resp_type === MT_WU, Cat(UFix(0, 32), dmem_resp_data_w), - Mux(dmem_resp_type === MT_D, r_dmem_resp_data, - UFix(0, 64)))))))); - - io.wb_wen := dmem_resp_xval.toBool; - io.wb_waddr := dmem_resp_waddr; - io.wb_wdata := dmem_resp_data_final; - -} - -} From 2b67eee6835158b1b442d2e218ed8cd0cb6e431b Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 1 Nov 2011 19:05:27 -0700 Subject: [PATCH 0006/1087] pipeline changes for replay on dcache miss --- rocket/src/main/scala/ctrl.scala | 5 ++++- rocket/src/main/scala/dpath.scala | 28 ++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 8d6a7d22..3945c56d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,7 +37,8 @@ class ioCtrlDpath extends Bundle() val xcpt_privileged = Bool('output); val xcpt_fpu = Bool('output); val xcpt_syscall = Bool('output); - val eret = Bool('output); + val eret = Bool('output); + val mem_load = Bool('output); val dcache_miss = Bool('output); val wen = Bool('output); // inputs from datapath @@ -298,9 +299,11 @@ class rocketCtrl extends Component } // replay on a D$ load miss : FIXME - add a miss signal to D$ + val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); val replay_mem = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD) && !io.dmem.resp_val; val dcache_miss = Reg(replay_mem); + io.dpath.mem_load := mem_cmd_load; io.dpath.dcache_miss := dcache_miss; io.dpath.sel_pc := diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f9039a0a..aafeffd8 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -75,6 +75,7 @@ class rocketDpath extends Component val ex_reg_ctrl_cause = Reg(resetVal = UFix(0,5)); val ex_wdata = Wire() { Bits() }; + // memory definitions val mem_reg_pc = Reg(resetVal = UFix(0,32)); val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val mem_reg_waddr = Reg(resetVal = UFix(0,5)); @@ -87,6 +88,7 @@ class rocketDpath extends Component val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val mem_reg_ctrl_exception = Reg(resetVal = Bool(false)); + // writeback definitions val wb_reg_pc = Reg(resetVal = UFix(0,32)); val wb_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val wb_reg_waddr = Reg(resetVal = UFix(0,5)); @@ -98,6 +100,12 @@ class rocketDpath extends Component val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val r_dmem_resp_val = Reg(resetVal = Bool(false)); + val r_dmem_resp_waddr = Reg(resetVal = UFix(0,5)); + val r_dmem_resp_pos = Reg(resetVal = UFix(0,3)); + val r_dmem_resp_type = Reg(resetVal = UFix(0,3)); + val r_dmem_resp_data = Reg(resetVal = Bits(0,64)); + // instruction fetch stage val if_pc_plus4 = if_reg_pc + UFix(4, 32); @@ -122,7 +130,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, Mux(io.ctrl.sel_pc === PC_PCR, ex_pcr(31,0).toUFix, - Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, + Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc_plus4, UFix(0, 32))))))))); when (!io.host.start){ @@ -179,14 +187,18 @@ class rocketDpath extends Component Mux(io.ctrl.mul_wb, mul_result, Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, io.dmem.resp_data, + Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, r_dmem_resp_data, Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata, - id_rdata1))))); + id_rdata1))))))); val id_rs2 = Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, io.dmem.resp_data, + Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, r_dmem_resp_data, Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata, - id_rdata2))); + id_rdata2))))); // write value to cause register based on exception type val id_exception = io.ctrl.xcpt_illegal || io.ctrl.xcpt_privileged || io.ctrl.xcpt_fpu || io.ctrl.xcpt_syscall; @@ -349,11 +361,11 @@ class rocketDpath extends Component io.ctrl.exception := mem_reg_ctrl_exception; // writeback stage - val r_dmem_resp_val = Reg(io.dmem.resp_val); - val r_dmem_resp_waddr = Reg(io.dmem.resp_tag(4,0).toUFix); - val r_dmem_resp_pos = Reg(io.dmem.resp_tag(7,5)); - val r_dmem_resp_type = Reg(io.dmem.resp_tag(9,8)); - val r_dmem_resp_data = Reg(io.dmem.resp_data); + r_dmem_resp_val <== io.dmem.resp_val; + r_dmem_resp_waddr <== io.dmem.resp_tag(4,0).toUFix; + r_dmem_resp_pos <== io.dmem.resp_tag(7,5).toUFix; + r_dmem_resp_type <== io.dmem.resp_tag(10,8).toUFix; + r_dmem_resp_data <== io.dmem.resp_data; wb_reg_pc <== mem_reg_pc; wb_reg_pc_plus4 <== mem_reg_pc_plus4; From 3b3d988fde49ad55a32f74e8e4ec22268298ed67 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 1 Nov 2011 21:25:52 -0700 Subject: [PATCH 0007/1087] dcache loads working - 1/2 cycle load/use delay depending on load type --- rocket/src/main/scala/ctrl.scala | 38 +++++++++--- rocket/src/main/scala/dcache.scala | 3 +- rocket/src/main/scala/dpath.scala | 93 +++++++++++++++++++++--------- 3 files changed, 97 insertions(+), 37 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3945c56d..5650dfa8 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -51,6 +51,7 @@ class ioCtrlDpath extends Bundle() val div_result_val = Bool('input); val mul_result_val = Bool('input); val ex_waddr = UFix(5,'input); // write addr from execute stage + val mem_waddr = UFix(5,'input); // write addr from memory stage val exception = Bool('input); val status = Bits(8, 'input); val sboard_set = Bool('input); @@ -337,23 +338,44 @@ class rocketCtrl extends Component ); // check for loads in execute stage to detect load/use hazards - val lu_stall_raddr1 = - ex_reg_mem_val && - (ex_reg_mem_cmd === M_XRD) && + + val ex_mem_cmd_load = ex_reg_mem_val && (ex_reg_mem_cmd === M_XRD); + + val lu_stall_raddr1_ex = + ex_mem_cmd_load && id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr); - val lu_stall_raddr2 = - ex_reg_mem_val && - (ex_reg_mem_cmd === M_XRD) && + val lu_stall_raddr2_ex = + ex_mem_cmd_load && id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr); + + val mem_mem_cmd_load_bh = + mem_reg_mem_val && + (mem_reg_mem_cmd === M_XRD) && + ((mem_reg_mem_type === MT_B) || + (mem_reg_mem_type === MT_BU) || + (mem_reg_mem_type === MT_H) || + (mem_reg_mem_type === MT_HU)); + + val lu_stall_raddr1_mem = + mem_mem_cmd_load_bh && + id_ren1.toBool && + (id_raddr1 === io.dpath.mem_waddr); + + val lu_stall_raddr2_mem = + mem_mem_cmd_load_bh && + id_ren2.toBool && + (id_raddr2 === io.dpath.mem_waddr); val ctrl_stalld = ~take_pc & ( - lu_stall_raddr1 | - lu_stall_raddr2 | + lu_stall_raddr1_ex | + lu_stall_raddr2_ex | + lu_stall_raddr1_mem | + lu_stall_raddr2_mem | id_ren2 & id_stall_raddr2 | id_ren1 & id_stall_raddr1 | (id_sel_wa === WA_RD) && id_stall_waddr | diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 0bddf7d4..35a11534 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -306,7 +306,8 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // tag array - val tag_we = (state === s_resolve_miss); +// val tag_we = (state === s_resolve_miss); + val tag_we = (state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2)); val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index aafeffd8..5e5bff7a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -103,7 +103,7 @@ class rocketDpath extends Component val r_dmem_resp_val = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg(resetVal = UFix(0,5)); val r_dmem_resp_pos = Reg(resetVal = UFix(0,3)); - val r_dmem_resp_type = Reg(resetVal = UFix(0,3)); + val r_dmem_resp_type = Reg(resetVal = Bits(0,3)); val r_dmem_resp_data = Reg(resetVal = Bits(0,64)); // instruction fetch stage @@ -181,22 +181,52 @@ class rocketDpath extends Component Mux(io.ctrl.sel_wa === WA_RA, RA, UFix(0, 5))))); + // moved this here to avoid having to do forward declaration + // TODO: cleanup + + val dmem_resp_pos = io.dmem.resp_tag(7,5).toUFix; + val dmem_resp_type = io.dmem.resp_tag(10,8); + + val mem_dmem_resp_data_w = + Mux(dmem_resp_pos(2).toBool, io.dmem.resp_data(63, 32), io.dmem.resp_data(31, 0)); + + val mem_dmem_resp_data = + Mux(dmem_resp_type === MT_D, io.dmem.resp_data, + Mux(dmem_resp_type === MT_W, Cat(Fill(32, mem_dmem_resp_data_w(31)), mem_dmem_resp_data_w), + Cat(UFix(0,32), mem_dmem_resp_data_w))); + + // crossbar/sign extension for 8/16 bit loads (in writeback stage) + val dmem_resp_data_h = + Mux(r_dmem_resp_pos(1).toBool, r_dmem_resp_data(31, 16), r_dmem_resp_data(15, 0)); + val dmem_resp_data_b = + Mux(r_dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); + + val dmem_resp_data_final = + Mux(r_dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), + Mux(r_dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), + Mux(r_dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), + Mux(r_dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), + Mux((r_dmem_resp_type === MT_W) || + (r_dmem_resp_type === MT_WU) || + (r_dmem_resp_type === MT_D), r_dmem_resp_data, + UFix(0,64)))))); + // bypass muxes val id_rs1 = Mux(io.ctrl.div_wb, div_result, Mux(io.ctrl.mul_wb, mul_result, Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, io.dmem.resp_data, - Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, r_dmem_resp_data, + Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, mem_dmem_resp_data, + Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, dmem_resp_data_final, Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata, id_rdata1))))))); val id_rs2 = Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, io.dmem.resp_data, - Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, r_dmem_resp_data, + Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, mem_dmem_resp_data, + Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, dmem_resp_data_final, Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata, id_rdata2))))); @@ -210,8 +240,6 @@ class rocketDpath extends Component UFix(0,5))))); io.ctrl.inst := id_reg_inst; -// io.ctrl.rs1 := id_rs1; -// io.ctrl.rs2 := id_rs2; // execute stage ex_reg_pc <== id_reg_pc; @@ -359,13 +387,24 @@ class rocketDpath extends Component // exception signal to control (for NPC select) io.ctrl.exception := mem_reg_ctrl_exception; - + // for load/use hazard detection (load byte/halfword) + io.ctrl.mem_waddr := mem_reg_waddr; + + // moved to earlier in file +// val mem_dmem_resp_data_w = +// Mux(io.dmem.resp_pos(2).toBool, io.dmem.resp_data(63, 32), io.dmem.resp_data(31, 0)); +// +// val mem_dmem_resp_data = +// Mux(io.dmem.resp_type === MT_D, io.dmem.resp_data, +// Mux(io.dmem.resp_type === MT_W, Cat(Fill(32, mem_dmem_resp_data_w(31)), mem_dmem_resp_data_w)), +// Cat(UFix(0,32), mem_dmem_resp_data_w)); + // writeback stage r_dmem_resp_val <== io.dmem.resp_val; r_dmem_resp_waddr <== io.dmem.resp_tag(4,0).toUFix; - r_dmem_resp_pos <== io.dmem.resp_tag(7,5).toUFix; - r_dmem_resp_type <== io.dmem.resp_tag(10,8).toUFix; - r_dmem_resp_data <== io.dmem.resp_data; + r_dmem_resp_pos <== dmem_resp_pos; + r_dmem_resp_type <== dmem_resp_type; + r_dmem_resp_data <== mem_dmem_resp_data; wb_reg_pc <== mem_reg_pc; wb_reg_pc_plus4 <== mem_reg_pc_plus4; @@ -387,23 +426,21 @@ class rocketDpath extends Component wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } - // crossbar/sign extension for 8/16/32 bit loads - val dmem_resp_data_w = - Mux(r_dmem_resp_pos(2).toBool, r_dmem_resp_data(63, 32), r_dmem_resp_data(31, 0)); - val dmem_resp_data_h = - Mux(r_dmem_resp_pos(1).toBool, dmem_resp_data_w(31, 16), dmem_resp_data_w(15, 0)); - val dmem_resp_data_b = - Mux(r_dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); - - val dmem_resp_data_final = - Mux(r_dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), - Mux(r_dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), - Mux(r_dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), - Mux(r_dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), - Mux(r_dmem_resp_type === MT_W, Cat(Fill(32, dmem_resp_data_w(31)), dmem_resp_data_w), - Mux(r_dmem_resp_type === MT_WU, Cat(UFix(0, 32), dmem_resp_data_w), - Mux(r_dmem_resp_type === MT_D, r_dmem_resp_data, - UFix(0, 64)))))))); + // crossbar/sign extension for 8/16 bit loads +// val dmem_resp_data_h = +// Mux(r_dmem_resp_pos(1).toBool, r_dmem_resp_data(31, 16), r_dmem_resp_data(15, 0)); +// val dmem_resp_data_b = +// Mux(r_dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); +// +// val dmem_resp_data_final = +// Mux(r_dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), +// Mux(r_dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), +// Mux(r_dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), +// Mux(r_dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), +// Mux((r_dmem_resp_type === MT_W) || +// (r_dmem_resp_type === MT_WU) || +// (r_dmem_resp_type === MT_D), r_dmem_resp_data, +// UFix(0,64)))))); // regfile write rfile.io.w0.addr := wb_reg_waddr; From 7479e085ecb75b4ae6c2a32ec73940d7d692e906 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 1 Nov 2011 22:04:45 -0700 Subject: [PATCH 0008/1087] dcache loads working - 1/2 cycle load/use delay depending on load type --- rocket/src/main/scala/consts.scala | 17 +++++++++-------- rocket/src/main/scala/ctrl.scala | 13 ++++++++----- rocket/src/main/scala/dpath.scala | 4 +++- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 8b8c8740..674fb33b 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -14,14 +14,15 @@ object Constants val BR_J = UFix(7, 4); val BR_JR = UFix(8, 4); - val PC_4 = UFix(0, 3); - val PC_BTB = UFix(1, 3); - val PC_EX4 = UFix(2, 3); - val PC_BR = UFix(3, 3); - val PC_J = UFix(4, 3); - val PC_JR = UFix(5, 3); - val PC_PCR = UFix(6, 3); - val PC_MEM = UFix(7, 3); + val PC_4 = UFix(0, 4); + val PC_BTB = UFix(1, 4); + val PC_EX4 = UFix(2, 4); + val PC_BR = UFix(3, 4); + val PC_J = UFix(4, 4); + val PC_JR = UFix(5, 4); + val PC_PCR = UFix(6, 4); + val PC_MEM = UFix(7, 4); + val PC_MEM4 = UFix(8, 4); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 5650dfa8..cf3fff30 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -9,7 +9,7 @@ import Instructions._ class ioCtrlDpath extends Bundle() { // outputs to datapath - val sel_pc = UFix(3, 'output); + val sel_pc = UFix(4, 'output); val wen_btb = Bool('output); val stallf = Bool('output); val stalld = Bool('output); @@ -299,16 +299,19 @@ class rocketCtrl extends Component mem_reg_mem_type <== ex_reg_mem_type; } - // replay on a D$ load miss : FIXME - add a miss signal to D$ + // replay PC when the D$ is blocked + val replay_mem_pc = mem_reg_mem_val && !io.dmem.req_rdy; + // replay PC+4 on a D$ load miss val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); - val replay_mem = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD) && !io.dmem.resp_val; - val dcache_miss = Reg(replay_mem); + val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; + val dcache_miss = Reg(replay_mem_pc_plus4); io.dpath.mem_load := mem_cmd_load; io.dpath.dcache_miss := dcache_miss; io.dpath.sel_pc := - Mux(replay_mem, PC_MEM, + Mux(replay_mem_pc, PC_MEM, + Mux(replay_mem_pc_plus4, PC_MEM4, Mux(io.dpath.exception || ex_reg_eret, PC_PCR, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken || ex_reg_privileged, PC_EX4, diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 5e5bff7a..1f6a287e 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -130,7 +130,8 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, Mux(io.ctrl.sel_pc === PC_PCR, ex_pcr(31,0).toUFix, - Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc_plus4, + Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, + Mux(io.ctrl.sel_pc === PC_MEM4, mem_reg_pc_plus4, UFix(0, 32))))))))); when (!io.host.start){ @@ -184,6 +185,7 @@ class rocketDpath extends Component // moved this here to avoid having to do forward declaration // TODO: cleanup + // 64/32 bit load handling (in mem stage) val dmem_resp_pos = io.dmem.resp_tag(7,5).toUFix; val dmem_resp_type = io.dmem.resp_tag(10,8); From d8ffecf5653d4261a00caab33828081e984aa2ba Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 1 Nov 2011 22:10:06 -0700 Subject: [PATCH 0009/1087] dcache fix --- rocket/src/main/scala/ctrl.scala | 6 ++++-- rocket/src/main/scala/dpath.scala | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cf3fff30..ed70dc74 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -305,6 +305,7 @@ class rocketCtrl extends Component val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; val dcache_miss = Reg(replay_mem_pc_plus4); + val replay_mem = replay_mem_pc | replay_mem_pc_plus4; io.dpath.mem_load := mem_cmd_load; io.dpath.dcache_miss := dcache_miss; @@ -318,7 +319,7 @@ class rocketCtrl extends Component Mux(jr_taken, PC_JR, Mux(j_taken, PC_J, Mux(io.dpath.btb_hit, PC_BTB, - PC_4))))))); + PC_4)))))))); io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken; @@ -330,7 +331,8 @@ class rocketCtrl extends Component io.dpath.exception | ex_reg_privileged | ex_reg_eret | - replay_mem; + replay_mem_pc | + replay_mem_pc_plus4; io.dpath.stallf := ~take_pc & diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 1f6a287e..af4240e6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -132,7 +132,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_PCR, ex_pcr(31,0).toUFix, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, Mux(io.ctrl.sel_pc === PC_MEM4, mem_reg_pc_plus4, - UFix(0, 32))))))))); + UFix(0, 32)))))))))); when (!io.host.start){ if_reg_pc <== UFix(0, 32); //32'hFFFF_FFFC; From 7a528d6255e3a8f2cd6c443de4fb12ac0387b0d8 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 1 Nov 2011 23:14:34 -0700 Subject: [PATCH 0010/1087] fixes for div/mul hazard checking + cleanup --- rocket/src/main/scala/cpu.scala | 48 +---- rocket/src/main/scala/ctrl.scala | 126 +++++++---- rocket/src/main/scala/dcache.scala | 322 ++++++++++++++--------------- rocket/src/main/scala/dpath.scala | 16 +- 4 files changed, 249 insertions(+), 263 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2e3374f7..965dfd0e 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -40,63 +40,23 @@ class rocketProc extends Component val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); -// val mem = new rocketMemory(); -// val wb = new rocketWriteback(); ctrl.io.dpath <> dpath.io.ctrl; + ctrl.io.dmem ^^ io.dmem; ctrl.io.host.start ^^ io.host.start; ctrl.io.imem ^^ io.imem; + + dpath.io.dmem ^^ io.dmem; dpath.io.imem.req_addr ^^ io.imem.req_addr; dpath.io.imem.resp_data ^^ io.imem.resp_data; dpath.io.host ^^ io.host; dpath.io.debug ^^ io.debug; - ctrl.io.dmem ^^ io.dmem; - dpath.io.dmem ^^ io.dmem; - - // FIXME + // FIXME: console disconnected // io.console.bits := dpath.io.dpath.rs1(7,0); io.console.bits := Bits(0,8); io.console.valid := ctrl.io.console.valid; ctrl.io.console.rdy := io.console.rdy; - - // dpath.io.wb <> wb.io; -// dpath.io.wb.wen <> wb.io.wb_wen; -// dpath.io.wb.waddr <> wb.io.wb_waddr; -// dpath.io.wb.wdata <> wb.io.wb_wdata; - - -// ctrl.io.mem.mrq_val <> mem.io.mem_mrq_val; -// ctrl.io.mem.mrq_cmd <> mem.io.mem_mrq_cmd; -// ctrl.io.mem.mrq_type <> mem.io.mem_mrq_type; -// ctrl.io.mem.mrq_deq <> mem.io.mem_mrq_deq; -// ctrl.io.mem.xsdq_rdy <> mem.io.mem_xsdq_rdy; -// ctrl.io.mem.xsdq_val <> mem.io.mem_xsdq_val; -// ctrl.io.mem.dc_busy := !io.dmem.req_rdy; - -// ctrl.io.console ^^ io.console; -// ctrl.io.wb.waddr <> wb.io.wb_waddr; -// ctrl.io.wb.wen <> wb.io.wb_wen; - - // TODO: SHOULD BE THE FOLLOWING BUT NEED BETTER INTERFACE CHUNKS - // mem.io.dmem >< io.dmem; - -// mem.io.dmem_req_val ^^ io.dmem.req_val; -// mem.io.dmem_req_rdy ^^ io.dmem.req_rdy; -// mem.io.dmem_req_op ^^ io.dmem.req_op; -// mem.io.dmem_req_addr ^^ io.dmem.req_addr; -// mem.io.dmem_req_data ^^ io.dmem.req_data; -// mem.io.dmem_req_wmask ^^ io.dmem.req_wmask; -// mem.io.dmem_req_tag ^^ io.dmem.req_tag; - -// -// mem.io.dpath_rs2 <> dpath.io.dpath.rs2; -// mem.io.dpath_waddr <> dpath.io.dpath.waddr; -// mem.io.dpath_alu_out <> dpath.io.dpath.alu_out; - -// wb.io.dmem_resp_val ^^ io.dmem.resp_val; -// wb.io.dmem_resp_data ^^ io.dmem.resp_data; -// wb.io.dmem_resp_tag ^^ io.dmem.resp_tag; } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ed70dc74..4b1326b5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -41,6 +41,7 @@ class ioCtrlDpath extends Bundle() val mem_load = Bool('output); val dcache_miss = Bool('output); val wen = Bool('output); + val wb_div_mul = Bool('output); // inputs from datapath val btb_hit = Bool('input); val inst = Bits(32, 'input); @@ -52,10 +53,11 @@ class ioCtrlDpath extends Bundle() val mul_result_val = Bool('input); val ex_waddr = UFix(5,'input); // write addr from execute stage val mem_waddr = UFix(5,'input); // write addr from memory stage + val wb_waddr = UFix(5,'input); // write addr from writeback stage val exception = Bool('input); val status = Bits(8, 'input); - val sboard_set = Bool('input); - val sboard_seta = UFix(5, 'input); +// val sboard_set = Bool('input); +// val sboard_seta = UFix(5, 'input); val sboard_clr0 = Bool('input); val sboard_clr0a = UFix(5, 'input); val sboard_clr1 = Bool('input); @@ -205,13 +207,21 @@ class rocketCtrl extends Component val console_out_fire = id_console_out_val & ~io.dpath.killd; io.console.valid := console_out_fire.toBool; + val wb_reg_div_mul_val = Reg(){Bool()}; + val dcache_miss = Reg(){Bool()}; + val sboard = new rocketCtrlSboard(); sboard.io.raddra := id_raddr2.toUFix; sboard.io.raddrb := id_raddr1.toUFix; sboard.io.raddrc := id_waddr.toUFix; - sboard.io.set := io.dpath.sboard_set; - sboard.io.seta := io.dpath.sboard_seta; +// sboard.io.set := io.dpath.sboard_set; +// sboard.io.seta := io.dpath.sboard_seta; + + // scoreboard set (for D$ misses, div, mul) + sboard.io.set := wb_reg_div_mul_val | dcache_miss; + sboard.io.seta := io.dpath.wb_waddr; + sboard.io.clr0 := io.dpath.sboard_clr0; sboard.io.clr0a := io.dpath.sboard_clr0a; sboard.io.clr1 := io.dpath.sboard_clr1; @@ -222,14 +232,15 @@ class rocketCtrl extends Component val id_stall_waddr = sboard.io.stallc; val id_stall_ra = sboard.io.stallra; - val id_reg_btb_hit = Reg(width = 1, resetVal = Bool(false)); - val ex_reg_br_type = Reg(){UFix(width = 4)}; - val ex_reg_btb_hit = Reg(){Bool()}; - val ex_reg_mem_val = Reg(){Bool()}; - val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; - val ex_reg_mem_type = Reg(){UFix(width = 3)}; - val ex_reg_eret = Reg(resetVal = Bool(false)); - val ex_reg_privileged = Reg(resetVal = Bool(false)); + val id_reg_btb_hit = Reg(width = 1, resetVal = Bool(false)); + val ex_reg_br_type = Reg(){UFix(width = 4)}; + val ex_reg_btb_hit = Reg(){Bool()}; + val ex_reg_div_mul_val = Reg(){Bool()}; + val ex_reg_mem_val = Reg(){Bool()}; + val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; + val ex_reg_mem_type = Reg(){UFix(width = 3)}; + val ex_reg_eret = Reg(resetVal = Bool(false)); + val ex_reg_privileged = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -241,22 +252,24 @@ class rocketCtrl extends Component } when (reset.toBool || io.dpath.killd) { - ex_reg_br_type <== BR_N; - ex_reg_btb_hit <== Bool(false); - ex_reg_mem_val <== Bool(false); - ex_reg_mem_cmd <== UFix(0, 4); - ex_reg_mem_type <== UFix(0, 3); - ex_reg_eret <== Bool(false); - ex_reg_privileged <== Bool(false); + ex_reg_br_type <== BR_N; + ex_reg_btb_hit <== Bool(false); + ex_reg_div_mul_val <== Bool(false); + ex_reg_mem_val <== Bool(false); + ex_reg_mem_cmd <== UFix(0, 4); + ex_reg_mem_type <== UFix(0, 3); + ex_reg_eret <== Bool(false); + ex_reg_privileged <== Bool(false); } otherwise { - ex_reg_br_type <== id_br_type; - ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_mem_val <== id_mem_val.toBool; - ex_reg_mem_cmd <== id_mem_cmd; - ex_reg_mem_type <== id_mem_type; - ex_reg_eret <== id_eret.toBool; - ex_reg_privileged <== id_privileged.toBool; + ex_reg_br_type <== id_br_type; + ex_reg_btb_hit <== id_reg_btb_hit; + ex_reg_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; + ex_reg_mem_val <== id_mem_val.toBool; + ex_reg_mem_cmd <== id_mem_cmd; + ex_reg_mem_type <== id_mem_type; + ex_reg_eret <== id_eret.toBool; + ex_reg_privileged <== id_privileged.toBool; } val beq = io.dpath.br_eq; @@ -283,31 +296,42 @@ class rocketCtrl extends Component io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; - - val mem_reg_mem_val = Reg(){Bool()}; - val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; - val mem_reg_mem_type = Reg(){UFix(width = 3)}; + + val mem_reg_div_mul_val = Reg(){Bool()}; + val mem_reg_mem_val = Reg(){Bool()}; + val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; + val mem_reg_mem_type = Reg(){UFix(width = 3)}; when (reset.toBool || io.dpath.killx) { - mem_reg_mem_val <== Bool(false); - mem_reg_mem_cmd <== UFix(0, 4); - mem_reg_mem_type <== UFix(0, 3); + mem_reg_div_mul_val <== Bool(false); + mem_reg_mem_val <== Bool(false); + mem_reg_mem_cmd <== UFix(0, 4); + mem_reg_mem_type <== UFix(0, 3); } otherwise { - mem_reg_mem_val <== ex_reg_mem_val; - mem_reg_mem_cmd <== ex_reg_mem_cmd; - mem_reg_mem_type <== ex_reg_mem_type; + mem_reg_div_mul_val <== ex_reg_div_mul_val; + mem_reg_mem_val <== ex_reg_mem_val; + mem_reg_mem_cmd <== ex_reg_mem_cmd; + mem_reg_mem_type <== ex_reg_mem_type; } - + + when (reset.toBool || io.dpath.killm) { + wb_reg_div_mul_val <== Bool(false); + } + otherwise { + wb_reg_div_mul_val <== mem_reg_div_mul_val; + } + // replay PC when the D$ is blocked val replay_mem_pc = mem_reg_mem_val && !io.dmem.req_rdy; // replay PC+4 on a D$ load miss val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; - val dcache_miss = Reg(replay_mem_pc_plus4); val replay_mem = replay_mem_pc | replay_mem_pc_plus4; - io.dpath.mem_load := mem_cmd_load; + dcache_miss <== replay_mem_pc_plus4; + + io.dpath.mem_load := mem_cmd_load; io.dpath.dcache_miss := dcache_miss; io.dpath.sel_pc := @@ -342,8 +366,7 @@ class rocketCtrl extends Component io.dpath.stalld ); - // check for loads in execute stage to detect load/use hazards - + // check for loads in execute and mem stages to detect load/use hazards val ex_mem_cmd_load = ex_reg_mem_val && (ex_reg_mem_cmd === M_XRD); val lu_stall_raddr1_ex = @@ -374,9 +397,28 @@ class rocketCtrl extends Component id_ren2.toBool && (id_raddr2 === io.dpath.mem_waddr); + // check for divide and multiply instructions in ex,mem,wb stages + val dm_stall_ex = + ex_reg_div_mul_val && + ((id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr))); + + val dm_stall_mem = + mem_reg_div_mul_val && + ((id_ren1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.mem_waddr))); + + val dm_stall_wb = + wb_reg_div_mul_val && + ((id_ren1.toBool && (id_raddr1 === io.dpath.wb_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.wb_waddr))); + + val dm_stall = dm_stall_ex || dm_stall_mem || dm_stall_wb; + val ctrl_stalld = ~take_pc & ( + dm_stall | lu_stall_raddr1_ex | lu_stall_raddr2_ex | lu_stall_raddr1_mem | @@ -396,7 +438,7 @@ class rocketCtrl extends Component io.dpath.mul_result_val ); - val ctrl_killd = take_pc | ctrl_stalld; + val ctrl_killd = take_pc | ctrl_stalld | io.dpath.killx; // for divider, multiplier writeback val mul_wb = io.dpath.mul_result_val; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 35a11534..8233de86 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -91,165 +91,6 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { } -// basic direct mapped data cache, 2 cycle read latency -// parameters : -// lines = # of cache lines -// addr_bits = address width (word addressable) bits -// 64 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines -/* -class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { - val io = new ioDCacheDM(); - - val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; - val tagmsb = addrbits - 1; - val taglsb = indexbits+offsetbits; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val offsetlsb = 3; - - val s_reset :: s_ready :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(7) { UFix() }; - val state = Reg(resetVal = s_reset); - - val r_cpu_req_addr = Reg(Bits(0, addrbits)); - val r_r_cpu_req_addr = Reg(r_cpu_req_addr); - val r_cpu_req_val = Reg(Bool(false)); - val r_cpu_req_data = Reg(Bits(0,64)); - val r_cpu_req_cmd = Reg(Bits(0,4)); - val r_cpu_req_wmask = Reg(Bits(0,8)); - val r_cpu_req_tag = Reg(Bits(0,12)); - val r_cpu_resp_tag = Reg(r_cpu_req_tag); - val r_cpu_resp_val = Reg(Bool(false)); - - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_addr <== io.cpu.req_addr; - r_cpu_req_data <== io.cpu.req_data; - r_cpu_req_cmd <== io.cpu.req_cmd; - r_cpu_req_wmask <== io.cpu.req_wmask; - r_cpu_req_tag <== io.cpu.req_tag; } - - val req_load = (r_cpu_req_cmd === M_XRD); - val req_store = (r_cpu_req_cmd === M_XWR); - val req_flush = (r_cpu_req_cmd === M_FLA); - - when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } - otherwise { r_cpu_req_val <== Bool(false); } - - // counter - val rr_count = Reg(resetVal = UFix(0,2)); - val rr_count_next = rr_count + UFix(1); - when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) - { rr_count <== rr_count_next; } - - // tag array - val tag_we = (state === s_resolve_miss); - val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; - val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); - val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); - val tag_raddr = Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, r_cpu_req_addr(indexmsb, indexlsb).toUFix); - val tag_rdata = Reg(tag_array.read(tag_raddr)); - - // valid bit array - val vb_array = Reg(resetVal = Bits(0, lines)); - val vb_rdata = Reg(vb_array(tag_raddr)); - when (tag_we && !req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } - when (tag_we && req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } - - val tag_valid = vb_rdata.toBool; - val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); - val store = ((state === s_ready) && r_cpu_req_val && req_store && tag_match ) || - ((state === s_resolve_miss) && req_store); - - // dirty bit array - val db_array = Reg(resetVal = Bits(0, lines)); - val db_rdata = Reg(db_array(tag_raddr)); - val tag_dirty = db_rdata.toBool; - when (store) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } - when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } - - // data array - val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; - val data_array_waddr = Mux((state === s_refill), - Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - r_cpu_req_addr(indexmsb, offsetmsb-1).toUFix); - - val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, Cat(r_cpu_req_data, r_cpu_req_data)); - - val req_wmask_expand = Cat(Fill(8, r_cpu_req_wmask(7)), - Fill(8, r_cpu_req_wmask(6)), - Fill(8, r_cpu_req_wmask(5)), - Fill(8, r_cpu_req_wmask(4)), - Fill(8, r_cpu_req_wmask(3)), - Fill(8, r_cpu_req_wmask(2)), - Fill(8, r_cpu_req_wmask(1)), - Fill(8, r_cpu_req_wmask(0))); - - val store_wmask = Mux(r_cpu_req_addr(offsetlsb).toBool, - Cat(req_wmask_expand, Bits(0,64)), - Cat(Bits(0,64), req_wmask_expand)); - - val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), store_wmask); - val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); - val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, - Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - r_cpu_req_addr(indexmsb, offsetmsb-1))); - val data_array_rdata = Reg(data_array.read(data_array_raddr)); - - // output signals - io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match); - - when ((((state === s_ready) && r_cpu_req_val && tag_match) || (state === s_resolve_miss)) && !req_store) - { r_cpu_resp_val <== Bool(true); } - otherwise { r_cpu_resp_val <== Bool(false); } - - io.cpu.resp_val := r_cpu_resp_val; - io.cpu.resp_data := Mux(r_r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); - io.cpu.resp_tag := r_cpu_resp_tag; - - io.mem.req_val := (state === s_req_refill) || (state === s_writeback); - io.mem.req_rw := (state === s_writeback); - io.mem.req_wdata := data_array_rdata; - io.mem.req_tag := UFix(0); - io.mem.req_addr := Mux(state === s_writeback, - Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); - - // control state machine - switch (state) { - is (s_reset) { - state <== s_ready; - } - is (s_ready) { - when (~r_cpu_req_val) { state <== s_ready; } - when (r_cpu_req_val & tag_match) { state <== s_ready; } - when (tag_valid & tag_dirty) { state <== s_start_writeback; } - when (req_flush) { state <== s_resolve_miss; } - otherwise { state <== s_req_refill; } - } - is (s_start_writeback) { - state <== s_writeback; - } - is (s_writeback) { - when (io.mem.req_rdy && (rr_count === UFix(3,2))) { - when (req_flush) { state <== s_resolve_miss; } - otherwise { state <== s_req_refill; } - } - } - is (s_req_refill) - { - when (io.mem.req_rdy) { state <== s_refill; } - } - is (s_refill) { - when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } - } - is (s_resolve_miss) { - state <== s_ready; - } - } -} -*/ - class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val io = new ioDCacheDM(); @@ -270,7 +111,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val r_cpu_req_data = Reg(resetVal = Bits(0,64)); val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_type = Reg(resetVal = Bits(0,3)); -// val r_cpu_req_wmask = Reg(resetVal = Bits(0,8)); val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); val p_store_data = Reg(resetVal = Bits(0,64)); @@ -287,7 +127,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { r_cpu_req_data <== io.cpu.req_data; r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_type <== io.cpu.req_type; -// r_cpu_req_wmask <== io.cpu.req_wmask; r_cpu_req_tag <== io.cpu.req_tag; } @@ -306,7 +145,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // tag array -// val tag_we = (state === s_resolve_miss); val tag_we = (state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2)); val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); @@ -500,4 +338,164 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } } +// basic direct mapped data cache, 2 cycle read latency +// parameters : +// lines = # of cache lines +// addr_bits = address width (word addressable) bits +// 64 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines +/* +class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { + val io = new ioDCacheDM(); + + val indexbits = ceil(log10(lines)/log10(2)).toInt; + val offsetbits = 6; + val tagmsb = addrbits - 1; + val taglsb = indexbits+offsetbits; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = 3; + + val s_reset :: s_ready :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(7) { UFix() }; + val state = Reg(resetVal = s_reset); + + val r_cpu_req_addr = Reg(Bits(0, addrbits)); + val r_r_cpu_req_addr = Reg(r_cpu_req_addr); + val r_cpu_req_val = Reg(Bool(false)); + val r_cpu_req_data = Reg(Bits(0,64)); + val r_cpu_req_cmd = Reg(Bits(0,4)); + val r_cpu_req_wmask = Reg(Bits(0,8)); + val r_cpu_req_tag = Reg(Bits(0,12)); + val r_cpu_resp_tag = Reg(r_cpu_req_tag); + val r_cpu_resp_val = Reg(Bool(false)); + + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_addr <== io.cpu.req_addr; + r_cpu_req_data <== io.cpu.req_data; + r_cpu_req_cmd <== io.cpu.req_cmd; + r_cpu_req_wmask <== io.cpu.req_wmask; + r_cpu_req_tag <== io.cpu.req_tag; } + + val req_load = (r_cpu_req_cmd === M_XRD); + val req_store = (r_cpu_req_cmd === M_XWR); + val req_flush = (r_cpu_req_cmd === M_FLA); + + when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } + otherwise { r_cpu_req_val <== Bool(false); } + + // counter + val rr_count = Reg(resetVal = UFix(0,2)); + val rr_count_next = rr_count + UFix(1); + when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) + { rr_count <== rr_count_next; } + + // tag array + val tag_we = (state === s_resolve_miss); + val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; + val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); + val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); + val tag_raddr = Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, r_cpu_req_addr(indexmsb, indexlsb).toUFix); + val tag_rdata = Reg(tag_array.read(tag_raddr)); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, lines)); + val vb_rdata = Reg(vb_array(tag_raddr)); + when (tag_we && !req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + when (tag_we && req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } + + val tag_valid = vb_rdata.toBool; + val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); + val store = ((state === s_ready) && r_cpu_req_val && req_store && tag_match ) || + ((state === s_resolve_miss) && req_store); + + // dirty bit array + val db_array = Reg(resetVal = Bits(0, lines)); + val db_rdata = Reg(db_array(tag_raddr)); + val tag_dirty = db_rdata.toBool; + when (store) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } + + // data array + val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; + val data_array_waddr = Mux((state === s_refill), + Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + r_cpu_req_addr(indexmsb, offsetmsb-1).toUFix); + + val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, Cat(r_cpu_req_data, r_cpu_req_data)); + + val req_wmask_expand = Cat(Fill(8, r_cpu_req_wmask(7)), + Fill(8, r_cpu_req_wmask(6)), + Fill(8, r_cpu_req_wmask(5)), + Fill(8, r_cpu_req_wmask(4)), + Fill(8, r_cpu_req_wmask(3)), + Fill(8, r_cpu_req_wmask(2)), + Fill(8, r_cpu_req_wmask(1)), + Fill(8, r_cpu_req_wmask(0))); + + val store_wmask = Mux(r_cpu_req_addr(offsetlsb).toBool, + Cat(req_wmask_expand, Bits(0,64)), + Cat(Bits(0,64), req_wmask_expand)); + + val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), store_wmask); + val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); + val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, + Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + r_cpu_req_addr(indexmsb, offsetmsb-1))); + val data_array_rdata = Reg(data_array.read(data_array_raddr)); + + // output signals + io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match); + + when ((((state === s_ready) && r_cpu_req_val && tag_match) || (state === s_resolve_miss)) && !req_store) + { r_cpu_resp_val <== Bool(true); } + otherwise { r_cpu_resp_val <== Bool(false); } + + io.cpu.resp_val := r_cpu_resp_val; + io.cpu.resp_data := Mux(r_r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); + io.cpu.resp_tag := r_cpu_resp_tag; + + io.mem.req_val := (state === s_req_refill) || (state === s_writeback); + io.mem.req_rw := (state === s_writeback); + io.mem.req_wdata := data_array_rdata; + io.mem.req_tag := UFix(0); + io.mem.req_addr := Mux(state === s_writeback, + Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); + + // control state machine + switch (state) { + is (s_reset) { + state <== s_ready; + } + is (s_ready) { + when (~r_cpu_req_val) { state <== s_ready; } + when (r_cpu_req_val & tag_match) { state <== s_ready; } + when (tag_valid & tag_dirty) { state <== s_start_writeback; } + when (req_flush) { state <== s_resolve_miss; } + otherwise { state <== s_req_refill; } + } + is (s_start_writeback) { + state <== s_writeback; + } + is (s_writeback) { + when (io.mem.req_rdy && (rr_count === UFix(3,2))) { + when (req_flush) { state <== s_resolve_miss; } + otherwise { state <== s_req_refill; } + } + } + is (s_req_refill) + { + when (io.mem.req_rdy) { state <== s_refill; } + } + is (s_refill) { + when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } + } + is (s_resolve_miss) { + state <== s_ready; + } + } +} +*/ + + } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index af4240e6..d53a257a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -82,8 +82,6 @@ class rocketDpath extends Component val mem_reg_wdata = Reg(resetVal = Bits(0,64)); val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val mem_reg_ctrl_exception = Reg(resetVal = Bool(false)); @@ -95,8 +93,6 @@ class rocketDpath extends Component val wb_reg_wdata = Reg(resetVal = Bits(0,64)); val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val wb_reg_raddr2 = Reg(resetVal = UFix(0,5)); - val wb_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val wb_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); @@ -373,15 +369,11 @@ class rocketDpath extends Component mem_reg_raddr2 <== ex_reg_raddr2; when (io.ctrl.killx) { - mem_reg_ctrl_div_val <== Bool(false); - mem_reg_ctrl_mul_val <== Bool(false); mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); mem_reg_ctrl_exception <== Bool(false); } otherwise { - mem_reg_ctrl_div_val <== ex_reg_ctrl_div_val; - mem_reg_ctrl_mul_val <== ex_reg_ctrl_mul_val; mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; mem_reg_ctrl_exception <== ex_reg_ctrl_exception; @@ -416,14 +408,10 @@ class rocketDpath extends Component wb_reg_raddr2 <== mem_reg_raddr2; when (io.ctrl.killm) { - wb_reg_ctrl_div_val <== Bool(false); - wb_reg_ctrl_mul_val <== Bool(false); wb_reg_ctrl_wen <== Bool(false); wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { - wb_reg_ctrl_div_val <== mem_reg_ctrl_div_val; - wb_reg_ctrl_mul_val <== mem_reg_ctrl_mul_val; wb_reg_ctrl_wen <== mem_reg_ctrl_wen; wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } @@ -453,9 +441,7 @@ class rocketDpath extends Component rfile.io.w1.en := r_dmem_resp_val; rfile.io.w1.data := dmem_resp_data_final; - // scoreboard set (for D$ misses, div, mul) - io.ctrl.sboard_set := wb_reg_ctrl_div_val | wb_reg_ctrl_mul_val | io.ctrl.dcache_miss; - io.ctrl.sboard_seta := wb_reg_waddr; + io.ctrl.wb_waddr := wb_reg_waddr; // scoreboard clear (for div/mul and D$ load miss writebacks) io.ctrl.sboard_clr0 := wb_reg_ctrl_ll_wb; From 3a02028a35b76ea3cbc9368e12edb54ff79d64ec Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 2 Nov 2011 13:32:32 -0700 Subject: [PATCH 0011/1087] fixes to exception and dcache miss/blocked handling --- rocket/src/main/scala/ctrl.scala | 62 ++++++++++-------------- rocket/src/main/scala/dpath.scala | 79 +++++++++++++++---------------- 2 files changed, 63 insertions(+), 78 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 4b1326b5..2b155dd8 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -56,8 +56,6 @@ class ioCtrlDpath extends Bundle() val wb_waddr = UFix(5,'input); // write addr from writeback stage val exception = Bool('input); val status = Bits(8, 'input); -// val sboard_set = Bool('input); -// val sboard_seta = UFix(5, 'input); val sboard_clr0 = Bool('input); val sboard_clr0a = UFix(5, 'input); val sboard_clr1 = Bool('input); @@ -214,9 +212,6 @@ class rocketCtrl extends Component sboard.io.raddra := id_raddr2.toUFix; sboard.io.raddrb := id_raddr1.toUFix; sboard.io.raddrc := id_waddr.toUFix; - -// sboard.io.set := io.dpath.sboard_set; -// sboard.io.seta := io.dpath.sboard_seta; // scoreboard set (for D$ misses, div, mul) sboard.io.set := wb_reg_div_mul_val | dcache_miss; @@ -298,21 +293,27 @@ class rocketCtrl extends Component io.dmem.req_type := ex_reg_mem_type; val mem_reg_div_mul_val = Reg(){Bool()}; + val mem_reg_eret = Reg(){Bool()}; val mem_reg_mem_val = Reg(){Bool()}; val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; val mem_reg_mem_type = Reg(){UFix(width = 3)}; + val mem_reg_privileged = Reg(){Bool()}; when (reset.toBool || io.dpath.killx) { mem_reg_div_mul_val <== Bool(false); + mem_reg_eret <== Bool(false); mem_reg_mem_val <== Bool(false); mem_reg_mem_cmd <== UFix(0, 4); mem_reg_mem_type <== UFix(0, 3); + mem_reg_privileged <== Bool(false); } otherwise { mem_reg_div_mul_val <== ex_reg_div_mul_val; + mem_reg_eret <== ex_reg_eret; mem_reg_mem_val <== ex_reg_mem_val; mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; + mem_reg_privileged <== ex_reg_privileged; } when (reset.toBool || io.dpath.killm) { @@ -327,8 +328,10 @@ class rocketCtrl extends Component // replay PC+4 on a D$ load miss val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; - val replay_mem = replay_mem_pc | replay_mem_pc_plus4; + val kill_ex = replay_mem_pc | replay_mem_pc_plus4 | mem_reg_privileged | io.dpath.exception; + val kill_mem = io.dpath.exception; + dcache_miss <== replay_mem_pc_plus4; io.dpath.mem_load := mem_cmd_load; @@ -336,10 +339,11 @@ class rocketCtrl extends Component io.dpath.sel_pc := Mux(replay_mem_pc, PC_MEM, - Mux(replay_mem_pc_plus4, PC_MEM4, - Mux(io.dpath.exception || ex_reg_eret, PC_PCR, + Mux(replay_mem_pc_plus4 || mem_reg_privileged, PC_MEM4, + Mux(io.dpath.exception || mem_reg_eret, PC_PCR, Mux(!ex_reg_btb_hit && br_taken, PC_BR, - Mux(ex_reg_btb_hit && !br_taken || ex_reg_privileged, PC_EX4, + Mux(ex_reg_btb_hit && !br_taken, PC_EX4, +// Mux(ex_reg_btb_hit && !br_taken || ex_reg_privileged, PC_EX4, Mux(jr_taken, PC_JR, Mux(j_taken, PC_J, Mux(io.dpath.btb_hit, PC_BTB, @@ -353,8 +357,8 @@ class rocketCtrl extends Component jr_taken | j_taken | io.dpath.exception | - ex_reg_privileged | - ex_reg_eret | + mem_reg_privileged | + mem_reg_eret | replay_mem_pc | replay_mem_pc_plus4; @@ -369,15 +373,10 @@ class rocketCtrl extends Component // check for loads in execute and mem stages to detect load/use hazards val ex_mem_cmd_load = ex_reg_mem_val && (ex_reg_mem_cmd === M_XRD); - val lu_stall_raddr1_ex = + val lu_stall_ex = ex_mem_cmd_load && - id_ren1.toBool && - (id_raddr1 === io.dpath.ex_waddr); - - val lu_stall_raddr2_ex = - ex_mem_cmd_load && - id_ren2.toBool && - (id_raddr2 === io.dpath.ex_waddr); + ((id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr))); val mem_mem_cmd_load_bh = mem_reg_mem_val && @@ -387,15 +386,12 @@ class rocketCtrl extends Component (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU)); - val lu_stall_raddr1_mem = + val lu_stall_mem = mem_mem_cmd_load_bh && - id_ren1.toBool && - (id_raddr1 === io.dpath.mem_waddr); + ((id_ren1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || + (id_ren2.toBool && (id_raddr2 === io.dpath.mem_waddr))); - val lu_stall_raddr2_mem = - mem_mem_cmd_load_bh && - id_ren2.toBool && - (id_raddr2 === io.dpath.mem_waddr); + val lu_stall = lu_stall_ex || lu_stall_mem; // check for divide and multiply instructions in ex,mem,wb stages val dm_stall_ex = @@ -419,19 +415,13 @@ class rocketCtrl extends Component ~take_pc & ( dm_stall | - lu_stall_raddr1_ex | - lu_stall_raddr2_ex | - lu_stall_raddr1_mem | - lu_stall_raddr2_mem | + lu_stall | id_ren2 & id_stall_raddr2 | id_ren1 & id_stall_raddr1 | - (id_sel_wa === WA_RD) && id_stall_waddr | + (id_sel_wa === WA_RD) & id_stall_waddr | (id_sel_wa === WA_RA) & id_stall_ra | id_mem_val & ~io.dmem.req_rdy | id_sync & ~io.dmem.req_rdy | -// id_mem_val_masked & id_full_mrq | -// id_sync & (~id_empty_mrq | io.mem.dc_busy) | -// mem_xstore_val & ~io.mem.xsdq_rdy | id_console_out_val & ~io.console.rdy | id_div_val & ~io.dpath.div_rdy | io.dpath.div_result_val | @@ -448,8 +438,8 @@ class rocketCtrl extends Component io.dpath.killf := take_pc | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; - io.dpath.killx := replay_mem.toBool; - io.dpath.killm := replay_mem.toBool; + io.dpath.killx := kill_ex.toBool; + io.dpath.killm := kill_mem.toBool; io.dpath.ren2 := id_ren2.toBool; io.dpath.ren1 := id_ren1.toBool; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d53a257a..6383c4ec 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -76,23 +76,28 @@ class rocketDpath extends Component val ex_wdata = Wire() { Bits() }; // memory definitions - val mem_reg_pc = Reg(resetVal = UFix(0,32)); - val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); - val mem_reg_waddr = Reg(resetVal = UFix(0,5)); - val mem_reg_wdata = Reg(resetVal = Bits(0,64)); - val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); - val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); + val mem_reg_pc = Reg(resetVal = UFix(0,32)); + val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val mem_reg_waddr = Reg(resetVal = UFix(0,5)); + val mem_reg_wdata = Reg(resetVal = Bits(0,64)); + val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); + val mem_reg_pcr = Reg(resetVal = Bits(0,64)); + val mem_reg_ctrl_cause = Reg(resetVal = UFix(0,5)); + val mem_reg_ctrl_eret = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_exception = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_exception = Reg(resetVal = Bool(false)); // writeback definitions val wb_reg_pc = Reg(resetVal = UFix(0,32)); - val wb_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val wb_reg_waddr = Reg(resetVal = UFix(0,5)); val wb_reg_wdata = Reg(resetVal = Bits(0,64)); val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val wb_reg_raddr2 = Reg(resetVal = UFix(0,5)); + val wb_reg_ctrl_cause = Reg(resetVal = UFix(0,5)); + val wb_reg_ctrl_eret = Reg(resetVal = Bool(false)); + val wb_reg_ctrl_exception = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); @@ -125,7 +130,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, ex_pcr(31,0).toUFix, + Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(31,0).toUFix, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, Mux(io.ctrl.sel_pc === PC_MEM4, mem_reg_pc_plus4, UFix(0, 32)))))))))); @@ -336,10 +341,10 @@ class rocketDpath extends Component pcr.io.host.from ^^ io.host.from; pcr.io.host.to ^^ io.host.to; - pcr.io.eret := ex_reg_ctrl_eret; - pcr.io.exception := ex_reg_ctrl_exception; - pcr.io.cause := ex_reg_ctrl_cause; - pcr.io.pc := ex_reg_pc; +// pcr.io.eret := ex_reg_ctrl_eret; +// pcr.io.exception := ex_reg_ctrl_exception; +// pcr.io.cause := ex_reg_ctrl_cause; +// pcr.io.pc := ex_reg_pc; io.ctrl.status := pcr.io.status; io.debug.error_mode := pcr.io.debug.error_mode; @@ -363,17 +368,21 @@ class rocketDpath extends Component // memory stage mem_reg_pc <== ex_reg_pc; mem_reg_pc_plus4 <== ex_reg_pc_plus4; + mem_reg_pcr <== ex_pcr; mem_reg_waddr <== ex_reg_waddr; mem_reg_wdata <== ex_wdata; mem_reg_ctrl_ll_wb <== ex_reg_ctrl_ll_wb; mem_reg_raddr2 <== ex_reg_raddr2; + mem_reg_ctrl_cause <== ex_reg_ctrl_cause; when (io.ctrl.killx) { + mem_reg_ctrl_eret <== Bool(false); mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); mem_reg_ctrl_exception <== Bool(false); } otherwise { + mem_reg_ctrl_eret <== ex_reg_ctrl_eret; mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; mem_reg_ctrl_exception <== ex_reg_ctrl_exception; @@ -384,14 +393,7 @@ class rocketDpath extends Component // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; - // moved to earlier in file -// val mem_dmem_resp_data_w = -// Mux(io.dmem.resp_pos(2).toBool, io.dmem.resp_data(63, 32), io.dmem.resp_data(31, 0)); -// -// val mem_dmem_resp_data = -// Mux(io.dmem.resp_type === MT_D, io.dmem.resp_data, -// Mux(io.dmem.resp_type === MT_W, Cat(Fill(32, mem_dmem_resp_data_w(31)), mem_dmem_resp_data_w)), -// Cat(UFix(0,32), mem_dmem_resp_data_w)); + // 32/64 bit load handling (moved to earlier in file) // writeback stage r_dmem_resp_val <== io.dmem.resp_val; @@ -400,12 +402,14 @@ class rocketDpath extends Component r_dmem_resp_type <== dmem_resp_type; r_dmem_resp_data <== mem_dmem_resp_data; - wb_reg_pc <== mem_reg_pc; - wb_reg_pc_plus4 <== mem_reg_pc_plus4; - wb_reg_waddr <== mem_reg_waddr; - wb_reg_wdata <== mem_reg_wdata; - wb_reg_ctrl_ll_wb <== mem_reg_ctrl_ll_wb; - wb_reg_raddr2 <== mem_reg_raddr2; + wb_reg_pc <== mem_reg_pc; + wb_reg_waddr <== mem_reg_waddr; + wb_reg_wdata <== mem_reg_wdata; + wb_reg_ctrl_ll_wb <== mem_reg_ctrl_ll_wb; + wb_reg_raddr2 <== mem_reg_raddr2; + wb_reg_ctrl_cause <== mem_reg_ctrl_cause; + wb_reg_ctrl_eret <== mem_reg_ctrl_eret; + wb_reg_ctrl_exception <== mem_reg_ctrl_exception; when (io.ctrl.killm) { wb_reg_ctrl_wen <== Bool(false); @@ -416,21 +420,7 @@ class rocketDpath extends Component wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } - // crossbar/sign extension for 8/16 bit loads -// val dmem_resp_data_h = -// Mux(r_dmem_resp_pos(1).toBool, r_dmem_resp_data(31, 16), r_dmem_resp_data(15, 0)); -// val dmem_resp_data_b = -// Mux(r_dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); -// -// val dmem_resp_data_final = -// Mux(r_dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), -// Mux(r_dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), -// Mux(r_dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), -// Mux(r_dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), -// Mux((r_dmem_resp_type === MT_W) || -// (r_dmem_resp_type === MT_WU) || -// (r_dmem_resp_type === MT_D), r_dmem_resp_data, -// UFix(0,64)))))); + // crossbar/sign extension for 8/16 bit loads (moved to earlier in file) // regfile write rfile.io.w0.addr := wb_reg_waddr; @@ -454,6 +444,11 @@ class rocketDpath extends Component pcr.io.w.en := wb_reg_ctrl_wen_pcr; pcr.io.w.data := wb_reg_wdata; + pcr.io.eret := wb_reg_ctrl_eret; + pcr.io.exception := wb_reg_ctrl_exception; + pcr.io.cause := wb_reg_ctrl_cause; + pcr.io.pc := wb_reg_pc; + } } From 44599355549977f2fff26d4e0be74762432081b8 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 4 Nov 2011 15:40:41 -0700 Subject: [PATCH 0012/1087] dcache fixes - all tests and ubmarks pass, hello world still broken --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/cpu.scala | 4 + rocket/src/main/scala/ctrl.scala | 35 +++++---- rocket/src/main/scala/dcache.scala | 119 ++++++++++++++++------------- rocket/src/main/scala/dpath.scala | 25 +++++- 5 files changed, 109 insertions(+), 75 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 674fb33b..f8069c4e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -23,6 +23,7 @@ object Constants val PC_PCR = UFix(6, 4); val PC_MEM = UFix(7, 4); val PC_MEM4 = UFix(8, 4); + val PC_EX = UFix(9, 4); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 965dfd0e..2f54d775 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -8,6 +8,10 @@ class ioDebug extends Bundle() { val error_mode = Bool('output); val log_control = Bool('output); + val id_valid = Bool('output); + val ex_valid = Bool('output); + val mem_valid = Bool('output); + val wb_valid = Bool('output); } class ioHost(view: List[String] = null) extends Bundle(view) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2b155dd8..73aaa4b9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -39,9 +39,7 @@ class ioCtrlDpath extends Bundle() val xcpt_syscall = Bool('output); val eret = Bool('output); val mem_load = Bool('output); - val dcache_miss = Bool('output); val wen = Bool('output); - val wb_div_mul = Bool('output); // inputs from datapath val btb_hit = Bool('input); val inst = Bits(32, 'input); @@ -67,7 +65,7 @@ class ioCtrlAll extends Bundle() val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy", "valid")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_val")).flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_val")).flip(); val host = new ioHost(List("start")); } @@ -323,33 +321,34 @@ class rocketCtrl extends Component wb_reg_div_mul_val <== mem_reg_div_mul_val; } - // replay PC when the D$ is blocked - val replay_mem_pc = mem_reg_mem_val && !io.dmem.req_rdy; - // replay PC+4 on a D$ load miss + // replay execute stage PC when the D$ is blocked +// val replay_mem_pc = mem_reg_mem_val && (mem_reg_mem_cmd != M_FLA) && !io.dmem.req_rdy; + val replay_ex = ex_reg_mem_val && !io.dmem.req_rdy; + // replay memory stage PC+4 on a D$ load miss val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); - val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; +// val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; + val replay_mem = io.dmem.resp_miss; - val kill_ex = replay_mem_pc | replay_mem_pc_plus4 | mem_reg_privileged | io.dpath.exception; +// val kill_ex = replay_mem_pc | replay_mem_pc_plus4 | mem_reg_privileged; + val kill_ex = replay_ex | replay_mem | mem_reg_privileged; val kill_mem = io.dpath.exception; - dcache_miss <== replay_mem_pc_plus4; + dcache_miss <== io.dmem.resp_miss; io.dpath.mem_load := mem_cmd_load; - io.dpath.dcache_miss := dcache_miss; io.dpath.sel_pc := - Mux(replay_mem_pc, PC_MEM, - Mux(replay_mem_pc_plus4 || mem_reg_privileged, PC_MEM4, + Mux(replay_mem || mem_reg_privileged, PC_MEM4, Mux(io.dpath.exception || mem_reg_eret, PC_PCR, + Mux(replay_ex, PC_EX, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken, PC_EX4, -// Mux(ex_reg_btb_hit && !br_taken || ex_reg_privileged, PC_EX4, Mux(jr_taken, PC_JR, Mux(j_taken, PC_J, Mux(io.dpath.btb_hit, PC_BTB, PC_4)))))))); - io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken; + io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken & ~kill_ex & ~kill_mem; val take_pc = ~ex_reg_btb_hit & br_taken | @@ -359,8 +358,8 @@ class rocketCtrl extends Component io.dpath.exception | mem_reg_privileged | mem_reg_eret | - replay_mem_pc | - replay_mem_pc_plus4; + replay_ex | + replay_mem; io.dpath.stallf := ~take_pc & @@ -428,7 +427,7 @@ class rocketCtrl extends Component io.dpath.mul_result_val ); - val ctrl_killd = take_pc | ctrl_stalld | io.dpath.killx; + val ctrl_killd = take_pc | ctrl_stalld; // for divider, multiplier writeback val mul_wb = io.dpath.mul_result_val; @@ -438,7 +437,7 @@ class rocketCtrl extends Component io.dpath.killf := take_pc | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; - io.dpath.killx := kill_ex.toBool; + io.dpath.killx := kill_ex.toBool || kill_mem.toBool; io.dpath.killm := kill_mem.toBool; io.dpath.ren2 := id_ren2.toBool; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 8233de86..a9b0dac3 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -13,7 +13,8 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val req_type = Bits(3, 'input); val req_addr = UFix(32, 'input); val req_data = Bits(64, 'input); - val req_tag = Bits(12, 'input); + val req_tag = Bits(5, 'input); + val resp_miss = Bool('output); val resp_val = Bool('output); val resp_data = Bits(64, 'output); val resp_tag = Bits(12, 'output); @@ -56,7 +57,7 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { val flush_resp_count = Reg(resetVal = UFix(0, indexbits)); val flushing = Reg(resetVal = Bool(false)); val flush_waiting = Reg(resetVal = Bool(false)); - val r_cpu_req_tag = Reg(resetVal = Bits(0, 12)); + val r_cpu_req_tag = Reg(resetVal = Bits(0, 5)); when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) { @@ -84,6 +85,7 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { dcache.io.mem ^^ io.mem; io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; + io.cpu.resp_miss := dcache.io.cpu.resp_miss; io.cpu.resp_data := dcache.io.cpu.resp_data; io.cpu.resp_tag := dcache.io.cpu.resp_tag; io.cpu.resp_val := dcache.io.cpu.resp_val & @@ -108,14 +110,16 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val r_cpu_req_addr = Reg(resetVal = Bits(0, addrbits)); val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_data = Reg(resetVal = Bits(0,64)); +// val r_cpu_req_data = Reg(resetVal = Bits(0,64)); val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_type = Reg(resetVal = Bits(0,3)); val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); val p_store_data = Reg(resetVal = Bits(0,64)); val p_store_addr = Reg(resetVal = Bits(0,64)); - val p_store_wmask = Reg(resetVal = Bits(0,64)); + val p_store_cmd = Reg(resetVal = Bits(0,4)); + val p_store_type = Reg(resetVal = Bits(0,3)); +// val p_store_wmask = Reg(resetVal = Bits(0,64)); val p_store_valid = Reg(resetVal = Bool(false)); val req_load = (r_cpu_req_cmd === M_XRD); @@ -124,19 +128,25 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_addr <== io.cpu.req_addr; - r_cpu_req_data <== io.cpu.req_data; r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_type <== io.cpu.req_type; r_cpu_req_tag <== io.cpu.req_tag; } - + + when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_XWR)) { + p_store_data <== io.cpu.req_data; + p_store_addr <== io.cpu.req_addr; + p_store_type <== io.cpu.req_type; + p_store_valid <== Bool(true); + } + when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } when ((state === s_resolve_miss) && !req_load) { r_cpu_req_val <== Bool(false); } - + // counter val rr_count = Reg(resetVal = UFix(0,2)); val rr_count_next = rr_count + UFix(1); @@ -145,7 +155,9 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // tag array - val tag_we = (state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2)); + val tag_we = + ((state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2))) || + ((state === s_resolve_miss) && req_flush); val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); @@ -166,57 +178,56 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val tag_valid = vb_rdata.toBool; val tag_match = tag_valid && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); - + // generate write mask and store data signals based on store type and address LSBs val wmask_b = - Mux(r_cpu_req_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), - Mux(r_cpu_req_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), - Mux(r_cpu_req_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), - Mux(r_cpu_req_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), - Mux(r_cpu_req_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), + Mux(p_store_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), + Mux(p_store_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), + Mux(p_store_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), + Mux(p_store_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), + Mux(p_store_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), + Mux(p_store_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), + Mux(p_store_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), + Mux(p_store_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), UFix(0, 8))))))))); val wmask_h = - Mux(r_cpu_req_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), - Mux(r_cpu_req_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), - Mux(r_cpu_req_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), - Mux(r_cpu_req_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), + Mux(p_store_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), + Mux(p_store_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), + Mux(p_store_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), + Mux(p_store_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), UFix(0, 8))))); val wmask_w = - Mux(r_cpu_req_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), - Mux(r_cpu_req_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), + Mux(p_store_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), + Mux(p_store_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), UFix(0, 8))); val wmask_d = Bits("b1111_1111", 8); val store_wmask = - Mux(r_cpu_req_type === MT_B, wmask_b, - Mux(r_cpu_req_type === MT_H, wmask_h, - Mux(r_cpu_req_type === MT_W, wmask_w, - Mux(r_cpu_req_type === MT_D, wmask_d, + Mux(p_store_type === MT_B, wmask_b, + Mux(p_store_type === MT_H, wmask_h, + Mux(p_store_type === MT_W, wmask_w, + Mux(p_store_type === MT_D, wmask_d, UFix(0, 8))))); val store_data = - Mux(r_cpu_req_type === MT_B, Fill(8, r_cpu_req_data( 7,0)), - Mux(r_cpu_req_type === MT_H, Fill(4, r_cpu_req_data(15,0)), - Mux(r_cpu_req_type === MT_W, Fill(2, r_cpu_req_data(31,0)), - Mux(r_cpu_req_type === MT_D, r_cpu_req_data, + Mux(p_store_type === MT_B, Fill(8, p_store_data( 7,0)), + Mux(p_store_type === MT_H, Fill(4, p_store_data(15,0)), + Mux(p_store_type === MT_W, Fill(2, p_store_data(31,0)), + Mux(p_store_type === MT_D, p_store_data, UFix(0, 64))))); - - when ((state === s_ready) && r_cpu_req_val && req_store) { - p_store_data <== store_data; - p_store_addr <== r_cpu_req_addr; - p_store_wmask <== store_wmask; - p_store_valid <== Bool(true); - } val addr_match = (r_cpu_req_addr(tagmsb, offsetlsb) === p_store_addr(tagmsb, offsetlsb)); - val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !tag_match || !req_load || addr_match)) + val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; +// val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !tag_match || !req_load || addr_match)) + val drain_store = + (state === s_ready) && p_store_valid && + (!(io.cpu.req_val && (io.cpu.req_cmd === M_XRD)) || + r_cpu_req_val && req_load && p_store_valid && addr_match); + val resolve_store = (state === s_resolve_miss) && req_store; val do_store = drain_store | resolve_store; @@ -241,22 +252,22 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, - Cat(p_store_data, p_store_data)); + Cat(store_data, store_data)); - val p_wmask_expand = - Cat(Fill(8, p_store_wmask(7)), - Fill(8, p_store_wmask(6)), - Fill(8, p_store_wmask(5)), - Fill(8, p_store_wmask(4)), - Fill(8, p_store_wmask(3)), - Fill(8, p_store_wmask(2)), - Fill(8, p_store_wmask(1)), - Fill(8, p_store_wmask(0))); + val store_wmask_expand = + Cat(Fill(8, store_wmask(7)), + Fill(8, store_wmask(6)), + Fill(8, store_wmask(5)), + Fill(8, store_wmask(4)), + Fill(8, store_wmask(3)), + Fill(8, store_wmask(2)), + Fill(8, store_wmask(1)), + Fill(8, store_wmask(0))); val da_store_wmask = Mux(p_store_addr(offsetlsb).toBool, - Cat(p_wmask_expand, Bits(0,64)), - Cat(Bits(0,64), p_wmask_expand)); + Cat(store_wmask_expand, Bits(0,64)), + Cat(Bits(0,64), store_wmask_expand)); val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), @@ -269,16 +280,16 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { io.cpu.req_addr(indexmsb, offsetmsb-1)))); val data_array_rdata = Reg(data_array.read(data_array_raddr)); - val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; - + val miss = (state === s_ready) && r_cpu_req_val && req_load && (!tag_match || (p_store_valid && addr_match)); + // output signals io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (tag_match && !req_flush)); io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && req_flush); + io.cpu.resp_miss := miss; io.cpu.resp_tag := Cat(Bits(0,1), r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); - io.cpu.resp_data := Mux(r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 6383c4ec..73474a8a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -44,11 +44,13 @@ class rocketDpath extends Component val if_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); // instruction decode definitions + val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_pc = Reg(resetVal = UFix(0,32)); val id_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val id_reg_inst = Reg(resetVal = NOP); // execute definitions + val ex_reg_valid = Reg(resetVal = Bool(false)); val ex_reg_pc = Reg(resetVal = UFix(0,32)); val ex_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val ex_reg_inst = Reg(resetVal = Bits(0,32)); @@ -76,6 +78,7 @@ class rocketDpath extends Component val ex_wdata = Wire() { Bits() }; // memory definitions + val mem_reg_valid = Reg(resetVal = Bool(false)); val mem_reg_pc = Reg(resetVal = UFix(0,32)); val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); val mem_reg_waddr = Reg(resetVal = UFix(0,5)); @@ -90,6 +93,7 @@ class rocketDpath extends Component val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); // writeback definitions + val wb_reg_valid = Reg(resetVal = Bool(false)); val wb_reg_pc = Reg(resetVal = UFix(0,32)); val wb_reg_waddr = Reg(resetVal = UFix(0,5)); val wb_reg_wdata = Reg(resetVal = Bits(0,64)); @@ -126,6 +130,7 @@ class rocketDpath extends Component val if_next_pc = Mux(io.ctrl.sel_pc === PC_4, if_pc_plus4, Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, + Mux(io.ctrl.sel_pc === PC_EX, ex_reg_pc, Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, @@ -133,7 +138,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(31,0).toUFix, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, Mux(io.ctrl.sel_pc === PC_MEM4, mem_reg_pc_plus4, - UFix(0, 32)))))))))); + UFix(0, 32))))))))))); when (!io.host.start){ if_reg_pc <== UFix(0, 32); //32'hFFFF_FFFC; @@ -156,10 +161,12 @@ class rocketDpath extends Component id_reg_pc <== if_reg_pc; id_reg_pc_plus4 <== if_pc_plus4; when(io.ctrl.killf) { - id_reg_inst <== NOP; + id_reg_inst <== NOP; + id_reg_valid <== Bool(false); } otherwise { - id_reg_inst <== io.imem.resp_data; + id_reg_inst <== io.imem.resp_data; + id_reg_valid <== Bool(true); } } @@ -265,6 +272,7 @@ class rocketDpath extends Component ex_reg_ctrl_cause <== id_cause; when(io.ctrl.killd) { + ex_reg_valid <== Bool(false); ex_reg_ctrl_div_val <== Bool(false); ex_reg_ctrl_mul_val <== Bool(false); ex_reg_ctrl_wen <== Bool(false); @@ -273,6 +281,7 @@ class rocketDpath extends Component ex_reg_ctrl_exception <== Bool(false); } otherwise { + ex_reg_valid <== id_reg_valid; ex_reg_ctrl_div_val <== io.ctrl.div_val; ex_reg_ctrl_mul_val <== io.ctrl.mul_val; ex_reg_ctrl_wen <== io.ctrl.wen; @@ -376,12 +385,14 @@ class rocketDpath extends Component mem_reg_ctrl_cause <== ex_reg_ctrl_cause; when (io.ctrl.killx) { + mem_reg_valid <== Bool(false); mem_reg_ctrl_eret <== Bool(false); mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); mem_reg_ctrl_exception <== Bool(false); } otherwise { + mem_reg_valid <== ex_reg_valid; mem_reg_ctrl_eret <== ex_reg_ctrl_eret; mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; @@ -412,10 +423,12 @@ class rocketDpath extends Component wb_reg_ctrl_exception <== mem_reg_ctrl_exception; when (io.ctrl.killm) { + wb_reg_valid <== Bool(false); wb_reg_ctrl_wen <== Bool(false); wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { + wb_reg_valid <== mem_reg_valid; wb_reg_ctrl_wen <== mem_reg_ctrl_wen; wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } @@ -448,6 +461,12 @@ class rocketDpath extends Component pcr.io.exception := wb_reg_ctrl_exception; pcr.io.cause := wb_reg_ctrl_cause; pcr.io.pc := wb_reg_pc; + + // temporary debug outputs so things don't get optimized away + io.debug.id_valid := id_reg_valid; + io.debug.ex_valid := ex_reg_valid; + io.debug.mem_valid := mem_reg_valid; + io.debug.wb_valid := wb_reg_valid; } From 2db9ee12bc401afa4117d9f4bebcfde6d5f41e19 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 4 Nov 2011 15:57:08 -0700 Subject: [PATCH 0013/1087] fixed eret instruction, hello world runs --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 73aaa4b9..a13abaf0 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -338,8 +338,8 @@ class rocketCtrl extends Component io.dpath.mem_load := mem_cmd_load; io.dpath.sel_pc := - Mux(replay_mem || mem_reg_privileged, PC_MEM4, Mux(io.dpath.exception || mem_reg_eret, PC_PCR, + Mux(replay_mem || mem_reg_privileged, PC_MEM4, Mux(replay_ex, PC_EX, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken, PC_EX4, From 4d64099103f2796eafb6050d8894b055113662ae Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 4 Nov 2011 20:52:21 -0700 Subject: [PATCH 0014/1087] cleanup --- rocket/src/main/scala/ctrl.scala | 11 ++++------- rocket/src/main/scala/dcache.scala | 3 --- rocket/src/main/scala/dpath.scala | 3 +-- rocket/src/main/scala/icache.scala | 23 +++++++++++++++++------ 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a13abaf0..5a7ea713 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -322,14 +322,12 @@ class rocketCtrl extends Component } // replay execute stage PC when the D$ is blocked -// val replay_mem_pc = mem_reg_mem_val && (mem_reg_mem_cmd != M_FLA) && !io.dmem.req_rdy; val replay_ex = ex_reg_mem_val && !io.dmem.req_rdy; - // replay memory stage PC+4 on a D$ load miss + + // replay execute stage PC on a D$ load miss val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); -// val replay_mem_pc_plus4 = mem_cmd_load && !io.dmem.resp_val; val replay_mem = io.dmem.resp_miss; -// val kill_ex = replay_mem_pc | replay_mem_pc_plus4 | mem_reg_privileged; val kill_ex = replay_ex | replay_mem | mem_reg_privileged; val kill_mem = io.dpath.exception; @@ -339,14 +337,13 @@ class rocketCtrl extends Component io.dpath.sel_pc := Mux(io.dpath.exception || mem_reg_eret, PC_PCR, - Mux(replay_mem || mem_reg_privileged, PC_MEM4, - Mux(replay_ex, PC_EX, + Mux(replay_ex || replay_mem || mem_reg_privileged, PC_EX, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken, PC_EX4, Mux(jr_taken, PC_JR, Mux(j_taken, PC_J, Mux(io.dpath.btb_hit, PC_BTB, - PC_4)))))))); + PC_4))))))); io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken & ~kill_ex & ~kill_mem; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index a9b0dac3..bb22f12b 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -110,7 +110,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val r_cpu_req_addr = Reg(resetVal = Bits(0, addrbits)); val r_cpu_req_val = Reg(resetVal = Bool(false)); -// val r_cpu_req_data = Reg(resetVal = Bits(0,64)); val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_type = Reg(resetVal = Bits(0,3)); val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); @@ -119,7 +118,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val p_store_addr = Reg(resetVal = Bits(0,64)); val p_store_cmd = Reg(resetVal = Bits(0,4)); val p_store_type = Reg(resetVal = Bits(0,3)); -// val p_store_wmask = Reg(resetVal = Bits(0,64)); val p_store_valid = Reg(resetVal = Bool(false)); val req_load = (r_cpu_req_cmd === M_XRD); @@ -222,7 +220,6 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val addr_match = (r_cpu_req_addr(tagmsb, offsetlsb) === p_store_addr(tagmsb, offsetlsb)); val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; -// val drain_store = ((state === s_ready) && p_store_valid && (!r_cpu_req_val || !tag_match || !req_load || addr_match)) val drain_store = (state === s_ready) && p_store_valid && (!(io.cpu.req_val && (io.cpu.req_cmd === M_XRD)) || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 73474a8a..7591b31a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -137,8 +137,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(31,0).toUFix, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, - Mux(io.ctrl.sel_pc === PC_MEM4, mem_reg_pc_plus4, - UFix(0, 32))))))))))); + UFix(0, 32)))))))))); when (!io.host.start){ if_reg_pc <== UFix(0, 32); //32'hFFFF_FFFC; diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 7bf4ad3f..329be695 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -24,7 +24,8 @@ class ioIcache(view: List[String] = null) extends Bundle (view) val resp_val = Bool('output); } -class ioICacheDM extends Bundle() { +class ioICacheDM extends Bundle() +{ val cpu = new ioImem(); val mem = new ioIcache().flip(); } @@ -51,14 +52,22 @@ class rocketICacheDM(lines: Int, addrbits : Int) extends Component { val state = Reg(resetVal = s_reset); val r_cpu_req_addr = Reg(Bits(0, addrbits)); - when (io.cpu.req_val && ((state === s_ready) || (state === s_resolve_miss))) { r_cpu_req_addr <== io.cpu.req_addr; } + when (io.cpu.req_val && ((state === s_ready) || (state === s_resolve_miss))) { + r_cpu_req_addr <== io.cpu.req_addr; + } val r_cpu_req_val = Reg(Bool(false)); - when ((state === s_ready) || (state === s_resolve_miss)) { r_cpu_req_val <== io.cpu.req_val; } - otherwise { r_cpu_req_val <== Bool(false); } + when ((state === s_ready) || (state === s_resolve_miss)) { + r_cpu_req_val <== io.cpu.req_val; + } + otherwise { + r_cpu_req_val <== Bool(false); + } val refill_count = Reg(resetVal = UFix(0,2)); - when (io.mem.resp_val) { refill_count <== refill_count + UFix(1); } + when (io.mem.resp_val) { + refill_count <== refill_count + UFix(1); + } // tag array val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); @@ -72,7 +81,9 @@ class rocketICacheDM(lines: Int, addrbits : Int) extends Component { val vb_array = Reg(resetVal = Bits(0, lines)); val vb_rdata = Reg(vb_array(io.cpu.req_addr(indexmsb, indexlsb))); - when ((state === s_refill_wait) && io.mem.resp_val) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } + when ((state === s_refill_wait) && io.mem.resp_val) { + vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); + } val tag_match = vb_rdata.toBool && (tag_lookup === r_cpu_req_addr(tagmsb, taglsb)); From 9d63087eb2e581f608856ca0a8ff6b3aa04d35cc Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 7 Nov 2011 00:58:25 -0800 Subject: [PATCH 0015/1087] changed caches to use separate sram modules for tag and data arrays --- rocket/src/main/scala/dcache.scala | 222 +++++------------------------ rocket/src/main/scala/icache.scala | 64 ++++++--- 2 files changed, 78 insertions(+), 208 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index bb22f12b..91a90ea3 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -41,8 +41,7 @@ class ioDCacheDM extends Bundle() { // state machine to flush (write back dirty lines, invalidate clean ones) the D$ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { val io = new ioDCacheDM(); -// val dcache = new rocketDCacheDM(lines, addrbits); - val dcache = new rocketDCacheDM_1C(lines, addrbits); + val dcache = new rocketDCacheDM(lines, addrbits); val indexbits = ceil(log10(lines)/log10(2)).toInt; val offsetbits = 6; @@ -93,7 +92,7 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { } -class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { +class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { val io = new ioDCacheDM(); val indexbits = ceil(log10(lines)/log10(2)).toInt; @@ -153,17 +152,21 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // tag array - val tag_we = + val tagbits = addrbits-(indexbits+offsetbits); + val tag_we = ((state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2))) || ((state === s_resolve_miss) && req_flush); - val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; - val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); - val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); + val tag_array = new rocketSRAMsp(lines, tagbits); val tag_raddr = Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, - r_cpu_req_addr(indexmsb, indexlsb).toUFix); - val tag_rdata = Reg(tag_array.read(tag_raddr)); - + r_cpu_req_addr(indexmsb, indexlsb).toUFix); + tag_array.io.a := tag_raddr; + tag_array.io.d := r_cpu_req_addr(tagmsb, taglsb); + tag_array.io.we := tag_we; + tag_array.io.bweb := ~Bits(0,tagbits); + tag_array.io.ce := Bool(true); // FIXME + val tag_rdata = tag_array.io.q; + // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); val vb_rdata = Reg(vb_array(tag_raddr)); @@ -242,15 +245,7 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } // data array - val data_array_we = ((state === s_refill) && io.mem.resp_val) || do_store; - val data_array_waddr = - Mux((state === s_refill), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - p_store_addr(indexmsb, offsetmsb-1).toUFix); - - val data_array_wdata = - Mux((state === s_refill), io.mem.resp_data, - Cat(store_data, store_data)); - + val data_array = new rocketSRAMsp(lines*4, 128); val store_wmask_expand = Cat(Fill(8, store_wmask(7)), Fill(8, store_wmask(6)), @@ -264,18 +259,25 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { val da_store_wmask = Mux(p_store_addr(offsetlsb).toBool, Cat(store_wmask_expand, Bits(0,64)), - Cat(Bits(0,64), store_wmask_expand)); - - val data_array_wmask = - Mux((state === s_refill), ~Bits(0,128), - da_store_wmask); - val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); - val data_array_raddr = - Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, - Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, + Cat(Bits(0,64), store_wmask_expand)); + + data_array.io.a := + Mux(do_store, p_store_addr(indexmsb, offsetmsb-1), + Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next), + Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count), Mux((state === s_resolve_miss) || (state === s_replay_load), r_cpu_req_addr(indexmsb, offsetmsb-1), - io.cpu.req_addr(indexmsb, offsetmsb-1)))); - val data_array_rdata = Reg(data_array.read(data_array_raddr)); + io.cpu.req_addr(indexmsb, offsetmsb-1))))).toUFix; + + data_array.io.d := + Mux((state === s_refill), io.mem.resp_data, + Cat(store_data, store_data)); + + data_array.io.we := ((state === s_refill) && io.mem.resp_val) || do_store; + data_array.io.bweb := + Mux((state === s_refill), ~Bits(0,128), + da_store_wmask); + data_array.io.ce := Bool(true); // FIXME + val data_array_rdata = data_array.io.q; val miss = (state === s_ready) && r_cpu_req_val && req_load && (!tag_match || (p_store_valid && addr_match)); @@ -346,164 +348,4 @@ class rocketDCacheDM_1C(lines: Int, addrbits: Int) extends Component { } } -// basic direct mapped data cache, 2 cycle read latency -// parameters : -// lines = # of cache lines -// addr_bits = address width (word addressable) bits -// 64 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines -/* -class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { - val io = new ioDCacheDM(); - - val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; - val tagmsb = addrbits - 1; - val taglsb = indexbits+offsetbits; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val offsetlsb = 3; - - val s_reset :: s_ready :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(7) { UFix() }; - val state = Reg(resetVal = s_reset); - - val r_cpu_req_addr = Reg(Bits(0, addrbits)); - val r_r_cpu_req_addr = Reg(r_cpu_req_addr); - val r_cpu_req_val = Reg(Bool(false)); - val r_cpu_req_data = Reg(Bits(0,64)); - val r_cpu_req_cmd = Reg(Bits(0,4)); - val r_cpu_req_wmask = Reg(Bits(0,8)); - val r_cpu_req_tag = Reg(Bits(0,12)); - val r_cpu_resp_tag = Reg(r_cpu_req_tag); - val r_cpu_resp_val = Reg(Bool(false)); - - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_addr <== io.cpu.req_addr; - r_cpu_req_data <== io.cpu.req_data; - r_cpu_req_cmd <== io.cpu.req_cmd; - r_cpu_req_wmask <== io.cpu.req_wmask; - r_cpu_req_tag <== io.cpu.req_tag; } - - val req_load = (r_cpu_req_cmd === M_XRD); - val req_store = (r_cpu_req_cmd === M_XWR); - val req_flush = (r_cpu_req_cmd === M_FLA); - - when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } - otherwise { r_cpu_req_val <== Bool(false); } - - // counter - val rr_count = Reg(resetVal = UFix(0,2)); - val rr_count_next = rr_count + UFix(1); - when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) - { rr_count <== rr_count_next; } - - // tag array - val tag_we = (state === s_resolve_miss); - val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; - val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); - val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); - val tag_raddr = Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, r_cpu_req_addr(indexmsb, indexlsb).toUFix); - val tag_rdata = Reg(tag_array.read(tag_raddr)); - - // valid bit array - val vb_array = Reg(resetVal = Bits(0, lines)); - val vb_rdata = Reg(vb_array(tag_raddr)); - when (tag_we && !req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } - when (tag_we && req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } - - val tag_valid = vb_rdata.toBool; - val tag_match = tag_valid && !req_flush && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); - val store = ((state === s_ready) && r_cpu_req_val && req_store && tag_match ) || - ((state === s_resolve_miss) && req_store); - - // dirty bit array - val db_array = Reg(resetVal = Bits(0, lines)); - val db_rdata = Reg(db_array(tag_raddr)); - val tag_dirty = db_rdata.toBool; - when (store) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } - when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); } - - // data array - val data_array_we = ((state === s_refill) && io.mem.resp_val) || store; - val data_array_waddr = Mux((state === s_refill), - Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - r_cpu_req_addr(indexmsb, offsetmsb-1).toUFix); - - val data_array_wdata = Mux((state === s_refill), io.mem.resp_data, Cat(r_cpu_req_data, r_cpu_req_data)); - - val req_wmask_expand = Cat(Fill(8, r_cpu_req_wmask(7)), - Fill(8, r_cpu_req_wmask(6)), - Fill(8, r_cpu_req_wmask(5)), - Fill(8, r_cpu_req_wmask(4)), - Fill(8, r_cpu_req_wmask(3)), - Fill(8, r_cpu_req_wmask(2)), - Fill(8, r_cpu_req_wmask(1)), - Fill(8, r_cpu_req_wmask(0))); - - val store_wmask = Mux(r_cpu_req_addr(offsetlsb).toBool, - Cat(req_wmask_expand, Bits(0,64)), - Cat(Bits(0,64), req_wmask_expand)); - - val data_array_wmask = Mux((state === s_refill), ~Bits(0,128), store_wmask); - val data_array = Mem(lines*4, data_array_we, data_array_waddr, data_array_wdata, wrMask = data_array_wmask, resetVal = null); - val data_array_raddr = Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next).toUFix, - Mux((state === s_start_writeback) || (state === s_writeback), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - r_cpu_req_addr(indexmsb, offsetmsb-1))); - val data_array_rdata = Reg(data_array.read(data_array_raddr)); - - // output signals - io.cpu.req_rdy := (state === s_ready) && (!r_cpu_req_val || tag_match); - - when ((((state === s_ready) && r_cpu_req_val && tag_match) || (state === s_resolve_miss)) && !req_store) - { r_cpu_resp_val <== Bool(true); } - otherwise { r_cpu_resp_val <== Bool(false); } - - io.cpu.resp_val := r_cpu_resp_val; - io.cpu.resp_data := Mux(r_r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); - io.cpu.resp_tag := r_cpu_resp_tag; - - io.mem.req_val := (state === s_req_refill) || (state === s_writeback); - io.mem.req_rw := (state === s_writeback); - io.mem.req_wdata := data_array_rdata; - io.mem.req_tag := UFix(0); - io.mem.req_addr := Mux(state === s_writeback, - Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); - - // control state machine - switch (state) { - is (s_reset) { - state <== s_ready; - } - is (s_ready) { - when (~r_cpu_req_val) { state <== s_ready; } - when (r_cpu_req_val & tag_match) { state <== s_ready; } - when (tag_valid & tag_dirty) { state <== s_start_writeback; } - when (req_flush) { state <== s_resolve_miss; } - otherwise { state <== s_req_refill; } - } - is (s_start_writeback) { - state <== s_writeback; - } - is (s_writeback) { - when (io.mem.req_rdy && (rr_count === UFix(3,2))) { - when (req_flush) { state <== s_resolve_miss; } - otherwise { state <== s_req_refill; } - } - } - is (s_req_refill) - { - when (io.mem.req_rdy) { state <== s_refill; } - } - is (s_refill) { - when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } - } - is (s_resolve_miss) { - state <== s_ready; - } - } -} -*/ - - } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 329be695..30e4fc17 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -30,6 +30,25 @@ class ioICacheDM extends Bundle() val mem = new ioIcache().flip(); } +// single port SRAM i/o +class ioSRAMsp (width: Int, addrbits: Int) extends Bundle { + val a = UFix(addrbits, 'input); // address + val d = Bits(width, 'input); // data input + val bweb = Bits(width, 'input); // bit write enable mask + val ce = Bool('input); // chip enable + val we = Bool('input); // write enable + val q = Bits(width, 'output); // data out +} + +// single ported SRAM +class rocketSRAMsp(entries: Int, width: Int) extends Component { + val addrbits = ceil(log10(entries)/log10(2)).toInt; + val io = new ioSRAMsp(width, addrbits); + val sram = Mem(entries, io.we && io.ce, io.a, io.d, wrMask = io.bweb, resetVal = null); + val rdata = Reg(sram.read(io.a)); + io.q := rdata; +} + // basic direct mapped instruction cache // parameters : // lines = # cache lines @@ -38,7 +57,7 @@ class ioICacheDM extends Bundle() class rocketICacheDM(lines: Int, addrbits : Int) extends Component { val io = new ioICacheDM(); - + val indexbits = ceil(log10(lines)/log10(2)).toInt; val offsetbits = 6; val tagmsb = addrbits - 1; @@ -47,6 +66,7 @@ class rocketICacheDM(lines: Int, addrbits : Int) extends Component { val indexlsb = offsetbits; val offsetmsb = indexlsb-1; val offsetlsb = 2; + val databits = 32; val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: s_resolve_miss :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_reset); @@ -70,17 +90,19 @@ class rocketICacheDM(lines: Int, addrbits : Int) extends Component { } // tag array - val tag_wdata = r_cpu_req_addr(tagmsb, taglsb); - val tag_waddr = r_cpu_req_addr(indexmsb, indexlsb).toUFix; - val tag_we = (state === s_refill_wait) && io.mem.resp_val; - val tag_array = Mem(lines, tag_we, tag_waddr, tag_wdata); - val tag_raddr = io.cpu.req_addr(indexmsb, indexlsb);; - val tag_lookup = Reg(tag_array.read(tag_raddr)); + val tagbits = addrbits-(indexbits+offsetbits); + val tag_array = new rocketSRAMsp(lines, tagbits); + tag_array.io.a := + Mux((state === s_refill_wait), r_cpu_req_addr(indexmsb, indexlsb).toUFix, io.cpu.req_addr(indexmsb, indexlsb)); + tag_array.io.d := r_cpu_req_addr(tagmsb, taglsb); + tag_array.io.we := (state === s_refill_wait) && io.mem.resp_val; + tag_array.io.bweb := ~Bits(0,tagbits); + tag_array.io.ce := Bool(true); // FIXME + val tag_lookup = tag_array.io.q; // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); val vb_rdata = Reg(vb_array(io.cpu.req_addr(indexmsb, indexlsb))); - when ((state === s_refill_wait) && io.mem.resp_val) { vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); } @@ -88,18 +110,24 @@ class rocketICacheDM(lines: Int, addrbits : Int) extends Component { val tag_match = vb_rdata.toBool && (tag_lookup === r_cpu_req_addr(tagmsb, taglsb)); // data array - val data_array_waddr = Cat(r_cpu_req_addr(indexmsb, indexlsb), refill_count).toUFix; - val data_array = Mem(lines*4, io.mem.resp_val, data_array_waddr, io.mem.resp_data); - val data_array_raddr = Cat(io.cpu.req_addr(indexmsb, indexlsb), io.cpu.req_addr(offsetmsb, offsetmsb-1)); - val data_array_read = data_array(data_array_raddr); - val data_array_rdata = Reg(data_array_read); - + val data_array = new rocketSRAMsp(lines*4, 128); + data_array.io.a := + Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_addr(indexmsb, indexlsb), refill_count), + io.cpu.req_addr(indexmsb, offsetmsb-1)).toUFix; + data_array.io.d := io.mem.resp_data; + data_array.io.we := io.mem.resp_val; + data_array.io.bweb := ~Bits(0,128); + data_array.io.ce := Bool(true); // FIXME + val data_array_rdata = data_array.io.q; + + // output signals io.cpu.resp_val := (r_cpu_req_val && tag_match && (state === s_ready)); // || (state === s_resolve_miss); io.cpu.req_rdy := ((state === s_ready) && (!r_cpu_req_val || (r_cpu_req_val && tag_match))); // || (state === s_resolve_miss); - io.cpu.resp_data := MuxLookup(r_cpu_req_addr(offsetmsb-2, offsetlsb).toUFix, data_array_rdata(127, 96), - Array(UFix(2) -> data_array_rdata(95,64), - UFix(1) -> data_array_rdata(63,32), - UFix(0) -> data_array_rdata(31,0))); + io.cpu.resp_data := + MuxLookup(r_cpu_req_addr(offsetmsb-2, offsetlsb).toUFix, data_array_rdata(127, 96), + Array(UFix(2) -> data_array_rdata(95,64), + UFix(1) -> data_array_rdata(63,32), + UFix(0) -> data_array_rdata(31,0))); io.mem.req_val := (state === s_request); io.mem.req_addr := Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix; From 7130edac8dd500cf54c0025f728e7ac622d672da Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 7 Nov 2011 01:03:47 -0800 Subject: [PATCH 0016/1087] fix for flushed div/mul instructions --- rocket/src/main/scala/dpath.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7591b31a..55ca866d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -312,7 +312,7 @@ class rocketDpath extends Component // divider div.io.div_fn := ex_reg_ctrl_div_fn; - div.io.div_val := ex_reg_ctrl_div_val; + div.io.div_val := ex_reg_ctrl_div_val && !io.ctrl.killx; div.io.div_waddr := ex_reg_waddr; div.io.dpath_rs1 := ex_reg_rs1; div.io.dpath_rs2 := ex_reg_rs2; @@ -322,7 +322,7 @@ class rocketDpath extends Component io.ctrl.div_result_val := div.io.div_result_val; // multiplier - mul.io.mul_val := ex_reg_ctrl_mul_val; + mul.io.mul_val := ex_reg_ctrl_mul_val && !io.ctrl.killx; mul.io.mul_fn := ex_reg_ctrl_mul_fn; mul.io.mul_tag := ex_reg_waddr; mul.io.in0 := ex_reg_rs1; From e96430d862adf25be00784c612afe9b7de605390 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 9 Nov 2011 14:52:17 -0800 Subject: [PATCH 0017/1087] integrating ITLB & PTW --- rocket/src/main/scala/consts.scala | 17 ++- rocket/src/main/scala/cpu.scala | 51 ++++++- rocket/src/main/scala/ctrl.scala | 3 +- rocket/src/main/scala/dcache.scala | 46 ++++--- rocket/src/main/scala/dpath.scala | 32 +++-- rocket/src/main/scala/dpath_util.scala | 42 +++--- rocket/src/main/scala/icache.scala | 8 +- rocket/src/main/scala/itlb.scala | 178 +++++++++++++++++++++++++ rocket/src/main/scala/ptw.scala | 168 +++++++++++++++++++++++ rocket/src/main/scala/top.scala | 4 +- rocket/src/main/scala/util.scala | 52 ++++++++ 11 files changed, 538 insertions(+), 63 deletions(-) create mode 100644 rocket/src/main/scala/itlb.scala create mode 100644 rocket/src/main/scala/ptw.scala create mode 100644 rocket/src/main/scala/util.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index f8069c4e..344eace5 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -128,6 +128,7 @@ object Constants val M_FRD = Bits("b0010", 4); // fp load val M_FWR = Bits("b0011", 4); // fp store val M_FLA = Bits("b0100", 4); // flush cache + val M_PRD = Bits("b0101", 4); // PTW load val M_XA_ADD = Bits("b1000", 4); val M_XA_SWAP = Bits("b1001", 4); val M_XA_AND = Bits("b1010", 4); @@ -145,12 +146,26 @@ object Constants val PCR_COMPARE = UFix( 5, 5); val PCR_CAUSE = UFix( 6, 5); val PCR_MEMSIZE = UFix( 8, 5); + val PCR_PTBR = UFix( 9, 5); val PCR_LOG = UFix(10, 5); val PCR_TOHOST = UFix(16, 5); val PCR_FROMHOST = UFix(17, 5); val PCR_CONSOLE = UFix(18, 5); val PCR_K0 = UFix(24, 5); val PCR_K1 = UFix(25, 5); + + val PADDR_BITS = 40; + val VADDR_BITS = 43; + val PGIDX_BITS = 13; + val PPN_BITS = PADDR_BITS-PGIDX_BITS; + val VPN_BITS = VADDR_BITS-PGIDX_BITS; + val ASID_BITS = 7; + val PERM_BITS = 6; + + val ITLB_ENTRIES = 8; + + val HAVE_FPU = Bool(false); + val HAVE_VEC = Bool(false); } -} +} \ No newline at end of file diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2f54d775..efec38ef 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -1,6 +1,6 @@ package Top { -import Chisel._ +import Chisel._; import Node._; import Constants._; @@ -41,21 +41,60 @@ class ioRocket extends Bundle() class rocketProc extends Component { val io = new ioRocket(); - + val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); + val itlb = new rocketITLB(ITLB_ENTRIES); + val ptw = new rocketPTW(); + val arb = new rocketDmemArbiter(); + ctrl.io.dpath <> dpath.io.ctrl; - ctrl.io.dmem ^^ io.dmem; +// ctrl.io.dmem ^^ io.dmem; ctrl.io.host.start ^^ io.host.start; - ctrl.io.imem ^^ io.imem; +// ctrl.io.imem ^^ io.imem; - dpath.io.dmem ^^ io.dmem; - dpath.io.imem.req_addr ^^ io.imem.req_addr; +// dpath.io.dmem ^^ io.dmem; +// dpath.io.imem.req_addr ^^ io.imem.req_addr; dpath.io.imem.resp_data ^^ io.imem.resp_data; dpath.io.host ^^ io.host; dpath.io.debug ^^ io.debug; + itlb.io.cpu.invalidate := Bool(false); + itlb.io.cpu.status := dpath.io.ctrl.status; + itlb.io.cpu.req_val := ctrl.io.imem.req_val; + ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; + + itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); + + io.imem.req_val := itlb.io.cpu.resp_val; + io.imem.req_addr := Cat(itlb.io.cpu.resp_ppn, dpath.io.imem.req_addr(PGIDX_BITS-1,0)).toUFix; + + ctrl.io.imem.resp_val := io.imem.resp_val; + dpath.io.itlb_xcpt := itlb.io.cpu.exception; + + ptw.io.itlb <> itlb.io.ptw; + ptw.io.ptbr := dpath.io.ptbr; + + arb.io.ptw <> ptw.io.dmem; + arb.io.mem ^^ io.dmem + + arb.io.cpu.req_val := ctrl.io.dmem.req_val; + arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; + arb.io.cpu.req_type := ctrl.io.dmem.req_type; + arb.io.cpu.req_addr := dpath.io.dmem.req_addr; + arb.io.cpu.req_data := dpath.io.dmem.req_data; + arb.io.cpu.req_tag := dpath.io.dmem.req_tag; + + ctrl.io.dmem.req_rdy := arb.io.cpu.req_rdy; + ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; + ctrl.io.dmem.resp_val := arb.io.cpu.resp_val; + + dpath.io.dmem.resp_val := arb.io.cpu.resp_val; + dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; + dpath.io.dmem.resp_data := arb.io.cpu.resp_data; + // FIXME: console disconnected // io.console.bits := dpath.io.dpath.rs1(7,0); io.console.bits := Bits(0,8); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 5a7ea713..dfffc8ba 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -53,7 +53,7 @@ class ioCtrlDpath extends Bundle() val mem_waddr = UFix(5,'input); // write addr from memory stage val wb_waddr = UFix(5,'input); // write addr from writeback stage val exception = Bool('input); - val status = Bits(8, 'input); + val status = Bits(17, 'input); val sboard_clr0 = Bool('input); val sboard_clr0a = UFix(5, 'input); val sboard_clr1 = Bool('input); @@ -67,6 +67,7 @@ class ioCtrlAll extends Bundle() val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_val")).flip(); val host = new ioHost(List("start")); +// val itlb_xcpt = Bool('input); } class rocketCtrl extends Component diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 91a90ea3..fcd10394 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -5,24 +5,24 @@ import Node._; import Constants._; import scala.math._; -// interface between D$ and processor +// interface between D$ and processor pipeline class ioDmem(view: List[String] = null) extends Bundle(view) { val req_val = Bool('input); val req_rdy = Bool('output); val req_cmd = Bits(4, 'input); val req_type = Bits(3, 'input); - val req_addr = UFix(32, 'input); + val req_addr = UFix(PADDR_BITS, 'input); val req_data = Bits(64, 'input); val req_tag = Bits(5, 'input); val resp_miss = Bool('output); val resp_val = Bool('output); val resp_data = Bits(64, 'output); - val resp_tag = Bits(12, 'output); + val resp_tag = Bits(13, 'output); } -// interface between D$ and memory +// interface between D$ and next level in memory hierarchy class ioDcache(view: List[String] = null) extends Bundle(view) { - val req_addr = UFix(32, 'input); + val req_addr = UFix(PADDR_BITS, 'input); val req_tag = UFix(3, 'input); val req_val = Bool('input); val req_rdy = Bool('output); @@ -39,10 +39,11 @@ class ioDCacheDM extends Bundle() { } // state machine to flush (write back dirty lines, invalidate clean ones) the D$ -class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { +class rocketDCacheDM_flush(lines: Int) extends Component { val io = new ioDCacheDM(); - val dcache = new rocketDCacheDM(lines, addrbits); + val dcache = new rocketDCacheDM(lines); + val addrbits = PADDR_BITS; val indexbits = ceil(log10(lines)/log10(2)).toInt; val offsetbits = 6; val tagmsb = addrbits - 1; @@ -65,18 +66,22 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { flush_waiting <== Bool(true); } - when (dcache.io.cpu.req_rdy && - (flush_count === ~Bits(0, indexbits))) { flushing <== Bool(false); } - when (dcache.io.cpu.resp_val && - (dcache.io.cpu.resp_tag === r_cpu_req_tag) && - (flush_resp_count === ~Bits(0, indexbits))) { flush_waiting <== Bool(false); } + when (dcache.io.cpu.req_rdy && (flush_count === ~Bits(0, indexbits))) { + flushing <== Bool(false); + } + when (dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag) && (flush_resp_count === ~Bits(0, indexbits))) { + flush_waiting <== Bool(false); + } - when (flushing && dcache.io.cpu.req_rdy) { flush_count <== flush_count + UFix(1,1); } - when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag)) - { flush_resp_count <== flush_resp_count + UFix(1,1); } + when (flushing && dcache.io.cpu.req_rdy) { + flush_count <== flush_count + UFix(1,1); + } + when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag(5,0) === r_cpu_req_tag)) { + flush_resp_count <== flush_resp_count + UFix(1,1); + } dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_cmd != M_FLA) && !flush_waiting) || flushing; - dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); + dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); dcache.io.cpu.req_addr := Mux(flushing, Cat(Bits(0,tagmsb-taglsb+1), flush_count, Bits(0,offsetbits)).toUFix, io.cpu.req_addr); dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; @@ -92,9 +97,10 @@ class rocketDCacheDM_flush(lines: Int, addrbits: Int) extends Component { } -class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { +class rocketDCacheDM(lines: Int) extends Component { val io = new ioDCacheDM(); + val addrbits = PADDR_BITS; val indexbits = ceil(log10(lines)/log10(2)).toInt; val offsetbits = 6; val tagmsb = addrbits - 1; @@ -119,9 +125,10 @@ class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { val p_store_type = Reg(resetVal = Bits(0,3)); val p_store_valid = Reg(resetVal = Bool(false)); - val req_load = (r_cpu_req_cmd === M_XRD); + val req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); val req_store = (r_cpu_req_cmd === M_XWR); val req_flush = (r_cpu_req_cmd === M_FLA); + val req_ptw_load = (r_cpu_req_cmd === M_PRD); when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_addr <== io.cpu.req_addr; @@ -288,7 +295,8 @@ class rocketDCacheDM(lines: Int, addrbits: Int) extends Component { ((state === s_resolve_miss) && req_flush); io.cpu.resp_miss := miss; - io.cpu.resp_tag := Cat(Bits(0,1), r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); +// io.cpu.resp_tag := Cat(Bits(0,1), r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); + io.cpu.resp_tag := Cat(req_ptw_load, r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); io.cpu.resp_data := Mux(r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 55ca866d..ce6e651a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -12,6 +12,8 @@ class ioDpathAll extends Bundle() val debug = new ioDebug(); val dmem = new ioDmem(List("req_addr", "req_data", "req_tag", "resp_val", "resp_tag", "resp_data")).flip(); val imem = new ioImem(List("req_addr", "resp_data")).flip(); + val itlb_xcpt = Bool('input); + val ptbr = UFix(PADDR_BITS, 'output); } class rocketDpath extends Component @@ -44,15 +46,15 @@ class rocketDpath extends Component val if_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); // instruction decode definitions - val id_reg_valid = Reg(resetVal = Bool(false)); - val id_reg_pc = Reg(resetVal = UFix(0,32)); - val id_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val id_reg_valid = Reg(resetVal = Bool(false)); + val id_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); + val id_reg_pc_plus4 = Reg(resetVal = UFix(0,VADDR_BITS)); val id_reg_inst = Reg(resetVal = NOP); // execute definitions val ex_reg_valid = Reg(resetVal = Bool(false)); - val ex_reg_pc = Reg(resetVal = UFix(0,32)); - val ex_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val ex_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); + val ex_reg_pc_plus4 = Reg(resetVal = UFix(0,VADDR_BITS)); val ex_reg_inst = Reg(resetVal = Bits(0,32)); val ex_reg_raddr2 = Reg(resetVal = UFix(0,5)); val ex_reg_raddr1 = Reg(resetVal = UFix(0,5)); @@ -79,8 +81,8 @@ class rocketDpath extends Component // memory definitions val mem_reg_valid = Reg(resetVal = Bool(false)); - val mem_reg_pc = Reg(resetVal = UFix(0,32)); - val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,32)); + val mem_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); + val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,VADDR_BITS)); val mem_reg_waddr = Reg(resetVal = UFix(0,5)); val mem_reg_wdata = Reg(resetVal = Bits(0,64)); val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); @@ -94,7 +96,7 @@ class rocketDpath extends Component // writeback definitions val wb_reg_valid = Reg(resetVal = Bool(false)); - val wb_reg_pc = Reg(resetVal = UFix(0,32)); + val wb_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); val wb_reg_waddr = Reg(resetVal = UFix(0,5)); val wb_reg_wdata = Reg(resetVal = Bits(0,64)); val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); @@ -112,7 +114,7 @@ class rocketDpath extends Component val r_dmem_resp_data = Reg(resetVal = Bits(0,64)); // instruction fetch stage - val if_pc_plus4 = if_reg_pc + UFix(4, 32); + val if_pc_plus4 = if_reg_pc + UFix(4); val ex_sign_extend = Cat(Fill(52, ex_reg_inst(21)), ex_reg_inst(21,10)); @@ -135,16 +137,17 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(31,0).toUFix, + Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(VADDR_BITS-1,0).toUFix, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, - UFix(0, 32)))))))))); + UFix(0, VADDR_BITS)))))))))); when (!io.host.start){ - if_reg_pc <== UFix(0, 32); //32'hFFFF_FFFC; + if_reg_pc <== UFix(0, VADDR_BITS); //32'hFFFF_FFFC; } when (!io.ctrl.stallf) { if_reg_pc <== if_next_pc; } + io.imem.req_addr := Mux(io.ctrl.stallf, if_reg_pc, @@ -334,7 +337,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_addr := ex_alu_out; + io.dmem.req_addr := ex_alu_out(PADDR_BITS-1,0); io.dmem.req_data := ex_reg_rs2; io.dmem.req_tag := ex_reg_waddr; @@ -354,7 +357,8 @@ class rocketDpath extends Component // pcr.io.cause := ex_reg_ctrl_cause; // pcr.io.pc := ex_reg_pc; - io.ctrl.status := pcr.io.status; + io.ctrl.status := pcr.io.status; + io.ptbr := pcr.io.ptbr; io.debug.error_mode := pcr.io.debug.error_mode; io.debug.log_control := pcr.io.debug.log_control; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 4c53db11..7011d88b 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -1,19 +1,18 @@ package Top { -import Chisel._ +import Chisel._; import Node._; - import Constants._; class ioDpathBTB extends Bundle() { - val current_pc4 = UFix(32, 'input); + val current_pc4 = UFix(VADDR_BITS, 'input); val hit = Bool('output); - val target = UFix(32, 'output); + val target = UFix(VADDR_BITS, 'output); val wen = Bool('input); - val correct_pc4 = UFix(32, 'input); - val correct_target = UFix(32, 'input); + val correct_pc4 = UFix(VADDR_BITS, 'input); + val correct_target = UFix(VADDR_BITS, 'input); } class rocketDpathBTB extends Component @@ -21,11 +20,15 @@ class rocketDpathBTB extends Component override val io = new ioDpathBTB(); val rst_lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), UFix(1, 1), resetVal = UFix(0, 1)); val lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), - Cat(io.correct_pc4(31,4), io.correct_target(31,2)), resetVal = UFix(0, 1)); + Cat(io.correct_pc4(VADDR_BITS-1,4), io.correct_target(VADDR_BITS-1,2)), resetVal = UFix(0, 1)); +// Cat(io.correct_pc4(31,4), io.correct_target(31,2)), resetVal = UFix(0, 1)); val is_val = rst_lwlr_pf(io.current_pc4(3, 2)); val tag_target = lwlr_pf(io.current_pc4(3, 2)); - io.hit := (is_val & (tag_target(57,30) === io.current_pc4(31, 4))).toBool; - io.target := Cat(tag_target(29, 0), Bits(0,2)).toUFix; + io.hit := (is_val & (tag_target(2*VADDR_BITS-7,VADDR_BITS-2) === io.current_pc4(VADDR_BITS-1, 4))).toBool; + io.target := Cat(tag_target(VADDR_BITS-3, 0), Bits(0,2)).toUFix; + +// io.hit := (is_val & (tag_target(57,30) === io.current_pc4(31, 4))).toBool; +// io.target := Cat(tag_target(29, 0), Bits(0,2)).toUFix; } class ioDpathPCR extends Bundle() @@ -35,20 +38,19 @@ class ioDpathPCR extends Bundle() val r = new ioReadPort(); val w = new ioWritePort(); - val status = Bits(8, 'output); + val status = Bits(17, 'output); + val ptbr = UFix(PADDR_BITS, 'output); val exception = Bool('input); val cause = UFix(5, 'input); - val pc = UFix(32, 'input); + val pc = UFix(VADDR_BITS, 'input); val eret = Bool('input); } class rocketDpathPCR extends Component { - override val io = new ioDpathPCR(); - - val HAVE_FPU = Bool(false); - val HAVE_VEC = Bool(false); + val io = new ioDpathPCR(); val w = 32; + val reg_epc = Reg(resetVal = Bits(0, w)); val reg_badvaddr = Reg(resetVal = Bits(0, w)); val reg_ebase = Reg(resetVal = Bits(0, w)); @@ -59,9 +61,11 @@ class rocketDpathPCR extends Component val reg_fromhost = Reg(resetVal = Bits(0, w)); val reg_k0 = Reg(resetVal = Bits(0, 2*w)); val reg_k1 = Reg(resetVal = Bits(0, 2*w)); + val reg_ptbr = Reg(resetVal = UFix(0, PADDR_BITS)); val reg_error_mode = Reg(resetVal = Bool(false)); val reg_log_control = Reg(resetVal = Bool(false)); + val reg_status_vm = Reg(resetVal = Bool(false)); val reg_status_im = Reg(resetVal = Bits(0,8)); val reg_status_sx = Reg(resetVal = Bool(true)); val reg_status_ux = Reg(resetVal = Bool(true)); @@ -74,7 +78,8 @@ class rocketDpathPCR extends Component val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, Bits(0,1), reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; - io.status := reg_status; + io.status := Cat(reg_status_vm, reg_status_im, reg_status); + io.ptbr := reg_ptbr; io.host.to := Mux(io.host.from_wen, Bits(0, w), reg_tohost); io.debug.error_mode := reg_error_mode; io.debug.log_control := reg_log_control; @@ -110,6 +115,7 @@ class rocketDpathPCR extends Component when (!io.exception && !io.eret && io.w.en) { when (io.w.addr === PCR_STATUS) { + reg_status_vm <== io.w.data(16).toBool; reg_status_im <== io.w.data(15,8); reg_status_sx <== io.w.data(7).toBool; reg_status_ux <== io.w.data(6).toBool; @@ -129,11 +135,12 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(w-1,0); } when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } when (io.w.addr === PCR_K1) { reg_k1 <== io.w.data; } + when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } } when (!io.r.en) { rdata <== Bits(0,2*w); } switch (io.r.addr) { - is (PCR_STATUS) { rdata <== Cat(Bits(0,w+16), reg_status_im, reg_status); } + is (PCR_STATUS) { rdata <== Cat(Bits(0,w+15), reg_status_vm, reg_status_im, reg_status); } is (PCR_EPC) { rdata <== Cat(Fill(w, reg_epc(w-1)), reg_epc); } is (PCR_BADVADDR) { rdata <== Cat(Fill(w, reg_badvaddr(w-1)), reg_badvaddr); } is (PCR_EVEC) { rdata <== Cat(Fill(w, reg_ebase(w-1)), reg_ebase); } @@ -146,6 +153,7 @@ class rocketDpathPCR extends Component is (PCR_TOHOST) { rdata <== Cat(Fill(w, reg_tohost(w-1)), reg_tohost); } is (PCR_K0) { rdata <== reg_k0; } is (PCR_K1) { rdata <== reg_k1; } + is (PCR_PTBR) { rdata <== Cat(Bits(0,2*w-PADDR_BITS), reg_ptbr); } otherwise { rdata <== Bits(0,2*w); } } } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 30e4fc17..9b68556a 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,7 +1,8 @@ package Top { -import Chisel._ +import Chisel._; import Node._; +import Constants._; import scala.math._; // interface between I$ and processor (32 bits wide) @@ -17,7 +18,7 @@ class ioImem(view: List[String] = null) extends Bundle (view) // interface between I$ and memory (128 bits wide) class ioIcache(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(32, 'input); + val req_addr = UFix(PADDR_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); val resp_data = Bits(128, 'output); @@ -55,9 +56,10 @@ class rocketSRAMsp(entries: Int, width: Int) extends Component { // addr_bits = address width (word addressable) bits // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines -class rocketICacheDM(lines: Int, addrbits : Int) extends Component { +class rocketICacheDM(lines: Int) extends Component { val io = new ioICacheDM(); + val addrbits = PADDR_BITS; val indexbits = ceil(log10(lines)/log10(2)).toInt; val offsetbits = 6; val tagmsb = addrbits - 1; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala new file mode 100644 index 00000000..71f6073d --- /dev/null +++ b/rocket/src/main/scala/itlb.scala @@ -0,0 +1,178 @@ +package Top +{ + +import Chisel._; +import Node._; +import Constants._; +import scala.math._; + +class ioCAM(addr_bits: Int, tag_bits: Int) extends Bundle { + val tag = Bits(tag_bits, 'input); + val hit = Bool('output); + val hit_addr = UFix(addr_bits, 'output); + + val write = Bool('input); + val write_tag = Bits(tag_bits, 'input); + val write_addr = UFix(addr_bits, 'input); +} + +class rocketCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Component { + val io = new ioCAM(addr_bits, tag_bits); + val cam_tags = Mem(entries, io.write, io.write_addr, io.write_tag); + + val l_hit = Wire() { Bool() }; + val l_hit_addr = Wire() { UFix() }; + + for (i <- 0 to entries-1) { + when (cam_tags(UFix(i)) === io.tag) { + l_hit <== Bool(true); + l_hit_addr <== UFix(i,addr_bits); + } + } + + l_hit <== Bool(false); + l_hit_addr <== UFix(0, addr_bits); + + io.hit := l_hit; + io.hit_addr := l_hit_addr; +} + +// interface between TLB and PTW +class ioTLB_PTW extends Bundle +{ + // requests + val req_val = Bool('output); + val req_rdy = Bool('input); + val req_vpn = Bits(VPN_BITS, 'output); + // responses + val resp_val = Bool('input); + val resp_err = Bool('input); + val resp_ppn = Bits(PPN_BITS, 'input); + val resp_perm = Bits(PERM_BITS, 'input); +} + +// interface between ITLB and fetch stage of pipeline +class ioITLB_CPU extends Bundle +{ + // status bits (from PCR), to check current permission and whether VM is enabled + val status = Bits(17, 'input); + // invalidate all TLB entries + val invalidate = Bool('input); + // lookup requests + val req_val = Bool('input); + val req_rdy = Bool('output); + val req_asid = Bits(ASID_BITS, 'input); + val req_vpn = Bits(VPN_BITS, 'input); + // lookup responses + val resp_val = Bool('output); + val resp_ppn = Bits(PPN_BITS, 'output); + val exception = Bool('output); +} + +class ioITLB extends Bundle +{ + val cpu = new ioITLB_CPU(); + val ptw = new ioTLB_PTW(); +} + +class rocketITLB(entries: Int) extends Component +{ + val addr_bits = ceil(log10(entries)/log10(2)).toInt; + val io = new ioITLB(); + + val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; + val state = Reg(resetVal = s_ready); + + val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); + + val lookup_tag = Cat(io.cpu.req_asid, io.cpu.req_vpn); + val r_refill_tag = Reg(resetVal = Bits(0, ASID_BITS+VPN_BITS)); + val r_refill_waddr = Reg(resetVal = UFix(0, addr_bits)); + val repl_count = Reg(resetVal = UFix(0, addr_bits)); + + val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + + tag_cam.io.tag := lookup_tag; + tag_cam.io.write := io.ptw.resp_val; + tag_cam.io.write_tag := r_refill_tag; + tag_cam.io.write_addr := r_refill_waddr; + val tag_hit_addr = tag_cam.io.hit_addr; + + // extract fields from status register + val status_mode = io.cpu.status(6).toBool; + val status_vm = io.cpu.status(16).toBool + + // extract fields from PT permission bits + val ptw_perm_ux = io.ptw.resp_perm(4); + val ptw_perm_sx = io.ptw.resp_perm(7); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, entries)); + when (io.cpu.invalidate) { + vb_array <== Bits(0, entries); + } + when (io.ptw.resp_val) { + vb_array <== vb_array.bitSet(r_refill_waddr, Bool(true)); + } + + // permission bit arrays + val ux_array = Reg(resetVal = Bits(0, entries)); // user execute permission + val sx_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission + when (io.ptw.resp_val) { + ux_array <== ux_array.bitSet(r_refill_waddr, ptw_perm_ux); + sx_array <== ux_array.bitSet(r_refill_waddr, ptw_perm_sx); + } + + // high if there are any unused (invalid) entries in the ITLB +// val invalid_entry = orR(~vb_array); + val invalid_entry = ~vb_array.toUFix.orR(); + val ie_enc = new priorityEncoder(entries); + ie_enc.io.in := vb_array.toUFix; + val ie_addr = ie_enc.io.out; + + val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; + + val tag_hit = tag_cam.io.hit && vb_array(tag_hit_addr).toBool; + val lookup_miss = (state === s_ready) && status_vm && io.cpu.req_val && !tag_hit; + + when (lookup_miss) { + r_refill_tag <== lookup_tag; + r_refill_waddr <== repl_waddr; + when (!invalid_entry) { + repl_count <== repl_count + UFix(1); + } + } + + val itlb_exception = + tag_hit && + ((status_mode && !sx_array(tag_hit_addr).toBool) || + (!status_mode && !ux_array(tag_hit_addr).toBool)); + + io.cpu.req_rdy := (state === s_ready); + io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); + io.cpu.resp_ppn := Mux(status_vm, io.cpu.req_vpn(PPN_BITS-1, 0), tag_ram(tag_hit_addr)); + io.cpu.exception := itlb_exception; + + io.ptw.req_val := (state === s_request); + io.ptw.req_vpn := r_refill_tag; + + // control state machine + switch (state) { + is (s_ready) { + when (status_vm && io.cpu.req_val && !tag_hit) { + state <== s_request; + } + } + is (s_request) { + when (io.ptw.req_rdy) { + state <== s_wait; + } + } + is (s_wait) { + when (io.ptw.resp_val) { + state <== s_ready; + } + } + } +} +} \ No newline at end of file diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala new file mode 100644 index 00000000..92cb219e --- /dev/null +++ b/rocket/src/main/scala/ptw.scala @@ -0,0 +1,168 @@ +package Top { + +import Chisel._; +import Node._; +import Constants._; +import scala.math._; + +class ioDmemArbiter extends Bundle +{ + val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_addr", "resp_data", "resp_val")); + val cpu = new ioDmem(); + val mem = new ioDmem().flip(); +} + +class rocketDmemArbiter extends Component +{ + val io = new ioDmemArbiter(); + + io.mem.req_val := io.ptw.req_val || io.cpu.req_val; + io.mem.req_cmd := Mux(io.ptw.req_val, io.ptw.req_cmd, io.cpu.req_cmd); + io.mem.req_type := Mux(io.ptw.req_val, io.ptw.req_type, io.cpu.req_type); + io.mem.req_addr := Mux(io.ptw.req_val, io.ptw.req_addr, io.cpu.req_addr); + io.mem.req_data := io.cpu.req_data; + io.mem.req_tag := Mux(io.ptw.req_val, Bits(0,5), io.cpu.req_tag); + + io.ptw.req_rdy := io.mem.req_rdy; + io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; + io.cpu.resp_miss := io.mem.resp_miss; // FIXME + + io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(12).toBool; + io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(12).toBool; + + io.ptw.resp_data := io.mem.resp_data; + io.cpu.resp_data := io.mem.resp_data; + io.cpu.resp_tag := io.mem.resp_tag; + +} + +class ioPTW extends Bundle +{ + val itlb = new ioTLB_PTW().flip(); +// val dtlb = new ioTLB_PTW.flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_addr", "resp_data", "resp_val")).flip(); + val ptbr = UFix(PADDR_BITS, 'input); +} + +class rocketPTW extends Component +{ + val io = new ioPTW(); + + val s_ready :: s_l1_req :: s_l1_wait :: s_l1_fake :: s_l2_req :: s_l2_wait :: s_l2_fake:: s_l3_req :: s_l3_wait :: s_done :: s_error :: Nil = Enum(11) { UFix() }; + val state = Reg(resetVal = s_ready); + + val r_req_vpn = Reg(resetVal = Bits(0,VPN_BITS)); + + val req_addr = Reg(resetVal = UFix(0,PPN_BITS+PGIDX_BITS)); + val r_resp_ppn = Reg(resetVal = Bits(0,PPN_BITS)); + val r_resp_perm = Reg(resetVal = Bits(0,PERM_BITS)); + + val vpn_idx = Mux(state === s_l2_wait, r_req_vpn(9,0), r_req_vpn(19,10)); + + when ((state === s_ready) && io.itlb.req_val) { + r_req_vpn <== io.itlb.req_vpn; + req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10)).toUFix; + } + when (io.dmem.resp_val) { + req_addr <== Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx).toUFix; + r_resp_perm <== io.dmem.resp_data(9,4); + r_resp_ppn <== io.dmem.resp_data(PPN_BITS-1, PGIDX_BITS); + } + + io.dmem.req_val := + (state === s_l1_req) || + (state === s_l2_req) || + (state === s_l3_req); + + io.dmem.req_cmd := M_PRD; + io.dmem.req_type := MT_D; + io.dmem.req_addr := req_addr; + + io.itlb.resp_val := (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); + io.itlb.resp_err := (state === s_error); + io.itlb.resp_perm := r_resp_perm; + io.itlb.resp_ppn := + Mux(state === s_l1_fake, Cat(r_resp_ppn(PADDR_BITS-1, PADDR_BITS-7), r_req_vpn(VPN_BITS-8, 0)), + Mux(state === s_l2_fake, Cat(r_resp_ppn(PADDR_BITS-1, PADDR_BITS-17), r_req_vpn(VPN_BITS-18, 0)), + r_resp_ppn)); + + val resp_ptd = (io.dmem.resp_data(1,0) === Bits(1,2)); + val resp_pte = (io.dmem.resp_data(1,0) === Bits(2,2)); + + // control state machine + switch (state) { + is (s_ready) { + when (io.itlb.req_val) { + state <== s_l1_req; + } + } + // level 1 + is (s_l1_req) { + when (io.dmem.req_rdy) { + state <== s_l1_wait; + } + } + is (s_l1_wait) { + when (io.dmem.resp_val) { + when (resp_ptd) { // page table descriptor + state <== s_l2_req; + } + when (resp_pte) { // page table entry + state <== s_l1_fake; + } + otherwise { + state <== s_error; + } + } + } + is (s_l1_fake) { + state <== s_ready; + } + // level 2 + is (s_l2_req) { + when (io.dmem.req_rdy) { + state <== s_l2_wait; + } + } + is (s_l2_wait) { + when (io.dmem.resp_val) { + when (resp_ptd) { // page table descriptor + state <== s_l3_req; + } + when (resp_pte) { // page table entry + state <== s_l2_fake; + } + otherwise { + state <== s_error; + } + } + } + is (s_l2_fake) { + state <== s_ready; + } + // level 3 + is (s_l3_req) { + when (io.dmem.req_rdy) { + state <== s_l3_wait; + } + } + is (s_l3_wait) { + when (io.dmem.resp_val) { + when (resp_pte) { // page table entry + state <== s_done; + } + otherwise { + state <== s_error; + } + } + } + is (s_done) { + state <== s_ready; + } + is (s_error) { + state <== s_ready; + } + } +} + +} \ No newline at end of file diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index bb035340..26f27010 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -15,9 +15,9 @@ class Top() extends Component { val io = new ioTop(); val cpu = new rocketProc(); - val icache = new rocketICacheDM(128, 32); // lines, address bits + val icache = new rocketICacheDM(128); // # 64 byte cache lines val icache_pf = new rocketIPrefetcher(); - val dcache = new rocketDCacheDM_flush(128, 32); + val dcache = new rocketDCacheDM_flush(128); val arbiter = new rocketMemArbiter(); arbiter.io.mem ^^ io.mem; diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala new file mode 100644 index 00000000..9c60d726 --- /dev/null +++ b/rocket/src/main/scala/util.scala @@ -0,0 +1,52 @@ +package Top +{ + +import Chisel._ +import Node._; +import scala.math._; + +class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle +{ + val in = UFix(in_width, 'input); + val out = Bits(out_width, 'output); +} + +class priorityDecoder(width: Int) extends Component +{ + val in_width = ceil(log10(width)/log10(2)).toInt; + val io = new ioPriorityEncoder(in_width, width); + val l_out = Wire() { Bits() }; + + for (i <- 0 to width-1) { + when (io.in === UFix(i, in_width)) { + l_out <== Bits(1,1) << UFix(i); + } + } + + l_out <== Bits(0, width); + io.out := l_out; +} + +class ioPriorityEncoder(in_width: Int, out_width: Int) extends Bundle +{ + val in = Bits(in_width, 'input); + val out = UFix(out_width, 'output); +} + +class priorityEncoder(width: Int) extends Component +{ + val out_width = ceil(log10(width)/log10(2)).toInt; + val io = new ioPriorityDecoder(width, out_width); + val l_out = Wire() { UFix() }; + + for (i <- 0 to width-1) { + when (io.in(i).toBool) { + l_out <== UFix(i, out_width); + } + } + + l_out <== UFix(0, out_width); + io.out := l_out; +} + +} \ No newline at end of file From c29d2821b47f5e7babf5f270b1909692336b0ead Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 9 Nov 2011 21:54:11 -0800 Subject: [PATCH 0018/1087] cleanup, fixes, initial commit for dtlb.scala --- rocket/src/main/scala/cpu.scala | 9 +- rocket/src/main/scala/ctrl.scala | 18 ++- rocket/src/main/scala/dpath.scala | 15 ++- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/dtlb.scala | 166 +++++++++++++++++++++++++ rocket/src/main/scala/icache.scala | 4 +- rocket/src/main/scala/itlb.scala | 40 ++++-- rocket/src/main/scala/ptw.scala | 9 +- 8 files changed, 230 insertions(+), 33 deletions(-) create mode 100644 rocket/src/main/scala/dtlb.scala diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index efec38ef..3b09b0f1 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -4,7 +4,7 @@ import Chisel._; import Node._; import Constants._; -class ioDebug extends Bundle() +class ioDebug(view: List[String] = null) extends Bundle(view) { val error_mode = Bool('output); val log_control = Bool('output); @@ -66,13 +66,14 @@ class rocketProc extends Component ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); +// itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); + itlb.io.cpu.req_addr := dpath.io.imem.req_addr; io.imem.req_val := itlb.io.cpu.resp_val; - io.imem.req_addr := Cat(itlb.io.cpu.resp_ppn, dpath.io.imem.req_addr(PGIDX_BITS-1,0)).toUFix; + io.imem.req_addr := itlb.io.cpu.resp_addr; ctrl.io.imem.resp_val := io.imem.resp_val; - dpath.io.itlb_xcpt := itlb.io.cpu.exception; + ctrl.io.itlb_xcpt := itlb.io.cpu.exception; ptw.io.itlb <> itlb.io.ptw; ptw.io.ptbr := dpath.io.ptbr; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dfffc8ba..cef554f7 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,6 +37,7 @@ class ioCtrlDpath extends Bundle() val xcpt_privileged = Bool('output); val xcpt_fpu = Bool('output); val xcpt_syscall = Bool('output); + val xcpt_itlb = Bool('output); val eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); @@ -65,9 +66,9 @@ class ioCtrlAll extends Bundle() val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy", "valid")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_val")).flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); val host = new ioHost(List("start")); -// val itlb_xcpt = Bool('input); + val itlb_xcpt = Bool('input); } class rocketCtrl extends Component @@ -226,7 +227,8 @@ class rocketCtrl extends Component val id_stall_waddr = sboard.io.stallc; val id_stall_ra = sboard.io.stallra; - val id_reg_btb_hit = Reg(width = 1, resetVal = Bool(false)); + val id_reg_btb_hit = Reg(resetVal = Bool(false)); + val id_reg_itlb_xcpt = Reg(resetVal = Bool(false)); val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; val ex_reg_div_mul_val = Reg(){Bool()}; @@ -235,12 +237,15 @@ class rocketCtrl extends Component val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_privileged = Reg(resetVal = Bool(false)); +// val id_reg_itlb_xcpt = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { + id_reg_itlb_xcpt <== Bool(false); id_reg_btb_hit <== Bool(false); } otherwise{ + id_reg_itlb_xcpt <== io.itlb_xcpt; id_reg_btb_hit <== io.dpath.btb_hit; } } @@ -254,6 +259,7 @@ class rocketCtrl extends Component ex_reg_mem_type <== UFix(0, 3); ex_reg_eret <== Bool(false); ex_reg_privileged <== Bool(false); +// ex_reg_itlb_xcpt <== Bool(false); } otherwise { ex_reg_br_type <== id_br_type; @@ -264,6 +270,7 @@ class rocketCtrl extends Component ex_reg_mem_type <== id_mem_type; ex_reg_eret <== id_eret.toBool; ex_reg_privileged <== id_privileged.toBool; +// ex_reg_itlb_xcpt <== id_reg_itlb_xcpt; } val beq = io.dpath.br_eq; @@ -284,7 +291,7 @@ class rocketCtrl extends Component val jr_taken = (ex_reg_br_type === BR_JR); val j_taken = (ex_reg_br_type === BR_J); - io.imem.req_val := io.host.start; + io.imem.req_val := io.host.start; // FIXME // io.imem.req_val := Bool(true); io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; @@ -330,7 +337,7 @@ class rocketCtrl extends Component val replay_mem = io.dmem.resp_miss; val kill_ex = replay_ex | replay_mem | mem_reg_privileged; - val kill_mem = io.dpath.exception; + val kill_mem = io.dpath.exception; // TODO: add load/store related exceptions dcache_miss <== io.dmem.resp_miss; @@ -461,6 +468,7 @@ class rocketCtrl extends Component io.dpath.xcpt_privileged := (id_privileged & ~io.dpath.status(5)).toBool; io.dpath.xcpt_fpu := Bool(false); io.dpath.xcpt_syscall := id_syscall.toBool; + io.dpath.xcpt_itlb := id_reg_itlb_xcpt; } } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ce6e651a..d687bb8d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -5,14 +5,20 @@ import Node._; import Constants._ import Instructions._ +class ioDpathImem extends Bundle() +{ + val req_addr = UFix(VADDR_BITS, 'output); + val resp_data = Bits(32, 'input); +} + class ioDpathAll extends Bundle() { val host = new ioHost(); val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); val dmem = new ioDmem(List("req_addr", "req_data", "req_tag", "resp_val", "resp_tag", "resp_data")).flip(); - val imem = new ioImem(List("req_addr", "resp_data")).flip(); - val itlb_xcpt = Bool('input); +// val imem = new ioImem(List("req_addr", "resp_data")).flip(); + val imem = new ioDpathImem(); val ptbr = UFix(PADDR_BITS, 'output); } @@ -243,13 +249,14 @@ class rocketDpath extends Component id_rdata2))))); // write value to cause register based on exception type - val id_exception = io.ctrl.xcpt_illegal || io.ctrl.xcpt_privileged || io.ctrl.xcpt_fpu || io.ctrl.xcpt_syscall; + val id_exception = io.ctrl.xcpt_illegal || io.ctrl.xcpt_privileged || io.ctrl.xcpt_fpu || io.ctrl.xcpt_syscall || io.ctrl.xcpt_itlb; val id_cause = + Mux(io.ctrl.xcpt_itlb, UFix(1,5), Mux(io.ctrl.xcpt_illegal, UFix(2,5), Mux(io.ctrl.xcpt_privileged, UFix(3,5), Mux(io.ctrl.xcpt_fpu, UFix(4,5), Mux(io.ctrl.xcpt_syscall, UFix(6,5), - UFix(0,5))))); + UFix(0,5)))))); io.ctrl.inst := id_reg_inst; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 7011d88b..b9de3dab 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -34,7 +34,7 @@ class rocketDpathBTB extends Component class ioDpathPCR extends Bundle() { val host = new ioHost(List("from", "from_wen", "to")); - val debug = new ioDebug(); + val debug = new ioDebug(List("error_mode", "log_control")); val r = new ioReadPort(); val w = new ioWritePort(); diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala new file mode 100644 index 00000000..961e6041 --- /dev/null +++ b/rocket/src/main/scala/dtlb.scala @@ -0,0 +1,166 @@ +package Top +{ + +import Chisel._; +import Node._; +import Constants._; +import scala.math._; + +// interface between DTLB and fetch stage of pipeline +class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) +{ + // status bits (from PCR), to check current permission and whether VM is enabled + val status = Bits(17, 'input); + // invalidate all TLB entries + val invalidate = Bool('input); + // lookup requests + val req_val = Bool('input); + val req_cmd = Bits(4, 'input); // load/store/amo + val req_rdy = Bool('output); + val req_asid = Bits(ASID_BITS, 'input); + val req_addr = UFix(VADDR_BITS, 'input); + // lookup responses + val resp_val = Bool('output); + val resp_addr = UFix(PADDR_BITS, 'output); + val exception = Bool('output); +} + +class ioDTLB extends Bundle +{ + val cpu = new ioDTLB_CPU(); + val ptw = new ioTLB_PTW(); +} + +class rocketDTLB(entries: Int) extends Component +{ + val addr_bits = ceil(log10(entries)/log10(2)).toInt; + val io = new ioDTLB(); + + val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; + val state = Reg(resetVal = s_ready); + + val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); + + val req_vpn = io.cpu.req_addr(VADDR_BITS-1,PGIDX_BITS); + val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); + val req_load = (io.cpu.req_cmd === M_XRD); + val req_store = (io.cpu.req_cmd === M_XWR); +// val req_amo = io.cpu.req_cmd(3).toBool; + + val lookup_tag = Cat(io.cpu.req_asid, req_vpn); + val r_refill_tag = Reg(resetVal = Bits(0, ASID_BITS+VPN_BITS)); + val r_refill_waddr = Reg(resetVal = UFix(0, addr_bits)); + val repl_count = Reg(resetVal = UFix(0, addr_bits)); + + val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + + tag_cam.io.tag := lookup_tag; + tag_cam.io.write := io.ptw.resp_val; + tag_cam.io.write_tag := r_refill_tag; + tag_cam.io.write_addr := r_refill_waddr; + val tag_hit_addr = tag_cam.io.hit_addr; + + // extract fields from status register + val status_mode = io.cpu.status(6).toBool; // user/supervisor mode + val status_vm = io.cpu.status(16).toBool // virtual memory enable + + // extract fields from PT permission bits +// val ptw_perm_ux = io.ptw.resp_perm(0); + val ptw_perm_ur = io.ptw.resp_perm(1); + val ptw_perm_uw = io.ptw.resp_perm(2); +// val ptw_perm_sx = io.ptw.resp_perm(3); + val ptw_perm_sr = io.ptw.resp_perm(4); + val ptw_perm_sw = io.ptw.resp_perm(5); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, entries)); + when (io.cpu.invalidate) { + vb_array <== Bits(0, entries); + } + when (io.ptw.resp_val) { + vb_array <== vb_array.bitSet(r_refill_waddr, Bool(true)); + } + + // permission bit arrays + val ur_array = Reg(resetVal = Bits(0, entries)); // user execute permission + val uw_array = Reg(resetVal = Bits(0, entries)); // user execute permission + val sr_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission + val sw_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission + when (io.ptw.resp_val) { + ur_array <== ur_array.bitSet(r_refill_waddr, ptw_perm_ur); + uw_array <== ur_array.bitSet(r_refill_waddr, ptw_perm_uw); + sr_array <== sr_array.bitSet(r_refill_waddr, ptw_perm_sr); + sw_array <== sw_array.bitSet(r_refill_waddr, ptw_perm_sw); + } + + // when the page table lookup reports an error, set all permission + // bits to 0 so the next access will cause an exception + when (io.ptw.resp_err) { + ur_array <== ur_array.bitSet(r_refill_waddr, Bool(false)); + uw_array <== ur_array.bitSet(r_refill_waddr, Bool(false)); + sr_array <== sr_array.bitSet(r_refill_waddr, Bool(false)); + sw_array <== sw_array.bitSet(r_refill_waddr, Bool(false)); + } + + // high if there are any unused (invalid) entries in the TLB + val invalid_entry = ~vb_array.toUFix.orR(); + val ie_enc = new priorityEncoder(entries); + ie_enc.io.in := vb_array.toUFix; + val ie_addr = ie_enc.io.out; + + val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; + + val tag_hit = tag_cam.io.hit && vb_array(tag_hit_addr).toBool; + val lookup_miss = (state === s_ready) && status_vm && io.cpu.req_val && !tag_hit; + + when (lookup_miss) { + r_refill_tag <== lookup_tag; + r_refill_waddr <== repl_waddr; + when (!invalid_entry) { + repl_count <== repl_count + UFix(1); + } + } + + val dtlb_st_xcpt = + tag_hit && req_load && + ((status_mode && !sw_array(tag_hit_addr).toBool) || + (!status_mode && !uw_array(tag_hit_addr).toBool)); + + val dtlb_ld_xcpt = + tag_hit && req_store && + ((status_mode && !sr_array(tag_hit_addr).toBool) || + (!status_mode && !ur_array(tag_hit_addr).toBool)); + + val dtlb_exception = dtlb_st_xcpt || dtlb_ld_xcpt; + + io.cpu.req_rdy := (state === s_ready); + io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); +// io.cpu.resp_ppn := Mux(status_vm, io.cpu.req_vpn(PPN_BITS-1, 0), tag_ram(tag_hit_addr)); + io.cpu.resp_addr := + Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), + io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; + io.cpu.exception := dtlb_exception; + + io.ptw.req_val := (state === s_request); + io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); + + // control state machine + switch (state) { + is (s_ready) { + when (status_vm && io.cpu.req_val && !tag_hit) { + state <== s_request; + } + } + is (s_request) { + when (io.ptw.req_rdy) { + state <== s_wait; + } + } + is (s_wait) { + when (io.ptw.resp_val || io.ptw.resp_err) { + state <== s_ready; + } + } + } +} +} \ No newline at end of file diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 9b68556a..c72c7ca8 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,10 +5,10 @@ import Node._; import Constants._; import scala.math._; -// interface between I$ and processor (32 bits wide) +// interface between I$ and pipeline/ITLB (32 bits wide) class ioImem(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(32, 'input); + val req_addr = UFix(PADDR_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); val resp_data = Bits(32, 'output); diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 71f6073d..5d80c4cb 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -52,7 +52,7 @@ class ioTLB_PTW extends Bundle } // interface between ITLB and fetch stage of pipeline -class ioITLB_CPU extends Bundle +class ioITLB_CPU(view: List[String] = null) extends Bundle(view) { // status bits (from PCR), to check current permission and whether VM is enabled val status = Bits(17, 'input); @@ -62,10 +62,12 @@ class ioITLB_CPU extends Bundle val req_val = Bool('input); val req_rdy = Bool('output); val req_asid = Bits(ASID_BITS, 'input); - val req_vpn = Bits(VPN_BITS, 'input); +// val req_vpn = Bits(VPN_BITS, 'input); + val req_addr = UFix(VADDR_BITS, 'input); // lookup responses val resp_val = Bool('output); - val resp_ppn = Bits(PPN_BITS, 'output); +// val resp_ppn = Bits(PPN_BITS, 'output); + val resp_addr = UFix(PADDR_BITS, 'output); val exception = Bool('output); } @@ -85,7 +87,9 @@ class rocketITLB(entries: Int) extends Component val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); - val lookup_tag = Cat(io.cpu.req_asid, io.cpu.req_vpn); + val req_vpn = io.cpu.req_addr(VADDR_BITS-1,PGIDX_BITS); + val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); + val lookup_tag = Cat(io.cpu.req_asid, req_vpn); val r_refill_tag = Reg(resetVal = Bits(0, ASID_BITS+VPN_BITS)); val r_refill_waddr = Reg(resetVal = UFix(0, addr_bits)); val repl_count = Reg(resetVal = UFix(0, addr_bits)); @@ -99,12 +103,12 @@ class rocketITLB(entries: Int) extends Component val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register - val status_mode = io.cpu.status(6).toBool; - val status_vm = io.cpu.status(16).toBool + val status_mode = io.cpu.status(6).toBool; // user/supervisor mode + val status_vm = io.cpu.status(16).toBool // virtual memory enable // extract fields from PT permission bits - val ptw_perm_ux = io.ptw.resp_perm(4); - val ptw_perm_sx = io.ptw.resp_perm(7); + val ptw_perm_ux = io.ptw.resp_perm(0); + val ptw_perm_sx = io.ptw.resp_perm(3); // valid bit array val vb_array = Reg(resetVal = Bits(0, entries)); @@ -120,7 +124,14 @@ class rocketITLB(entries: Int) extends Component val sx_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission when (io.ptw.resp_val) { ux_array <== ux_array.bitSet(r_refill_waddr, ptw_perm_ux); - sx_array <== ux_array.bitSet(r_refill_waddr, ptw_perm_sx); + sx_array <== sx_array.bitSet(r_refill_waddr, ptw_perm_sx); + } + + // when the page table lookup reports an error, set both execute permission + // bits to 0 so the next access will cause an exceptions + when (io.ptw.resp_err) { + ux_array <== ux_array.bitSet(r_refill_waddr, Bool(false)); + sx_array <== sx_array.bitSet(r_refill_waddr, Bool(false)); } // high if there are any unused (invalid) entries in the ITLB @@ -150,11 +161,14 @@ class rocketITLB(entries: Int) extends Component io.cpu.req_rdy := (state === s_ready); io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); - io.cpu.resp_ppn := Mux(status_vm, io.cpu.req_vpn(PPN_BITS-1, 0), tag_ram(tag_hit_addr)); - io.cpu.exception := itlb_exception; +// io.cpu.resp_ppn := Mux(status_vm, io.cpu.req_vpn(PPN_BITS-1, 0), tag_ram(tag_hit_addr)); + io.cpu.resp_addr := + Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), + io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; + io.cpu.exception := status_vm && itlb_exception; io.ptw.req_val := (state === s_request); - io.ptw.req_vpn := r_refill_tag; + io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); // control state machine switch (state) { @@ -169,7 +183,7 @@ class rocketITLB(entries: Int) extends Component } } is (s_wait) { - when (io.ptw.resp_val) { + when (io.ptw.resp_val || io.ptw.resp_err) { state <== s_ready; } } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 92cb219e..18fc8912 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -66,7 +66,7 @@ class rocketPTW extends Component when (io.dmem.resp_val) { req_addr <== Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx).toUFix; r_resp_perm <== io.dmem.resp_data(9,4); - r_resp_ppn <== io.dmem.resp_data(PPN_BITS-1, PGIDX_BITS); + r_resp_ppn <== io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS); } io.dmem.req_val := @@ -77,13 +77,14 @@ class rocketPTW extends Component io.dmem.req_cmd := M_PRD; io.dmem.req_type := MT_D; io.dmem.req_addr := req_addr; - + + io.itlb.req_rdy := (state === s_ready); io.itlb.resp_val := (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); io.itlb.resp_err := (state === s_error); io.itlb.resp_perm := r_resp_perm; io.itlb.resp_ppn := - Mux(state === s_l1_fake, Cat(r_resp_ppn(PADDR_BITS-1, PADDR_BITS-7), r_req_vpn(VPN_BITS-8, 0)), - Mux(state === s_l2_fake, Cat(r_resp_ppn(PADDR_BITS-1, PADDR_BITS-17), r_req_vpn(VPN_BITS-18, 0)), + Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-8, 0)), + Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-18, 0)), r_resp_ppn)); val resp_ptd = (io.dmem.resp_data(1,0) === Bits(1,2)); From 9aca403aa8dab1f44c5f0125002d9d034febb33b Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 9 Nov 2011 23:18:14 -0800 Subject: [PATCH 0019/1087] more itlb integration & cleanup --- rocket/src/main/scala/arbiter.scala | 4 ++-- rocket/src/main/scala/dcache.scala | 2 +- rocket/src/main/scala/dpath.scala | 6 +++--- rocket/src/main/scala/dpath_alu.scala | 5 +++-- rocket/src/main/scala/icache_prefetch.scala | 3 ++- rocket/src/main/scala/ptw.scala | 4 ++-- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index eaf0241d..7e2ca8b4 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -9,7 +9,7 @@ class ioMem() extends Bundle val req_val = Bool('output); val req_rdy = Bool('input); val req_rw = Bool('output); - val req_addr = UFix(32, 'output); + val req_addr = UFix(PADDR_BITS, 'output); val req_wdata = Bits(128, 'output); val req_tag = Bits(4, 'output); @@ -67,7 +67,7 @@ class rocketMemArbiter extends Component { io.dcache.resp_data := io.mem.resp_data; io.icache.resp_tag := io.mem.resp_tag(2,0); - io.dcache.resp_tag := io.mem.resp_tag(2,0); +// io.dcache.resp_tag := io.mem.resp_tag(2,0); } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index fcd10394..644a73b3 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -29,7 +29,7 @@ class ioDcache(view: List[String] = null) extends Bundle(view) { val req_wdata = Bits(128, 'input); val req_rw = Bool('input); val resp_data = Bits(128, 'output); - val resp_tag = Bits(3, 'output); +// val resp_tag = Bits(3, 'output); val resp_val = Bool('output); } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d687bb8d..a759b8ea 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -49,7 +49,7 @@ class rocketDpath extends Component val rfile = new rocketDpathRegfile(); // instruction fetch definitions - val if_reg_pc = Reg(width = 32, resetVal = UFix(0, 32)); + val if_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); // instruction decode definitions val id_reg_valid = Reg(resetVal = Bool(false)); @@ -128,8 +128,8 @@ class rocketDpath extends Component Cat(Fill(52, ex_reg_inst(31)), ex_reg_inst(31,27), ex_reg_inst(16,10)); val branch_adder_rhs = - Mux(io.ctrl.sel_pc === PC_BR, Cat(ex_sign_extend_split(30,0), UFix(0, 1)), - Cat(Fill(6, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0, 1))); + Mux(io.ctrl.sel_pc === PC_BR, Cat(ex_sign_extend_split(41,0), UFix(0, 1)), + Cat(Fill(17, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0, 1))); val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index d7ae1e90..6c4eadfd 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -18,7 +18,8 @@ class ioALU extends Bundle(){ class rocketDpathALU extends Component { - override val io = new ioALU(); + val io = new ioALU(); + val out64 = MuxCase(Fix(0, 64), Array( (io.fn === FN_ADD) -> (io.in1 + io.in2).toFix, @@ -34,7 +35,7 @@ class rocketDpathALU extends Component (io.fn === FN_SRA) -> (io.in1.toFix >>> io.shamt))); io.out := MuxLookup(io.dw, Fix(0, 64), Array( - DW_64 -> out64, + DW_64 -> out64(63,0), DW_32 -> Cat(Fill(32, out64(31)), out64(31,0)).toFix)).toUFix; } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 1c1219e3..fe7b761c 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -2,11 +2,12 @@ package Top { import Chisel._; import Node._; +import Constants._; import queues._; class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(32, 'output); + val req_addr = UFix(PADDR_BITS, 'output); val req_val = Bool('output); val req_rdy = Bool('input); val req_tag = Bits(3, 'output); diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 18fc8912..89170884 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -83,8 +83,8 @@ class rocketPTW extends Component io.itlb.resp_err := (state === s_error); io.itlb.resp_perm := r_resp_perm; io.itlb.resp_ppn := - Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-8, 0)), - Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-18, 0)), + Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-11, 0)), + Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-21, 0)), r_resp_ppn)); val resp_ptd = (io.dmem.resp_data(1,0) === Bits(1,2)); From 6664af3bc028b9be94a67d9e11bafeb8959e36d6 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 9 Nov 2011 23:27:29 -0800 Subject: [PATCH 0020/1087] cleanup before adding dtlb --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/cpu.scala | 19 ++++++++++--------- rocket/src/main/scala/dpath.scala | 11 ++--------- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 344eace5..794e54d2 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -162,6 +162,7 @@ object Constants val ASID_BITS = 7; val PERM_BITS = 6; + val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; val HAVE_FPU = Bool(false); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 3b09b0f1..b68a9316 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -45,6 +45,7 @@ class rocketProc extends Component val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); + val dtlb = new rocketDTLB(ITLB_ENTRIES); val itlb = new rocketITLB(ITLB_ENTRIES); val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(); @@ -60,38 +61,38 @@ class rocketProc extends Component dpath.io.host ^^ io.host; dpath.io.debug ^^ io.debug; + // FIXME: make this less verbose + // connect ITLB to I$, ctrl, dpath itlb.io.cpu.invalidate := Bool(false); itlb.io.cpu.status := dpath.io.ctrl.status; - itlb.io.cpu.req_val := ctrl.io.imem.req_val; - ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; - + itlb.io.cpu.req_val := ctrl.io.imem.req_val; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR -// itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); itlb.io.cpu.req_addr := dpath.io.imem.req_addr; - io.imem.req_val := itlb.io.cpu.resp_val; io.imem.req_addr := itlb.io.cpu.resp_addr; - + ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; ctrl.io.imem.resp_val := io.imem.resp_val; ctrl.io.itlb_xcpt := itlb.io.cpu.exception; + // connect page table walker to TLBs, page table base register (from PCR) + // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) + ptw.io.itlb <> itlb.io.ptw; ptw.io.ptbr := dpath.io.ptbr; - arb.io.ptw <> ptw.io.dmem; arb.io.mem ^^ io.dmem + // FIXME: make this less verbose + // connect arbiter to ctrl+dpath arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; arb.io.cpu.req_addr := dpath.io.dmem.req_addr; arb.io.cpu.req_data := dpath.io.dmem.req_data; arb.io.cpu.req_tag := dpath.io.dmem.req_tag; - ctrl.io.dmem.req_rdy := arb.io.cpu.req_rdy; ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; ctrl.io.dmem.resp_val := arb.io.cpu.resp_val; - dpath.io.dmem.resp_val := arb.io.cpu.resp_val; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a759b8ea..e9218261 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -17,7 +17,6 @@ class ioDpathAll extends Bundle() val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); val dmem = new ioDmem(List("req_addr", "req_data", "req_tag", "resp_val", "resp_tag", "resp_data")).flip(); -// val imem = new ioImem(List("req_addr", "resp_data")).flip(); val imem = new ioDpathImem(); val ptbr = UFix(PADDR_BITS, 'output); } @@ -127,6 +126,7 @@ class rocketDpath extends Component val ex_sign_extend_split = Cat(Fill(52, ex_reg_inst(31)), ex_reg_inst(31,27), ex_reg_inst(16,10)); + // FIXME: which bits to extract should be calculated based on VADDR_BITS val branch_adder_rhs = Mux(io.ctrl.sel_pc === PC_BR, Cat(ex_sign_extend_split(41,0), UFix(0, 1)), Cat(Fill(17, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0, 1))); @@ -153,7 +153,6 @@ class rocketDpath extends Component when (!io.ctrl.stallf) { if_reg_pc <== if_next_pc; } - io.imem.req_addr := Mux(io.ctrl.stallf, if_reg_pc, @@ -199,8 +198,7 @@ class rocketDpath extends Component UFix(0, 5))))); // moved this here to avoid having to do forward declaration - // TODO: cleanup - + // FIXME: cleanup // 64/32 bit load handling (in mem stage) val dmem_resp_pos = io.dmem.resp_tag(7,5).toUFix; val dmem_resp_type = io.dmem.resp_tag(10,8); @@ -358,11 +356,6 @@ class rocketDpath extends Component pcr.io.host.from_wen ^^ io.host.from_wen; pcr.io.host.from ^^ io.host.from; pcr.io.host.to ^^ io.host.to; - -// pcr.io.eret := ex_reg_ctrl_eret; -// pcr.io.exception := ex_reg_ctrl_exception; -// pcr.io.cause := ex_reg_ctrl_cause; -// pcr.io.pc := ex_reg_pc; io.ctrl.status := pcr.io.status; io.ptbr := pcr.io.ptbr; From 62407b466872563d4a59c301a1a8aee3512c3338 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 10 Nov 2011 00:23:29 -0800 Subject: [PATCH 0021/1087] more tlb/ptw fixes --- rocket/src/main/scala/cpu.scala | 40 ++++++++++++++++++------ rocket/src/main/scala/ctrl.scala | 10 ++++-- rocket/src/main/scala/dpath.scala | 13 +++++++- rocket/src/main/scala/dtlb.scala | 9 ++---- rocket/src/main/scala/itlb.scala | 5 +-- rocket/src/main/scala/ptw.scala | 52 +++++++++++++++++++++++-------- 6 files changed, 93 insertions(+), 36 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index b68a9316..f2c993ca 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -45,16 +45,15 @@ class rocketProc extends Component val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); - val dtlb = new rocketDTLB(ITLB_ENTRIES); + val dtlb = new rocketDTLB(DTLB_ENTRIES); val itlb = new rocketITLB(ITLB_ENTRIES); val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(); ctrl.io.dpath <> dpath.io.ctrl; -// ctrl.io.dmem ^^ io.dmem; ctrl.io.host.start ^^ io.host.start; +// ctrl.io.dmem ^^ io.dmem; // ctrl.io.imem ^^ io.imem; - // dpath.io.dmem ^^ io.dmem; // dpath.io.imem.req_addr ^^ io.imem.req_addr; dpath.io.imem.resp_data ^^ io.imem.resp_data; @@ -63,7 +62,7 @@ class rocketProc extends Component // FIXME: make this less verbose // connect ITLB to I$, ctrl, dpath - itlb.io.cpu.invalidate := Bool(false); + itlb.io.cpu.invalidate := Bool(false); // FIXME itlb.io.cpu.status := dpath.io.ctrl.status; itlb.io.cpu.req_val := ctrl.io.imem.req_val; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR @@ -74,9 +73,18 @@ class rocketProc extends Component ctrl.io.imem.resp_val := io.imem.resp_val; ctrl.io.itlb_xcpt := itlb.io.cpu.exception; + // connect DTLB to D$ arbiter, ctrl+dpath + dtlb.io.cpu.invalidate := Bool(false); // FIXME + dtlb.io.cpu.status := dpath.io.ctrl.status; + dtlb.io.cpu.req_val := ctrl.io.dmem.req_val; + dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; + dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlb.io.cpu.req_addr := dpath.io.dmem.req_addr; + ctrl.io.dtlb_xcpt := dtlb.io.cpu.exception; + // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) - + ptw.io.dtlb <> dtlb.io.ptw; ptw.io.itlb <> itlb.io.ptw; ptw.io.ptbr := dpath.io.ptbr; arb.io.ptw <> ptw.io.dmem; @@ -84,18 +92,30 @@ class rocketProc extends Component // FIXME: make this less verbose // connect arbiter to ctrl+dpath - arb.io.cpu.req_val := ctrl.io.dmem.req_val; + arb.io.cpu.req_val := dtlb.io.cpu.resp_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; - arb.io.cpu.req_addr := dpath.io.dmem.req_addr; + arb.io.cpu.req_addr := dtlb.io.cpu.resp_addr; arb.io.cpu.req_data := dpath.io.dmem.req_data; arb.io.cpu.req_tag := dpath.io.dmem.req_tag; - ctrl.io.dmem.req_rdy := arb.io.cpu.req_rdy; + ctrl.io.dmem.req_rdy := dtlb.io.cpu.req_rdy && arb.io.cpu.req_rdy; ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; - ctrl.io.dmem.resp_val := arb.io.cpu.resp_val; dpath.io.dmem.resp_val := arb.io.cpu.resp_val; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; - dpath.io.dmem.resp_data := arb.io.cpu.resp_data; + dpath.io.dmem.resp_data := arb.io.cpu.resp_data; + +// arb.io.cpu.req_val := ctrl.io.dmem.req_val; +// arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; +// arb.io.cpu.req_type := ctrl.io.dmem.req_type; +// arb.io.cpu.req_addr := dpath.io.dmem.req_addr; +// arb.io.cpu.req_data := dpath.io.dmem.req_data; +// arb.io.cpu.req_tag := dpath.io.dmem.req_tag; +// ctrl.io.dmem.req_rdy := arb.io.cpu.req_rdy; +// ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; +// ctrl.io.dmem.resp_val := arb.io.cpu.resp_val; +// dpath.io.dmem.resp_val := arb.io.cpu.resp_val; +// dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; +// dpath.io.dmem.resp_data := arb.io.cpu.resp_data; // FIXME: console disconnected // io.console.bits := dpath.io.dpath.rs1(7,0); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cef554f7..b1eea3d0 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -33,10 +33,13 @@ class ioCtrlDpath extends Bundle() val sel_wb = UFix(3, 'output); val ren_pcr = Bool('output); val wen_pcr = Bool('output); + // FIXME: move exception handling stuff (generating cause value, etc) + // from EX stage of dpath to MEM stage of control val xcpt_illegal = Bool('output); val xcpt_privileged = Bool('output); val xcpt_fpu = Bool('output); val xcpt_syscall = Bool('output); +// val xcpt_dtlb = Bool('output); val xcpt_itlb = Bool('output); val eret = Bool('output); val mem_load = Bool('output); @@ -68,6 +71,7 @@ class ioCtrlAll extends Bundle() val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); val host = new ioHost(List("start")); + val dtlb_xcpt = Bool('input); val itlb_xcpt = Bool('input); } @@ -237,7 +241,7 @@ class rocketCtrl extends Component val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_privileged = Reg(resetVal = Bool(false)); -// val id_reg_itlb_xcpt = Reg(resetVal = Bool(false)); +// val ex_reg_itlb_xcpt = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -343,8 +347,10 @@ class rocketCtrl extends Component io.dpath.mem_load := mem_cmd_load; + // FIXME: dtlb exception handling broken, need to move cause value generation + // to mem stage. also should probably move it from dpath to ctrl io.dpath.sel_pc := - Mux(io.dpath.exception || mem_reg_eret, PC_PCR, + Mux(io.dpath.exception || io.dtlb_xcpt, PC_PCR, Mux(replay_ex || replay_mem || mem_reg_privileged, PC_EX, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken, PC_EX4, diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index e9218261..825c3897 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -5,6 +5,16 @@ import Node._; import Constants._ import Instructions._ +class ioDpathDmem extends Bundle() +{ + val req_addr = UFix(VADDR_BITS, 'output); + val req_tag = UFix(5, 'output); + val req_data = Bits(64, 'output); + val resp_val = Bool('input); + val resp_tag = Bits(13, 'input); // FIXME: MSB is ignored + val resp_data = Bits(64, 'input); +} + class ioDpathImem extends Bundle() { val req_addr = UFix(VADDR_BITS, 'output); @@ -16,7 +26,8 @@ class ioDpathAll extends Bundle() val host = new ioHost(); val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); - val dmem = new ioDmem(List("req_addr", "req_data", "req_tag", "resp_val", "resp_tag", "resp_data")).flip(); +// val dmem = new ioDmem(List("req_addr", "req_data", "req_tag", "resp_val", "resp_tag", "resp_data")).flip(); + val dmem = new ioDpathDmem(); val imem = new ioDpathImem(); val ptbr = UFix(PADDR_BITS, 'output); } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 961e6041..e0cdfe6f 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -43,8 +43,8 @@ class rocketDTLB(entries: Int) extends Component val req_vpn = io.cpu.req_addr(VADDR_BITS-1,PGIDX_BITS); val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); - val req_load = (io.cpu.req_cmd === M_XRD); - val req_store = (io.cpu.req_cmd === M_XWR); + val req_load = io.cpu.req_val && (io.cpu.req_cmd === M_XRD); + val req_store = io.cpu.req_val && (io.cpu.req_cmd === M_XWR); // val req_amo = io.cpu.req_cmd(3).toBool; val lookup_tag = Cat(io.cpu.req_asid, req_vpn); @@ -65,10 +65,8 @@ class rocketDTLB(entries: Int) extends Component val status_vm = io.cpu.status(16).toBool // virtual memory enable // extract fields from PT permission bits -// val ptw_perm_ux = io.ptw.resp_perm(0); val ptw_perm_ur = io.ptw.resp_perm(1); val ptw_perm_uw = io.ptw.resp_perm(2); -// val ptw_perm_sx = io.ptw.resp_perm(3); val ptw_perm_sr = io.ptw.resp_perm(4); val ptw_perm_sw = io.ptw.resp_perm(5); @@ -135,11 +133,10 @@ class rocketDTLB(entries: Int) extends Component io.cpu.req_rdy := (state === s_ready); io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); -// io.cpu.resp_ppn := Mux(status_vm, io.cpu.req_vpn(PPN_BITS-1, 0), tag_ram(tag_hit_addr)); io.cpu.resp_addr := Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; - io.cpu.exception := dtlb_exception; + io.cpu.exception := status_vm && dtlb_exception; io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 5d80c4cb..cf9532c2 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -62,11 +62,9 @@ class ioITLB_CPU(view: List[String] = null) extends Bundle(view) val req_val = Bool('input); val req_rdy = Bool('output); val req_asid = Bits(ASID_BITS, 'input); -// val req_vpn = Bits(VPN_BITS, 'input); val req_addr = UFix(VADDR_BITS, 'input); // lookup responses val resp_val = Bool('output); -// val resp_ppn = Bits(PPN_BITS, 'output); val resp_addr = UFix(PADDR_BITS, 'output); val exception = Bool('output); } @@ -155,13 +153,12 @@ class rocketITLB(entries: Int) extends Component } val itlb_exception = - tag_hit && + io.cpu.req_val && tag_hit && ((status_mode && !sx_array(tag_hit_addr).toBool) || (!status_mode && !ux_array(tag_hit_addr).toBool)); io.cpu.req_rdy := (state === s_ready); io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); -// io.cpu.resp_ppn := Mux(status_vm, io.cpu.req_vpn(PPN_BITS-1, 0), tag_ram(tag_hit_addr)); io.cpu.resp_addr := Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 89170884..596f3e43 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -32,14 +32,15 @@ class rocketDmemArbiter extends Component io.ptw.resp_data := io.mem.resp_data; io.cpu.resp_data := io.mem.resp_data; - io.cpu.resp_tag := io.mem.resp_tag; +// io.cpu.resp_tag := io.mem.resp_tag(11,0); + io.cpu.resp_tag := io.mem.resp_tag; // to get rid of warning, MSB of tag is ignored in dpath } class ioPTW extends Bundle { val itlb = new ioTLB_PTW().flip(); -// val dtlb = new ioTLB_PTW.flip(); + val dtlb = new ioTLB_PTW().flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_addr", "resp_data", "resp_val")).flip(); val ptbr = UFix(PADDR_BITS, 'input); } @@ -52,17 +53,31 @@ class rocketPTW extends Component val state = Reg(resetVal = s_ready); val r_req_vpn = Reg(resetVal = Bits(0,VPN_BITS)); + val r_req_dest = Reg(resetVal = Bool(false)); // 0 = ITLB, 1 = DTLB val req_addr = Reg(resetVal = UFix(0,PPN_BITS+PGIDX_BITS)); val r_resp_ppn = Reg(resetVal = Bits(0,PPN_BITS)); val r_resp_perm = Reg(resetVal = Bits(0,PERM_BITS)); val vpn_idx = Mux(state === s_l2_wait, r_req_vpn(9,0), r_req_vpn(19,10)); + val req_val = io.itlb.req_val || io.dtlb.req_val; - when ((state === s_ready) && io.itlb.req_val) { + // give ITLB requests priority over DTLB requests + val req_itlb_val = io.itlb.req_val; + val req_dtlb_val = io.dtlb.req_val && !io.itlb.req_val; + + when ((state === s_ready) && req_itlb_val) { r_req_vpn <== io.itlb.req_vpn; + r_req_dest <== Bool(false); req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10)).toUFix; } + + when ((state === s_ready) && req_dtlb_val) { + r_req_vpn <== io.dtlb.req_vpn; + r_req_dest <== Bool(true); + req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10)).toUFix; + } + when (io.dmem.resp_val) { req_addr <== Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx).toUFix; r_resp_perm <== io.dmem.resp_data(9,4); @@ -78,22 +93,33 @@ class rocketPTW extends Component io.dmem.req_type := MT_D; io.dmem.req_addr := req_addr; - io.itlb.req_rdy := (state === s_ready); - io.itlb.resp_val := (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); - io.itlb.resp_err := (state === s_error); - io.itlb.resp_perm := r_resp_perm; - io.itlb.resp_ppn := - Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-11, 0)), - Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-21, 0)), - r_resp_ppn)); - + val resp_val = (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); + val resp_err = (state === s_error); + val resp_ptd = (io.dmem.resp_data(1,0) === Bits(1,2)); val resp_pte = (io.dmem.resp_data(1,0) === Bits(2,2)); + io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val; + io.itlb.req_rdy := (state === s_ready); + io.dtlb.resp_val := r_req_dest && resp_val; + io.itlb.resp_val := !r_req_dest && resp_val; + io.dtlb.resp_err := r_req_dest && resp_err; + io.itlb.resp_err := !r_req_dest && resp_err; + io.dtlb.resp_perm := r_resp_perm; + io.itlb.resp_perm := r_resp_perm; + + val resp_ppn = + Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-11, 0)), + Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-21, 0)), + r_resp_ppn)); + + io.dtlb.resp_ppn := resp_ppn; + io.itlb.resp_ppn := resp_ppn; + // control state machine switch (state) { is (s_ready) { - when (io.itlb.req_val) { + when (req_val) { state <== s_l1_req; } } From fbfa356d2ac9f8c5455c52ccaa282a7f876274cd Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 10 Nov 2011 00:37:00 -0800 Subject: [PATCH 0022/1087] fixed eret instruction --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index b1eea3d0..807a80d8 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -350,7 +350,7 @@ class rocketCtrl extends Component // FIXME: dtlb exception handling broken, need to move cause value generation // to mem stage. also should probably move it from dpath to ctrl io.dpath.sel_pc := - Mux(io.dpath.exception || io.dtlb_xcpt, PC_PCR, + Mux(io.dpath.exception || io.dtlb_xcpt || mem_reg_eret, PC_PCR, Mux(replay_ex || replay_mem || mem_reg_privileged, PC_EX, Mux(!ex_reg_btb_hit && br_taken, PC_BR, Mux(ex_reg_btb_hit && !br_taken, PC_EX4, From 36aa4bcc9dc3b8d1055b8cafef343a5c00263e09 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 10 Nov 2011 00:50:09 -0800 Subject: [PATCH 0023/1087] moved exception handling from ex stage in dpath to mem stage in ctrl --- rocket/src/main/scala/consts.scala | 5 + rocket/src/main/scala/cpu.scala | 32 ++---- rocket/src/main/scala/ctrl.scala | 153 ++++++++++++++++--------- rocket/src/main/scala/dpath.scala | 55 +++------ rocket/src/main/scala/dpath_util.scala | 29 +++-- rocket/src/main/scala/dtlb.scala | 18 +-- rocket/src/main/scala/icache.scala | 4 +- 7 files changed, 153 insertions(+), 143 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 794e54d2..d0d318d3 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -24,6 +24,7 @@ object Constants val PC_MEM = UFix(7, 4); val PC_MEM4 = UFix(8, 4); val PC_EX = UFix(9, 4); + val PC_EVEC = UFix(10, 4); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); @@ -165,6 +166,10 @@ object Constants val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; + // physical memory size (# 4K pages - for proxy kernel at least) + // if you change this value, make sure to also change MEMORY_SIZE variable in memif.h + val MEMSIZE = Bits("h2000", 64); // 32 megs + val HAVE_FPU = Bool(false); val HAVE_VEC = Bool(false); } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index f2c993ca..0c2d2e0f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -51,14 +51,11 @@ class rocketProc extends Component val arb = new rocketDmemArbiter(); ctrl.io.dpath <> dpath.io.ctrl; - ctrl.io.host.start ^^ io.host.start; -// ctrl.io.dmem ^^ io.dmem; -// ctrl.io.imem ^^ io.imem; -// dpath.io.dmem ^^ io.dmem; -// dpath.io.imem.req_addr ^^ io.imem.req_addr; - dpath.io.imem.resp_data ^^ io.imem.resp_data; dpath.io.host ^^ io.host; + ctrl.io.host.start := io.host.start; dpath.io.debug ^^ io.debug; +// dpath.io.imem.resp_data ^^ io.imem.resp_data; + // FIXME: make this less verbose // connect ITLB to I$, ctrl, dpath @@ -71,7 +68,8 @@ class rocketProc extends Component io.imem.req_addr := itlb.io.cpu.resp_addr; ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; ctrl.io.imem.resp_val := io.imem.resp_val; - ctrl.io.itlb_xcpt := itlb.io.cpu.exception; + dpath.io.imem.resp_data := io.imem.resp_data; + ctrl.io.xcpt_itlb := itlb.io.cpu.exception; // connect DTLB to D$ arbiter, ctrl+dpath dtlb.io.cpu.invalidate := Bool(false); // FIXME @@ -80,7 +78,9 @@ class rocketProc extends Component dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlb.io.cpu.req_addr := dpath.io.dmem.req_addr; - ctrl.io.dtlb_xcpt := dtlb.io.cpu.exception; + ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu.xcpt_ld; + ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; + ctrl.io.dtlb_miss := dtlb.io.cpu.resp_miss; // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) @@ -90,8 +90,7 @@ class rocketProc extends Component arb.io.ptw <> ptw.io.dmem; arb.io.mem ^^ io.dmem - // FIXME: make this less verbose - // connect arbiter to ctrl+dpath + // connect arbiter to ctrl+dpath+DTLB arb.io.cpu.req_val := dtlb.io.cpu.resp_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; @@ -104,19 +103,6 @@ class rocketProc extends Component dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; -// arb.io.cpu.req_val := ctrl.io.dmem.req_val; -// arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; -// arb.io.cpu.req_type := ctrl.io.dmem.req_type; -// arb.io.cpu.req_addr := dpath.io.dmem.req_addr; -// arb.io.cpu.req_data := dpath.io.dmem.req_data; -// arb.io.cpu.req_tag := dpath.io.dmem.req_tag; -// ctrl.io.dmem.req_rdy := arb.io.cpu.req_rdy; -// ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; -// ctrl.io.dmem.resp_val := arb.io.cpu.resp_val; -// dpath.io.dmem.resp_val := arb.io.cpu.resp_val; -// dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; -// dpath.io.dmem.resp_data := arb.io.cpu.resp_data; - // FIXME: console disconnected // io.console.bits := dpath.io.dpath.rs1(7,0); io.console.bits := Bits(0,8); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 807a80d8..346cb095 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -33,17 +33,11 @@ class ioCtrlDpath extends Bundle() val sel_wb = UFix(3, 'output); val ren_pcr = Bool('output); val wen_pcr = Bool('output); - // FIXME: move exception handling stuff (generating cause value, etc) - // from EX stage of dpath to MEM stage of control - val xcpt_illegal = Bool('output); - val xcpt_privileged = Bool('output); - val xcpt_fpu = Bool('output); - val xcpt_syscall = Bool('output); -// val xcpt_dtlb = Bool('output); - val xcpt_itlb = Bool('output); + val exception = Bool('output); + val cause = UFix(5,'output); val eret = Bool('output); val mem_load = Bool('output); - val wen = Bool('output); + val wen = Bool('output); // inputs from datapath val btb_hit = Bool('input); val inst = Bits(32, 'input); @@ -53,10 +47,9 @@ class ioCtrlDpath extends Bundle() val div_rdy = Bool('input); val div_result_val = Bool('input); val mul_result_val = Bool('input); - val ex_waddr = UFix(5,'input); // write addr from execute stage + val ex_waddr = UFix(5,'input); // write addr from execute stage val mem_waddr = UFix(5,'input); // write addr from memory stage - val wb_waddr = UFix(5,'input); // write addr from writeback stage - val exception = Bool('input); + val wb_waddr = UFix(5,'input); // write addr from writeback stage val status = Bits(17, 'input); val sboard_clr0 = Bool('input); val sboard_clr0a = UFix(5, 'input); @@ -71,8 +64,10 @@ class ioCtrlAll extends Bundle() val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); val host = new ioHost(List("start")); - val dtlb_xcpt = Bool('input); - val itlb_xcpt = Bool('input); + val dtlb_miss = Bool('input); + val xcpt_dtlb_ld = Bool('input); + val xcpt_dtlb_st = Bool('input); + val xcpt_itlb = Bool('input); } class rocketCtrl extends Component @@ -97,6 +92,7 @@ class rocketCtrl extends Component JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), + RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), @@ -189,9 +185,6 @@ class rocketCtrl extends Component AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - - // miscellaneous - RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), */ )); @@ -210,7 +203,7 @@ class rocketCtrl extends Component io.console.valid := console_out_fire.toBool; val wb_reg_div_mul_val = Reg(){Bool()}; - val dcache_miss = Reg(){Bool()}; + val dcache_miss = Reg(io.dmem.resp_miss); val sboard = new rocketCtrlSboard(); sboard.io.raddra := id_raddr2.toUFix; @@ -232,7 +225,8 @@ class rocketCtrl extends Component val id_stall_ra = sboard.io.stallra; val id_reg_btb_hit = Reg(resetVal = Bool(false)); - val id_reg_itlb_xcpt = Reg(resetVal = Bool(false)); + val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); + val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; val ex_reg_div_mul_val = Reg(){Bool()}; @@ -241,15 +235,27 @@ class rocketCtrl extends Component val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_privileged = Reg(resetVal = Bool(false)); -// val ex_reg_itlb_xcpt = Reg(resetVal = Bool(false)); + + val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); + val ex_reg_xcpt_illegal = Reg(resetVal = Bool(false)); + val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); +// val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); + val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + + val mem_reg_xcpt_itlb = Reg(resetVal = Bool(false)); + val mem_reg_xcpt_illegal = Reg(resetVal = Bool(false)); + val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); +// val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); + val mem_reg_xcpt_fpu = Bool(false); // FIXME: trap on unimplemented FPU instructions + val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { - id_reg_itlb_xcpt <== Bool(false); + id_reg_xcpt_itlb <== Bool(false); id_reg_btb_hit <== Bool(false); } otherwise{ - id_reg_itlb_xcpt <== io.itlb_xcpt; + id_reg_xcpt_itlb <== io.xcpt_itlb; id_reg_btb_hit <== io.dpath.btb_hit; } } @@ -263,7 +269,12 @@ class rocketCtrl extends Component ex_reg_mem_type <== UFix(0, 3); ex_reg_eret <== Bool(false); ex_reg_privileged <== Bool(false); -// ex_reg_itlb_xcpt <== Bool(false); + + ex_reg_xcpt_itlb <== Bool(false); + ex_reg_xcpt_illegal <== Bool(false); + ex_reg_xcpt_privileged <== Bool(false); +// ex_reg_xcpt_fpu <== Bool(false); + ex_reg_xcpt_syscall <== Bool(false); } otherwise { ex_reg_br_type <== id_br_type; @@ -274,7 +285,12 @@ class rocketCtrl extends Component ex_reg_mem_type <== id_mem_type; ex_reg_eret <== id_eret.toBool; ex_reg_privileged <== id_privileged.toBool; -// ex_reg_itlb_xcpt <== id_reg_itlb_xcpt; + + ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; + ex_reg_xcpt_illegal <== ~id_int_val.toBool; + ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(5)).toBool; +// ex_reg_xcpt_fpu <== Bool(false); + ex_reg_xcpt_syscall <== id_syscall.toBool; } val beq = io.dpath.br_eq; @@ -316,6 +332,12 @@ class rocketCtrl extends Component mem_reg_mem_cmd <== UFix(0, 4); mem_reg_mem_type <== UFix(0, 3); mem_reg_privileged <== Bool(false); + + mem_reg_xcpt_itlb <== Bool(false); + mem_reg_xcpt_illegal <== Bool(false); + mem_reg_xcpt_privileged <== Bool(false); +// mem_reg_xcpt_fpu <== Bool(false); + mem_reg_xcpt_syscall <== Bool(false); } otherwise { mem_reg_div_mul_val <== ex_reg_div_mul_val; @@ -324,6 +346,12 @@ class rocketCtrl extends Component mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; mem_reg_privileged <== ex_reg_privileged; + + mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; + mem_reg_xcpt_illegal <== mem_reg_xcpt_illegal; + mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; +// mem_reg_xcpt_fpu <== Bool(false); + mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } when (reset.toBool || io.dpath.killm) { @@ -333,31 +361,54 @@ class rocketCtrl extends Component wb_reg_div_mul_val <== mem_reg_div_mul_val; } - // replay execute stage PC when the D$ is blocked - val replay_ex = ex_reg_mem_val && !io.dmem.req_rdy; - - // replay execute stage PC on a D$ load miss - val mem_cmd_load = mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); - val replay_mem = io.dmem.resp_miss; - - val kill_ex = replay_ex | replay_mem | mem_reg_privileged; - val kill_mem = io.dpath.exception; // TODO: add load/store related exceptions + // exception handling + val mem_exception = + io.xcpt_dtlb_ld || + io.xcpt_dtlb_st || + mem_reg_xcpt_illegal || + mem_reg_xcpt_privileged || + mem_reg_xcpt_fpu || + mem_reg_xcpt_syscall || + mem_reg_xcpt_itlb; + + val mem_cause = + // instruction address misaligned + Mux(mem_reg_xcpt_itlb, UFix(1,5), // instruction access fault + Mux(mem_reg_xcpt_illegal, UFix(2,5), // illegal instruction + Mux(mem_reg_xcpt_privileged, UFix(3,5), // privileged instruction + Mux(mem_reg_xcpt_fpu, UFix(4,5), // FPU disabled + // interrupt + Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call + // breakpoint + // misaligned load + // misaligned store + Mux(io.xcpt_dtlb_ld, UFix(8,5), // load fault + Mux(io.xcpt_dtlb_st, UFix(9,5), // store fault + UFix(0,5)))))))); + + // write cause to PCR on an exception + io.dpath.exception := mem_exception; + io.dpath.cause := mem_cause; - dcache_miss <== io.dmem.resp_miss; + // replay execute stage PC when the D$ is blocked, when the D$ misses, and for privileged instructions + val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_privileged; - io.dpath.mem_load := mem_cmd_load; - - // FIXME: dtlb exception handling broken, need to move cause value generation - // to mem stage. also should probably move it from dpath to ctrl + // replay mem stage PC on a DTLB miss + val replay_mem = io.dtlb_miss; + val kill_ex = replay_ex || replay_mem; + val kill_mem = mem_exception || io.dtlb_miss; + io.dpath.sel_pc := - Mux(io.dpath.exception || io.dtlb_xcpt || mem_reg_eret, PC_PCR, - Mux(replay_ex || replay_mem || mem_reg_privileged, PC_EX, - Mux(!ex_reg_btb_hit && br_taken, PC_BR, - Mux(ex_reg_btb_hit && !br_taken, PC_EX4, - Mux(jr_taken, PC_JR, - Mux(j_taken, PC_J, - Mux(io.dpath.btb_hit, PC_BTB, - PC_4))))))); + Mux(mem_exception, PC_EVEC, // exception + Mux(replay_mem, PC_MEM, // dtlb miss + Mux(mem_reg_eret, PC_PCR, // eret instruction + Mux(replay_ex, PC_EX, // D$ blocked, D$ miss, privileged inst + Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch + Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch + Mux(jr_taken, PC_JR, // jump register + Mux(j_taken, PC_J, // jump + Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB + PC_4))))))))); // PC+4 io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken & ~kill_ex & ~kill_mem; @@ -366,8 +417,7 @@ class rocketCtrl extends Component ex_reg_btb_hit & ~br_taken | jr_taken | j_taken | - io.dpath.exception | - mem_reg_privileged | + mem_exception | mem_reg_eret | replay_ex | replay_mem; @@ -451,6 +501,7 @@ class rocketCtrl extends Component io.dpath.killx := kill_ex.toBool || kill_mem.toBool; io.dpath.killm := kill_mem.toBool; + io.dpath.mem_load := mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); io.dpath.ren2 := id_ren2.toBool; io.dpath.ren1 := id_ren1.toBool; io.dpath.sel_alu2 := id_sel_alu2; @@ -469,12 +520,6 @@ class rocketCtrl extends Component io.dpath.ren_pcr := id_ren_pcr.toBool; io.dpath.wen_pcr := id_wen_pcr.toBool; io.dpath.eret := id_eret.toBool; - - io.dpath.xcpt_illegal := ~id_int_val.toBool; - io.dpath.xcpt_privileged := (id_privileged & ~io.dpath.status(5)).toBool; - io.dpath.xcpt_fpu := Bool(false); - io.dpath.xcpt_syscall := id_syscall.toBool; - io.dpath.xcpt_itlb := id_reg_itlb_xcpt; } } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 825c3897..d5a98549 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -26,7 +26,6 @@ class ioDpathAll extends Bundle() val host = new ioHost(); val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); -// val dmem = new ioDmem(List("req_addr", "req_data", "req_tag", "resp_val", "resp_tag", "resp_data")).flip(); val dmem = new ioDpathDmem(); val imem = new ioDpathImem(); val ptbr = UFix(PADDR_BITS, 'output); @@ -91,8 +90,6 @@ class rocketDpath extends Component val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); val ex_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_exception = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_cause = Reg(resetVal = UFix(0,5)); val ex_wdata = Wire() { Bits() }; // memory definitions @@ -103,9 +100,7 @@ class rocketDpath extends Component val mem_reg_wdata = Reg(resetVal = Bits(0,64)); val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); val mem_reg_pcr = Reg(resetVal = Bits(0,64)); - val mem_reg_ctrl_cause = Reg(resetVal = UFix(0,5)); val mem_reg_ctrl_eret = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_exception = Reg(resetVal = Bool(false)); val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); @@ -147,16 +142,17 @@ class rocketDpath extends Component btb.io.correct_target := ex_branch_target; val if_next_pc = - Mux(io.ctrl.sel_pc === PC_4, if_pc_plus4, - Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, - Mux(io.ctrl.sel_pc === PC_EX, ex_reg_pc, - Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, - Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, - Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, - Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(VADDR_BITS-1,0).toUFix, - Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, - UFix(0, VADDR_BITS)))))))))); + Mux(io.ctrl.sel_pc === PC_4, if_pc_plus4, + Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, + Mux(io.ctrl.sel_pc === PC_EX, ex_reg_pc, + Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, + Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, + Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, + Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, + Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(VADDR_BITS-1,0).toUFix, // only used for ERET + Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, + Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, + UFix(0, VADDR_BITS))))))))))); when (!io.host.start){ if_reg_pc <== UFix(0, VADDR_BITS); //32'hFFFF_FFFC; @@ -256,16 +252,6 @@ class rocketDpath extends Component Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, dmem_resp_data_final, Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata, id_rdata2))))); - - // write value to cause register based on exception type - val id_exception = io.ctrl.xcpt_illegal || io.ctrl.xcpt_privileged || io.ctrl.xcpt_fpu || io.ctrl.xcpt_syscall || io.ctrl.xcpt_itlb; - val id_cause = - Mux(io.ctrl.xcpt_itlb, UFix(1,5), - Mux(io.ctrl.xcpt_illegal, UFix(2,5), - Mux(io.ctrl.xcpt_privileged, UFix(3,5), - Mux(io.ctrl.xcpt_fpu, UFix(4,5), - Mux(io.ctrl.xcpt_syscall, UFix(6,5), - UFix(0,5)))))); io.ctrl.inst := id_reg_inst; @@ -287,7 +273,6 @@ class rocketDpath extends Component ex_reg_ctrl_ll_wb <== io.ctrl.div_wb | io.ctrl.mul_wb; // TODO: verify ex_reg_ctrl_sel_wb <== io.ctrl.sel_wb; ex_reg_ctrl_ren_pcr <== io.ctrl.ren_pcr; - ex_reg_ctrl_cause <== id_cause; when(io.ctrl.killd) { ex_reg_valid <== Bool(false); @@ -296,7 +281,6 @@ class rocketDpath extends Component ex_reg_ctrl_wen <== Bool(false); ex_reg_ctrl_wen_pcr <== Bool(false); ex_reg_ctrl_eret <== Bool(false); - ex_reg_ctrl_exception <== Bool(false); } otherwise { ex_reg_valid <== id_reg_valid; @@ -305,7 +289,6 @@ class rocketDpath extends Component ex_reg_ctrl_wen <== io.ctrl.wen; ex_reg_ctrl_wen_pcr <== io.ctrl.wen_pcr; ex_reg_ctrl_eret <== io.ctrl.eret; - ex_reg_ctrl_exception <== id_exception; } val ex_alu_in2 = @@ -358,12 +341,11 @@ class rocketDpath extends Component io.dmem.req_tag := ex_reg_waddr; // processor control regfile read - pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_exception | ex_reg_ctrl_eret; + pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; pcr.io.r.addr := - Mux(ex_reg_ctrl_exception, PCR_EVEC, Mux(ex_reg_ctrl_eret, PCR_EPC, - ex_reg_raddr2)); - + ex_reg_raddr2); + pcr.io.host.from_wen ^^ io.host.from_wen; pcr.io.host.from ^^ io.host.from; pcr.io.host.to ^^ io.host.to; @@ -396,25 +378,20 @@ class rocketDpath extends Component mem_reg_wdata <== ex_wdata; mem_reg_ctrl_ll_wb <== ex_reg_ctrl_ll_wb; mem_reg_raddr2 <== ex_reg_raddr2; - mem_reg_ctrl_cause <== ex_reg_ctrl_cause; when (io.ctrl.killx) { mem_reg_valid <== Bool(false); mem_reg_ctrl_eret <== Bool(false); mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); - mem_reg_ctrl_exception <== Bool(false); } otherwise { mem_reg_valid <== ex_reg_valid; mem_reg_ctrl_eret <== ex_reg_ctrl_eret; mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; - mem_reg_ctrl_exception <== ex_reg_ctrl_exception; } - // exception signal to control (for NPC select) - io.ctrl.exception := mem_reg_ctrl_exception; // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; @@ -432,9 +409,9 @@ class rocketDpath extends Component wb_reg_wdata <== mem_reg_wdata; wb_reg_ctrl_ll_wb <== mem_reg_ctrl_ll_wb; wb_reg_raddr2 <== mem_reg_raddr2; - wb_reg_ctrl_cause <== mem_reg_ctrl_cause; wb_reg_ctrl_eret <== mem_reg_ctrl_eret; - wb_reg_ctrl_exception <== mem_reg_ctrl_exception; + wb_reg_ctrl_exception <== io.ctrl.exception; + wb_reg_ctrl_cause <== io.ctrl.cause; when (io.ctrl.killm) { wb_reg_valid <== Bool(false); diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index b9de3dab..7b132e3e 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -17,18 +17,15 @@ class ioDpathBTB extends Bundle() class rocketDpathBTB extends Component { - override val io = new ioDpathBTB(); + override val io = new ioDpathBTB(); val rst_lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), UFix(1, 1), resetVal = UFix(0, 1)); val lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), Cat(io.correct_pc4(VADDR_BITS-1,4), io.correct_target(VADDR_BITS-1,2)), resetVal = UFix(0, 1)); -// Cat(io.correct_pc4(31,4), io.correct_target(31,2)), resetVal = UFix(0, 1)); val is_val = rst_lwlr_pf(io.current_pc4(3, 2)); val tag_target = lwlr_pf(io.current_pc4(3, 2)); + io.hit := (is_val & (tag_target(2*VADDR_BITS-7,VADDR_BITS-2) === io.current_pc4(VADDR_BITS-1, 4))).toBool; io.target := Cat(tag_target(VADDR_BITS-3, 0), Bits(0,2)).toUFix; - -// io.hit := (is_val & (tag_target(57,30) === io.current_pc4(31, 4))).toBool; -// io.target := Cat(tag_target(29, 0), Bits(0,2)).toUFix; } class ioDpathPCR extends Bundle() @@ -40,6 +37,7 @@ class ioDpathPCR extends Bundle() val status = Bits(17, 'output); val ptbr = UFix(PADDR_BITS, 'output); + val evec = UFix(VADDR_BITS, 'output); val exception = Bool('input); val cause = UFix(5, 'input); val pc = UFix(VADDR_BITS, 'input); @@ -51,9 +49,9 @@ class rocketDpathPCR extends Component val io = new ioDpathPCR(); val w = 32; - val reg_epc = Reg(resetVal = Bits(0, w)); - val reg_badvaddr = Reg(resetVal = Bits(0, w)); - val reg_ebase = Reg(resetVal = Bits(0, w)); + val reg_epc = Reg(resetVal = UFix(0, VADDR_BITS)); + val reg_badvaddr = Reg(resetVal = UFix(0, VADDR_BITS)); + val reg_ebase = Reg(resetVal = UFix(0, VADDR_BITS)); val reg_count = Reg(resetVal = Bits(0, w)); val reg_compare = Reg(resetVal = Bits(0, w)); val reg_cause = Reg(resetVal = Bits(0, 5)); @@ -79,6 +77,7 @@ class rocketDpathPCR extends Component val rdata = Wire() { Bits() }; io.status := Cat(reg_status_vm, reg_status_im, reg_status); + io.evec := reg_ebase; io.ptbr := reg_ptbr; io.host.to := Mux(io.host.from_wen, Bits(0, w), reg_tohost); io.debug.error_mode := reg_error_mode; @@ -125,9 +124,9 @@ class rocketDpathPCR extends Component reg_status_ef <== HAVE_FPU && io.w.data(1).toBool; reg_status_et <== io.w.data(0).toBool; } - when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(w-1,0); } - when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(w-1,0); } - when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(w-1,0); } + when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS-1,0).toUFix; } + when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS-1,0).toUFix; } + when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(w-1,0); } when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(w-1,0); } when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } @@ -141,13 +140,13 @@ class rocketDpathPCR extends Component when (!io.r.en) { rdata <== Bits(0,2*w); } switch (io.r.addr) { is (PCR_STATUS) { rdata <== Cat(Bits(0,w+15), reg_status_vm, reg_status_im, reg_status); } - is (PCR_EPC) { rdata <== Cat(Fill(w, reg_epc(w-1)), reg_epc); } - is (PCR_BADVADDR) { rdata <== Cat(Fill(w, reg_badvaddr(w-1)), reg_badvaddr); } - is (PCR_EVEC) { rdata <== Cat(Fill(w, reg_ebase(w-1)), reg_ebase); } + is (PCR_EPC) { rdata <== Cat(Fill(2*w-VADDR_BITS, reg_epc(VADDR_BITS-1)), reg_epc); } + is (PCR_BADVADDR) { rdata <== Cat(Fill(2*w-VADDR_BITS, reg_badvaddr(VADDR_BITS-1)), reg_badvaddr); } + is (PCR_EVEC) { rdata <== Cat(Fill(2*w-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } is (PCR_COUNT) { rdata <== Cat(Fill(w, reg_count(w-1)), reg_count); } is (PCR_COMPARE) { rdata <== Cat(Fill(w, reg_compare(w-1)), reg_compare); } is (PCR_CAUSE) { rdata <== Cat(Bits(0,w+27), reg_cause); } - is (PCR_MEMSIZE) { rdata <== Bits("h2000", 2*w); } + is (PCR_MEMSIZE) { rdata <== MEMSIZE; } is (PCR_LOG) { rdata <== Cat(Bits(0,63), reg_log_control); } is (PCR_FROMHOST) { rdata <== Cat(Fill(w, reg_fromhost(w-1)), reg_fromhost); } is (PCR_TOHOST) { rdata <== Cat(Fill(w, reg_tohost(w-1)), reg_tohost); } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index e0cdfe6f..557a7e24 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -6,7 +6,7 @@ import Node._; import Constants._; import scala.math._; -// interface between DTLB and fetch stage of pipeline +// interface between DTLB and pipeline class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) { // status bits (from PCR), to check current permission and whether VM is enabled @@ -20,9 +20,11 @@ class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) val req_asid = Bits(ASID_BITS, 'input); val req_addr = UFix(VADDR_BITS, 'input); // lookup responses + val resp_miss = Bool('output); val resp_val = Bool('output); val resp_addr = UFix(PADDR_BITS, 'output); - val exception = Bool('output); + val xcpt_ld = Bool('output); + val xcpt_st = Bool('output); } class ioDTLB extends Bundle @@ -119,24 +121,22 @@ class rocketDTLB(entries: Int) extends Component } } - val dtlb_st_xcpt = - tag_hit && req_load && + io.cpu.xcpt_ld := + status_vm && tag_hit && req_load && ((status_mode && !sw_array(tag_hit_addr).toBool) || (!status_mode && !uw_array(tag_hit_addr).toBool)); - val dtlb_ld_xcpt = - tag_hit && req_store && + io.cpu.xcpt_st := + status_vm && tag_hit && req_store && ((status_mode && !sr_array(tag_hit_addr).toBool) || (!status_mode && !ur_array(tag_hit_addr).toBool)); - val dtlb_exception = dtlb_st_xcpt || dtlb_ld_xcpt; - io.cpu.req_rdy := (state === s_ready); + io.cpu.resp_miss := lookup_miss; io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); io.cpu.resp_addr := Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; - io.cpu.exception := status_vm && dtlb_exception; io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c72c7ca8..36ff768b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -51,11 +51,9 @@ class rocketSRAMsp(entries: Int, width: Int) extends Component { } // basic direct mapped instruction cache +// 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines // parameters : // lines = # cache lines -// addr_bits = address width (word addressable) bits -// 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines - class rocketICacheDM(lines: Int) extends Component { val io = new ioICacheDM(); From 603ede8bfe70cec57b43c3630f50e87883882b75 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 10 Nov 2011 02:46:09 -0800 Subject: [PATCH 0024/1087] access faults now write badvaddr PCR register with faulting address --- rocket/src/main/scala/ctrl.scala | 11 ++++++++--- rocket/src/main/scala/dpath.scala | 17 +++++++++++++---- rocket/src/main/scala/dpath_util.scala | 9 ++++++++- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 346cb095..f8f7c709 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -33,11 +33,14 @@ class ioCtrlDpath extends Bundle() val sel_wb = UFix(3, 'output); val ren_pcr = Bool('output); val wen_pcr = Bool('output); - val exception = Bool('output); - val cause = UFix(5,'output); val eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); + // exception handling + val exception = Bool('output); + val cause = UFix(5,'output); + val badvaddr_wen = Bool('output); // high for any access fault + val badvaddr_sel = Bool('output); // select between instruction PC or load/store addr // inputs from datapath val btb_hit = Bool('input); val inst = Bits(32, 'input); @@ -389,7 +392,9 @@ class rocketCtrl extends Component // write cause to PCR on an exception io.dpath.exception := mem_exception; io.dpath.cause := mem_cause; - + io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st || mem_reg_xcpt_itlb; + io.dpath.badvaddr_sel := mem_reg_xcpt_itlb; + // replay execute stage PC when the D$ is blocked, when the D$ misses, and for privileged instructions val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_privileged; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d5a98549..8e2fa880 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -108,6 +108,7 @@ class rocketDpath extends Component // writeback definitions val wb_reg_valid = Reg(resetVal = Bool(false)); val wb_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); + val wb_reg_mem_req_addr = Reg(resetVal = UFix(0,VADDR_BITS)); val wb_reg_waddr = Reg(resetVal = UFix(0,5)); val wb_reg_wdata = Reg(resetVal = Bits(0,64)); val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); @@ -117,6 +118,8 @@ class rocketDpath extends Component val wb_reg_ctrl_exception = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val wb_reg_badvaddr_sel = Reg(resetVal = Bool(false)); + val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); val r_dmem_resp_val = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg(resetVal = UFix(0,5)); @@ -412,6 +415,9 @@ class rocketDpath extends Component wb_reg_ctrl_eret <== mem_reg_ctrl_eret; wb_reg_ctrl_exception <== io.ctrl.exception; wb_reg_ctrl_cause <== io.ctrl.cause; + wb_reg_mem_req_addr <== io.dmem.req_addr; + wb_reg_badvaddr_wen <== io.ctrl.badvaddr_wen; + wb_reg_badvaddr_sel <== io.ctrl.badvaddr_sel; when (io.ctrl.killm) { wb_reg_valid <== Bool(false); @@ -448,10 +454,13 @@ class rocketDpath extends Component pcr.io.w.en := wb_reg_ctrl_wen_pcr; pcr.io.w.data := wb_reg_wdata; - pcr.io.eret := wb_reg_ctrl_eret; - pcr.io.exception := wb_reg_ctrl_exception; - pcr.io.cause := wb_reg_ctrl_cause; - pcr.io.pc := wb_reg_pc; + pcr.io.eret := wb_reg_ctrl_eret; + pcr.io.exception := wb_reg_ctrl_exception; + pcr.io.cause := wb_reg_ctrl_cause; + pcr.io.pc := wb_reg_pc; + pcr.io.ldst_addr := wb_reg_mem_req_addr; + pcr.io.badvaddr_wen := wb_reg_badvaddr_wen; + pcr.io.badvaddr_sel := wb_reg_badvaddr_sel; // temporary debug outputs so things don't get optimized away io.debug.id_valid := id_reg_valid; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 7b132e3e..edcb7895 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -40,7 +40,10 @@ class ioDpathPCR extends Bundle() val evec = UFix(VADDR_BITS, 'output); val exception = Bool('input); val cause = UFix(5, 'input); + val badvaddr_wen = Bool('input); + val badvaddr_sel = Bool('input); val pc = UFix(VADDR_BITS, 'input); + val ldst_addr = UFix(VADDR_BITS, 'input); val eret = Bool('input); } @@ -95,6 +98,10 @@ class rocketDpathPCR extends Component } } + when (io.badvaddr_wen) { + reg_badvaddr <== Mux(io.badvaddr_sel, io.pc, io.ldst_addr); + } + when (io.exception && !reg_status_et) { reg_error_mode <== Bool(true); } @@ -106,7 +113,7 @@ class rocketDpathPCR extends Component reg_epc <== io.pc; reg_cause <== io.cause; } - + when (!io.exception && io.eret) { reg_status_s <== reg_status_ps; reg_status_et <== Bool(true); From 4bd0263a4a0559f287154409b5bfe5f188ee3ff0 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 10 Nov 2011 03:38:59 -0800 Subject: [PATCH 0025/1087] added misaligned instruction check, cleaned up badvaddr handling --- rocket/src/main/scala/consts.scala | 3 +- rocket/src/main/scala/ctrl.scala | 42 ++++++++++++++++---------- rocket/src/main/scala/dpath.scala | 9 +++--- rocket/src/main/scala/dpath_util.scala | 7 ++--- rocket/src/main/scala/itlb.scala | 3 +- 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d0d318d3..800de56d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -168,7 +168,8 @@ object Constants // physical memory size (# 4K pages - for proxy kernel at least) // if you change this value, make sure to also change MEMORY_SIZE variable in memif.h - val MEMSIZE = Bits("h2000", 64); // 32 megs + val MEMSIZE_PAGES = 8192; // 32 megs + val MEMSIZE = MEMSIZE_PAGES*4096; val HAVE_FPU = Bool(false); val HAVE_VEC = Bool(false); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f8f7c709..3aff5072 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -39,9 +39,9 @@ class ioCtrlDpath extends Bundle() // exception handling val exception = Bool('output); val cause = UFix(5,'output); - val badvaddr_wen = Bool('output); // high for any access fault - val badvaddr_sel = Bool('output); // select between instruction PC or load/store addr + val badvaddr_wen = Bool('output); // high for a load/store access fault // inputs from datapath + val xcpt_ma_inst = Bool('input); // high on a misaligned/illegal virtual PC val btb_hit = Bool('input); val inst = Bits(32, 'input); val br_eq = Bool('input); @@ -76,7 +76,7 @@ class ioCtrlAll extends Bundle() class rocketCtrl extends Component { val io = new ioCtrlAll(); - + val xpr64 = Y; val cs = ListLookup(io.dpath.inst, @@ -191,6 +191,11 @@ class rocketCtrl extends Component */ )); + val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst); + + // FIXME + io.imem.req_val := io.host.start && !io.dpath.xcpt_ma_inst; + val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; @@ -227,8 +232,9 @@ class rocketCtrl extends Component val id_stall_waddr = sboard.io.stallc; val id_stall_ra = sboard.io.stallra; - val id_reg_btb_hit = Reg(resetVal = Bool(false)); - val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); + val id_reg_btb_hit = Reg(resetVal = Bool(false)); + val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); + val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; @@ -239,12 +245,14 @@ class rocketCtrl extends Component val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_privileged = Reg(resetVal = Bool(false)); + val ex_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val ex_reg_xcpt_illegal = Reg(resetVal = Bool(false)); val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); // val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); - + + val mem_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val mem_reg_xcpt_illegal = Reg(resetVal = Bool(false)); val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); @@ -254,10 +262,12 @@ class rocketCtrl extends Component when (!io.dpath.stalld) { when (io.dpath.killf) { + id_reg_xcpt_ma_inst <== Bool(false); id_reg_xcpt_itlb <== Bool(false); id_reg_btb_hit <== Bool(false); } otherwise{ + id_reg_xcpt_ma_inst <== if_reg_xcpt_ma_inst; id_reg_xcpt_itlb <== io.xcpt_itlb; id_reg_btb_hit <== io.dpath.btb_hit; } @@ -272,7 +282,8 @@ class rocketCtrl extends Component ex_reg_mem_type <== UFix(0, 3); ex_reg_eret <== Bool(false); ex_reg_privileged <== Bool(false); - + + ex_reg_xcpt_ma_inst <== Bool(false); ex_reg_xcpt_itlb <== Bool(false); ex_reg_xcpt_illegal <== Bool(false); ex_reg_xcpt_privileged <== Bool(false); @@ -289,6 +300,7 @@ class rocketCtrl extends Component ex_reg_eret <== id_eret.toBool; ex_reg_privileged <== id_privileged.toBool; + ex_reg_xcpt_ma_inst <== id_reg_xcpt_ma_inst; ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; ex_reg_xcpt_illegal <== ~id_int_val.toBool; ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(5)).toBool; @@ -314,9 +326,6 @@ class rocketCtrl extends Component val jr_taken = (ex_reg_br_type === BR_JR); val j_taken = (ex_reg_br_type === BR_J); - io.imem.req_val := io.host.start; // FIXME -// io.imem.req_val := Bool(true); - io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; @@ -336,6 +345,7 @@ class rocketCtrl extends Component mem_reg_mem_type <== UFix(0, 3); mem_reg_privileged <== Bool(false); + mem_reg_xcpt_ma_inst <== Bool(false); mem_reg_xcpt_itlb <== Bool(false); mem_reg_xcpt_illegal <== Bool(false); mem_reg_xcpt_privileged <== Bool(false); @@ -350,6 +360,7 @@ class rocketCtrl extends Component mem_reg_mem_type <== ex_reg_mem_type; mem_reg_privileged <== ex_reg_privileged; + mem_reg_xcpt_ma_inst <== ex_reg_xcpt_ma_inst; mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; mem_reg_xcpt_illegal <== mem_reg_xcpt_illegal; mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; @@ -372,10 +383,10 @@ class rocketCtrl extends Component mem_reg_xcpt_privileged || mem_reg_xcpt_fpu || mem_reg_xcpt_syscall || - mem_reg_xcpt_itlb; + mem_reg_xcpt_itlb || + mem_reg_xcpt_ma_inst; val mem_cause = - // instruction address misaligned Mux(mem_reg_xcpt_itlb, UFix(1,5), // instruction access fault Mux(mem_reg_xcpt_illegal, UFix(2,5), // illegal instruction Mux(mem_reg_xcpt_privileged, UFix(3,5), // privileged instruction @@ -387,13 +398,12 @@ class rocketCtrl extends Component // misaligned store Mux(io.xcpt_dtlb_ld, UFix(8,5), // load fault Mux(io.xcpt_dtlb_st, UFix(9,5), // store fault - UFix(0,5)))))))); - + UFix(0,5)))))))); // instruction address misaligned + // write cause to PCR on an exception io.dpath.exception := mem_exception; io.dpath.cause := mem_cause; - io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st || mem_reg_xcpt_itlb; - io.dpath.badvaddr_sel := mem_reg_xcpt_itlb; + io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st; // replay execute stage PC when the D$ is blocked, when the D$ misses, and for privileged instructions val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_privileged; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 8e2fa880..a8b0a528 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -118,7 +118,6 @@ class rocketDpath extends Component val wb_reg_ctrl_exception = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); - val wb_reg_badvaddr_sel = Reg(resetVal = Bool(false)); val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); val r_dmem_resp_val = Reg(resetVal = Bool(false)); @@ -156,7 +155,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, UFix(0, VADDR_BITS))))))))))); - + when (!io.host.start){ if_reg_pc <== UFix(0, VADDR_BITS); //32'hFFFF_FFFC; } @@ -164,6 +163,8 @@ class rocketDpath extends Component if_reg_pc <== if_next_pc; } + io.ctrl.xcpt_ma_inst := if_next_pc(1,0) != Bits(0,2) + io.imem.req_addr := Mux(io.ctrl.stallf, if_reg_pc, if_next_pc); @@ -417,7 +418,6 @@ class rocketDpath extends Component wb_reg_ctrl_cause <== io.ctrl.cause; wb_reg_mem_req_addr <== io.dmem.req_addr; wb_reg_badvaddr_wen <== io.ctrl.badvaddr_wen; - wb_reg_badvaddr_sel <== io.ctrl.badvaddr_sel; when (io.ctrl.killm) { wb_reg_valid <== Bool(false); @@ -458,9 +458,8 @@ class rocketDpath extends Component pcr.io.exception := wb_reg_ctrl_exception; pcr.io.cause := wb_reg_ctrl_cause; pcr.io.pc := wb_reg_pc; - pcr.io.ldst_addr := wb_reg_mem_req_addr; + pcr.io.badvaddr := wb_reg_mem_req_addr; pcr.io.badvaddr_wen := wb_reg_badvaddr_wen; - pcr.io.badvaddr_sel := wb_reg_badvaddr_sel; // temporary debug outputs so things don't get optimized away io.debug.id_valid := id_reg_valid; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index edcb7895..fc2de9bf 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -41,9 +41,8 @@ class ioDpathPCR extends Bundle() val exception = Bool('input); val cause = UFix(5, 'input); val badvaddr_wen = Bool('input); - val badvaddr_sel = Bool('input); val pc = UFix(VADDR_BITS, 'input); - val ldst_addr = UFix(VADDR_BITS, 'input); + val badvaddr = UFix(VADDR_BITS, 'input); val eret = Bool('input); } @@ -99,7 +98,7 @@ class rocketDpathPCR extends Component } when (io.badvaddr_wen) { - reg_badvaddr <== Mux(io.badvaddr_sel, io.pc, io.ldst_addr); + reg_badvaddr <== io.badvaddr; } when (io.exception && !reg_status_et) { @@ -153,7 +152,7 @@ class rocketDpathPCR extends Component is (PCR_COUNT) { rdata <== Cat(Fill(w, reg_count(w-1)), reg_count); } is (PCR_COMPARE) { rdata <== Cat(Fill(w, reg_compare(w-1)), reg_compare); } is (PCR_CAUSE) { rdata <== Cat(Bits(0,w+27), reg_cause); } - is (PCR_MEMSIZE) { rdata <== MEMSIZE; } + is (PCR_MEMSIZE) { rdata <== Bits(MEMSIZE_PAGES, 64); } is (PCR_LOG) { rdata <== Cat(Bits(0,63), reg_log_control); } is (PCR_FROMHOST) { rdata <== Cat(Fill(w, reg_fromhost(w-1)), reg_fromhost); } is (PCR_TOHOST) { rdata <== Cat(Fill(w, reg_tohost(w-1)), reg_tohost); } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index cf9532c2..9337392c 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -155,7 +155,8 @@ class rocketITLB(entries: Int) extends Component val itlb_exception = io.cpu.req_val && tag_hit && ((status_mode && !sx_array(tag_hit_addr).toBool) || - (!status_mode && !ux_array(tag_hit_addr).toBool)); + (!status_mode && !ux_array(tag_hit_addr).toBool) || + (io.cpu.resp_addr >= MEMSIZE)); io.cpu.req_rdy := (state === s_ready); io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); From f86d5b133481517aef67562424534587f51dd52f Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 10 Nov 2011 11:26:13 -0800 Subject: [PATCH 0026/1087] cleanup, lots of minor fixes, added more PCR regs (COREID, NUMCORES), parameterized BTB --- rocket/src/main/scala/consts.scala | 6 +- rocket/src/main/scala/cpu.scala | 1 - rocket/src/main/scala/dcache.scala | 2 +- rocket/src/main/scala/dpath.scala | 6 +- rocket/src/main/scala/dpath_util.scala | 95 +++++++++++++++----------- rocket/src/main/scala/dtlb.scala | 26 +++---- rocket/src/main/scala/itlb.scala | 73 ++++++++++---------- rocket/src/main/scala/ptw.scala | 17 +++-- rocket/src/main/scala/util.scala | 2 +- 9 files changed, 119 insertions(+), 109 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 800de56d..9d7d4484 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -146,15 +146,19 @@ object Constants val PCR_COUNT = UFix( 4, 5); val PCR_COMPARE = UFix( 5, 5); val PCR_CAUSE = UFix( 6, 5); + val PCR_IPI = UFix( 7, 5); val PCR_MEMSIZE = UFix( 8, 5); val PCR_PTBR = UFix( 9, 5); - val PCR_LOG = UFix(10, 5); + val PCR_COREID = UFix(10, 5); + val PCR_NUMCORES = UFix(12, 5); val PCR_TOHOST = UFix(16, 5); val PCR_FROMHOST = UFix(17, 5); val PCR_CONSOLE = UFix(18, 5); val PCR_K0 = UFix(24, 5); val PCR_K1 = UFix(25, 5); + val COREID = 0; + val NUMCORES = 1; val PADDR_BITS = 40; val VADDR_BITS = 43; val PGIDX_BITS = 13; diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 0c2d2e0f..2f58276f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -7,7 +7,6 @@ import Constants._; class ioDebug(view: List[String] = null) extends Bundle(view) { val error_mode = Bool('output); - val log_control = Bool('output); val id_valid = Bool('output); val ex_valid = Bool('output); val mem_valid = Bool('output); diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 644a73b3..068ccc6a 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -17,7 +17,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val resp_miss = Bool('output); val resp_val = Bool('output); val resp_data = Bits(64, 'output); - val resp_tag = Bits(13, 'output); + val resp_tag = Bits(12, 'output); } // interface between D$ and next level in memory hierarchy diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a8b0a528..6d611217 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -11,7 +11,7 @@ class ioDpathDmem extends Bundle() val req_tag = UFix(5, 'output); val req_data = Bits(64, 'output); val resp_val = Bool('input); - val resp_tag = Bits(13, 'input); // FIXME: MSB is ignored + val resp_tag = Bits(12, 'input); // FIXME: MSB is ignored val resp_data = Bits(64, 'input); } @@ -35,7 +35,8 @@ class rocketDpath extends Component { val io = new ioDpathAll(); - val btb = new rocketDpathBTB(); + val btb = new rocketDpathBTB(8); // # of entries in BTB + val if_btb_target = btb.io.target; val pcr = new rocketDpathPCR(); @@ -357,7 +358,6 @@ class rocketDpath extends Component io.ctrl.status := pcr.io.status; io.ptbr := pcr.io.ptbr; io.debug.error_mode := pcr.io.debug.error_mode; - io.debug.log_control := pcr.io.debug.log_control; // branch resolution logic io.ctrl.br_eq := (ex_reg_rs1 === ex_reg_rs2); diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index fc2de9bf..2ef560d0 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -4,6 +4,7 @@ package Top import Chisel._; import Node._; import Constants._; +import scala.math._; class ioDpathBTB extends Bundle() { @@ -15,17 +16,34 @@ class ioDpathBTB extends Bundle() val correct_target = UFix(VADDR_BITS, 'input); } -class rocketDpathBTB extends Component +// basic direct-mapped branch target buffer +class rocketDpathBTB(entries: Int) extends Component { - override val io = new ioDpathBTB(); - val rst_lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), UFix(1, 1), resetVal = UFix(0, 1)); - val lwlr_pf = Mem(4, io.wen, io.correct_pc4(3, 2), - Cat(io.correct_pc4(VADDR_BITS-1,4), io.correct_target(VADDR_BITS-1,2)), resetVal = UFix(0, 1)); - val is_val = rst_lwlr_pf(io.current_pc4(3, 2)); - val tag_target = lwlr_pf(io.current_pc4(3, 2)); + val io = new ioDpathBTB(); - io.hit := (is_val & (tag_target(2*VADDR_BITS-7,VADDR_BITS-2) === io.current_pc4(VADDR_BITS-1, 4))).toBool; - io.target := Cat(tag_target(VADDR_BITS-3, 0), Bits(0,2)).toUFix; + val addr_bits = ceil(log10(entries)/log10(2)).toInt; + val idxlsb = 2; + val idxmsb = idxlsb+addr_bits-1; + val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1; + val taglsb = (VADDR_BITS-idxlsb); + + val rst_lwlr_pf = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), UFix(1,1), resetVal = UFix(0,1)); + val lwlr_pf = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), + Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb)), resetVal = UFix(0,1)); + val is_val = rst_lwlr_pf(io.current_pc4(idxmsb,idxlsb)); + val tag_target = lwlr_pf(io.current_pc4(idxmsb, idxlsb)); + + io.hit := (is_val & (tag_target(tagmsb,taglsb) === io.current_pc4(VADDR_BITS-1, idxmsb+1))).toBool; + io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix; + +// val rst_lwlr_pf = Mem(entries, io.wen, io.correct_pc4(3, 2), UFix(1, 1), resetVal = UFix(0, 1)); +// val lwlr_pf = Mem(entries, io.wen, io.correct_pc4(3, 2), +// Cat(io.correct_pc4(VADDR_BITS-1,4), io.correct_target(VADDR_BITS-1,2)), resetVal = UFix(0, 1)); +// val is_val = rst_lwlr_pf(io.current_pc4(3, 2)); +// val tag_target = lwlr_pf(io.current_pc4(3, 2)); +// +// io.hit := (is_val & (tag_target(2*VADDR_BITS-7,VADDR_BITS-2) === io.current_pc4(VADDR_BITS-1, 4))).toBool; +// io.target := Cat(tag_target(VADDR_BITS-3, 0), Bits(0,2)).toUFix; } class ioDpathPCR extends Bundle() @@ -49,22 +67,20 @@ class ioDpathPCR extends Bundle() class rocketDpathPCR extends Component { val io = new ioDpathPCR(); - val w = 32; val reg_epc = Reg(resetVal = UFix(0, VADDR_BITS)); val reg_badvaddr = Reg(resetVal = UFix(0, VADDR_BITS)); val reg_ebase = Reg(resetVal = UFix(0, VADDR_BITS)); - val reg_count = Reg(resetVal = Bits(0, w)); - val reg_compare = Reg(resetVal = Bits(0, w)); + val reg_count = Reg(resetVal = Bits(0, 32)); + val reg_compare = Reg(resetVal = Bits(0, 32)); val reg_cause = Reg(resetVal = Bits(0, 5)); - val reg_tohost = Reg(resetVal = Bits(0, w)); - val reg_fromhost = Reg(resetVal = Bits(0, w)); - val reg_k0 = Reg(resetVal = Bits(0, 2*w)); - val reg_k1 = Reg(resetVal = Bits(0, 2*w)); + val reg_tohost = Reg(resetVal = Bits(0, 32)); + val reg_fromhost = Reg(resetVal = Bits(0, 32)); + val reg_k0 = Reg(resetVal = Bits(0, 64)); + val reg_k1 = Reg(resetVal = Bits(0, 64)); val reg_ptbr = Reg(resetVal = UFix(0, PADDR_BITS)); val reg_error_mode = Reg(resetVal = Bool(false)); - val reg_log_control = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); val reg_status_im = Reg(resetVal = Bits(0,8)); val reg_status_sx = Reg(resetVal = Bool(true)); @@ -81,19 +97,18 @@ class rocketDpathPCR extends Component io.status := Cat(reg_status_vm, reg_status_im, reg_status); io.evec := reg_ebase; io.ptbr := reg_ptbr; - io.host.to := Mux(io.host.from_wen, Bits(0, w), reg_tohost); + io.host.to := Mux(io.host.from_wen, Bits(0,32), reg_tohost); io.debug.error_mode := reg_error_mode; - io.debug.log_control := reg_log_control; io.r.data := rdata; when (io.host.from_wen) { - reg_tohost <== Bits(0, w); + reg_tohost <== Bits(0,32); reg_fromhost <== io.host.from; } otherwise { when (!io.exception && io.w.en && (io.w.addr === PCR_TOHOST)) { - reg_tohost <== io.w.data(w-1, 0); - reg_fromhost <== Bits(0, w); + reg_tohost <== io.w.data(31,0); + reg_fromhost <== Bits(0,32); } } @@ -133,33 +148,33 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(VADDR_BITS-1,0).toUFix; } - when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(w-1,0); } - when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(w-1,0); } + when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(31,0); } + when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0); } when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } - when (io.w.addr === PCR_LOG) { reg_log_control <== io.w.data(0).toBool; } - when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(w-1,0); } + when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(31,0); } when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } when (io.w.addr === PCR_K1) { reg_k1 <== io.w.data; } when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } } - when (!io.r.en) { rdata <== Bits(0,2*w); } + when (!io.r.en) { rdata <== Bits(0,64); } switch (io.r.addr) { - is (PCR_STATUS) { rdata <== Cat(Bits(0,w+15), reg_status_vm, reg_status_im, reg_status); } - is (PCR_EPC) { rdata <== Cat(Fill(2*w-VADDR_BITS, reg_epc(VADDR_BITS-1)), reg_epc); } - is (PCR_BADVADDR) { rdata <== Cat(Fill(2*w-VADDR_BITS, reg_badvaddr(VADDR_BITS-1)), reg_badvaddr); } - is (PCR_EVEC) { rdata <== Cat(Fill(2*w-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } - is (PCR_COUNT) { rdata <== Cat(Fill(w, reg_count(w-1)), reg_count); } - is (PCR_COMPARE) { rdata <== Cat(Fill(w, reg_compare(w-1)), reg_compare); } - is (PCR_CAUSE) { rdata <== Cat(Bits(0,w+27), reg_cause); } - is (PCR_MEMSIZE) { rdata <== Bits(MEMSIZE_PAGES, 64); } - is (PCR_LOG) { rdata <== Cat(Bits(0,63), reg_log_control); } - is (PCR_FROMHOST) { rdata <== Cat(Fill(w, reg_fromhost(w-1)), reg_fromhost); } - is (PCR_TOHOST) { rdata <== Cat(Fill(w, reg_tohost(w-1)), reg_tohost); } + is (PCR_STATUS) { rdata <== Cat(Bits(0,47), reg_status_vm, reg_status_im, reg_status); } + is (PCR_EPC) { rdata <== Cat(Fill(64-VADDR_BITS, reg_epc(VADDR_BITS-1)), reg_epc); } + is (PCR_BADVADDR) { rdata <== Cat(Fill(64-VADDR_BITS, reg_badvaddr(VADDR_BITS-1)), reg_badvaddr); } + is (PCR_EVEC) { rdata <== Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } + is (PCR_COUNT) { rdata <== Cat(Fill(32, reg_count(31)), reg_count); } + is (PCR_COMPARE) { rdata <== Cat(Fill(32, reg_compare(31)), reg_compare); } + is (PCR_CAUSE) { rdata <== Cat(Bits(0,59), reg_cause); } + is (PCR_MEMSIZE) { rdata <== Bits(MEMSIZE_PAGES,64); } + is (PCR_COREID) { rdata <== Bits(COREID,64); } + is (PCR_NUMCORES) { rdata <== Bits(NUMCORES,64); } + is (PCR_FROMHOST) { rdata <== Cat(Fill(32, reg_fromhost(31)), reg_fromhost); } + is (PCR_TOHOST) { rdata <== Cat(Fill(32, reg_tohost(31)), reg_tohost); } is (PCR_K0) { rdata <== reg_k0; } is (PCR_K1) { rdata <== reg_k1; } - is (PCR_PTBR) { rdata <== Cat(Bits(0,2*w-PADDR_BITS), reg_ptbr); } - otherwise { rdata <== Bits(0,2*w); } + is (PCR_PTBR) { rdata <== Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } + otherwise { rdata <== Bits(0,64); } } } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 557a7e24..d27ffdc8 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -56,6 +56,7 @@ class rocketDTLB(entries: Int) extends Component val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + tag_cam.io.clear := io.cpu.invalidate; tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val; tag_cam.io.write_tag := r_refill_tag; @@ -72,15 +73,6 @@ class rocketDTLB(entries: Int) extends Component val ptw_perm_sr = io.ptw.resp_perm(4); val ptw_perm_sw = io.ptw.resp_perm(5); - // valid bit array - val vb_array = Reg(resetVal = Bits(0, entries)); - when (io.cpu.invalidate) { - vb_array <== Bits(0, entries); - } - when (io.ptw.resp_val) { - vb_array <== vb_array.bitSet(r_refill_waddr, Bool(true)); - } - // permission bit arrays val ur_array = Reg(resetVal = Bits(0, entries)); // user execute permission val uw_array = Reg(resetVal = Bits(0, entries)); // user execute permission @@ -103,15 +95,15 @@ class rocketDTLB(entries: Int) extends Component } // high if there are any unused (invalid) entries in the TLB - val invalid_entry = ~vb_array.toUFix.orR(); + val invalid_entry = (tag_cam.io.valid_bits != ~Bits(0,entries)); val ie_enc = new priorityEncoder(entries); - ie_enc.io.in := vb_array.toUFix; + ie_enc.io.in := ~tag_cam.io.valid_bits.toUFix; val ie_addr = ie_enc.io.out; val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val tag_hit = tag_cam.io.hit && vb_array(tag_hit_addr).toBool; - val lookup_miss = (state === s_ready) && status_vm && io.cpu.req_val && !tag_hit; + val tag_hit = io.cpu.req_val && tag_cam.io.hit; + val lookup_miss = (state === s_ready) && status_vm && !tag_hit; when (lookup_miss) { r_refill_tag <== lookup_tag; @@ -123,13 +115,13 @@ class rocketDTLB(entries: Int) extends Component io.cpu.xcpt_ld := status_vm && tag_hit && req_load && - ((status_mode && !sw_array(tag_hit_addr).toBool) || - (!status_mode && !uw_array(tag_hit_addr).toBool)); + !((status_mode && sw_array(tag_hit_addr).toBool) || + (!status_mode && uw_array(tag_hit_addr).toBool)); io.cpu.xcpt_st := status_vm && tag_hit && req_store && - ((status_mode && !sr_array(tag_hit_addr).toBool) || - (!status_mode && !ur_array(tag_hit_addr).toBool)); + !((status_mode && sr_array(tag_hit_addr).toBool) || + (!status_mode && ur_array(tag_hit_addr).toBool)); io.cpu.req_rdy := (state === s_ready); io.cpu.resp_miss := lookup_miss; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 9337392c..55620a28 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -6,10 +6,12 @@ import Node._; import Constants._; import scala.math._; -class ioCAM(addr_bits: Int, tag_bits: Int) extends Bundle { +class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { + val clear = Bool('input); val tag = Bits(tag_bits, 'input); val hit = Bool('output); val hit_addr = UFix(addr_bits, 'output); + val valid_bits = Bits(entries, 'output); val write = Bool('input); val write_tag = Bits(tag_bits, 'input); @@ -17,24 +19,33 @@ class ioCAM(addr_bits: Int, tag_bits: Int) extends Bundle { } class rocketCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Component { - val io = new ioCAM(addr_bits, tag_bits); - val cam_tags = Mem(entries, io.write, io.write_addr, io.write_tag); + val io = new ioCAM(entries, addr_bits, tag_bits); + val cam_tags = Mem(entries, io.write, io.write_addr, io.write_tag); - val l_hit = Wire() { Bool() }; - val l_hit_addr = Wire() { UFix() }; + val l_hit = Wire() { Bool() }; + val l_hit_addr = Wire() { UFix() }; - for (i <- 0 to entries-1) { - when (cam_tags(UFix(i)) === io.tag) { - l_hit <== Bool(true); - l_hit_addr <== UFix(i,addr_bits); - } - } + val vb_array = Reg(resetVal = Bits(0, entries)); + when (io.clear) { + vb_array <== Bits(0, entries); + } + when (io.write) { + vb_array <== vb_array.bitSet(io.write_addr, Bool(true)); + } - l_hit <== Bool(false); - l_hit_addr <== UFix(0, addr_bits); - - io.hit := l_hit; - io.hit_addr := l_hit_addr; + for (i <- 0 to entries-1) { + when (vb_array(UFix(i)).toBool && (cam_tags(UFix(i)) === io.tag)) { + l_hit <== Bool(true); + l_hit_addr <== UFix(i,addr_bits); + } + } + + l_hit <== Bool(false); + l_hit_addr <== UFix(0, addr_bits); + + io.valid_bits := vb_array; + io.hit := l_hit; + io.hit_addr := l_hit_addr; } // interface between TLB and PTW @@ -94,6 +105,7 @@ class rocketITLB(entries: Int) extends Component val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + tag_cam.io.clear := io.cpu.invalidate; tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val; tag_cam.io.write_tag := r_refill_tag; @@ -108,15 +120,6 @@ class rocketITLB(entries: Int) extends Component val ptw_perm_ux = io.ptw.resp_perm(0); val ptw_perm_sx = io.ptw.resp_perm(3); - // valid bit array - val vb_array = Reg(resetVal = Bits(0, entries)); - when (io.cpu.invalidate) { - vb_array <== Bits(0, entries); - } - when (io.ptw.resp_val) { - vb_array <== vb_array.bitSet(r_refill_waddr, Bool(true)); - } - // permission bit arrays val ux_array = Reg(resetVal = Bits(0, entries)); // user execute permission val sx_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission @@ -132,17 +135,16 @@ class rocketITLB(entries: Int) extends Component sx_array <== sx_array.bitSet(r_refill_waddr, Bool(false)); } - // high if there are any unused (invalid) entries in the ITLB -// val invalid_entry = orR(~vb_array); - val invalid_entry = ~vb_array.toUFix.orR(); + // high if there are any unused entries in the ITLB + val invalid_entry = (tag_cam.io.valid_bits != ~Bits(0,entries)); val ie_enc = new priorityEncoder(entries); - ie_enc.io.in := vb_array.toUFix; + ie_enc.io.in := ~tag_cam.io.valid_bits.toUFix; val ie_addr = ie_enc.io.out; val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val tag_hit = tag_cam.io.hit && vb_array(tag_hit_addr).toBool; - val lookup_miss = (state === s_ready) && status_vm && io.cpu.req_val && !tag_hit; + val tag_hit = io.cpu.req_val && tag_cam.io.hit; + val lookup_miss = (state === s_ready) && status_vm && !tag_hit; when (lookup_miss) { r_refill_tag <== lookup_tag; @@ -153,10 +155,9 @@ class rocketITLB(entries: Int) extends Component } val itlb_exception = - io.cpu.req_val && tag_hit && - ((status_mode && !sx_array(tag_hit_addr).toBool) || - (!status_mode && !ux_array(tag_hit_addr).toBool) || - (io.cpu.resp_addr >= MEMSIZE)); + tag_hit && + !((status_mode && sx_array(tag_hit_addr).toBool) || + (!status_mode && ux_array(tag_hit_addr).toBool)); io.cpu.req_rdy := (state === s_ready); io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); @@ -171,7 +172,7 @@ class rocketITLB(entries: Int) extends Component // control state machine switch (state) { is (s_ready) { - when (status_vm && io.cpu.req_val && !tag_hit) { + when (status_vm && !tag_hit) { state <== s_request; } } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 596f3e43..524984e5 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -27,14 +27,13 @@ class rocketDmemArbiter extends Component io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; io.cpu.resp_miss := io.mem.resp_miss; // FIXME - io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(12).toBool; - io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(12).toBool; + io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(11).toBool; + io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(11).toBool; io.ptw.resp_data := io.mem.resp_data; io.cpu.resp_data := io.mem.resp_data; -// io.cpu.resp_tag := io.mem.resp_tag(11,0); - io.cpu.resp_tag := io.mem.resp_tag; // to get rid of warning, MSB of tag is ignored in dpath - +// io.cpu.resp_tag := io.mem.resp_tag(10,0); + io.cpu.resp_tag := io.mem.resp_tag; } class ioPTW extends Bundle @@ -55,7 +54,7 @@ class rocketPTW extends Component val r_req_vpn = Reg(resetVal = Bits(0,VPN_BITS)); val r_req_dest = Reg(resetVal = Bool(false)); // 0 = ITLB, 1 = DTLB - val req_addr = Reg(resetVal = UFix(0,PPN_BITS+PGIDX_BITS)); + val req_addr = Reg(resetVal = UFix(0,PADDR_BITS)); val r_resp_ppn = Reg(resetVal = Bits(0,PPN_BITS)); val r_resp_perm = Reg(resetVal = Bits(0,PERM_BITS)); @@ -69,17 +68,17 @@ class rocketPTW extends Component when ((state === s_ready) && req_itlb_val) { r_req_vpn <== io.itlb.req_vpn; r_req_dest <== Bool(false); - req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10)).toUFix; + req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; } when ((state === s_ready) && req_dtlb_val) { r_req_vpn <== io.dtlb.req_vpn; r_req_dest <== Bool(true); - req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10)).toUFix; + req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; } when (io.dmem.resp_val) { - req_addr <== Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx).toUFix; + req_addr <== Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; r_resp_perm <== io.dmem.resp_data(9,4); r_resp_ppn <== io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS); } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 9c60d726..edc3173d 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -29,7 +29,7 @@ class priorityDecoder(width: Int) extends Component class ioPriorityEncoder(in_width: Int, out_width: Int) extends Bundle { - val in = Bits(in_width, 'input); + val in = Bits(in_width, 'input); val out = UFix(out_width, 'output); } From e4fa94aa27ed1418842f23e0592a48b127865330 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 10 Nov 2011 17:41:22 -0800 Subject: [PATCH 0027/1087] checkpoint --- rocket/src/main/scala/consts.scala | 10 ++++++ rocket/src/main/scala/ctrl.scala | 10 +++--- rocket/src/main/scala/dpath_util.scala | 16 ++++----- rocket/src/main/scala/dtlb.scala | 45 ++++++++++++++------------ rocket/src/main/scala/itlb.scala | 30 +++++++++-------- rocket/src/main/scala/ptw.scala | 2 +- 6 files changed, 66 insertions(+), 47 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 9d7d4484..777a8de8 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -157,6 +157,16 @@ object Constants val PCR_K0 = UFix(24, 5); val PCR_K1 = UFix(25, 5); + // definition of bits in PCR status reg + val SR_ET = 0; // enable traps + val SR_EF = 1; // enable floating point + val SR_EV = 2; // enable vector unit + val SR_PS = 4; // mode stack bit + val SR_S = 5; // user/supervisor mode + val SR_UX = 6; // 64 bit user mode + val SR_SX = 7; // 64 bit supervisor mode + val SR_VM = 16; // VM enable + val COREID = 0; val NUMCORES = 1; val PADDR_BITS = 40; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3aff5072..0fa03d4a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -326,7 +326,7 @@ class rocketCtrl extends Component val jr_taken = (ex_reg_br_type === BR_JR); val j_taken = (ex_reg_br_type === BR_J); - io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; + io.dmem.req_val := ex_reg_mem_val; // && ~io.dpath.killx; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; @@ -407,15 +407,15 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, and for privileged instructions val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_privileged; - // replay mem stage PC on a DTLB miss val replay_mem = io.dtlb_miss; +// val replay_mem = Bool(false); val kill_ex = replay_ex || replay_mem; - val kill_mem = mem_exception || io.dtlb_miss; + val kill_mem = mem_exception || replay_mem; io.dpath.sel_pc := - Mux(mem_exception, PC_EVEC, // exception Mux(replay_mem, PC_MEM, // dtlb miss + Mux(mem_exception, PC_EVEC, // exception Mux(mem_reg_eret, PC_PCR, // eret instruction Mux(replay_ex, PC_EX, // D$ blocked, D$ miss, privileged inst Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch @@ -513,7 +513,7 @@ class rocketCtrl extends Component io.dpath.killf := take_pc | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; - io.dpath.killx := kill_ex.toBool || kill_mem.toBool; + io.dpath.killx := kill_ex.toBool; io.dpath.killm := kill_mem.toBool; io.dpath.mem_load := mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 2ef560d0..1c10abf3 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -135,15 +135,15 @@ class rocketDpathPCR extends Component when (!io.exception && !io.eret && io.w.en) { when (io.w.addr === PCR_STATUS) { - reg_status_vm <== io.w.data(16).toBool; + reg_status_vm <== io.w.data(SR_VM).toBool; reg_status_im <== io.w.data(15,8); - reg_status_sx <== io.w.data(7).toBool; - reg_status_ux <== io.w.data(6).toBool; - reg_status_s <== io.w.data(5).toBool; - reg_status_ps <== io.w.data(4).toBool; - reg_status_ev <== HAVE_VEC && io.w.data(2).toBool; - reg_status_ef <== HAVE_FPU && io.w.data(1).toBool; - reg_status_et <== io.w.data(0).toBool; + reg_status_sx <== io.w.data(SR_SX).toBool; + reg_status_ux <== io.w.data(SR_UX).toBool; + reg_status_s <== io.w.data(SR_S).toBool; + reg_status_ps <== io.w.data(SR_PS).toBool; + reg_status_ev <== HAVE_VEC && io.w.data(SR_EV).toBool; + reg_status_ef <== HAVE_FPU && io.w.data(SR_EF).toBool; + reg_status_et <== io.w.data(SR_ET).toBool; } when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS-1,0).toUFix; } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index d27ffdc8..006d54a6 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -45,8 +45,8 @@ class rocketDTLB(entries: Int) extends Component val req_vpn = io.cpu.req_addr(VADDR_BITS-1,PGIDX_BITS); val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); - val req_load = io.cpu.req_val && (io.cpu.req_cmd === M_XRD); - val req_store = io.cpu.req_val && (io.cpu.req_cmd === M_XWR); + val req_load = (io.cpu.req_cmd === M_XRD); + val req_store = (io.cpu.req_cmd === M_XWR); // val req_amo = io.cpu.req_cmd(3).toBool; val lookup_tag = Cat(io.cpu.req_asid, req_vpn); @@ -61,11 +61,12 @@ class rocketDTLB(entries: Int) extends Component tag_cam.io.write := io.ptw.resp_val; tag_cam.io.write_tag := r_refill_tag; tag_cam.io.write_addr := r_refill_waddr; - val tag_hit_addr = tag_cam.io.hit_addr; + val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register - val status_mode = io.cpu.status(6).toBool; // user/supervisor mode - val status_vm = io.cpu.status(16).toBool // virtual memory enable + val status_s = io.cpu.status(SR_S).toBool; // user/supervisor mode + val status_u = !status_s; + val status_vm = io.cpu.status(SR_VM).toBool // virtual memory enable // extract fields from PT permission bits val ptw_perm_ur = io.ptw.resp_perm(1); @@ -80,7 +81,7 @@ class rocketDTLB(entries: Int) extends Component val sw_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission when (io.ptw.resp_val) { ur_array <== ur_array.bitSet(r_refill_waddr, ptw_perm_ur); - uw_array <== ur_array.bitSet(r_refill_waddr, ptw_perm_uw); + uw_array <== uw_array.bitSet(r_refill_waddr, ptw_perm_uw); sr_array <== sr_array.bitSet(r_refill_waddr, ptw_perm_sr); sw_array <== sw_array.bitSet(r_refill_waddr, ptw_perm_sw); } @@ -89,7 +90,7 @@ class rocketDTLB(entries: Int) extends Component // bits to 0 so the next access will cause an exception when (io.ptw.resp_err) { ur_array <== ur_array.bitSet(r_refill_waddr, Bool(false)); - uw_array <== ur_array.bitSet(r_refill_waddr, Bool(false)); + uw_array <== uw_array.bitSet(r_refill_waddr, Bool(false)); sr_array <== sr_array.bitSet(r_refill_waddr, Bool(false)); sw_array <== sw_array.bitSet(r_refill_waddr, Bool(false)); } @@ -102,10 +103,13 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val tag_hit = io.cpu.req_val && tag_cam.io.hit; - val lookup_miss = (state === s_ready) && status_vm && !tag_hit; - - when (lookup_miss) { + val lookup_hit = (state === s_ready) && io.cpu.req_val && tag_cam.io.hit; + val lookup_miss = (state === s_ready) && io.cpu.req_val && !tag_cam.io.hit; + + val tlb_hit = status_vm && lookup_hit; + val tlb_miss = status_vm && lookup_miss; + + when (tlb_miss) { r_refill_tag <== lookup_tag; r_refill_waddr <== repl_waddr; when (!invalid_entry) { @@ -113,19 +117,20 @@ class rocketDTLB(entries: Int) extends Component } } + // FIXME: add check for out of range physical addresses (>MEMSIZE) io.cpu.xcpt_ld := - status_vm && tag_hit && req_load && - !((status_mode && sw_array(tag_hit_addr).toBool) || - (!status_mode && uw_array(tag_hit_addr).toBool)); + tlb_hit && req_load && + ((status_s && !sr_array(tag_hit_addr).toBool) || + (status_u && !ur_array(tag_hit_addr).toBool)); io.cpu.xcpt_st := - status_vm && tag_hit && req_store && - !((status_mode && sr_array(tag_hit_addr).toBool) || - (!status_mode && ur_array(tag_hit_addr).toBool)); + tlb_hit && req_store && + ((status_s && !sw_array(tag_hit_addr).toBool) || + (status_u && !uw_array(tag_hit_addr).toBool)); io.cpu.req_rdy := (state === s_ready); - io.cpu.resp_miss := lookup_miss; - io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); + io.cpu.resp_miss := tlb_miss; + io.cpu.resp_val := Mux(status_vm, lookup_hit, io.cpu.req_val); io.cpu.resp_addr := Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; @@ -136,7 +141,7 @@ class rocketDTLB(entries: Int) extends Component // control state machine switch (state) { is (s_ready) { - when (status_vm && io.cpu.req_val && !tag_hit) { + when (tlb_miss) { state <== s_request; } } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 55620a28..8068a30f 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -113,8 +113,9 @@ class rocketITLB(entries: Int) extends Component val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register - val status_mode = io.cpu.status(6).toBool; // user/supervisor mode - val status_vm = io.cpu.status(16).toBool // virtual memory enable + val status_s = io.cpu.status(SR_S).toBool; // user/supervisor mode + val status_u = !status_s; + val status_vm = io.cpu.status(SR_VM).toBool // virtual memory enable // extract fields from PT permission bits val ptw_perm_ux = io.ptw.resp_perm(0); @@ -143,10 +144,13 @@ class rocketITLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val tag_hit = io.cpu.req_val && tag_cam.io.hit; - val lookup_miss = (state === s_ready) && status_vm && !tag_hit; + val lookup_hit = (state === s_ready) && io.cpu.req_val && tag_cam.io.hit; + val lookup_miss = (state === s_ready) && io.cpu.req_val && !tag_cam.io.hit; + + val tlb_hit = status_vm && lookup_hit; + val tlb_miss = status_vm && lookup_miss; - when (lookup_miss) { + when (tlb_miss) { r_refill_tag <== lookup_tag; r_refill_waddr <== repl_waddr; when (!invalid_entry) { @@ -154,25 +158,25 @@ class rocketITLB(entries: Int) extends Component } } - val itlb_exception = - tag_hit && - !((status_mode && sx_array(tag_hit_addr).toBool) || - (!status_mode && ux_array(tag_hit_addr).toBool)); + // FIXME: add test for out of range physical addresses (> MEMSIZE) + io.cpu.exception := + tlb_hit && + ((status_s && !sx_array(tag_hit_addr).toBool) || + (status_u && !ux_array(tag_hit_addr).toBool)); io.cpu.req_rdy := (state === s_ready); - io.cpu.resp_val := Mux(status_vm, tag_hit, io.cpu.req_val); + io.cpu.resp_val := Mux(status_vm, lookup_hit, io.cpu.req_val); io.cpu.resp_addr := Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; - io.cpu.exception := status_vm && itlb_exception; io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); - + // control state machine switch (state) { is (s_ready) { - when (status_vm && !tag_hit) { + when (tlb_miss) { state <== s_request; } } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 524984e5..64cdb76d 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -25,7 +25,7 @@ class rocketDmemArbiter extends Component io.ptw.req_rdy := io.mem.req_rdy; io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; - io.cpu.resp_miss := io.mem.resp_miss; // FIXME + io.cpu.resp_miss := io.mem.resp_miss && !io.mem.resp_tag(11).toBool; io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(11).toBool; io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(11).toBool; From a1ce9085414d7fdf2556d37d584562e253bb9854 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 11 Nov 2011 18:18:47 -0800 Subject: [PATCH 0028/1087] dcache/dtlb overhaul --- rocket/src/main/scala/cpu.scala | 9 +- rocket/src/main/scala/ctrl.scala | 3 +- rocket/src/main/scala/dcache.scala | 326 +++++++++++++++++------------ rocket/src/main/scala/dtlb.scala | 66 +++--- rocket/src/main/scala/ptw.scala | 11 +- 5 files changed, 242 insertions(+), 173 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2f58276f..33e7c0a7 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -76,7 +76,7 @@ class rocketProc extends Component dtlb.io.cpu.req_val := ctrl.io.dmem.req_val; dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - dtlb.io.cpu.req_addr := dpath.io.dmem.req_addr; + dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS-1,PGIDX_BITS); ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu.xcpt_ld; ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; ctrl.io.dtlb_miss := dtlb.io.cpu.resp_miss; @@ -90,10 +90,13 @@ class rocketProc extends Component arb.io.mem ^^ io.dmem // connect arbiter to ctrl+dpath+DTLB - arb.io.cpu.req_val := dtlb.io.cpu.resp_val; +// arb.io.cpu.req_val := dtlb.io.cpu.resp_val; + arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; - arb.io.cpu.req_addr := dtlb.io.cpu.resp_addr; +// arb.io.cpu.req_addr := dtlb.io.cpu.resp_addr; + arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); + arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; arb.io.cpu.req_data := dpath.io.dmem.req_data; arb.io.cpu.req_tag := dpath.io.dmem.req_tag; ctrl.io.dmem.req_rdy := dtlb.io.cpu.req_rdy && arb.io.cpu.req_rdy; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 0fa03d4a..c79f6909 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -326,7 +326,8 @@ class rocketCtrl extends Component val jr_taken = (ex_reg_br_type === BR_JR); val j_taken = (ex_reg_br_type === BR_J); - io.dmem.req_val := ex_reg_mem_val; // && ~io.dpath.killx; +// io.dmem.req_val := ex_reg_mem_val; + io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 068ccc6a..2bb7dbb2 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -5,13 +5,15 @@ import Node._; import Constants._; import scala.math._; -// interface between D$ and processor pipeline +// interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { val req_val = Bool('input); val req_rdy = Bool('output); val req_cmd = Bits(4, 'input); val req_type = Bits(3, 'input); - val req_addr = UFix(PADDR_BITS, 'input); + val req_idx = Bits(PGIDX_BITS, 'input); + val req_ppn = Bits(PPN_BITS, 'input); +// val req_addr = UFix(PADDR_BITS, 'input); val req_data = Bits(64, 'input); val req_tag = Bits(5, 'input); val resp_miss = Bool('output); @@ -38,6 +40,70 @@ class ioDCacheDM extends Bundle() { val mem = new ioDcache().flip(); } +class rocketDCacheStoreGen extends Component { + val io = new Bundle { + val req_type = Bits(3, 'input); + val req_addr_lsb = Bits(3, 'input); + val req_data = Bits(64, 'input); + val store_wmask = Bits(64, 'output); + val store_data = Bits(64, 'output); + } + + // generate write mask and store data signals based on store type and address LSBs + val wmask_b = + Mux(io.req_addr_lsb === UFix(0, 3), Bits("b0000_0001", 8), + Mux(io.req_addr_lsb === UFix(1, 3), Bits("b0000_0010", 8), + Mux(io.req_addr_lsb === UFix(2, 3), Bits("b0000_0100", 8), + Mux(io.req_addr_lsb === UFix(3, 3), Bits("b0000_1000", 8), + Mux(io.req_addr_lsb === UFix(4, 3), Bits("b0001_0000", 8), + Mux(io.req_addr_lsb === UFix(5, 3), Bits("b0010_0000", 8), + Mux(io.req_addr_lsb === UFix(6, 3), Bits("b0100_0000", 8), + Mux(io.req_addr_lsb === UFix(7, 3), Bits("b1000_0000", 8), + UFix(0, 8))))))))); + + val wmask_h = + Mux(io.req_addr_lsb(2,1) === UFix(0, 2), Bits("b0000_0011", 8), + Mux(io.req_addr_lsb(2,1) === UFix(1, 2), Bits("b0000_1100", 8), + Mux(io.req_addr_lsb(2,1) === UFix(2, 2), Bits("b0011_0000", 8), + Mux(io.req_addr_lsb(2,1) === UFix(3, 2), Bits("b1100_0000", 8), + UFix(0, 8))))); + + val wmask_w = + Mux(io.req_addr_lsb(2) === UFix(0, 1), Bits("b0000_1111", 8), + Mux(io.req_addr_lsb(2) === UFix(1, 1), Bits("b1111_0000", 8), + UFix(0, 8))); + + val wmask_d = + Bits("b1111_1111", 8); + + val store_wmask_byte = + Mux(io.req_type === MT_B, wmask_b, + Mux(io.req_type === MT_H, wmask_h, + Mux(io.req_type === MT_W, wmask_w, + Mux(io.req_type === MT_D, wmask_d, + UFix(0, 8))))); + + io.store_wmask := + Cat(Fill(8, store_wmask_byte(7)), + Fill(8, store_wmask_byte(6)), + Fill(8, store_wmask_byte(5)), + Fill(8, store_wmask_byte(4)), + Fill(8, store_wmask_byte(3)), + Fill(8, store_wmask_byte(2)), + Fill(8, store_wmask_byte(1)), + Fill(8, store_wmask_byte(0))); + + io.store_data := + Mux(io.req_type === MT_B, Fill(8, io.req_data( 7,0)), + Mux(io.req_type === MT_H, Fill(4, io.req_data(15,0)), + Mux(io.req_type === MT_W, Fill(2, io.req_data(31,0)), + Mux(io.req_type === MT_D, io.req_data, + UFix(0, 64))))); + +} + + + // state machine to flush (write back dirty lines, invalidate clean ones) the D$ class rocketDCacheDM_flush(lines: Int) extends Component { val io = new ioDCacheDM(); @@ -48,6 +114,7 @@ class rocketDCacheDM_flush(lines: Int) extends Component { val offsetbits = 6; val tagmsb = addrbits - 1; val taglsb = indexbits+offsetbits; + val tagbits = tagmsb-taglsb+1; val indexmsb = taglsb-1; val indexlsb = offsetbits; val offsetmsb = indexlsb-1; @@ -82,7 +149,11 @@ class rocketDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_cmd != M_FLA) && !flush_waiting) || flushing; dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); - dcache.io.cpu.req_addr := Mux(flushing, Cat(Bits(0,tagmsb-taglsb+1), flush_count, Bits(0,offsetbits)).toUFix, io.cpu.req_addr); + dcache.io.cpu.req_idx := Mux(flushing, Cat(flush_count, Bits(0,offsetbits)), io.cpu.req_idx); + dcache.io.cpu.req_ppn := Mux(flushing, UFix(0,PPN_BITS), io.cpu.req_ppn); +// dcache.io.cpu.req_addr := +// Mux(flushing, Cat(Bits(0,tagmsb-taglsb+1), flush_count, Bits(0,offsetbits)).toUFix, +// io.cpu.req_addr); dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; @@ -102,9 +173,10 @@ class rocketDCacheDM(lines: Int) extends Component { val addrbits = PADDR_BITS; val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; - val tagmsb = addrbits - 1; + val offsetbits = 6; // 64 byte cache lines = 2^6 bytes + val tagmsb = PADDR_BITS-1; val taglsb = indexbits+offsetbits; + val tagbits = tagmsb-taglsb+1; val indexmsb = taglsb-1; val indexlsb = offsetbits; val offsetmsb = indexlsb-1; @@ -113,45 +185,48 @@ class rocketDCacheDM(lines: Int) extends Component { val s_reset :: s_ready :: s_replay_load :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(8) { UFix() }; val state = Reg(resetVal = s_reset); - val r_cpu_req_addr = Reg(resetVal = Bits(0, addrbits)); + // idx arrives one clock cycle prior to ppn b/c of DTLB + val r_cpu_req_idx = Reg(resetVal = Bits(0, PGIDX_BITS)); + val r_cpu_req_ppn = Reg(resetVal = Bits(0, PPN_BITS)); val r_cpu_req_val = Reg(resetVal = Bool(false)); val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_type = Reg(resetVal = Bits(0,3)); val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); + val r_cpu_resp_val = Reg(resetVal = Bool(false)); - val p_store_data = Reg(resetVal = Bits(0,64)); - val p_store_addr = Reg(resetVal = Bits(0,64)); - val p_store_cmd = Reg(resetVal = Bits(0,4)); - val p_store_type = Reg(resetVal = Bits(0,3)); - val p_store_valid = Reg(resetVal = Bool(false)); + val p_store_data = Reg(resetVal = Bits(0,64)); + val p_store_idx = Reg(resetVal = Bits(0,PGIDX_BITS)); + val p_store_type = Reg(resetVal = Bits(0,3)); + val p_store_valid = Reg(resetVal = Bool(false)); - val req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); - val req_store = (r_cpu_req_cmd === M_XWR); - val req_flush = (r_cpu_req_cmd === M_FLA); - val req_ptw_load = (r_cpu_req_cmd === M_PRD); + val req_store = (io.cpu.req_cmd === M_XWR); + val r_req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); + val r_req_store = (r_cpu_req_cmd === M_XWR); + val r_req_flush = (r_cpu_req_cmd === M_FLA); + val r_req_ptw_load = (r_cpu_req_cmd === M_PRD); - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_addr <== io.cpu.req_addr; + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_idx <== io.cpu.req_idx; r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_type <== io.cpu.req_type; r_cpu_req_tag <== io.cpu.req_tag; } - when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_XWR)) { - p_store_data <== io.cpu.req_data; - p_store_addr <== io.cpu.req_addr; - p_store_type <== io.cpu.req_type; - p_store_valid <== Bool(true); + when (state === s_ready) { + r_cpu_req_ppn <== io.cpu.req_ppn; } - when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; } - when ((state === s_resolve_miss) && !req_load) { + when (state === s_resolve_miss) { r_cpu_req_val <== Bool(false); + when (r_req_load) { + r_cpu_resp_val <== Bool(true); + } } + r_cpu_resp_val <== Bool(false); - // counter + // refill counter val rr_count = Reg(resetVal = UFix(0,2)); val rr_count_next = rr_count + UFix(1); when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) { @@ -159,146 +234,115 @@ class rocketDCacheDM(lines: Int) extends Component { } // tag array - val tagbits = addrbits-(indexbits+offsetbits); + val tag_addr = + Mux((state === s_ready), io.cpu.req_idx(PGIDX_BITS-1,offsetbits), + r_cpu_req_idx(PGIDX_BITS-1,offsetbits)).toUFix; val tag_we = ((state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2))) || - ((state === s_resolve_miss) && req_flush); - val tag_array = new rocketSRAMsp(lines, tagbits); - val tag_raddr = - Mux((state === s_ready), io.cpu.req_addr(indexmsb, indexlsb).toUFix, - r_cpu_req_addr(indexmsb, indexlsb).toUFix); - tag_array.io.a := tag_raddr; - tag_array.io.d := r_cpu_req_addr(tagmsb, taglsb); + ((state === s_resolve_miss) && r_req_flush); + + val tag_array = new rocketSRAMsp(lines, tagbits); + tag_array.io.a := tag_addr; + tag_array.io.d := r_cpu_req_ppn; tag_array.io.we := tag_we; tag_array.io.bweb := ~Bits(0,tagbits); tag_array.io.ce := Bool(true); // FIXME - val tag_rdata = tag_array.io.q; + val tag_rdata = tag_array.io.q; // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); - val vb_rdata = Reg(vb_array(tag_raddr)); - when (tag_we && !req_flush) { - vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); +// val vb_rdata = Reg(vb_array(tag_raddr)); + when (tag_we && !r_req_flush) { + vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } - when (tag_we && req_flush) { - vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); + when (tag_we && r_req_flush) { + vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } - val tag_valid = vb_rdata.toBool; - val tag_match = tag_valid && (tag_rdata === r_cpu_req_addr(tagmsb, taglsb)); + val tag_valid = Reg(vb_array(tag_addr)).toBool; + val tag_match = (tag_rdata === io.cpu.req_ppn); + val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetbits) === p_store_idx(PGIDX_BITS-1,offsetbits)); + val ldst_conflict = r_cpu_req_val && r_req_load && p_store_valid && addr_match; + + // write the pending store data when the cache is idle, when the next command isn't a load + // or when there's a load to the same address (in which case there's a 2 cycle delay: + // once cycle to write the store data and another to read the data back) + val drain_store = p_store_valid && (!io.cpu.req_val || req_store || ldst_conflict); + + // write pending store data from a store which missed + // after the cache line refill has completed + val resolve_store = (state === s_resolve_miss) && r_req_store; - // generate write mask and store data signals based on store type and address LSBs - val wmask_b = - Mux(p_store_addr(2,0) === UFix(0, 3), Bits("b0000_0001", 8), - Mux(p_store_addr(2,0) === UFix(1, 3), Bits("b0000_0010", 8), - Mux(p_store_addr(2,0) === UFix(2, 3), Bits("b0000_0100", 8), - Mux(p_store_addr(2,0) === UFix(3, 3), Bits("b0000_1000", 8), - Mux(p_store_addr(2,0) === UFix(4, 3), Bits("b0001_0000", 8), - Mux(p_store_addr(2,0) === UFix(5, 3), Bits("b0010_0000", 8), - Mux(p_store_addr(2,0) === UFix(6, 3), Bits("b0100_0000", 8), - Mux(p_store_addr(2,0) === UFix(7, 3), Bits("b1000_0000", 8), - UFix(0, 8))))))))); - - val wmask_h = - Mux(p_store_addr(2,1) === UFix(0, 2), Bits("b0000_0011", 8), - Mux(p_store_addr(2,1) === UFix(1, 2), Bits("b0000_1100", 8), - Mux(p_store_addr(2,1) === UFix(2, 2), Bits("b0011_0000", 8), - Mux(p_store_addr(2,1) === UFix(3, 2), Bits("b1100_0000", 8), - UFix(0, 8))))); - - val wmask_w = - Mux(p_store_addr(2) === UFix(0, 1), Bits("b0000_1111", 8), - Mux(p_store_addr(2) === UFix(1, 1), Bits("b1111_0000", 8), - UFix(0, 8))); - - val wmask_d = - Bits("b1111_1111", 8); - - val store_wmask = - Mux(p_store_type === MT_B, wmask_b, - Mux(p_store_type === MT_H, wmask_h, - Mux(p_store_type === MT_W, wmask_w, - Mux(p_store_type === MT_D, wmask_d, - UFix(0, 8))))); - - val store_data = - Mux(p_store_type === MT_B, Fill(8, p_store_data( 7,0)), - Mux(p_store_type === MT_H, Fill(4, p_store_data(15,0)), - Mux(p_store_type === MT_W, Fill(2, p_store_data(31,0)), - Mux(p_store_type === MT_D, p_store_data, - UFix(0, 64))))); - - val addr_match = (r_cpu_req_addr(tagmsb, offsetlsb) === p_store_addr(tagmsb, offsetlsb)); - val ldst_conflict = r_cpu_req_val && req_load && p_store_valid && addr_match; - val drain_store = - (state === s_ready) && p_store_valid && - (!(io.cpu.req_val && (io.cpu.req_cmd === M_XRD)) || - r_cpu_req_val && req_load && p_store_valid && addr_match); - - val resolve_store = (state === s_resolve_miss) && req_store; - val do_store = drain_store | resolve_store; - // dirty bit array val db_array = Reg(resetVal = Bits(0, lines)); - val db_rdata = Reg(db_array(tag_raddr)); - val tag_dirty = db_rdata.toBool; +// val db_rdata = Reg(db_array(tag_raddr)); + val tag_dirty = Reg(db_array(tag_addr)).toBool; - when (do_store) { + when (io.cpu.req_val && io.cpu.req_rdy && req_store) { + p_store_idx <== io.cpu.req_idx; + p_store_data <== io.cpu.req_data; + p_store_type <== io.cpu.req_type; + p_store_valid <== Bool(true); + } + when (drain_store) { p_store_valid <== Bool(false); - db_array <== db_array.bitSet(p_store_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); + db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + } + when (resolve_store) { + db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (tag_we) { - db_array <== db_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(0,1)); + db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } + + // generate write mask and data signals for stores + val storegen = new rocketDCacheStoreGen(); + storegen.io.req_addr_lsb := p_store_idx(2,0); + storegen.io.req_data := p_store_data; + storegen.io.req_type := p_store_type + val store_data = Fill(2, storegen.io.store_data); + val store_wmask_d = storegen.io.store_wmask; + val store_idx_sel = p_store_idx(offsetlsb).toBool; + val store_wmask = + Mux(store_idx_sel, + Cat(store_wmask_d, Bits(0,64)), + Cat(Bits(0,64), store_wmask_d)); // data array val data_array = new rocketSRAMsp(lines*4, 128); - val store_wmask_expand = - Cat(Fill(8, store_wmask(7)), - Fill(8, store_wmask(6)), - Fill(8, store_wmask(5)), - Fill(8, store_wmask(4)), - Fill(8, store_wmask(3)), - Fill(8, store_wmask(2)), - Fill(8, store_wmask(1)), - Fill(8, store_wmask(0))); - - val da_store_wmask = - Mux(p_store_addr(offsetlsb).toBool, - Cat(store_wmask_expand, Bits(0,64)), - Cat(Bits(0,64), store_wmask_expand)); - data_array.io.a := - Mux(do_store, p_store_addr(indexmsb, offsetmsb-1), - Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count_next), - Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_addr(indexmsb, indexlsb), rr_count), - Mux((state === s_resolve_miss) || (state === s_replay_load), r_cpu_req_addr(indexmsb, offsetmsb-1), - io.cpu.req_addr(indexmsb, offsetmsb-1))))).toUFix; + Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), + Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next), + Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), + Mux((state === s_resolve_miss) || (state === s_replay_load), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), + io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; data_array.io.d := - Mux((state === s_refill), io.mem.resp_data, - Cat(store_data, store_data)); - - data_array.io.we := ((state === s_refill) && io.mem.resp_val) || do_store; - data_array.io.bweb := - Mux((state === s_refill), ~Bits(0,128), - da_store_wmask); + Mux((state === s_refill), io.mem.resp_data, + Mux((state === s_resolve_miss), Fill(2, p_store_data), + store_data)); + data_array.io.we := ((state === s_refill) && io.mem.resp_val) || drain_store || resolve_store; + data_array.io.bweb := Mux((state === s_refill), ~Bits(0,128), store_wmask); data_array.io.ce := Bool(true); // FIXME val data_array_rdata = data_array.io.q; - val miss = (state === s_ready) && r_cpu_req_val && req_load && (!tag_match || (p_store_valid && addr_match)); + // signal a load miss when the data isn't present in the cache and when it's in the pending store data register + // (causes the cache to block for 2 cycles and the load instruction is replayed) + val hit = tag_valid && tag_match; + val load_miss = (state === s_ready) && r_cpu_req_val && r_req_load && (!hit || (p_store_valid && addr_match)); // output signals - io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (tag_match && !req_flush)); - - io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && tag_match && req_load && !(p_store_valid && addr_match)) || - ((state === s_resolve_miss) && req_flush); + // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush + io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (hit && !r_req_flush)); + io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && r_req_load && hit && !(p_store_valid && addr_match)) || + ((state === s_resolve_miss) && r_req_flush) || + r_cpu_resp_val; - io.cpu.resp_miss := miss; -// io.cpu.resp_tag := Cat(Bits(0,1), r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); - io.cpu.resp_tag := Cat(req_ptw_load, r_cpu_req_type, r_cpu_req_addr(2,0), r_cpu_req_tag); + io.cpu.resp_miss := load_miss; + // tag MSB distinguishes between loads destined for the PTW and CPU + io.cpu.resp_tag := Cat(r_req_ptw_load, r_cpu_req_type, r_cpu_req_idx(2,0), r_cpu_req_tag); io.cpu.resp_data := - Mux(r_cpu_req_addr(offsetlsb).toBool, data_array_rdata(127, 64), + Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); io.mem.req_val := (state === s_req_refill) || (state === s_writeback); @@ -306,8 +350,8 @@ class rocketDCacheDM(lines: Int) extends Component { io.mem.req_wdata := data_array_rdata; io.mem.req_tag := UFix(0); io.mem.req_addr := - Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_addr(indexmsb, indexlsb), rr_count).toUFix, - Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix); + Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), + Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2))).toUFix; // control state machine switch (state) { @@ -318,13 +362,13 @@ class rocketDCacheDM(lines: Int) extends Component { when (ldst_conflict) { state <== s_replay_load; } - when (!r_cpu_req_val || tag_match) { + when (!r_cpu_req_val || (hit && !r_req_flush)) { state <== s_ready; } when (tag_valid & tag_dirty) { state <== s_start_writeback; } - when (req_flush) { + when (r_req_flush) { state <== s_resolve_miss; } otherwise { @@ -339,8 +383,12 @@ class rocketDCacheDM(lines: Int) extends Component { } is (s_writeback) { when (io.mem.req_rdy && (rr_count === UFix(3,2))) { - when (req_flush) { state <== s_resolve_miss; } - otherwise { state <== s_req_refill; } + when (r_req_flush) { + state <== s_resolve_miss; + } + otherwise { + state <== s_req_refill; + } } } is (s_req_refill) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 006d54a6..0e7a6489 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -18,11 +18,11 @@ class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) val req_cmd = Bits(4, 'input); // load/store/amo val req_rdy = Bool('output); val req_asid = Bits(ASID_BITS, 'input); - val req_addr = UFix(VADDR_BITS, 'input); + val req_vpn = UFix(VPN_BITS, 'input); // lookup responses val resp_miss = Bool('output); - val resp_val = Bool('output); - val resp_addr = UFix(PADDR_BITS, 'output); +// val resp_val = Bool('output); + val resp_ppn = UFix(PPN_BITS, 'output); val xcpt_ld = Bool('output); val xcpt_st = Bool('output); } @@ -35,25 +35,40 @@ class ioDTLB extends Bundle class rocketDTLB(entries: Int) extends Component { - val addr_bits = ceil(log10(entries)/log10(2)).toInt; val io = new ioDTLB(); + val addr_bits = ceil(log10(entries)/log10(2)).toInt; + val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; val state = Reg(resetVal = s_ready); - - val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); - val req_vpn = io.cpu.req_addr(VADDR_BITS-1,PGIDX_BITS); - val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); - val req_load = (io.cpu.req_cmd === M_XRD); - val req_store = (io.cpu.req_cmd === M_XWR); + val r_cpu_req_vpn = Reg(resetVal = Bits(0, VPN_BITS)); + val r_cpu_req_val = Reg(resetVal = Bool(false)); + val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); + val r_cpu_req_asid = Reg(resetVal = Bits(0,ASID_BITS)); + val r_refill_tag = Reg(resetVal = Bits(0,ASID_BITS+VPN_BITS)); + val r_refill_waddr = Reg(resetVal = UFix(0,addr_bits)); + val repl_count = Reg(resetVal = UFix(0,addr_bits)); + + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_vpn <== io.cpu.req_vpn; + r_cpu_req_cmd <== io.cpu.req_cmd; + r_cpu_req_asid <== io.cpu.req_asid; + } + + when (io.cpu.req_rdy) { + r_cpu_req_val <== io.cpu.req_val; + } + +// val req_vpn = r_cpu_req_addr(VADDR_BITS-1,PGIDX_BITS); +// val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); + val req_load = (r_cpu_req_cmd === M_XRD); + val req_store = (r_cpu_req_cmd === M_XWR); // val req_amo = io.cpu.req_cmd(3).toBool; - val lookup_tag = Cat(io.cpu.req_asid, req_vpn); - val r_refill_tag = Reg(resetVal = Bits(0, ASID_BITS+VPN_BITS)); - val r_refill_waddr = Reg(resetVal = UFix(0, addr_bits)); - val repl_count = Reg(resetVal = UFix(0, addr_bits)); - + val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); + + val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); tag_cam.io.clear := io.cpu.invalidate; @@ -61,6 +76,7 @@ class rocketDTLB(entries: Int) extends Component tag_cam.io.write := io.ptw.resp_val; tag_cam.io.write_tag := r_refill_tag; tag_cam.io.write_addr := r_refill_waddr; + val tag_hit = tag_cam.io.hit; val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register @@ -75,10 +91,10 @@ class rocketDTLB(entries: Int) extends Component val ptw_perm_sw = io.ptw.resp_perm(5); // permission bit arrays - val ur_array = Reg(resetVal = Bits(0, entries)); // user execute permission - val uw_array = Reg(resetVal = Bits(0, entries)); // user execute permission - val sr_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission - val sw_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission + val ur_array = Reg(resetVal = Bits(0, entries)); // user read permission + val uw_array = Reg(resetVal = Bits(0, entries)); // user write permission + val sr_array = Reg(resetVal = Bits(0, entries)); // supervisor read permission + val sw_array = Reg(resetVal = Bits(0, entries)); // supervisor write permission when (io.ptw.resp_val) { ur_array <== ur_array.bitSet(r_refill_waddr, ptw_perm_ur); uw_array <== uw_array.bitSet(r_refill_waddr, ptw_perm_uw); @@ -103,12 +119,13 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup_hit = (state === s_ready) && io.cpu.req_val && tag_cam.io.hit; - val lookup_miss = (state === s_ready) && io.cpu.req_val && !tag_cam.io.hit; - + val lookup_hit = (state === s_ready) && r_cpu_req_val && tag_hit; + val lookup_miss = (state === s_ready) && r_cpu_req_val && !tag_hit; val tlb_hit = status_vm && lookup_hit; val tlb_miss = status_vm && lookup_miss; + // currently replace TLB entries in LIFO order + // TODO: implement LRU replacement policy when (tlb_miss) { r_refill_tag <== lookup_tag; r_refill_waddr <== repl_waddr; @@ -130,10 +147,7 @@ class rocketDTLB(entries: Int) extends Component io.cpu.req_rdy := (state === s_ready); io.cpu.resp_miss := tlb_miss; - io.cpu.resp_val := Mux(status_vm, lookup_hit, io.cpu.req_val); - io.cpu.resp_addr := - Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), - io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; + io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 64cdb76d..048816db 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -7,7 +7,7 @@ import scala.math._; class ioDmemArbiter extends Bundle { - val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_addr", "resp_data", "resp_val")); + val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val")); val cpu = new ioDmem(); val mem = new ioDmem().flip(); } @@ -19,7 +19,8 @@ class rocketDmemArbiter extends Component io.mem.req_val := io.ptw.req_val || io.cpu.req_val; io.mem.req_cmd := Mux(io.ptw.req_val, io.ptw.req_cmd, io.cpu.req_cmd); io.mem.req_type := Mux(io.ptw.req_val, io.ptw.req_type, io.cpu.req_type); - io.mem.req_addr := Mux(io.ptw.req_val, io.ptw.req_addr, io.cpu.req_addr); + io.mem.req_idx := Mux(io.ptw.req_val, io.ptw.req_idx, io.cpu.req_idx); + io.mem.req_ppn := Mux(io.ptw.req_val, io.ptw.req_ppn, io.cpu.req_ppn); io.mem.req_data := io.cpu.req_data; io.mem.req_tag := Mux(io.ptw.req_val, Bits(0,5), io.cpu.req_tag); @@ -40,7 +41,7 @@ class ioPTW extends Bundle { val itlb = new ioTLB_PTW().flip(); val dtlb = new ioTLB_PTW().flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_addr", "resp_data", "resp_val")).flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_ppn", "req_idx", "resp_data", "resp_val")).flip(); val ptbr = UFix(PADDR_BITS, 'input); } @@ -90,7 +91,9 @@ class rocketPTW extends Component io.dmem.req_cmd := M_PRD; io.dmem.req_type := MT_D; - io.dmem.req_addr := req_addr; +// io.dmem.req_addr := req_addr; + io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); + io.dmem.req_ppn := req_addr(PADDR_BITS-1,PGIDX_BITS); val resp_val = (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); val resp_err = (state === s_error); From 44926866b79a3b1ce590477ab62512b0507e6e2e Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 11 Nov 2011 18:48:34 -0800 Subject: [PATCH 0029/1087] updated itlb --- rocket/src/main/scala/cpu.scala | 7 +++--- rocket/src/main/scala/ctrl.scala | 3 ++- rocket/src/main/scala/dtlb.scala | 2 -- rocket/src/main/scala/itlb.scala | 41 +++++++++++++++++++------------- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 33e7c0a7..9ed7432f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -62,13 +62,14 @@ class rocketProc extends Component itlb.io.cpu.status := dpath.io.ctrl.status; itlb.io.cpu.req_val := ctrl.io.imem.req_val; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - itlb.io.cpu.req_addr := dpath.io.imem.req_addr; - io.imem.req_val := itlb.io.cpu.resp_val; - io.imem.req_addr := itlb.io.cpu.resp_addr; + dtlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); + io.imem.req_vpn := itlb.io.cpu.resp_vpn; ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; ctrl.io.xcpt_itlb := itlb.io.cpu.exception; + ctrl.io.itlb_miss := itlb.io.cpu.resp_miss; + // connect DTLB to D$ arbiter, ctrl+dpath dtlb.io.cpu.invalidate := Bool(false); // FIXME diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c79f6909..6985cf3d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -68,6 +68,7 @@ class ioCtrlAll extends Bundle() val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); val host = new ioHost(List("start")); val dtlb_miss = Bool('input); + val itlb_miss = Bool('input); val xcpt_dtlb_ld = Bool('input); val xcpt_dtlb_st = Bool('input); val xcpt_itlb = Bool('input); @@ -512,7 +513,7 @@ class rocketCtrl extends Component io.dpath.stalld := ctrl_stalld.toBool; - io.dpath.killf := take_pc | ~io.imem.resp_val; + io.dpath.killf := take_pc | io.itlb_miss | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; io.dpath.killx := kill_ex.toBool; io.dpath.killm := kill_mem.toBool; diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 0e7a6489..9c3f4499 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -60,8 +60,6 @@ class rocketDTLB(entries: Int) extends Component r_cpu_req_val <== io.cpu.req_val; } -// val req_vpn = r_cpu_req_addr(VADDR_BITS-1,PGIDX_BITS); -// val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); val req_load = (r_cpu_req_cmd === M_XRD); val req_store = (r_cpu_req_cmd === M_XWR); // val req_amo = io.cpu.req_cmd(3).toBool; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 8068a30f..56f8ccec 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -73,10 +73,11 @@ class ioITLB_CPU(view: List[String] = null) extends Bundle(view) val req_val = Bool('input); val req_rdy = Bool('output); val req_asid = Bits(ASID_BITS, 'input); - val req_addr = UFix(VADDR_BITS, 'input); + val req_vpn = UFix(VPN_BITS, 'input); // lookup responses - val resp_val = Bool('output); - val resp_addr = UFix(PADDR_BITS, 'output); + val resp_miss = Bool('output); +// val resp_val = Bool('output); + val resp_ppn = UFix(PPN_BITS, 'output); val exception = Bool('output); } @@ -93,16 +94,26 @@ class rocketITLB(entries: Int) extends Component val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; val state = Reg(resetVal = s_ready); - - val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); - val req_vpn = io.cpu.req_addr(VADDR_BITS-1,PGIDX_BITS); - val req_idx = io.cpu.req_addr(PGIDX_BITS-1,0); - val lookup_tag = Cat(io.cpu.req_asid, req_vpn); + val r_cpu_req_vpn = Reg(resetVal = Bits(0, VPN_BITS)); + val r_cpu_req_val = Reg(resetVal = Bool(false)); + val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); + val r_cpu_req_asid = Reg(resetVal = Bits(0,ASID_BITS)); val r_refill_tag = Reg(resetVal = Bits(0, ASID_BITS+VPN_BITS)); val r_refill_waddr = Reg(resetVal = UFix(0, addr_bits)); val repl_count = Reg(resetVal = UFix(0, addr_bits)); + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_vpn <== io.cpu.req_vpn; + r_cpu_req_asid <== io.cpu.req_asid; + } + when (io.cpu.req_rdy) { + r_cpu_req_val <== io.cpu.req_val; + } + + val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); + + val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); tag_cam.io.clear := io.cpu.invalidate; @@ -110,7 +121,8 @@ class rocketITLB(entries: Int) extends Component tag_cam.io.write := io.ptw.resp_val; tag_cam.io.write_tag := r_refill_tag; tag_cam.io.write_addr := r_refill_waddr; - val tag_hit_addr = tag_cam.io.hit_addr; + val tag_hit = tag_cam.io.hit; + val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register val status_s = io.cpu.status(SR_S).toBool; // user/supervisor mode @@ -144,9 +156,8 @@ class rocketITLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup_hit = (state === s_ready) && io.cpu.req_val && tag_cam.io.hit; - val lookup_miss = (state === s_ready) && io.cpu.req_val && !tag_cam.io.hit; - + val lookup_hit = (state === s_ready) && r_cpu_req_val && tag_hit; + val lookup_miss = (state === s_ready) && r_cpu_req_val && !tag_hit; val tlb_hit = status_vm && lookup_hit; val tlb_miss = status_vm && lookup_miss; @@ -165,10 +176,8 @@ class rocketITLB(entries: Int) extends Component (status_u && !ux_array(tag_hit_addr).toBool)); io.cpu.req_rdy := (state === s_ready); - io.cpu.resp_val := Mux(status_vm, lookup_hit, io.cpu.req_val); - io.cpu.resp_addr := - Mux(status_vm, Cat(tag_ram(tag_hit_addr), req_idx), - io.cpu.req_addr(PADDR_BITS-1,0)).toUFix; + io.cpu.resp_miss := tlb_miss; + io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); From 73416f224b3fdeeab412055b62d75222ea3068a0 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sat, 12 Nov 2011 00:25:06 -0800 Subject: [PATCH 0030/1087] more tlb/ptw debugging --- rocket/src/main/scala/cpu.scala | 7 ++- rocket/src/main/scala/dcache.scala | 18 ++---- rocket/src/main/scala/icache.scala | 96 ++++++++++++++++++++---------- rocket/src/main/scala/itlb.scala | 5 +- rocket/src/main/scala/ptw.scala | 7 ++- 5 files changed, 81 insertions(+), 52 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 9ed7432f..2c45e534 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -62,13 +62,16 @@ class rocketProc extends Component itlb.io.cpu.status := dpath.io.ctrl.status; itlb.io.cpu.req_val := ctrl.io.imem.req_val; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - dtlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); - io.imem.req_vpn := itlb.io.cpu.resp_vpn; + itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); + io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0); + io.imem.req_ppn := itlb.io.cpu.resp_ppn; + io.imem.req_val := ctrl.io.imem.req_val; ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; ctrl.io.xcpt_itlb := itlb.io.cpu.exception; ctrl.io.itlb_miss := itlb.io.cpu.resp_miss; + io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect DTLB to D$ arbiter, ctrl+dpath diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 2bb7dbb2..a82c8bfc 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -102,8 +102,6 @@ class rocketDCacheStoreGen extends Component { } - - // state machine to flush (write back dirty lines, invalidate clean ones) the D$ class rocketDCacheDM_flush(lines: Int) extends Component { val io = new ioDCacheDM(); @@ -212,7 +210,7 @@ class rocketDCacheDM(lines: Int) extends Component { r_cpu_req_tag <== io.cpu.req_tag; } - when (state === s_ready) { + when ((state === s_ready) && r_cpu_req_val) { r_cpu_req_ppn <== io.cpu.req_ppn; } when (io.cpu.req_rdy) { @@ -240,13 +238,13 @@ class rocketDCacheDM(lines: Int) extends Component { val tag_we = ((state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2))) || ((state === s_resolve_miss) && r_req_flush); - + val tag_array = new rocketSRAMsp(lines, tagbits); tag_array.io.a := tag_addr; tag_array.io.d := r_cpu_req_ppn; tag_array.io.we := tag_we; tag_array.io.bweb := ~Bits(0,tagbits); - tag_array.io.ce := Bool(true); // FIXME + tag_array.io.ce := (state === s_ready) && io.cpu.req_val; val tag_rdata = tag_array.io.q; // valid bit array @@ -303,10 +301,7 @@ class rocketDCacheDM(lines: Int) extends Component { val store_data = Fill(2, storegen.io.store_data); val store_wmask_d = storegen.io.store_wmask; val store_idx_sel = p_store_idx(offsetlsb).toBool; - val store_wmask = - Mux(store_idx_sel, - Cat(store_wmask_d, Bits(0,64)), - Cat(Bits(0,64), store_wmask_d)); + val store_wmask = Mux(store_idx_sel, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); // data array val data_array = new rocketSRAMsp(lines*4, 128); @@ -317,10 +312,7 @@ class rocketDCacheDM(lines: Int) extends Component { Mux((state === s_resolve_miss) || (state === s_replay_load), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; - data_array.io.d := - Mux((state === s_refill), io.mem.resp_data, - Mux((state === s_resolve_miss), Fill(2, p_store_data), - store_data)); + data_array.io.d := Mux((state === s_refill), io.mem.resp_data, store_data); data_array.io.we := ((state === s_refill) && io.mem.resp_val) || drain_store || resolve_store; data_array.io.bweb := Mux((state === s_refill), ~Bits(0,128), store_wmask); data_array.io.ce := Bool(true); // FIXME diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 36ff768b..7c11c675 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -8,9 +8,11 @@ import scala.math._; // interface between I$ and pipeline/ITLB (32 bits wide) class ioImem(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(PADDR_BITS, 'input); + val itlb_miss = Bool('input); val req_val = Bool('input); val req_rdy = Bool('output); + val req_idx = Bits(PGIDX_BITS, 'input); + val req_ppn = Bits(PPN_BITS, 'input); val resp_data = Bits(32, 'output); val resp_val = Bool('output); } @@ -45,8 +47,8 @@ class ioSRAMsp (width: Int, addrbits: Int) extends Bundle { class rocketSRAMsp(entries: Int, width: Int) extends Component { val addrbits = ceil(log10(entries)/log10(2)).toInt; val io = new ioSRAMsp(width, addrbits); - val sram = Mem(entries, io.we && io.ce, io.a, io.d, wrMask = io.bweb, resetVal = null); - val rdata = Reg(sram.read(io.a)); + val sram = Mem(entries, io.we, io.a, io.d, wrMask = io.bweb, resetVal = null); + val rdata = Reg(Mux(io.ce, sram.read(io.a), Bits(0,width))); io.q := rdata; } @@ -62,6 +64,7 @@ class rocketICacheDM(lines: Int) extends Component { val offsetbits = 6; val tagmsb = addrbits - 1; val taglsb = indexbits+offsetbits; + val tagbits = addrbits-taglsb; val indexmsb = taglsb-1; val indexlsb = offsetbits; val offsetmsb = indexlsb-1; @@ -71,18 +74,35 @@ class rocketICacheDM(lines: Int) extends Component { val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: s_resolve_miss :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_reset); - val r_cpu_req_addr = Reg(Bits(0, addrbits)); - when (io.cpu.req_val && ((state === s_ready) || (state === s_resolve_miss))) { - r_cpu_req_addr <== io.cpu.req_addr; - } + val r_cpu_req_idx = Reg(resetVal = Bits(0, PGIDX_BITS)); + val r_cpu_req_ppn = Reg(resetVal = Bits(0, PPN_BITS)); + val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_val = Reg(Bool(false)); - when ((state === s_ready) || (state === s_resolve_miss)) { - r_cpu_req_val <== io.cpu.req_val; + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_idx <== io.cpu.req_idx; + } + when (state === s_ready) { + r_cpu_req_ppn <== io.cpu.req_ppn; + } + when (io.cpu.req_rdy) { + r_cpu_req_val <== io.cpu.req_val; } otherwise { r_cpu_req_val <== Bool(false); } + +// val r_cpu_req_addr = Reg(Bits(0, addrbits)); +// when (io.cpu.req_val && ((state === s_ready) || (state === s_resolve_miss))) { +// r_cpu_req_addr <== io.cpu.req_addr; +// } + +// val r_cpu_req_val = Reg(Bool(false)); +// when ((state === s_ready) || (state === s_resolve_miss)) { +// r_cpu_req_val <== io.cpu.req_val; +// } +// otherwise { +// r_cpu_req_val <== Bool(false); +// } val refill_count = Reg(resetVal = UFix(0,2)); when (io.mem.resp_val) { @@ -90,30 +110,33 @@ class rocketICacheDM(lines: Int) extends Component { } // tag array - val tagbits = addrbits-(indexbits+offsetbits); val tag_array = new rocketSRAMsp(lines, tagbits); - tag_array.io.a := - Mux((state === s_refill_wait), r_cpu_req_addr(indexmsb, indexlsb).toUFix, io.cpu.req_addr(indexmsb, indexlsb)); - tag_array.io.d := r_cpu_req_addr(tagmsb, taglsb); - tag_array.io.we := (state === s_refill_wait) && io.mem.resp_val; + val tag_addr = + Mux((state === s_refill_wait), r_cpu_req_idx(PGIDX_BITS-1,offsetbits), + io.cpu.req_idx(PGIDX_BITS-1,offsetbits)).toUFix; + val tag_we = (state === s_refill_wait) && io.mem.resp_val; + tag_array.io.a := tag_addr; + tag_array.io.d := r_cpu_req_ppn; + tag_array.io.we := tag_we; tag_array.io.bweb := ~Bits(0,tagbits); - tag_array.io.ce := Bool(true); // FIXME - val tag_lookup = tag_array.io.q; + tag_array.io.ce := (state === s_ready) && io.cpu.req_val; + val tag_rdata = tag_array.io.q; // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); - val vb_rdata = Reg(vb_array(io.cpu.req_addr(indexmsb, indexlsb))); - when ((state === s_refill_wait) && io.mem.resp_val) { - vb_array <== vb_array.bitSet(r_cpu_req_addr(indexmsb, indexlsb).toUFix, UFix(1,1)); +// val vb_rdata = Reg(vb_array(io.cpu.req_addr(indexmsb, indexlsb))); + when (tag_we) { + vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } - val tag_match = vb_rdata.toBool && (tag_lookup === r_cpu_req_addr(tagmsb, taglsb)); - + val tag_valid = Reg(vb_array(tag_addr)).toBool; + val tag_match = (tag_rdata === io.cpu.req_ppn); + // data array val data_array = new rocketSRAMsp(lines*4, 128); data_array.io.a := - Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_addr(indexmsb, indexlsb), refill_count), - io.cpu.req_addr(indexmsb, offsetmsb-1)).toUFix; + Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), + io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; data_array.io.d := io.mem.resp_data; data_array.io.we := io.mem.resp_val; data_array.io.bweb := ~Bits(0,128); @@ -121,16 +144,17 @@ class rocketICacheDM(lines: Int) extends Component { val data_array_rdata = data_array.io.q; // output signals - io.cpu.resp_val := (r_cpu_req_val && tag_match && (state === s_ready)); // || (state === s_resolve_miss); - io.cpu.req_rdy := ((state === s_ready) && (!r_cpu_req_val || (r_cpu_req_val && tag_match))); // || (state === s_resolve_miss); - io.cpu.resp_data := - MuxLookup(r_cpu_req_addr(offsetmsb-2, offsetlsb).toUFix, data_array_rdata(127, 96), + io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; + io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); + io.cpu.resp_data := + MuxLookup(r_cpu_req_idx(offsetmsb-2, offsetlsb).toUFix, data_array_rdata(127, 96), Array(UFix(2) -> data_array_rdata(95,64), UFix(1) -> data_array_rdata(63,32), UFix(0) -> data_array_rdata(31,0))); io.mem.req_val := (state === s_request); - io.mem.req_addr := Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix; + io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2)).toUFix; + // Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix; // control state machine switch (state) { @@ -138,17 +162,23 @@ class rocketICacheDM(lines: Int) extends Component { state <== s_ready; } is (s_ready) { - when (r_cpu_req_val && !tag_match) { state <== s_request; } + when (!io.cpu.itlb_miss && r_cpu_req_val && !(tag_valid && tag_match)) { state <== s_request; } } is (s_request) { - when (io.mem.req_rdy) { state <== s_refill_wait; } + when (io.mem.req_rdy) { + state <== s_refill_wait; + } } is (s_refill_wait) { - when (io.mem.resp_val) { state <== s_refill; } + when (io.mem.resp_val) { + state <== s_refill; + } } is (s_refill) { - when (io.mem.resp_val && (refill_count === UFix(3,2))) { state <== s_resolve_miss; } + when (io.mem.resp_val && (refill_count === UFix(3,2))) { + state <== s_resolve_miss; + } } is (s_resolve_miss) { state <== s_ready; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 56f8ccec..8808c786 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -97,7 +97,6 @@ class rocketITLB(entries: Int) extends Component val r_cpu_req_vpn = Reg(resetVal = Bits(0, VPN_BITS)); val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_asid = Reg(resetVal = Bits(0,ASID_BITS)); val r_refill_tag = Reg(resetVal = Bits(0, ASID_BITS+VPN_BITS)); val r_refill_waddr = Reg(resetVal = UFix(0, addr_bits)); @@ -175,8 +174,8 @@ class rocketITLB(entries: Int) extends Component ((status_s && !sx_array(tag_hit_addr).toBool) || (status_u && !ux_array(tag_hit_addr).toBool)); - io.cpu.req_rdy := (state === s_ready); - io.cpu.resp_miss := tlb_miss; + io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && (!r_cpu_req_val || tag_hit), Bool(true)); + io.cpu.resp_miss := tlb_miss || (state != s_ready); io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; io.ptw.req_val := (state === s_request); diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 048816db..b43a665d 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -16,11 +16,16 @@ class rocketDmemArbiter extends Component { val io = new ioDmemArbiter(); + // must delay ppn part of address from PTW by 1 cycle (to match TLB behavior) + val r_ptw_req_val = Reg(io.ptw.req_val); + val r_ptw_req_ppn = Reg(io.ptw.req_ppn); + io.mem.req_val := io.ptw.req_val || io.cpu.req_val; io.mem.req_cmd := Mux(io.ptw.req_val, io.ptw.req_cmd, io.cpu.req_cmd); io.mem.req_type := Mux(io.ptw.req_val, io.ptw.req_type, io.cpu.req_type); io.mem.req_idx := Mux(io.ptw.req_val, io.ptw.req_idx, io.cpu.req_idx); - io.mem.req_ppn := Mux(io.ptw.req_val, io.ptw.req_ppn, io.cpu.req_ppn); +// io.mem.req_ppn := Mux(io.ptw.req_val, io.ptw.req_ppn, io.cpu.req_ppn); + io.mem.req_ppn := Mux(r_ptw_req_val, r_ptw_req_ppn, io.cpu.req_ppn); io.mem.req_data := io.cpu.req_data; io.mem.req_tag := Mux(io.ptw.req_val, Bits(0,5), io.cpu.req_tag); From 83d90c4dab5ff3695ce118dafe76f00c5764934a Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sat, 12 Nov 2011 15:00:45 -0800 Subject: [PATCH 0031/1087] more itlb/dtlb/ptw fixes --- rocket/src/main/scala/cpu.scala | 7 ++++- rocket/src/main/scala/ctrl.scala | 8 +++-- rocket/src/main/scala/dcache.scala | 47 ++++++++++++++++++++---------- rocket/src/main/scala/dtlb.scala | 13 ++++++--- rocket/src/main/scala/icache.scala | 13 +++++++-- rocket/src/main/scala/ptw.scala | 2 ++ 6 files changed, 63 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2c45e534..3e79bbfe 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -70,7 +70,7 @@ class rocketProc extends Component ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; ctrl.io.xcpt_itlb := itlb.io.cpu.exception; - ctrl.io.itlb_miss := itlb.io.cpu.resp_miss; +// ctrl.io.itlb_miss := itlb.io.cpu.resp_miss; io.imem.itlb_miss := itlb.io.cpu.resp_miss; @@ -83,7 +83,9 @@ class rocketProc extends Component dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS-1,PGIDX_BITS); ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu.xcpt_ld; ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; + ctrl.io.dtlb_busy := dtlb.io.cpu.resp_busy; ctrl.io.dtlb_miss := dtlb.io.cpu.resp_miss; +// io.dmem.dtlb_miss := dtlb.io.cpu.resp_miss; // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) @@ -98,6 +100,9 @@ class rocketProc extends Component arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; +// arb.io.cpu.dtlb_busy := dtlb.io.cpu.resp_busy; + arb.io.cpu.dtlb_miss := dtlb.io.cpu.resp_miss; + // arb.io.cpu.req_addr := dtlb.io.cpu.resp_addr; arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6985cf3d..777a0afe 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -67,8 +67,9 @@ class ioCtrlAll extends Bundle() val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); val host = new ioHost(List("start")); + val dtlb_busy = Bool('input); val dtlb_miss = Bool('input); - val itlb_miss = Bool('input); +// val itlb_miss = Bool('input); val xcpt_dtlb_ld = Bool('input); val xcpt_dtlb_st = Bool('input); val xcpt_itlb = Bool('input); @@ -497,7 +498,7 @@ class rocketCtrl extends Component id_ren1 & id_stall_raddr1 | (id_sel_wa === WA_RD) & id_stall_waddr | (id_sel_wa === WA_RA) & id_stall_ra | - id_mem_val & ~io.dmem.req_rdy | + id_mem_val & (~io.dmem.req_rdy | io.dtlb_busy) | id_sync & ~io.dmem.req_rdy | id_console_out_val & ~io.console.rdy | id_div_val & ~io.dpath.div_rdy | @@ -513,7 +514,8 @@ class rocketCtrl extends Component io.dpath.stalld := ctrl_stalld.toBool; - io.dpath.killf := take_pc | io.itlb_miss | ~io.imem.resp_val; +// io.dpath.killf := take_pc | io.itlb_miss | ~io.imem.resp_val; + io.dpath.killf := take_pc | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; io.dpath.killx := kill_ex.toBool; io.dpath.killm := kill_mem.toBool; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index a82c8bfc..7ab9042a 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -7,6 +7,8 @@ import scala.math._; // interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { +// val dtlb_busy = Bool('input); + val dtlb_miss = Bool('input); val req_val = Bool('input); val req_rdy = Bool('output); val req_cmd = Bits(4, 'input); @@ -155,6 +157,8 @@ class rocketDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; +// dcache.io.cpu.dtlb_busy := io.cpu.dtlb_busy; + dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss; dcache.io.mem ^^ io.mem; io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; @@ -210,20 +214,22 @@ class rocketDCacheDM(lines: Int) extends Component { r_cpu_req_tag <== io.cpu.req_tag; } - when ((state === s_ready) && r_cpu_req_val) { + when ((state === s_ready) && r_cpu_req_val && !io.cpu.dtlb_miss) { r_cpu_req_ppn <== io.cpu.req_ppn; } when (io.cpu.req_rdy) { r_cpu_req_val <== io.cpu.req_val; - } - when (state === s_resolve_miss) { - r_cpu_req_val <== Bool(false); - when (r_req_load) { - r_cpu_resp_val <== Bool(true); - } } - r_cpu_resp_val <== Bool(false); - + otherwise { + r_cpu_req_val <== Bool(false); + } + when (((state === s_resolve_miss) && r_req_load) || (state === s_replay_load)) { + r_cpu_resp_val <== Bool(true); + } + otherwise { + r_cpu_resp_val <== Bool(false); + } + // refill counter val rr_count = Reg(resetVal = UFix(0,2)); val rr_count_next = rr_count + UFix(1); @@ -244,7 +250,7 @@ class rocketDCacheDM(lines: Int) extends Component { tag_array.io.d := r_cpu_req_ppn; tag_array.io.we := tag_we; tag_array.io.bweb := ~Bits(0,tagbits); - tag_array.io.ce := (state === s_ready) && io.cpu.req_val; + tag_array.io.ce := (state === s_ready) || (state === s_start_writeback) || (state === s_writeback); val tag_rdata = tag_array.io.q; // valid bit array @@ -256,8 +262,9 @@ class rocketDCacheDM(lines: Int) extends Component { when (tag_we && r_req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } - - val tag_valid = Reg(vb_array(tag_addr)).toBool; + val vb_rdata = Reg(vb_array(tag_addr).toBool); + val tag_valid = r_cpu_req_val && vb_rdata; +// val tag_valid = Reg(vb_array(tag_addr)).toBool; val tag_match = (tag_rdata === io.cpu.req_ppn); val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetbits) === p_store_idx(PGIDX_BITS-1,offsetbits)); val ldst_conflict = r_cpu_req_val && r_req_load && p_store_valid && addr_match; @@ -265,7 +272,7 @@ class rocketDCacheDM(lines: Int) extends Component { // write the pending store data when the cache is idle, when the next command isn't a load // or when there's a load to the same address (in which case there's a 2 cycle delay: // once cycle to write the store data and another to read the data back) - val drain_store = p_store_valid && (!io.cpu.req_val || req_store || ldst_conflict); + val drain_store = !io.cpu.dtlb_miss && p_store_valid && (!io.cpu.req_val || req_store || ldst_conflict); // write pending store data from a store which missed // after the cache line refill has completed @@ -282,6 +289,11 @@ class rocketDCacheDM(lines: Int) extends Component { p_store_type <== io.cpu.req_type; p_store_valid <== Bool(true); } + // cancel store if there's a DTLB miss + when (r_cpu_req_val && r_req_store && io.cpu.dtlb_miss) + { + p_store_valid <== Bool(false); + } when (drain_store) { p_store_valid <== Bool(false); db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); @@ -321,12 +333,12 @@ class rocketDCacheDM(lines: Int) extends Component { // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load instruction is replayed) val hit = tag_valid && tag_match; - val load_miss = (state === s_ready) && r_cpu_req_val && r_req_load && (!hit || (p_store_valid && addr_match)); + val load_miss = !io.cpu.dtlb_miss && (state === s_ready) && r_cpu_req_val && r_req_load && (!hit || (p_store_valid && addr_match)); // output signals // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (hit && !r_req_flush)); - io.cpu.resp_val := ((state === s_ready) && r_cpu_req_val && r_req_load && hit && !(p_store_valid && addr_match)) || + io.cpu.req_rdy := !io.cpu.dtlb_miss && (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (hit && !r_req_flush)); + io.cpu.resp_val := !io.cpu.dtlb_miss && ((state === s_ready) && hit && r_req_load && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && r_req_flush) || r_cpu_resp_val; @@ -351,6 +363,9 @@ class rocketDCacheDM(lines: Int) extends Component { state <== s_ready; } is (s_ready) { + when (io.cpu.dtlb_miss) { + state <== s_ready; + } when (ldst_conflict) { state <== s_replay_load; } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 9c3f4499..6b13eeb4 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -20,6 +20,7 @@ class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) val req_asid = Bits(ASID_BITS, 'input); val req_vpn = UFix(VPN_BITS, 'input); // lookup responses + val resp_busy = Bool('output); val resp_miss = Bool('output); // val resp_val = Bool('output); val resp_ppn = UFix(PPN_BITS, 'output); @@ -62,6 +63,7 @@ class rocketDTLB(entries: Int) extends Component val req_load = (r_cpu_req_cmd === M_XRD); val req_store = (r_cpu_req_cmd === M_XWR); + val req_flush = (r_cpu_req_cmd === M_FLA); // val req_amo = io.cpu.req_cmd(3).toBool; val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); @@ -117,8 +119,8 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup_hit = (state === s_ready) && r_cpu_req_val && tag_hit; - val lookup_miss = (state === s_ready) && r_cpu_req_val && !tag_hit; + val lookup_hit = (state === s_ready) && r_cpu_req_val && !req_flush && tag_hit; + val lookup_miss = (state === s_ready) && r_cpu_req_val && !req_flush && !tag_hit; val tlb_hit = status_vm && lookup_hit; val tlb_miss = status_vm && lookup_miss; @@ -143,9 +145,12 @@ class rocketDTLB(entries: Int) extends Component ((status_s && !sw_array(tag_hit_addr).toBool) || (status_u && !uw_array(tag_hit_addr).toBool)); - io.cpu.req_rdy := (state === s_ready); + io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && !tlb_miss, Bool(true)); + io.cpu.resp_busy := tlb_miss || (state != s_ready); io.cpu.resp_miss := tlb_miss; - io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; + io.cpu.resp_ppn := + Mux(status_vm, Mux(req_flush, Bits(0,PPN_BITS), tag_ram(tag_hit_addr)), + r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 7c11c675..46e1df3a 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -81,7 +81,7 @@ class rocketICacheDM(lines: Int) extends Component { when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_idx <== io.cpu.req_idx; } - when (state === s_ready) { + when (state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss) { r_cpu_req_ppn <== io.cpu.req_ppn; } when (io.cpu.req_rdy) { @@ -140,7 +140,9 @@ class rocketICacheDM(lines: Int) extends Component { data_array.io.d := io.mem.resp_data; data_array.io.we := io.mem.resp_val; data_array.io.bweb := ~Bits(0,128); - data_array.io.ce := Bool(true); // FIXME +// data_array.io.ce := Bool(true); // FIXME + data_array.io.ce := (io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss); + val data_array_rdata = data_array.io.q; // output signals @@ -162,7 +164,12 @@ class rocketICacheDM(lines: Int) extends Component { state <== s_ready; } is (s_ready) { - when (!io.cpu.itlb_miss && r_cpu_req_val && !(tag_valid && tag_match)) { state <== s_request; } + when (io.cpu.itlb_miss) { + state <== s_ready; + } + when (r_cpu_req_val && !(tag_valid && tag_match)) { + state <== s_request; + } } is (s_request) { diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index b43a665d..52bdf8e8 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -28,6 +28,8 @@ class rocketDmemArbiter extends Component io.mem.req_ppn := Mux(r_ptw_req_val, r_ptw_req_ppn, io.cpu.req_ppn); io.mem.req_data := io.cpu.req_data; io.mem.req_tag := Mux(io.ptw.req_val, Bits(0,5), io.cpu.req_tag); +// io.mem.dtlb_busy := io.cpu.dtlb_busy; + io.mem.dtlb_miss := io.cpu.dtlb_miss; io.ptw.req_rdy := io.mem.req_rdy; io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; From 91c252ad0865495f9be88c44d26d2f80e146c08d Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sat, 12 Nov 2011 15:47:47 -0800 Subject: [PATCH 0032/1087] fixing output enable signals for data/tag SRAMs --- rocket/src/main/scala/dcache.scala | 10 +++++++++- rocket/src/main/scala/dtlb.scala | 2 +- rocket/src/main/scala/itlb.scala | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 7ab9042a..5e850500 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -202,6 +202,7 @@ class rocketDCacheDM(lines: Int) extends Component { val p_store_valid = Reg(resetVal = Bool(false)); val req_store = (io.cpu.req_cmd === M_XWR); + val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD); val r_req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); val r_req_store = (r_cpu_req_cmd === M_XWR); val r_req_flush = (r_cpu_req_cmd === M_FLA); @@ -327,7 +328,14 @@ class rocketDCacheDM(lines: Int) extends Component { data_array.io.d := Mux((state === s_refill), io.mem.resp_data, store_data); data_array.io.we := ((state === s_refill) && io.mem.resp_val) || drain_store || resolve_store; data_array.io.bweb := Mux((state === s_refill), ~Bits(0,128), store_wmask); - data_array.io.ce := Bool(true); // FIXME +// data_array.io.ce := Bool(true); // FIXME + data_array.io.ce := + (io.cpu.req_val && io.cpu.req_rdy && req_load) || + (state === s_start_writeback) || + (state === s_writeback) || + ((state === s_resolve_miss) && r_req_load) || + (state === s_replay_load); + val data_array_rdata = data_array.io.q; // signal a load miss when the data isn't present in the cache and when it's in the pending store data register diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 6b13eeb4..3c466434 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -73,7 +73,7 @@ class rocketDTLB(entries: Int) extends Component tag_cam.io.clear := io.cpu.invalidate; tag_cam.io.tag := lookup_tag; - tag_cam.io.write := io.ptw.resp_val; + tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; tag_cam.io.write_addr := r_refill_waddr; val tag_hit = tag_cam.io.hit; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 8808c786..91cca795 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -117,7 +117,7 @@ class rocketITLB(entries: Int) extends Component tag_cam.io.clear := io.cpu.invalidate; tag_cam.io.tag := lookup_tag; - tag_cam.io.write := io.ptw.resp_val; + tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; tag_cam.io.write_addr := r_refill_waddr; val tag_hit = tag_cam.io.hit; From 35af912bd2357110415b8d3ce734e085636fa1ce Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sat, 12 Nov 2011 22:13:29 -0800 Subject: [PATCH 0033/1087] cache optimizations, cleanup, and testharness improvement --- rocket/src/main/scala/ctrl.scala | 1 + rocket/src/main/scala/dcache.scala | 13 +++++++------ rocket/src/main/scala/icache.scala | 19 ++----------------- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 777a0afe..779b9328 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -196,6 +196,7 @@ class rocketCtrl extends Component val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst); // FIXME +// io.imem.req_val := io.host.start && !io.dpath.xcpt_ma_inst; io.imem.req_val := io.host.start && !io.dpath.xcpt_ma_inst; val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 5e850500..8b4ce89e 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -251,12 +251,14 @@ class rocketDCacheDM(lines: Int) extends Component { tag_array.io.d := r_cpu_req_ppn; tag_array.io.we := tag_we; tag_array.io.bweb := ~Bits(0,tagbits); - tag_array.io.ce := (state === s_ready) || (state === s_start_writeback) || (state === s_writeback); + tag_array.io.ce := + (io.cpu.req_val && io.cpu.req_rdy) || + (state === s_start_writeback) || + (state === s_writeback); val tag_rdata = tag_array.io.q; // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); -// val vb_rdata = Reg(vb_array(tag_raddr)); when (tag_we && !r_req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } @@ -265,9 +267,10 @@ class rocketDCacheDM(lines: Int) extends Component { } val vb_rdata = Reg(vb_array(tag_addr).toBool); val tag_valid = r_cpu_req_val && vb_rdata; -// val tag_valid = Reg(vb_array(tag_addr)).toBool; val tag_match = (tag_rdata === io.cpu.req_ppn); - val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetbits) === p_store_idx(PGIDX_BITS-1,offsetbits)); + + // load/store addresses conflict if they are to any part of the same 64 bit word + val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); val ldst_conflict = r_cpu_req_val && r_req_load && p_store_valid && addr_match; // write the pending store data when the cache is idle, when the next command isn't a load @@ -281,7 +284,6 @@ class rocketDCacheDM(lines: Int) extends Component { // dirty bit array val db_array = Reg(resetVal = Bits(0, lines)); -// val db_rdata = Reg(db_array(tag_raddr)); val tag_dirty = Reg(db_array(tag_addr)).toBool; when (io.cpu.req_val && io.cpu.req_rdy && req_store) { @@ -328,7 +330,6 @@ class rocketDCacheDM(lines: Int) extends Component { data_array.io.d := Mux((state === s_refill), io.mem.resp_data, store_data); data_array.io.we := ((state === s_refill) && io.mem.resp_val) || drain_store || resolve_store; data_array.io.bweb := Mux((state === s_refill), ~Bits(0,128), store_wmask); -// data_array.io.ce := Bool(true); // FIXME data_array.io.ce := (io.cpu.req_val && io.cpu.req_rdy && req_load) || (state === s_start_writeback) || diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 46e1df3a..5871a165 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -90,20 +90,8 @@ class rocketICacheDM(lines: Int) extends Component { otherwise { r_cpu_req_val <== Bool(false); } - -// val r_cpu_req_addr = Reg(Bits(0, addrbits)); -// when (io.cpu.req_val && ((state === s_ready) || (state === s_resolve_miss))) { -// r_cpu_req_addr <== io.cpu.req_addr; -// } - -// val r_cpu_req_val = Reg(Bool(false)); -// when ((state === s_ready) || (state === s_resolve_miss)) { -// r_cpu_req_val <== io.cpu.req_val; -// } -// otherwise { -// r_cpu_req_val <== Bool(false); -// } + // refill counter val refill_count = Reg(resetVal = UFix(0,2)); when (io.mem.resp_val) { refill_count <== refill_count + UFix(1); @@ -119,12 +107,11 @@ class rocketICacheDM(lines: Int) extends Component { tag_array.io.d := r_cpu_req_ppn; tag_array.io.we := tag_we; tag_array.io.bweb := ~Bits(0,tagbits); - tag_array.io.ce := (state === s_ready) && io.cpu.req_val; + tag_array.io.ce := (io.cpu.req_val && io.cpu.req_rdy); val tag_rdata = tag_array.io.q; // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); -// val vb_rdata = Reg(vb_array(io.cpu.req_addr(indexmsb, indexlsb))); when (tag_we) { vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } @@ -140,7 +127,6 @@ class rocketICacheDM(lines: Int) extends Component { data_array.io.d := io.mem.resp_data; data_array.io.we := io.mem.resp_val; data_array.io.bweb := ~Bits(0,128); -// data_array.io.ce := Bool(true); // FIXME data_array.io.ce := (io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss); val data_array_rdata = data_array.io.q; @@ -156,7 +142,6 @@ class rocketICacheDM(lines: Int) extends Component { io.mem.req_val := (state === s_request); io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2)).toUFix; - // Cat(r_cpu_req_addr(tagmsb, indexlsb), Bits(0,2)).toUFix; // control state machine switch (state) { From fbd44ea9363055caf1217b837a75ed65848eacf4 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sat, 12 Nov 2011 23:39:43 -0800 Subject: [PATCH 0034/1087] added checks for addresses > physical memory size, increased memsize to 64M --- rocket/src/main/scala/consts.scala | 6 +++--- rocket/src/main/scala/dtlb.scala | 19 ++++++++++++++----- rocket/src/main/scala/itlb.scala | 14 +++++++++----- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 777a8de8..612a7ec0 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -180,10 +180,10 @@ object Constants val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; - // physical memory size (# 4K pages - for proxy kernel at least) + // physical memory size (# 8K pages) // if you change this value, make sure to also change MEMORY_SIZE variable in memif.h - val MEMSIZE_PAGES = 8192; // 32 megs - val MEMSIZE = MEMSIZE_PAGES*4096; + val MEMSIZE_PAGES = 8192; // 64 megs + val MEMSIZE_BYTES = MEMSIZE_PAGES*8192; val HAVE_FPU = Bool(false); val HAVE_VEC = Bool(false); diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 3c466434..2d4089e9 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -119,8 +119,9 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup_hit = (state === s_ready) && r_cpu_req_val && !req_flush && tag_hit; - val lookup_miss = (state === s_ready) && r_cpu_req_val && !req_flush && !tag_hit; + val lookup = (state === s_ready) && r_cpu_req_val && !req_flush; + val lookup_hit = lookup && tag_hit; + val lookup_miss = lookup && !tag_hit; val tlb_hit = status_vm && lookup_hit; val tlb_miss = status_vm && lookup_miss; @@ -134,17 +135,25 @@ class rocketDTLB(entries: Int) extends Component } } - // FIXME: add check for out of range physical addresses (>MEMSIZE) - io.cpu.xcpt_ld := + // exception check + val outofrange = (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); + + val access_fault_ld = tlb_hit && req_load && ((status_s && !sr_array(tag_hit_addr).toBool) || (status_u && !ur_array(tag_hit_addr).toBool)); - io.cpu.xcpt_st := + io.cpu.xcpt_ld := + (lookup && req_load && outofrange) || access_fault_ld; + + val access_fault_st = tlb_hit && req_store && ((status_s && !sw_array(tag_hit_addr).toBool) || (status_u && !uw_array(tag_hit_addr).toBool)); + io.cpu.xcpt_st := + (lookup && req_store && outofrange) || access_fault_st; + io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && !tlb_miss, Bool(true)); io.cpu.resp_busy := tlb_miss || (state != s_ready); io.cpu.resp_miss := tlb_miss; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 91cca795..0df38144 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -155,8 +155,9 @@ class rocketITLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup_hit = (state === s_ready) && r_cpu_req_val && tag_hit; - val lookup_miss = (state === s_ready) && r_cpu_req_val && !tag_hit; + val lookup = (state === s_ready) && r_cpu_req_val; + val lookup_hit = lookup && tag_hit; + val lookup_miss = lookup && !tag_hit; val tlb_hit = status_vm && lookup_hit; val tlb_miss = status_vm && lookup_miss; @@ -168,12 +169,15 @@ class rocketITLB(entries: Int) extends Component } } - // FIXME: add test for out of range physical addresses (> MEMSIZE) - io.cpu.exception := + // exception check + val outofrange = (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); + + val access_fault = tlb_hit && ((status_s && !sx_array(tag_hit_addr).toBool) || (status_u && !ux_array(tag_hit_addr).toBool)); - + + io.cpu.exception := access_fault || outofrange; io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && (!r_cpu_req_val || tag_hit), Bool(true)); io.cpu.resp_miss := tlb_miss || (state != s_ready); io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; From 5f4b15b809888bd5f9b44cdadfcc0aa40c38688f Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 13 Nov 2011 00:03:17 -0800 Subject: [PATCH 0035/1087] added ld/st misaligned exceptions --- rocket/src/main/scala/cpu.scala | 3 ++- rocket/src/main/scala/ctrl.scala | 14 +++++++++----- rocket/src/main/scala/dcache.scala | 16 ++++++++++++---- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 3e79bbfe..2e777f9f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -85,7 +85,8 @@ class rocketProc extends Component ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; ctrl.io.dtlb_busy := dtlb.io.cpu.resp_busy; ctrl.io.dtlb_miss := dtlb.io.cpu.resp_miss; -// io.dmem.dtlb_miss := dtlb.io.cpu.resp_miss; + ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld; + ctrl.io.xcpt_ma_st := io.dmem.xcpt_ma_st; // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 779b9328..c6fe2328 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -73,6 +73,8 @@ class ioCtrlAll extends Bundle() val xcpt_dtlb_ld = Bool('input); val xcpt_dtlb_st = Bool('input); val xcpt_itlb = Bool('input); + val xcpt_ma_ld = Bool('input); + val xcpt_ma_st = Bool('input); } class rocketCtrl extends Component @@ -381,6 +383,8 @@ class rocketCtrl extends Component // exception handling val mem_exception = + io.xcpt_ma_ld || + io.xcpt_ma_st || io.xcpt_dtlb_ld || io.xcpt_dtlb_st || mem_reg_xcpt_illegal || @@ -398,11 +402,11 @@ class rocketCtrl extends Component // interrupt Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call // breakpoint - // misaligned load - // misaligned store - Mux(io.xcpt_dtlb_ld, UFix(8,5), // load fault - Mux(io.xcpt_dtlb_st, UFix(9,5), // store fault - UFix(0,5)))))))); // instruction address misaligned + Mux(io.xcpt_ma_ld, UFix(8,5), // misaligned load + Mux(io.xcpt_ma_st, UFix(9,5), // misaligned store + Mux(io.xcpt_dtlb_ld, UFix(10,5), // load fault + Mux(io.xcpt_dtlb_st, UFix(11,5), // store fault + UFix(0,5)))))))))); // instruction address misaligned // write cause to PCR on an exception io.dpath.exception := mem_exception; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 8b4ce89e..fe2962d3 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -18,6 +18,8 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { // val req_addr = UFix(PADDR_BITS, 'input); val req_data = Bits(64, 'input); val req_tag = Bits(5, 'input); + val xcpt_ma_ld = Bool('output); // misaligned load + val xcpt_ma_st = Bool('output); // misaligned store val resp_miss = Bool('output); val resp_val = Bool('output); val resp_data = Bits(64, 'output); @@ -151,16 +153,14 @@ class rocketDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); dcache.io.cpu.req_idx := Mux(flushing, Cat(flush_count, Bits(0,offsetbits)), io.cpu.req_idx); dcache.io.cpu.req_ppn := Mux(flushing, UFix(0,PPN_BITS), io.cpu.req_ppn); -// dcache.io.cpu.req_addr := -// Mux(flushing, Cat(Bits(0,tagmsb-taglsb+1), flush_count, Bits(0,offsetbits)).toUFix, -// io.cpu.req_addr); dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; -// dcache.io.cpu.dtlb_busy := io.cpu.dtlb_busy; dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss; dcache.io.mem ^^ io.mem; + io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; + io.cpu.xcpt_ma_st := dcache.io.cpu.xcpt_ma_st; io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; io.cpu.resp_miss := dcache.io.cpu.resp_miss; io.cpu.resp_data := dcache.io.cpu.resp_data; @@ -351,6 +351,14 @@ class rocketDCacheDM(lines: Int) extends Component { ((state === s_resolve_miss) && r_req_flush) || r_cpu_resp_val; + val misaligned = + ((r_cpu_req_type === MT_H) && r_cpu_req_idx(0).toBool) || + ((r_cpu_req_type === MT_W) && (r_cpu_req_idx(1,0) != Bits(0,2))) || + ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0,3))); + + io.cpu.xcpt_ma_ld := r_cpu_req_val && r_req_load && misaligned; + io.cpu.xcpt_ma_st := r_cpu_req_val && r_req_store && misaligned; + io.cpu.resp_miss := load_miss; // tag MSB distinguishes between loads destined for the PTW and CPU io.cpu.resp_tag := Cat(r_req_ptw_load, r_cpu_req_type, r_cpu_req_idx(2,0), r_cpu_req_tag); From 345f950eff7163d2a72423285f285e628f760056 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 13 Nov 2011 00:27:57 -0800 Subject: [PATCH 0036/1087] added timer interrupt support --- rocket/src/main/scala/ctrl.scala | 10 ++++++++-- rocket/src/main/scala/dpath_util.scala | 26 +++++++++++++------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c6fe2328..d76a9fb5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -75,6 +75,7 @@ class ioCtrlAll extends Bundle() val xcpt_itlb = Bool('input); val xcpt_ma_ld = Bool('input); val xcpt_ma_st = Bool('input); + val timer_int = Bool('input); } class rocketCtrl extends Component @@ -382,7 +383,12 @@ class rocketCtrl extends Component } // exception handling + // FIXME: verify PC in MEM stage points to valid, restartable instruction + val interrupt = io.dpath.status(SR_ET).toBool && io.dpath.status(15).toBool && io.timer_int; + val interrupt_cause = UFix(0x17, 5); + val mem_exception = + interrupt || io.xcpt_ma_ld || io.xcpt_ma_st || io.xcpt_dtlb_ld || @@ -395,18 +401,18 @@ class rocketCtrl extends Component mem_reg_xcpt_ma_inst; val mem_cause = + Mux(interrupt, interrupt_cause, // asynchronous interrupt Mux(mem_reg_xcpt_itlb, UFix(1,5), // instruction access fault Mux(mem_reg_xcpt_illegal, UFix(2,5), // illegal instruction Mux(mem_reg_xcpt_privileged, UFix(3,5), // privileged instruction Mux(mem_reg_xcpt_fpu, UFix(4,5), // FPU disabled - // interrupt Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call // breakpoint Mux(io.xcpt_ma_ld, UFix(8,5), // misaligned load Mux(io.xcpt_ma_st, UFix(9,5), // misaligned store Mux(io.xcpt_dtlb_ld, UFix(10,5), // load fault Mux(io.xcpt_dtlb_st, UFix(11,5), // store fault - UFix(0,5)))))))))); // instruction address misaligned + UFix(0,5))))))))))); // instruction address misaligned // write cause to PCR on an exception io.dpath.exception := mem_exception; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 1c10abf3..a1a28287 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -35,15 +35,6 @@ class rocketDpathBTB(entries: Int) extends Component io.hit := (is_val & (tag_target(tagmsb,taglsb) === io.current_pc4(VADDR_BITS-1, idxmsb+1))).toBool; io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix; - -// val rst_lwlr_pf = Mem(entries, io.wen, io.correct_pc4(3, 2), UFix(1, 1), resetVal = UFix(0, 1)); -// val lwlr_pf = Mem(entries, io.wen, io.correct_pc4(3, 2), -// Cat(io.correct_pc4(VADDR_BITS-1,4), io.correct_target(VADDR_BITS-1,2)), resetVal = UFix(0, 1)); -// val is_val = rst_lwlr_pf(io.current_pc4(3, 2)); -// val tag_target = lwlr_pf(io.current_pc4(3, 2)); -// -// io.hit := (is_val & (tag_target(2*VADDR_BITS-7,VADDR_BITS-2) === io.current_pc4(VADDR_BITS-1, 4))).toBool; -// io.target := Cat(tag_target(VADDR_BITS-3, 0), Bits(0,2)).toUFix; } class ioDpathPCR extends Bundle() @@ -62,6 +53,7 @@ class ioDpathPCR extends Bundle() val pc = UFix(VADDR_BITS, 'input); val badvaddr = UFix(VADDR_BITS, 'input); val eret = Bool('input); + val timer_int = Bool('output); } class rocketDpathPCR extends Component @@ -71,8 +63,8 @@ class rocketDpathPCR extends Component val reg_epc = Reg(resetVal = UFix(0, VADDR_BITS)); val reg_badvaddr = Reg(resetVal = UFix(0, VADDR_BITS)); val reg_ebase = Reg(resetVal = UFix(0, VADDR_BITS)); - val reg_count = Reg(resetVal = Bits(0, 32)); - val reg_compare = Reg(resetVal = Bits(0, 32)); + val reg_count = Reg(resetVal = UFix(0, 32)); + val reg_compare = Reg(resetVal = UFix(0, 32)); val reg_cause = Reg(resetVal = Bits(0, 5)); val reg_tohost = Reg(resetVal = Bits(0, 32)); val reg_fromhost = Reg(resetVal = Bits(0, 32)); @@ -91,6 +83,8 @@ class rocketDpathPCR extends Component val reg_status_ps = Reg(resetVal = Bool(false)); val reg_status_et = Reg(resetVal = Bool(false)); + val timer_interrupt = Reg(resetVal = Bool(false)); + val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, Bits(0,1), reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; @@ -148,8 +142,8 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(VADDR_BITS-1,0).toUFix; } - when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(31,0); } - when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0); } + when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(31,0).toUFix; } + when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0).toUFix; timer_interrupt <== Bool(false); } when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(31,0); } when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } @@ -157,6 +151,12 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } } + reg_count <== reg_count + UFix(1); + when (reg_count === reg_compare) { + timer_interrupt <== Bool(true); + } + io.timer_int := timer_interrupt; + when (!io.r.en) { rdata <== Bits(0,64); } switch (io.r.addr) { is (PCR_STATUS) { rdata <== Cat(Bits(0,47), reg_status_vm, reg_status_im, reg_status); } From 44419511b7ef302d1e0b7264f3b1dfe23d0c6f8d Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 13 Nov 2011 00:32:08 -0800 Subject: [PATCH 0037/1087] timer interrupt fixes --- rocket/src/main/scala/ctrl.scala | 9 +++------ rocket/src/main/scala/dpath.scala | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d76a9fb5..9e0a6c12 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -58,6 +58,7 @@ class ioCtrlDpath extends Bundle() val sboard_clr0a = UFix(5, 'input); val sboard_clr1 = Bool('input); val sboard_clr1a = UFix(5, 'input); + val timer_int = Bool('input); } class ioCtrlAll extends Bundle() @@ -69,13 +70,11 @@ class ioCtrlAll extends Bundle() val host = new ioHost(List("start")); val dtlb_busy = Bool('input); val dtlb_miss = Bool('input); -// val itlb_miss = Bool('input); val xcpt_dtlb_ld = Bool('input); val xcpt_dtlb_st = Bool('input); val xcpt_itlb = Bool('input); val xcpt_ma_ld = Bool('input); val xcpt_ma_st = Bool('input); - val timer_int = Bool('input); } class rocketCtrl extends Component @@ -332,7 +331,6 @@ class rocketCtrl extends Component val jr_taken = (ex_reg_br_type === BR_JR); val j_taken = (ex_reg_br_type === BR_J); -// io.dmem.req_val := ex_reg_mem_val; io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; @@ -384,7 +382,7 @@ class rocketCtrl extends Component // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction - val interrupt = io.dpath.status(SR_ET).toBool && io.dpath.status(15).toBool && io.timer_int; + val interrupt = io.dpath.status(SR_ET).toBool && io.dpath.status(15).toBool && io.dpath.timer_int; val interrupt_cause = UFix(0x17, 5); val mem_exception = @@ -421,9 +419,9 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, and for privileged instructions val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_privileged; + // replay mem stage PC on a DTLB miss val replay_mem = io.dtlb_miss; -// val replay_mem = Bool(false); val kill_ex = replay_ex || replay_mem; val kill_mem = mem_exception || replay_mem; @@ -525,7 +523,6 @@ class rocketCtrl extends Component io.dpath.stalld := ctrl_stalld.toBool; -// io.dpath.killf := take_pc | io.itlb_miss | ~io.imem.resp_val; io.dpath.killf := take_pc | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; io.dpath.killx := kill_ex.toBool; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 6d611217..b6f63c94 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -355,6 +355,7 @@ class rocketDpath extends Component pcr.io.host.from ^^ io.host.from; pcr.io.host.to ^^ io.host.to; + io.ctrl.timer_int := pcr.io.timer_int; io.ctrl.status := pcr.io.status; io.ptbr := pcr.io.ptbr; io.debug.error_mode := pcr.io.debug.error_mode; From 7b3c34a34144c2a31570ed6eeb63291317279f5b Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 13 Nov 2011 00:59:02 -0800 Subject: [PATCH 0038/1087] regenerated instruction encodings using parse-opcodes --- rocket/src/main/scala/instructions.scala | 394 ++++++++++++++--------- 1 file changed, 242 insertions(+), 152 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 13d822b1..92812bb0 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -4,159 +4,249 @@ import Node._; object Instructions { - val UNIMP = Bits("b00000000000000000000000000000000", 32); - val J = Bits("b?????????????????????????_1100111", 32); - val JAL = Bits("b?????????????????????????_1101111", 32); - val JALR_C = Bits("b?????_?????_????????????_000_1101011", 32); - val JALR_R = Bits("b?????_?????_????????????_001_1101011", 32); - val JALR_J = Bits("b?????_?????_????????????_010_1101011", 32); - val BEQ = Bits("b?????_?????_?????_???????_000_1100011", 32); - val BNE = Bits("b?????_?????_?????_???????_001_1100011", 32); - val BLT = Bits("b?????_?????_?????_???????_100_1100011", 32); - val BGE = Bits("b?????_?????_?????_???????_101_1100011", 32); - val BLTU = Bits("b?????_?????_?????_???????_110_1100011", 32); - val BGEU = Bits("b?????_?????_?????_???????_111_1100011", 32); - val LUI = Bits("b?????_????????????????????_0110111", 32); - val ADDI = Bits("b?????_?????_????????????_000_0010011", 32); - val SLLI = Bits("b?????_?????_000000_??????_001_0010011", 32); - val SLTI = Bits("b?????_?????_????????????_010_0010011", 32); - val SLTIU = Bits("b?????_?????_????????????_011_0010011", 32); - val XORI = Bits("b?????_?????_????????????_100_0010011", 32); - val SRLI = Bits("b?????_?????_000000_??????_101_0010011", 32); - val SRAI = Bits("b?????_?????_000001_??????_101_0010011", 32); - val ORI = Bits("b?????_?????_????????????_110_0010011", 32); - val ANDI = Bits("b?????_?????_????????????_111_0010011", 32); - val ADD = Bits("b?????_?????_?????_0000000000_0110011", 32); - val SUB = Bits("b?????_?????_?????_1000000000_0110011", 32); - val SLL = Bits("b?????_?????_?????_0000000001_0110011", 32); - val SLT = Bits("b?????_?????_?????_0000000010_0110011", 32); - val SLTU = Bits("b?????_?????_?????_0000000011_0110011", 32); - val riscvXOR = Bits("b?????_?????_?????_0000000100_0110011", 32); - val SRL = Bits("b?????_?????_?????_0000000101_0110011", 32); - val SRA = Bits("b?????_?????_?????_1000000101_0110011", 32); - val riscvOR = Bits("b?????_?????_?????_0000000110_0110011", 32); - val riscvAND = Bits("b?????_?????_?????_0000000111_0110011", 32); - val MUL = Bits("b?????_?????_?????_0000001000_0110011", 32); - val MULH = Bits("b?????_?????_?????_0000001001_0110011", 32); - val MULHSU = Bits("b?????_?????_?????_0000001010_0110011", 32); - val MULHU = Bits("b?????_?????_?????_0000001011_0110011", 32); - val DIV = Bits("b?????_?????_?????_0000001100_0110011", 32); - val DIVU = Bits("b?????_?????_?????_0000001101_0110011", 32); - val REM = Bits("b?????_?????_?????_0000001110_0110011", 32); - val REMU = Bits("b?????_?????_?????_0000001111_0110011", 32); - val ADDIW = Bits("b?????_?????_????????????_000_0011011", 32); - val SLLIW = Bits("b?????_?????_000000_0_?????_001_0011011", 32); - val SRLIW = Bits("b?????_?????_000000_0_?????_101_0011011", 32); - val SRAIW = Bits("b?????_?????_000001_0_?????_101_0011011", 32); - val ADDW = Bits("b?????_?????_?????_0000000000_0111011", 32); - val SUBW = Bits("b?????_?????_?????_1000000000_0111011", 32); - val SLLW = Bits("b?????_?????_?????_0000000001_0111011", 32); - val SRLW = Bits("b?????_?????_?????_0000000101_0111011", 32); - val SRAW = Bits("b?????_?????_?????_1000000101_0111011", 32); - val MULW = Bits("b?????_?????_?????_0000001000_0111011", 32); - val DIVW = Bits("b?????_?????_?????_0000001100_0111011", 32); - val DIVUW = Bits("b?????_?????_?????_0000001101_0111011", 32); - val REMW = Bits("b?????_?????_?????_0000001110_0111011", 32); - val REMUW = Bits("b?????_?????_?????_0000001111_0111011", 32); - val LB = Bits("b?????_?????_????????????_000_0000011", 32); - val LH = Bits("b?????_?????_????????????_001_0000011", 32); - val LW = Bits("b?????_?????_????????????_010_0000011", 32); - val LD = Bits("b?????_?????_????????????_011_0000011", 32); - val LBU = Bits("b?????_?????_????????????_100_0000011", 32); - val LHU = Bits("b?????_?????_????????????_101_0000011", 32); - val LWU = Bits("b?????_?????_????????????_110_0000011", 32); - val SB = Bits("b?????_?????_?????_???????_000_0100011", 32); - val SH = Bits("b?????_?????_?????_???????_001_0100011", 32); - val SW = Bits("b?????_?????_?????_???????_010_0100011", 32); - val SD = Bits("b?????_?????_?????_???????_011_0100011", 32); - val AMOADD_W = Bits("b?????_?????_?????_00000_000_10_1000011", 32); - val AMOSWAP_W = Bits("b?????_?????_?????_00000_010_10_1000011", 32); - val AMOAND_W = Bits("b?????_?????_?????_00000_100_10_1000011", 32); - val AMOOR_W = Bits("b?????_?????_?????_00000_110_10_1000011", 32); - val AMOMIN_W = Bits("b?????_?????_?????_00001_000_10_1000011", 32); - val AMOMAX_W = Bits("b?????_?????_?????_00001_010_10_1000011", 32); - val AMOMINU_W = Bits("b?????_?????_?????_00001_100_10_1000011", 32); - val AMOMAXU_W = Bits("b?????_?????_?????_00001_110_10_1000011", 32); - val AMOADD_D = Bits("b?????_?????_?????_00000_000_11_1000011", 32); - val AMOSWAP_D = Bits("b?????_?????_?????_00000_010_11_1000011", 32); - val AMOAND_D = Bits("b?????_?????_?????_00000_100_11_1000011", 32); - val AMOOR_D = Bits("b?????_?????_?????_00000_110_11_1000011", 32); - val AMOMIN_D = Bits("b?????_?????_?????_00001_000_11_1000011", 32); - val AMOMAX_D = Bits("b?????_?????_?????_00001_010_11_1000011", 32); - val AMOMINU_D = Bits("b?????_?????_?????_00001_100_11_1000011", 32); - val AMOMAXU_D = Bits("b?????_?????_?????_00001_110_11_1000011", 32); - val FENCE = Bits("b?????_?????_????????????_010_0101111", 32); - val FENCE_I = Bits("b?????_?????_????????????_001_0101111", 32); - val RDNPC = Bits("b?????_00000_00000_0000000000_0010111", 32); - val SYNCI = Bits("b00000_00000_00000_0000000001_0010111", 32); - val SYNC = Bits("b00000_00000_00000_0000000010_0010111", 32); - val SYSCALL = Bits("b00000_00000_00000_0000000000_1110111", 32); - val BREAK = Bits("b00000_00000_00000_0000000001_1110111", 32); - val EI = Bits("b?????_00000_00000_0000000000_1111011", 32); - val DI = Bits("b?????_00000_00000_0000000001_1111011", 32); - val MFPCR = Bits("b?????_00000_?????_0000000010_1111011", 32); - val MTPCR = Bits("b00000_?????_?????_0000000011_1111011", 32); - val ERET = Bits("b00000_00000_00000_0000000100_1111011", 32); - val FADD_S = Bits("b?????_?????_?????_00000_???_00_1010011", 32); - val FSUB_S = Bits("b?????_?????_?????_00001_???_00_1010011", 32); - val FMUL_S = Bits("b?????_?????_?????_00010_???_00_1010011", 32); - val FDIV_S = Bits("b?????_?????_?????_00011_???_00_1010011", 32); - val FSQRT_S = Bits("b?????_?????_00000_00100_???_00_1010011", 32); - val FSGNJ_S = Bits("b?????_?????_?????_0010111100_1010011", 32); - val FSGNJN_S = Bits("b?????_?????_?????_0011011100_1010011", 32); - val FSGNJX_S = Bits("b?????_?????_?????_0011111100_1010011", 32); - val FADD_D = Bits("b?????_?????_?????_00000_???_01_1010011", 32); - val FSUB_D = Bits("b?????_?????_?????_00001_???_01_1010011", 32); - val FMUL_D = Bits("b?????_?????_?????_00010_???_01_1010011", 32); - val FDIV_D = Bits("b?????_?????_?????_00011_???_01_1010011", 32); - val FSQRT_D = Bits("b?????_?????_00000_00100_???_01_1010011", 32); - val FSGNJ_D = Bits("b?????_?????_?????_0010111101_1010011", 32); - val FSGNJN_D = Bits("b?????_?????_?????_0011011101_1010011", 32); - val FSGNJX_D = Bits("b?????_?????_?????_0011111101_1010011", 32); - val FCVT_L_S = Bits("b?????_?????_00000_01000_???_00_1010011", 32); - val FCVT_LU_S = Bits("b?????_?????_00000_01001_???_00_1010011", 32); - val FCVT_W_S = Bits("b?????_?????_00000_01010_???_00_1010011", 32); - val FCVT_WU_S = Bits("b?????_?????_00000_01011_???_00_1010011", 32); - val FCVT_L_D = Bits("b?????_?????_00000_01000_???_01_1010011", 32); - val FCVT_LU_D = Bits("b?????_?????_00000_01001_???_01_1010011", 32); - val FCVT_W_D = Bits("b?????_?????_00000_01010_???_01_1010011", 32); - val FCVT_WU_D = Bits("b?????_?????_00000_01011_???_01_1010011", 32); - val FCVT_S_L = Bits("b?????_?????_00000_01100_???_00_1010011", 32); - val FCVT_S_LU = Bits("b?????_?????_00000_01101_???_00_1010011", 32); - val FCVT_S_W = Bits("b?????_?????_00000_01110_???_00_1010011", 32); - val FCVT_S_WU = Bits("b?????_?????_00000_01111_???_00_1010011", 32); - val FCVT_D_L = Bits("b?????_?????_00000_01100_???_01_1010011", 32); - val FCVT_D_LU = Bits("b?????_?????_00000_01101_???_01_1010011", 32); - val FCVT_D_W = Bits("b?????_?????_00000_0111011101_1010011", 32); - val FCVT_D_WU = Bits("b?????_?????_00000_0111111101_1010011", 32); - val FCVT_S_D = Bits("b?????_?????_00000_10001_???_00_1010011", 32); - val FCVT_D_S = Bits("b?????_?????_00000_10000_???_01_1010011", 32); - val FEQ_S = Bits("b?????_?????_?????_1010111100_1010011", 32); - val FLT_S = Bits("b?????_?????_?????_1011011100_1010011", 32); - val FLE_S = Bits("b?????_?????_?????_1011111100_1010011", 32); - val FEQ_D = Bits("b?????_?????_?????_1010111101_1010011", 32); - val FLT_D = Bits("b?????_?????_?????_1011011101_1010011", 32); - val FLE_D = Bits("b?????_?????_?????_1011111101_1010011", 32); - val MFTX_S = Bits("b?????_00000_?????_1100011100_1010011", 32); - val MFTX_D = Bits("b?????_00000_?????_1100011101_1010011", 32); - val MFFSR = Bits("b?????_00000_00000_1101111100_1010011", 32); - val MXTF_S = Bits("b?????_?????_00000_1110011100_1010011", 32); - val MXTF_D = Bits("b?????_?????_00000_1110011101_1010011", 32); - val MTFSR = Bits("b00000_?????_00000_1110111100_1010011", 32); - val FLW = Bits("b?????_?????_????????????_010_0000111", 32); - val FLD = Bits("b?????_?????_????????????_011_0000111", 32); - val FSW = Bits("b?????_?????_?????_???????_010_0100111", 32); - val FSD = Bits("b?????_?????_?????_???????_011_0100111", 32); - val FMADD_S = Bits("b?????_?????_?????_?????_???_00_1000011", 32); - val FMSUB_S = Bits("b?????_?????_?????_?????_???_00_1000111", 32); - val FNMSUB_S = Bits("b?????_?????_?????_?????_???_00_1001011", 32); - val FNMADD_S = Bits("b?????_?????_?????_?????_???_00_1001111", 32); - val FMADD_D = Bits("b?????_?????_?????_?????_???_01_1000011", 32); - val FMSUB_D = Bits("b?????_?????_?????_?????_???_01_1000111", 32); - val FNMSUB_D = Bits("b?????_?????_?????_?????_???_01_1001011", 32); - val FNMADD_D = Bits("b?????_?????_?????_?????_???_01_1001111", 32); + // automatically generated by parse-opcodes + val J = Bits("b?????????????????????????_1100111",32); + val JAL = Bits("b?????????????????????????_1101111",32); + val JALR_C = Bits("b?????_?????_????????????_000_1101011",32); + val JALR_R = Bits("b?????_?????_????????????_001_1101011",32); + val JALR_J = Bits("b?????_?????_????????????_010_1101011",32); + val RDNPC = Bits("b?????_00000_000000000000_100_1101011",32); + val BEQ = Bits("b?????_?????_?????_???????_000_1100011",32); + val BNE = Bits("b?????_?????_?????_???????_001_1100011",32); + val BLT = Bits("b?????_?????_?????_???????_100_1100011",32); + val BGE = Bits("b?????_?????_?????_???????_101_1100011",32); + val BLTU = Bits("b?????_?????_?????_???????_110_1100011",32); + val BGEU = Bits("b?????_?????_?????_???????_111_1100011",32); + val LUI = Bits("b?????_????????????????????_0110111",32); + val ADDI = Bits("b?????_?????_????????????_000_0010011",32); + val SLLI = Bits("b?????_?????_000000_??????_001_0010011",32); + val SLTI = Bits("b?????_?????_????????????_010_0010011",32); + val SLTIU = Bits("b?????_?????_????????????_011_0010011",32); + val XORI = Bits("b?????_?????_????????????_100_0010011",32); + val SRLI = Bits("b?????_?????_000000_??????_101_0010011",32); + val SRAI = Bits("b?????_?????_000001_??????_101_0010011",32); + val ORI = Bits("b?????_?????_????????????_110_0010011",32); + val ANDI = Bits("b?????_?????_????????????_111_0010011",32); + val ADD = Bits("b?????_?????_?????_0000000000_0110011",32); + val SUB = Bits("b?????_?????_?????_1000000000_0110011",32); + val SLL = Bits("b?????_?????_?????_0000000001_0110011",32); + val SLT = Bits("b?????_?????_?????_0000000010_0110011",32); + val SLTU = Bits("b?????_?????_?????_0000000011_0110011",32); + val riscvXOR = Bits("b?????_?????_?????_0000000100_0110011",32); + val SRL = Bits("b?????_?????_?????_0000000101_0110011",32); + val SRA = Bits("b?????_?????_?????_1000000101_0110011",32); + val riscvOR = Bits("b?????_?????_?????_0000000110_0110011",32); + val riscvAND = Bits("b?????_?????_?????_0000000111_0110011",32); + val MUL = Bits("b?????_?????_?????_0000001000_0110011",32); + val MULH = Bits("b?????_?????_?????_0000001001_0110011",32); + val MULHSU = Bits("b?????_?????_?????_0000001010_0110011",32); + val MULHU = Bits("b?????_?????_?????_0000001011_0110011",32); + val DIV = Bits("b?????_?????_?????_0000001100_0110011",32); + val DIVU = Bits("b?????_?????_?????_0000001101_0110011",32); + val REM = Bits("b?????_?????_?????_0000001110_0110011",32); + val REMU = Bits("b?????_?????_?????_0000001111_0110011",32); + val ADDIW = Bits("b?????_?????_????????????_000_0011011",32); + val SLLIW = Bits("b?????_?????_000000_0_?????_001_0011011",32); + val SRLIW = Bits("b?????_?????_000000_0_?????_101_0011011",32); + val SRAIW = Bits("b?????_?????_000001_0_?????_101_0011011",32); + val ADDW = Bits("b?????_?????_?????_0000000000_0111011",32); + val SUBW = Bits("b?????_?????_?????_1000000000_0111011",32); + val SLLW = Bits("b?????_?????_?????_0000000001_0111011",32); + val SRLW = Bits("b?????_?????_?????_0000000101_0111011",32); + val SRAW = Bits("b?????_?????_?????_1000000101_0111011",32); + val MULW = Bits("b?????_?????_?????_0000001000_0111011",32); + val DIVW = Bits("b?????_?????_?????_0000001100_0111011",32); + val DIVUW = Bits("b?????_?????_?????_0000001101_0111011",32); + val REMW = Bits("b?????_?????_?????_0000001110_0111011",32); + val REMUW = Bits("b?????_?????_?????_0000001111_0111011",32); + val LB = Bits("b?????_?????_????????????_000_0000011",32); + val LH = Bits("b?????_?????_????????????_001_0000011",32); + val LW = Bits("b?????_?????_????????????_010_0000011",32); + val LD = Bits("b?????_?????_????????????_011_0000011",32); + val LBU = Bits("b?????_?????_????????????_100_0000011",32); + val LHU = Bits("b?????_?????_????????????_101_0000011",32); + val LWU = Bits("b?????_?????_????????????_110_0000011",32); + val SB = Bits("b?????_?????_?????_???????_000_0100011",32); + val SH = Bits("b?????_?????_?????_???????_001_0100011",32); + val SW = Bits("b?????_?????_?????_???????_010_0100011",32); + val SD = Bits("b?????_?????_?????_???????_011_0100011",32); + val AMOADD_W = Bits("b?????_?????_?????_0000000010_0101011",32); + val AMOSWAP_W = Bits("b?????_?????_?????_0000001010_0101011",32); + val AMOAND_W = Bits("b?????_?????_?????_0000010010_0101011",32); + val AMOOR_W = Bits("b?????_?????_?????_0000011010_0101011",32); + val AMOMIN_W = Bits("b?????_?????_?????_0000100010_0101011",32); + val AMOMAX_W = Bits("b?????_?????_?????_0000101010_0101011",32); + val AMOMINU_W = Bits("b?????_?????_?????_0000110010_0101011",32); + val AMOMAXU_W = Bits("b?????_?????_?????_0000111010_0101011",32); + val AMOADD_D = Bits("b?????_?????_?????_0000000011_0101011",32); + val AMOSWAP_D = Bits("b?????_?????_?????_0000001011_0101011",32); + val AMOAND_D = Bits("b?????_?????_?????_0000010011_0101011",32); + val AMOOR_D = Bits("b?????_?????_?????_0000011011_0101011",32); + val AMOMIN_D = Bits("b?????_?????_?????_0000100011_0101011",32); + val AMOMAX_D = Bits("b?????_?????_?????_0000101011_0101011",32); + val AMOMINU_D = Bits("b?????_?????_?????_0000110011_0101011",32); + val AMOMAXU_D = Bits("b?????_?????_?????_0000111011_0101011",32); + val FENCE_I = Bits("b?????_?????_????????????_001_0101111",32); + val FENCE = Bits("b?????_?????_????????????_010_0101111",32); + val SYSCALL = Bits("b00000_00000_00000_0000000000_1110111",32); + val BREAK = Bits("b00000_00000_00000_0000000001_1110111",32); + val RDCYCLE = Bits("b?????_00000_00000_0000000100_1110111",32); + val RDTIME = Bits("b?????_00000_00000_0000001100_1110111",32); + val RDINSTRET = Bits("b?????_00000_00000_0000010100_1110111",32); + val MOVZ = Bits("b?????_?????_?????_0000000101_1110111",32); + val MOVN = Bits("b?????_?????_?????_0000001101_1110111",32); + val EI = Bits("b?????_00000_00000_0000000000_1111011",32); + val DI = Bits("b?????_00000_00000_0000000001_1111011",32); + val MFPCR = Bits("b?????_00000_?????_0000000010_1111011",32); + val MTPCR = Bits("b00000_?????_?????_0000000011_1111011",32); + val ERET = Bits("b00000_00000_00000_0000000100_1111011",32); + val CFLUSH = Bits("b00000_00000_00000_0000000101_1111011",32); + // floating point instructions + val FMOVZ = Bits("b?????_?????_?????_0000010101_1110111",32); + val FMOVN = Bits("b?????_?????_?????_0000011101_1110111",32); + val FADD_S = Bits("b?????_?????_?????_00000_???_00_1010011",32); + val FSUB_S = Bits("b?????_?????_?????_00001_???_00_1010011",32); + val FMUL_S = Bits("b?????_?????_?????_00010_???_00_1010011",32); + val FDIV_S = Bits("b?????_?????_?????_00011_???_00_1010011",32); + val FSQRT_S = Bits("b?????_?????_00000_00100_???_00_1010011",32); + val FSGNJ_S = Bits("b?????_?????_?????_00101_000_00_1010011",32); + val FSGNJN_S = Bits("b?????_?????_?????_00110_000_00_1010011",32); + val FSGNJX_S = Bits("b?????_?????_?????_00111_000_00_1010011",32); + val FADD_D = Bits("b?????_?????_?????_00000_???_01_1010011",32); + val FSUB_D = Bits("b?????_?????_?????_00001_???_01_1010011",32); + val FMUL_D = Bits("b?????_?????_?????_00010_???_01_1010011",32); + val FDIV_D = Bits("b?????_?????_?????_00011_???_01_1010011",32); + val FSQRT_D = Bits("b?????_?????_00000_00100_???_01_1010011",32); + val FSGNJ_D = Bits("b?????_?????_?????_00101_000_01_1010011",32); + val FSGNJN_D = Bits("b?????_?????_?????_00110_000_01_1010011",32); + val FSGNJX_D = Bits("b?????_?????_?????_00111_000_01_1010011",32); + val FCVT_L_S = Bits("b?????_?????_00000_01000_???_00_1010011",32); + val FCVT_LU_S = Bits("b?????_?????_00000_01001_???_00_1010011",32); + val FCVT_W_S = Bits("b?????_?????_00000_01010_???_00_1010011",32); + val FCVT_WU_S = Bits("b?????_?????_00000_01011_???_00_1010011",32); + val FCVT_L_D = Bits("b?????_?????_00000_01000_???_01_1010011",32); + val FCVT_LU_D = Bits("b?????_?????_00000_01001_???_01_1010011",32); + val FCVT_W_D = Bits("b?????_?????_00000_01010_???_01_1010011",32); + val FCVT_WU_D = Bits("b?????_?????_00000_01011_???_01_1010011",32); + val FCVT_S_L = Bits("b?????_?????_00000_01100_???_00_1010011",32); + val FCVT_S_LU = Bits("b?????_?????_00000_01101_???_00_1010011",32); + val FCVT_S_W = Bits("b?????_?????_00000_01110_???_00_1010011",32); + val FCVT_S_WU = Bits("b?????_?????_00000_01111_???_00_1010011",32); + val FCVT_D_L = Bits("b?????_?????_00000_01100_???_01_1010011",32); + val FCVT_D_LU = Bits("b?????_?????_00000_01101_???_01_1010011",32); + val FCVT_D_W = Bits("b?????_?????_00000_01110_???_01_1010011",32); + val FCVT_D_WU = Bits("b?????_?????_00000_01111_???_01_1010011",32); + val FCVT_S_D = Bits("b?????_?????_00000_10001_???_00_1010011",32); + val FCVT_D_S = Bits("b?????_?????_00000_10000_???_01_1010011",32); + val FEQ_S = Bits("b?????_?????_?????_10101_000_00_1010011",32); + val FLT_S = Bits("b?????_?????_?????_10110_000_00_1010011",32); + val FLE_S = Bits("b?????_?????_?????_10111_000_00_1010011",32); + val FEQ_D = Bits("b?????_?????_?????_10101_000_01_1010011",32); + val FLT_D = Bits("b?????_?????_?????_10110_000_01_1010011",32); + val FLE_D = Bits("b?????_?????_?????_10111_000_01_1010011",32); + val FMIN_S = Bits("b?????_?????_?????_11000_000_00_1010011",32); + val FMAX_S = Bits("b?????_?????_?????_11001_000_00_1010011",32); + val FMIN_D = Bits("b?????_?????_?????_11000_000_01_1010011",32); + val FMAX_D = Bits("b?????_?????_?????_11001_000_01_1010011",32); + val MFTX_S = Bits("b?????_00000_?????_11100_000_00_1010011",32); + val MFTX_D = Bits("b?????_00000_?????_11100_000_01_1010011",32); + val MFFSR = Bits("b?????_00000_00000_11101_000_00_1010011",32); + val MXTF_S = Bits("b?????_?????_00000_11110_000_00_1010011",32); + val MXTF_D = Bits("b?????_?????_00000_11110_000_01_1010011",32); + val MTFSR = Bits("b?????_?????_00000_11111_000_00_1010011",32); + val FLW = Bits("b?????_?????_????????????_010_0000111",32); + val FLD = Bits("b?????_?????_????????????_011_0000111",32); + val FSW = Bits("b?????_?????_?????_???????_010_0100111",32); + val FSD = Bits("b?????_?????_?????_???????_011_0100111",32); + val FMADD_S = Bits("b?????_?????_?????_?????_???_00_1000011",32); + val FMSUB_S = Bits("b?????_?????_?????_?????_???_00_1000111",32); + val FNMSUB_S = Bits("b?????_?????_?????_?????_???_00_1001011",32); + val FNMADD_S = Bits("b?????_?????_?????_?????_???_00_1001111",32); + val FMADD_D = Bits("b?????_?????_?????_?????_???_01_1000011",32); + val FMSUB_D = Bits("b?????_?????_?????_?????_???_01_1000111",32); + val FNMSUB_D = Bits("b?????_?????_?????_?????_???_01_1001011",32); + val FNMADD_D = Bits("b?????_?????_?????_?????_???_01_1001111",32); + // vector instructions + val FENCE_L_V = Bits("b?????_?????_????????????_100_0101111",32); + val FENCE_G_V = Bits("b?????_?????_????????????_101_0101111",32); + val FENCE_L_CV = Bits("b?????_?????_????????????_110_0101111",32); + val FENCE_G_CV = Bits("b?????_?????_????????????_111_0101111",32); + val STOP = Bits("b00000_00000_00000_0000000010_1110111",32); + val UTIDX = Bits("b?????_00000_00000_0000000011_1110111",32); + val VLD = Bits("b?????_?????_00000_0000000011_0001011",32); + val VLW = Bits("b?????_?????_00000_0000000010_0001011",32); + val VLWU = Bits("b?????_?????_00000_0000000110_0001011",32); + val VLH = Bits("b?????_?????_00000_0000000001_0001011",32); + val VLHU = Bits("b?????_?????_00000_0000000101_0001011",32); + val VLB = Bits("b?????_?????_00000_0000000000_0001011",32); + val VLBU = Bits("b?????_?????_00000_0000000100_0001011",32); + val VFLD = Bits("b?????_?????_00000_0000001011_0001011",32); + val VFLW = Bits("b?????_?????_00000_0000001010_0001011",32); + val VLSTD = Bits("b?????_?????_?????_0000100011_0001011",32); + val VLSTW = Bits("b?????_?????_?????_0000100010_0001011",32); + val VLSTWU = Bits("b?????_?????_?????_0000100110_0001011",32); + val VLSTH = Bits("b?????_?????_?????_0000100001_0001011",32); + val VLSTHU = Bits("b?????_?????_?????_0000100101_0001011",32); + val VLSTB = Bits("b?????_?????_?????_0000100000_0001011",32); + val VLSTBU = Bits("b?????_?????_?????_0000100100_0001011",32); + val VFLSTD = Bits("b?????_?????_?????_0000101011_0001011",32); + val VFLSTW = Bits("b?????_?????_?????_0000101010_0001011",32); + val VLSEGD = Bits("b?????_?????_?????_0001000011_0001011",32); + val VLSEGW = Bits("b?????_?????_?????_0001000010_0001011",32); + val VLSEGWU = Bits("b?????_?????_?????_0001000110_0001011",32); + val VLSEGH = Bits("b?????_?????_?????_0001000001_0001011",32); + val VLSEGHU = Bits("b?????_?????_?????_0001000101_0001011",32); + val VLSEGB = Bits("b?????_?????_?????_0001000000_0001011",32); + val VLSEGBU = Bits("b?????_?????_?????_0001000100_0001011",32); + val VFLSEGD = Bits("b?????_?????_?????_0001001011_0001011",32); + val VFLSEGW = Bits("b?????_?????_?????_0001001010_0001011",32); + val VLSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001011",32); + val VLSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001011",32); + val VLSEGSTWU = Bits("b?????_?????_?????_?????_101_10_0001011",32); + val VLSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001011",32); + val VLSEGSTHU = Bits("b?????_?????_?????_?????_101_01_0001011",32); + val VLSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001011",32); + val VLSEGSTBU = Bits("b?????_?????_?????_?????_101_00_0001011",32); + val VFLSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001011",32); + val VFLSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001011",32); + val VSD = Bits("b?????_?????_00000_0000000011_0001111",32); + val VSW = Bits("b?????_?????_00000_0000000010_0001111",32); + val VSH = Bits("b?????_?????_00000_0000000001_0001111",32); + val VSB = Bits("b?????_?????_00000_0000000000_0001111",32); + val VFSD = Bits("b?????_?????_00000_0000001011_0001111",32); + val VFSW = Bits("b?????_?????_00000_0000001010_0001111",32); + val VSSTD = Bits("b?????_?????_?????_0000100011_0001111",32); + val VSSTW = Bits("b?????_?????_?????_0000100010_0001111",32); + val VSSTH = Bits("b?????_?????_?????_0000100001_0001111",32); + val VSSTB = Bits("b?????_?????_?????_0000100000_0001111",32); + val VFSSTD = Bits("b?????_?????_?????_0000101011_0001111",32); + val VFSSTW = Bits("b?????_?????_?????_0000101010_0001111",32); + val VSSEGD = Bits("b?????_?????_?????_0001000011_0001111",32); + val VSSEGW = Bits("b?????_?????_?????_0001000010_0001111",32); + val VSSEGH = Bits("b?????_?????_?????_0001000001_0001111",32); + val VSSEGB = Bits("b?????_?????_?????_0001000000_0001111",32); + val VFSSEGD = Bits("b?????_?????_?????_0001001011_0001111",32); + val VFSSEGW = Bits("b?????_?????_?????_0001001010_0001111",32); + val VSSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001111",32); + val VSSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001111",32); + val VSSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001111",32); + val VSSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001111",32); + val VFSSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001111",32); + val VFSSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001111",32); + val VMVV = Bits("b?????_?????_00000_0000000000_1110011",32); + val VMSV = Bits("b?????_?????_00000_0000010000_1110011",32); + val VMST = Bits("b?????_?????_?????_0000100000_1110011",32); + val VMTS = Bits("b?????_?????_?????_0000110000_1110011",32); + val VFMVV = Bits("b?????_?????_00000_0000000010_1110011",32); + val VFMSV = Bits("b?????_?????_00000_0000010010_1110011",32); + val VFMST = Bits("b?????_?????_?????_0000100010_1110011",32); + val VFMTS = Bits("b?????_?????_?????_0000110010_1110011",32); + val VVCFGIVL = Bits("b?????_?????_????????????_001_1110011",32); + val VTCFGIVL = Bits("b?????_?????_????????????_011_1110011",32); + val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); + val VF = Bits("b00000_?????_????????????_111_1110011",32); + val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); - val CFLUSH = Bits("b00000_00000_00000_0000000101_1111011", 32); } } From 29d44b8bc57b74dc3e5a102beb39fc4685272338 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 13 Nov 2011 01:17:33 -0800 Subject: [PATCH 0039/1087] fixed typo that broke illegal instruction exception --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 9e0a6c12..4e8be7df 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -367,7 +367,7 @@ class rocketCtrl extends Component mem_reg_xcpt_ma_inst <== ex_reg_xcpt_ma_inst; mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; - mem_reg_xcpt_illegal <== mem_reg_xcpt_illegal; + mem_reg_xcpt_illegal <== ex_reg_xcpt_illegal; mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; // mem_reg_xcpt_fpu <== Bool(false); mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; From 67c7e7e28f15331e328d5d0833a581d73b808b62 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 13 Nov 2011 13:06:35 -0800 Subject: [PATCH 0040/1087] cache/tlb bugfixes, increased memory size to 256meg --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/dcache.scala | 4 ++-- rocket/src/main/scala/itlb.scala | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 612a7ec0..96a73e9f 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -182,7 +182,7 @@ object Constants // physical memory size (# 8K pages) // if you change this value, make sure to also change MEMORY_SIZE variable in memif.h - val MEMSIZE_PAGES = 8192; // 64 megs + val MEMSIZE_PAGES = 0x8000; // 256 megs val MEMSIZE_BYTES = MEMSIZE_PAGES*8192; val HAVE_FPU = Bool(false); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 4e8be7df..2277ad4a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -422,8 +422,8 @@ class rocketCtrl extends Component // replay mem stage PC on a DTLB miss val replay_mem = io.dtlb_miss; - val kill_ex = replay_ex || replay_mem; val kill_mem = mem_exception || replay_mem; + val kill_ex = replay_ex || kill_mem; io.dpath.sel_pc := Mux(replay_mem, PC_MEM, // dtlb miss diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index fe2962d3..863c0855 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -352,8 +352,8 @@ class rocketDCacheDM(lines: Int) extends Component { r_cpu_resp_val; val misaligned = - ((r_cpu_req_type === MT_H) && r_cpu_req_idx(0).toBool) || - ((r_cpu_req_type === MT_W) && (r_cpu_req_idx(1,0) != Bits(0,2))) || + (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && r_cpu_req_idx(0).toBool) || + (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0,2))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0,3))); io.cpu.xcpt_ma_ld := r_cpu_req_val && r_req_load && misaligned; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 0df38144..b6e82faf 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -177,7 +177,7 @@ class rocketITLB(entries: Int) extends Component ((status_s && !sx_array(tag_hit_addr).toBool) || (status_u && !ux_array(tag_hit_addr).toBool)); - io.cpu.exception := access_fault || outofrange; + io.cpu.exception := access_fault; //|| outofrange; io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && (!r_cpu_req_val || tag_hit), Bool(true)); io.cpu.resp_miss := tlb_miss || (state != s_ready); io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; From 9d3471a5691f243ccd04c1dfb11987786fb7c2b1 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 13 Nov 2011 23:32:18 -0800 Subject: [PATCH 0041/1087] more cache fixes, more test harness debug output --- rocket/src/main/scala/ctrl.scala | 89 +++++++++++++++++++++++++++--- rocket/src/main/scala/dcache.scala | 34 ++++++------ rocket/src/main/scala/dtlb.scala | 2 +- rocket/src/main/scala/itlb.scala | 2 +- 4 files changed, 100 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2277ad4a..3eb1b374 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -81,6 +81,77 @@ class rocketCtrl extends Component { val io = new ioCtrlAll(); + val fp = + ListLookup(io.dpath.inst, + List(Bool(false)), + Array( + FMOVZ -> List(Bool(true)), + FMOVN -> List(Bool(true)), + FADD_S -> List(Bool(true)), + FSUB_S -> List(Bool(true)), + FMUL_S -> List(Bool(true)), + FDIV_S -> List(Bool(true)), + FSQRT_S -> List(Bool(true)), + FSGNJ_S -> List(Bool(true)), + FSGNJN_S -> List(Bool(true)), + FSGNJX_S -> List(Bool(true)), + FADD_D -> List(Bool(true)), + FSUB_D -> List(Bool(true)), + FMUL_D -> List(Bool(true)), + FDIV_D -> List(Bool(true)), + FSQRT_D -> List(Bool(true)), + FSGNJ_D -> List(Bool(true)), + FSGNJN_D -> List(Bool(true)), + FSGNJX_D -> List(Bool(true)), + FCVT_L_S -> List(Bool(true)), + FCVT_LU_S -> List(Bool(true)), + FCVT_W_S -> List(Bool(true)), + FCVT_WU_S -> List(Bool(true)), + FCVT_L_D -> List(Bool(true)), + FCVT_LU_D -> List(Bool(true)), + FCVT_W_D -> List(Bool(true)), + FCVT_WU_D -> List(Bool(true)), + FCVT_S_L -> List(Bool(true)), + FCVT_S_LU -> List(Bool(true)), + FCVT_S_W -> List(Bool(true)), + FCVT_S_WU -> List(Bool(true)), + FCVT_D_L -> List(Bool(true)), + FCVT_D_LU -> List(Bool(true)), + FCVT_D_W -> List(Bool(true)), + FCVT_D_WU -> List(Bool(true)), + FCVT_S_D -> List(Bool(true)), + FCVT_D_S -> List(Bool(true)), + FEQ_S -> List(Bool(true)), + FLT_S -> List(Bool(true)), + FLE_S -> List(Bool(true)), + FEQ_D -> List(Bool(true)), + FLT_D -> List(Bool(true)), + FLE_D -> List(Bool(true)), + FMIN_S -> List(Bool(true)), + FMAX_S -> List(Bool(true)), + FMIN_D -> List(Bool(true)), + FMAX_D -> List(Bool(true)), + MFTX_S -> List(Bool(true)), + MFTX_D -> List(Bool(true)), + MFFSR -> List(Bool(true)), + MXTF_S -> List(Bool(true)), + MXTF_D -> List(Bool(true)), + MTFSR -> List(Bool(true)), + FLW -> List(Bool(true)), + FLD -> List(Bool(true)), + FSW -> List(Bool(true)), + FSD -> List(Bool(true)), + FMADD_S -> List(Bool(true)), + FMSUB_S -> List(Bool(true)), + FNMSUB_S -> List(Bool(true)), + FNMADD_S -> List(Bool(true)), + FMADD_D -> List(Bool(true)), + FMSUB_D -> List(Bool(true)), + FNMSUB_D -> List(Bool(true)), + FNMADD_D -> List(Bool(true)) + )); + val id_fp_val :: Nil = fp; + val xpr64 = Y; val cs = ListLookup(io.dpath.inst, @@ -198,7 +269,6 @@ class rocketCtrl extends Component val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst); // FIXME -// io.imem.req_val := io.host.start && !io.dpath.xcpt_ma_inst; io.imem.req_val := io.host.start && !io.dpath.xcpt_ma_inst; val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; @@ -254,15 +324,14 @@ class rocketCtrl extends Component val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val ex_reg_xcpt_illegal = Reg(resetVal = Bool(false)); val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); -// val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); + val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val mem_reg_xcpt_illegal = Reg(resetVal = Bool(false)); val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); -// val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_fpu = Bool(false); // FIXME: trap on unimplemented FPU instructions + val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { @@ -278,6 +347,8 @@ class rocketCtrl extends Component } } + val illegal_inst = !id_int_val.toBool && !id_fp_val.toBool; + when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; ex_reg_btb_hit <== Bool(false); @@ -292,7 +363,7 @@ class rocketCtrl extends Component ex_reg_xcpt_itlb <== Bool(false); ex_reg_xcpt_illegal <== Bool(false); ex_reg_xcpt_privileged <== Bool(false); -// ex_reg_xcpt_fpu <== Bool(false); + ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); } otherwise { @@ -307,9 +378,9 @@ class rocketCtrl extends Component ex_reg_xcpt_ma_inst <== id_reg_xcpt_ma_inst; ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; - ex_reg_xcpt_illegal <== ~id_int_val.toBool; + ex_reg_xcpt_illegal <== illegal_inst; ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(5)).toBool; -// ex_reg_xcpt_fpu <== Bool(false); + ex_reg_xcpt_fpu <== id_fp_val.toBool; ex_reg_xcpt_syscall <== id_syscall.toBool; } @@ -354,7 +425,7 @@ class rocketCtrl extends Component mem_reg_xcpt_itlb <== Bool(false); mem_reg_xcpt_illegal <== Bool(false); mem_reg_xcpt_privileged <== Bool(false); -// mem_reg_xcpt_fpu <== Bool(false); + mem_reg_xcpt_fpu <== Bool(false); mem_reg_xcpt_syscall <== Bool(false); } otherwise { @@ -369,7 +440,7 @@ class rocketCtrl extends Component mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; mem_reg_xcpt_illegal <== ex_reg_xcpt_illegal; mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; -// mem_reg_xcpt_fpu <== Bool(false); + mem_reg_xcpt_fpu <== ex_reg_xcpt_fpu; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 863c0855..d71b2155 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -15,7 +15,6 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val req_type = Bits(3, 'input); val req_idx = Bits(PGIDX_BITS, 'input); val req_ppn = Bits(PPN_BITS, 'input); -// val req_addr = UFix(PADDR_BITS, 'input); val req_data = Bits(64, 'input); val req_tag = Bits(5, 'input); val xcpt_ma_ld = Bool('output); // misaligned load @@ -203,6 +202,7 @@ class rocketDCacheDM(lines: Int) extends Component { val req_store = (io.cpu.req_cmd === M_XWR); val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD); + val req_flush = (io.cpu.req_cmd === M_FLA); val r_req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); val r_req_store = (r_cpu_req_cmd === M_XWR); val r_req_flush = (r_cpu_req_cmd === M_FLA); @@ -243,7 +243,7 @@ class rocketDCacheDM(lines: Int) extends Component { Mux((state === s_ready), io.cpu.req_idx(PGIDX_BITS-1,offsetbits), r_cpu_req_idx(PGIDX_BITS-1,offsetbits)).toUFix; val tag_we = - ((state === s_refill) && io.mem.req_rdy && (rr_count === UFix(3,2))) || + ((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) || ((state === s_resolve_miss) && r_req_flush); val tag_array = new rocketSRAMsp(lines, tagbits); @@ -268,15 +268,20 @@ class rocketDCacheDM(lines: Int) extends Component { val vb_rdata = Reg(vb_array(tag_addr).toBool); val tag_valid = r_cpu_req_val && vb_rdata; val tag_match = (tag_rdata === io.cpu.req_ppn); - + val tag_hit = tag_valid && tag_match; + val miss = r_cpu_req_val && (!vb_rdata || !tag_match); + // load/store addresses conflict if they are to any part of the same 64 bit word val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); - val ldst_conflict = r_cpu_req_val && r_req_load && p_store_valid && addr_match; + val ldst_conflict = tag_valid && tag_match && r_req_load && p_store_valid && addr_match; + val store_hit = r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store ; // write the pending store data when the cache is idle, when the next command isn't a load // or when there's a load to the same address (in which case there's a 2 cycle delay: - // once cycle to write the store data and another to read the data back) - val drain_store = !io.cpu.dtlb_miss && p_store_valid && (!io.cpu.req_val || req_store || ldst_conflict); + // once cycle to write the store data and another to read the data back) + val drain_store = + ((store_hit || p_store_valid) && (!io.cpu.req_val || req_store || req_flush)) || + (p_store_valid && (miss || ldst_conflict)); // write pending store data from a store which missed // after the cache line refill has completed @@ -290,12 +295,10 @@ class rocketDCacheDM(lines: Int) extends Component { p_store_idx <== io.cpu.req_idx; p_store_data <== io.cpu.req_data; p_store_type <== io.cpu.req_type; - p_store_valid <== Bool(true); } - // cancel store if there's a DTLB miss - when (r_cpu_req_val && r_req_store && io.cpu.dtlb_miss) - { - p_store_valid <== Bool(false); + + when (store_hit && !drain_store) { + p_store_valid <== Bool(true); } when (drain_store) { p_store_valid <== Bool(false); @@ -341,13 +344,12 @@ class rocketDCacheDM(lines: Int) extends Component { // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load instruction is replayed) - val hit = tag_valid && tag_match; - val load_miss = !io.cpu.dtlb_miss && (state === s_ready) && r_cpu_req_val && r_req_load && (!hit || (p_store_valid && addr_match)); + val load_miss = !io.cpu.dtlb_miss && (state === s_ready) && r_cpu_req_val && r_req_load && (!tag_hit || (p_store_valid && addr_match)); // output signals // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := !io.cpu.dtlb_miss && (state === s_ready) && !ldst_conflict && (!r_cpu_req_val || (hit && !r_req_flush)); - io.cpu.resp_val := !io.cpu.dtlb_miss && ((state === s_ready) && hit && r_req_load && !(p_store_valid && addr_match)) || + io.cpu.req_rdy := (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !r_req_flush)); + io.cpu.resp_val := !io.cpu.dtlb_miss && ((state === s_ready) && tag_hit && r_req_load && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && r_req_flush) || r_cpu_resp_val; @@ -386,7 +388,7 @@ class rocketDCacheDM(lines: Int) extends Component { when (ldst_conflict) { state <== s_replay_load; } - when (!r_cpu_req_val || (hit && !r_req_flush)) { + when (!r_cpu_req_val || (tag_hit && !r_req_flush)) { state <== s_ready; } when (tag_valid & tag_dirty) { diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 2d4089e9..90e1088b 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -136,7 +136,7 @@ class rocketDTLB(entries: Int) extends Component } // exception check - val outofrange = (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); + val outofrange = !tlb_miss && (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); val access_fault_ld = tlb_hit && req_load && diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index b6e82faf..dcfb0efb 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -170,7 +170,7 @@ class rocketITLB(entries: Int) extends Component } // exception check - val outofrange = (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); + val outofrange = !tlb_miss && (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); val access_fault = tlb_hit && From 5b29765917b3db9da060c121d1d456b512cba008 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 14 Nov 2011 01:37:20 -0800 Subject: [PATCH 0042/1087] synced up with supervisor mode state in latest ISA simulator --- rocket/src/main/scala/consts.scala | 16 +++++++++------- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 2 -- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 96a73e9f..a3641272 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -146,17 +146,17 @@ object Constants val PCR_COUNT = UFix( 4, 5); val PCR_COMPARE = UFix( 5, 5); val PCR_CAUSE = UFix( 6, 5); - val PCR_IPI = UFix( 7, 5); - val PCR_MEMSIZE = UFix( 8, 5); - val PCR_PTBR = UFix( 9, 5); + val PCR_PTBR = UFix( 7, 5); + val PCR_SENDIPI = UFix( 8, 5); + val PCR_CLEARIPI = UFix( 9, 5); val PCR_COREID = UFix(10, 5); - val PCR_NUMCORES = UFix(12, 5); + val PCR_K0 = UFix(12, 5); + val PCR_K1 = UFix(13, 5); val PCR_TOHOST = UFix(16, 5); val PCR_FROMHOST = UFix(17, 5); val PCR_CONSOLE = UFix(18, 5); - val PCR_K0 = UFix(24, 5); - val PCR_K1 = UFix(25, 5); - + + // definition of bits in PCR status reg val SR_ET = 0; // enable traps val SR_EF = 1; // enable floating point @@ -185,6 +185,8 @@ object Constants val MEMSIZE_PAGES = 0x8000; // 256 megs val MEMSIZE_BYTES = MEMSIZE_PAGES*8192; + val START_ADDR = 0x2000; + val HAVE_FPU = Bool(false); val HAVE_VEC = Bool(false); } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index b6f63c94..e49a1761 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -158,7 +158,7 @@ class rocketDpath extends Component UFix(0, VADDR_BITS))))))))))); when (!io.host.start){ - if_reg_pc <== UFix(0, VADDR_BITS); //32'hFFFF_FFFC; + if_reg_pc <== UFix(START_ADDR, VADDR_BITS); } when (!io.ctrl.stallf) { if_reg_pc <== if_next_pc; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a1a28287..951ec9c1 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -166,9 +166,7 @@ class rocketDpathPCR extends Component is (PCR_COUNT) { rdata <== Cat(Fill(32, reg_count(31)), reg_count); } is (PCR_COMPARE) { rdata <== Cat(Fill(32, reg_compare(31)), reg_compare); } is (PCR_CAUSE) { rdata <== Cat(Bits(0,59), reg_cause); } - is (PCR_MEMSIZE) { rdata <== Bits(MEMSIZE_PAGES,64); } is (PCR_COREID) { rdata <== Bits(COREID,64); } - is (PCR_NUMCORES) { rdata <== Bits(NUMCORES,64); } is (PCR_FROMHOST) { rdata <== Cat(Fill(32, reg_fromhost(31)), reg_fromhost); } is (PCR_TOHOST) { rdata <== Cat(Fill(32, reg_tohost(31)), reg_tohost); } is (PCR_K0) { rdata <== reg_k0; } From 890bfa7c48dbd5e50b0f94db063e60dd9891f3f3 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 14 Nov 2011 03:24:02 -0800 Subject: [PATCH 0043/1087] added IPIs and timer interrupts --- rocket/src/main/scala/ctrl.scala | 18 +++++++++++++++--- rocket/src/main/scala/dpath.scala | 6 ++++-- rocket/src/main/scala/dpath_util.scala | 17 +++++++++++------ 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3eb1b374..dc091ad5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -58,7 +58,9 @@ class ioCtrlDpath extends Bundle() val sboard_clr0a = UFix(5, 'input); val sboard_clr1 = Bool('input); val sboard_clr1a = UFix(5, 'input); - val timer_int = Bool('input); + val mem_valid = Bool('input); // high if there's a valid (not flushed) instruction in mem stage + val irq_timer = Bool('input); + val irq_ipi = Bool('input); } class ioCtrlAll extends Bundle() @@ -234,6 +236,7 @@ class rocketCtrl extends Component DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,Y,N,Y), FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,Y,N,N,N), + FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,Y,N,N,N), //FIXME CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,N,N,N,Y), MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,N,N,N,Y) @@ -453,8 +456,17 @@ class rocketCtrl extends Component // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction - val interrupt = io.dpath.status(SR_ET).toBool && io.dpath.status(15).toBool && io.dpath.timer_int; - val interrupt_cause = UFix(0x17, 5); + val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); + val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); + val interrupt = + io.dpath.status(SR_ET).toBool && io.dpath.mem_valid && + ((io.dpath.status(15).toBool && io.dpath.irq_timer) || + (io.dpath.status(13).toBool && io.dpath.irq_ipi)); + + val interrupt_cause = + Mux(p_irq_ipi, UFix(21,5), + Mux(p_irq_timer, UFix(23,5), + UFix(0,5))); val mem_exception = interrupt || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index e49a1761..bfb5eedb 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -354,8 +354,9 @@ class rocketDpath extends Component pcr.io.host.from_wen ^^ io.host.from_wen; pcr.io.host.from ^^ io.host.from; pcr.io.host.to ^^ io.host.to; - - io.ctrl.timer_int := pcr.io.timer_int; + + io.ctrl.irq_timer := pcr.io.irq_timer; + io.ctrl.irq_ipi := pcr.io.irq_ipi; io.ctrl.status := pcr.io.status; io.ptbr := pcr.io.ptbr; io.debug.error_mode := pcr.io.debug.error_mode; @@ -399,6 +400,7 @@ class rocketDpath extends Component // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; + io.ctrl.mem_valid := mem_reg_valid; // 32/64 bit load handling (moved to earlier in file) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 951ec9c1..97b5e15b 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -53,7 +53,8 @@ class ioDpathPCR extends Bundle() val pc = UFix(VADDR_BITS, 'input); val badvaddr = UFix(VADDR_BITS, 'input); val eret = Bool('input); - val timer_int = Bool('output); + val irq_timer = Bool('output); + val irq_ipi = Bool('output); } class rocketDpathPCR extends Component @@ -83,7 +84,8 @@ class rocketDpathPCR extends Component val reg_status_ps = Reg(resetVal = Bool(false)); val reg_status_et = Reg(resetVal = Bool(false)); - val timer_interrupt = Reg(resetVal = Bool(false)); + val r_irq_timer = Reg(resetVal = Bool(false)); + val r_irq_ipi = Reg(resetVal = Bool(false)); val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, Bits(0,1), reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; @@ -105,7 +107,7 @@ class rocketDpathPCR extends Component reg_fromhost <== Bits(0,32); } } - + when (io.badvaddr_wen) { reg_badvaddr <== io.badvaddr; } @@ -143,9 +145,11 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(31,0).toUFix; } - when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0).toUFix; timer_interrupt <== Bool(false); } + when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0).toUFix; r_irq_timer <== Bool(false); } when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(31,0); } + when (io.w.addr === PCR_SENDIPI) { r_irq_ipi <== Bool(true); } + when (io.w.addr === PCR_CLEARIPI) { r_irq_ipi <== Bool(false); } when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } when (io.w.addr === PCR_K1) { reg_k1 <== io.w.data; } when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } @@ -153,9 +157,10 @@ class rocketDpathPCR extends Component reg_count <== reg_count + UFix(1); when (reg_count === reg_compare) { - timer_interrupt <== Bool(true); + r_irq_timer <== Bool(true); } - io.timer_int := timer_interrupt; + io.irq_timer := r_irq_timer; + io.irq_ipi := r_irq_ipi; when (!io.r.en) { rdata <== Bits(0,64); } switch (io.r.addr) { From b791010bb10119761a9f20f80da50c2a0fc4de59 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 14 Nov 2011 04:13:13 -0800 Subject: [PATCH 0044/1087] flush.i invalidates I$ & ITLB, writing PTBR invalidates both TLBs --- rocket/src/main/scala/consts.scala | 4 + rocket/src/main/scala/cpu.scala | 6 +- rocket/src/main/scala/ctrl.scala | 215 +++++++++++++------------ rocket/src/main/scala/dpath.scala | 2 + rocket/src/main/scala/dpath_util.scala | 2 + rocket/src/main/scala/icache.scala | 4 + 6 files changed, 128 insertions(+), 105 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index a3641272..7ef801e6 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -138,6 +138,10 @@ object Constants val M_XA_MAX = Bits("b1101", 4); val M_XA_MINU = Bits("b1110", 4); val M_XA_MAXU = Bits("b1111", 4); + + val SYNC_N = Bits(0,2); + val SYNC_D = Bits(1,2); + val SYNC_I = Bits(2,2); val PCR_STATUS = UFix( 0, 5); val PCR_EPC = UFix( 1, 5); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2e777f9f..e543d0e0 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -58,7 +58,7 @@ class rocketProc extends Component // FIXME: make this less verbose // connect ITLB to I$, ctrl, dpath - itlb.io.cpu.invalidate := Bool(false); // FIXME + itlb.io.cpu.invalidate := dpath.io.ptbr_wen || ctrl.io.flush_inst; itlb.io.cpu.status := dpath.io.ctrl.status; itlb.io.cpu.req_val := ctrl.io.imem.req_val; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR @@ -66,6 +66,7 @@ class rocketProc extends Component io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0); io.imem.req_ppn := itlb.io.cpu.resp_ppn; io.imem.req_val := ctrl.io.imem.req_val; + io.imem.invalidate := ctrl.io.flush_inst; ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; @@ -75,7 +76,8 @@ class rocketProc extends Component // connect DTLB to D$ arbiter, ctrl+dpath - dtlb.io.cpu.invalidate := Bool(false); // FIXME +// dtlb.io.cpu.invalidate := Bool(false); // FIXME + dtlb.io.cpu.invalidate := dpath.io.ptbr_wen; dtlb.io.cpu.status := dpath.io.ctrl.status; dtlb.io.cpu.req_val := ctrl.io.dmem.req_val; dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dc091ad5..f9f19194 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -72,6 +72,7 @@ class ioCtrlAll extends Bundle() val host = new ioHost(List("start")); val dtlb_busy = Bool('input); val dtlb_miss = Bool('input); + val flush_inst = Bool('output); val xcpt_dtlb_ld = Bool('input); val xcpt_dtlb_st = Bool('input); val xcpt_itlb = Bool('input); @@ -157,115 +158,115 @@ class rocketCtrl extends Component val xpr64 = Y; val cs = ListLookup(io.dpath.inst, - List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), Array( - BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,N,N,N,N), - JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), - JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), - JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), - RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,N,N,N,N), + J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), + JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), + JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), + JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), + RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), - SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,N,N,N,N), + ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64H, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_32, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64H, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_32, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,Y,N), - EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), - DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), - ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,Y,N,Y), - FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,Y,N,N,N), - FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,Y,N,N,N), //FIXME - CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,Y), - MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,N,N,N,Y), - MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,N,N,N,Y) + SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,Y,N), + EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,Y), + DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,Y), + ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,Y,N,Y), + FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_D,N,N,N), + FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_I,N,N,N), + CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,Y), + MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,SYNC_N,N,N,Y), + MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,SYNC_N,N,N,Y) // Instructions that have not yet been implemented /* // floating point - FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), - FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,N,N,N,N), + FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), // atomic memory operations - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), - AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), */ )); @@ -322,7 +323,7 @@ class rocketCtrl extends Component val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_privileged = Reg(resetVal = Bool(false)); - + val ex_reg_flush_inst = Reg(resetVal = Bool(false)); val ex_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val ex_reg_xcpt_illegal = Reg(resetVal = Bool(false)); @@ -330,6 +331,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + val mem_reg_flush_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val mem_reg_xcpt_illegal = Reg(resetVal = Bool(false)); @@ -337,6 +339,8 @@ class rocketCtrl extends Component val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + val wb_reg_flush_inst = Reg(resetVal = Bool(false)); + when (!io.dpath.stalld) { when (io.dpath.killf) { id_reg_xcpt_ma_inst <== Bool(false); @@ -361,7 +365,7 @@ class rocketCtrl extends Component ex_reg_mem_type <== UFix(0, 3); ex_reg_eret <== Bool(false); ex_reg_privileged <== Bool(false); - + ex_reg_flush_inst <== Bool(false); ex_reg_xcpt_ma_inst <== Bool(false); ex_reg_xcpt_itlb <== Bool(false); ex_reg_xcpt_illegal <== Bool(false); @@ -378,7 +382,7 @@ class rocketCtrl extends Component ex_reg_mem_type <== id_mem_type; ex_reg_eret <== id_eret.toBool; ex_reg_privileged <== id_privileged.toBool; - + ex_reg_flush_inst <== (id_sync === SYNC_I); ex_reg_xcpt_ma_inst <== id_reg_xcpt_ma_inst; ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; ex_reg_xcpt_illegal <== illegal_inst; @@ -423,7 +427,7 @@ class rocketCtrl extends Component mem_reg_mem_cmd <== UFix(0, 4); mem_reg_mem_type <== UFix(0, 3); mem_reg_privileged <== Bool(false); - + mem_reg_flush_inst <== Bool(false); mem_reg_xcpt_ma_inst <== Bool(false); mem_reg_xcpt_itlb <== Bool(false); mem_reg_xcpt_illegal <== Bool(false); @@ -438,7 +442,7 @@ class rocketCtrl extends Component mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; mem_reg_privileged <== ex_reg_privileged; - + mem_reg_flush_inst <== ex_reg_flush_inst; mem_reg_xcpt_ma_inst <== ex_reg_xcpt_ma_inst; mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; mem_reg_xcpt_illegal <== ex_reg_xcpt_illegal; @@ -449,9 +453,11 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killm) { wb_reg_div_mul_val <== Bool(false); + wb_reg_flush_inst <== Bool(false); } otherwise { wb_reg_div_mul_val <== mem_reg_div_mul_val; + wb_reg_flush_inst <== mem_reg_flush_inst; } // exception handling @@ -500,8 +506,9 @@ class rocketCtrl extends Component io.dpath.cause := mem_cause; io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st; - // replay execute stage PC when the D$ is blocked, when the D$ misses, and for privileged instructions - val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_privileged; + // replay execute stage PC when the D$ is blocked, when the D$ misses, + // for privileged instructions, and for fence.i instructions + val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_flush_inst || mem_reg_privileged; // replay mem stage PC on a DTLB miss val replay_mem = io.dtlb_miss; @@ -591,7 +598,7 @@ class rocketCtrl extends Component (id_sel_wa === WA_RD) & id_stall_waddr | (id_sel_wa === WA_RA) & id_stall_ra | id_mem_val & (~io.dmem.req_rdy | io.dtlb_busy) | - id_sync & ~io.dmem.req_rdy | + (id_sync === SYNC_D) & ~io.dmem.req_rdy | id_console_out_val & ~io.console.rdy | id_div_val & ~io.dpath.div_rdy | io.dpath.div_result_val | @@ -604,6 +611,8 @@ class rocketCtrl extends Component val mul_wb = io.dpath.mul_result_val; val div_wb = io.dpath.div_result_val & !mul_wb; + io.flush_inst := wb_reg_flush_inst; + io.dpath.stalld := ctrl_stalld.toBool; io.dpath.killf := take_pc | ~io.imem.resp_val; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index bfb5eedb..641d40d1 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -28,6 +28,7 @@ class ioDpathAll extends Bundle() val debug = new ioDebug(); val dmem = new ioDpathDmem(); val imem = new ioDpathImem(); + val ptbr_wen = Bool('output); val ptbr = UFix(PADDR_BITS, 'output); } @@ -359,6 +360,7 @@ class rocketDpath extends Component io.ctrl.irq_ipi := pcr.io.irq_ipi; io.ctrl.status := pcr.io.status; io.ptbr := pcr.io.ptbr; + io.ptbr_wen := pcr.io.ptbr_wen; io.debug.error_mode := pcr.io.debug.error_mode; // branch resolution logic diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 97b5e15b..fbbd1c08 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -53,6 +53,7 @@ class ioDpathPCR extends Bundle() val pc = UFix(VADDR_BITS, 'input); val badvaddr = UFix(VADDR_BITS, 'input); val eret = Bool('input); + val ptbr_wen = Bool('output); val irq_timer = Bool('output); val irq_ipi = Bool('output); } @@ -90,6 +91,7 @@ class rocketDpathPCR extends Component val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, Bits(0,1), reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; + io.ptbr_wen := reg_status_vm.toBool && !io.exception && io.w.en && (io.w.addr === PCR_PTBR); io.status := Cat(reg_status_vm, reg_status_im, reg_status); io.evec := reg_ebase; io.ptbr := reg_ptbr; diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 5871a165..a7fa954e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -8,6 +8,7 @@ import scala.math._; // interface between I$ and pipeline/ITLB (32 bits wide) class ioImem(view: List[String] = null) extends Bundle (view) { + val invalidate = Bool('input); val itlb_miss = Bool('input); val req_val = Bool('input); val req_rdy = Bool('output); @@ -112,6 +113,9 @@ class rocketICacheDM(lines: Int) extends Component { // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); + when (io.cpu.invalidate) { + vb_array <== Bits(0,lines); + } when (tag_we) { vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } From cd6e4633204a1754121b0b7956884a398a4d3cf1 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 14 Nov 2011 13:48:49 -0800 Subject: [PATCH 0045/1087] added ei and di instructions --- rocket/src/main/scala/consts.scala | 4 + rocket/src/main/scala/ctrl.scala | 233 ++++++++++++++----------- rocket/src/main/scala/dpath.scala | 2 + rocket/src/main/scala/dpath_util.scala | 12 +- 4 files changed, 145 insertions(+), 106 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 7ef801e6..9c3fece3 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -139,6 +139,10 @@ object Constants val M_XA_MINU = Bits("b1110", 4); val M_XA_MAXU = Bits("b1111", 4); + val I_X = Bits(0,2); + val I_DI = Bits(1,2); + val I_EI = Bits(2,2); + val SYNC_N = Bits(0,2); val SYNC_D = Bits(1,2); val SYNC_I = Bits(2,2); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f9f19194..d8628f63 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -36,6 +36,9 @@ class ioCtrlDpath extends Bundle() val eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); + // enable/disable interrupts + val irq_enable = Bool('output); + val irq_disable = Bool('output); // exception handling val exception = Bool('output); val cause = UFix(5,'output); @@ -158,115 +161,115 @@ class rocketCtrl extends Component val xpr64 = Y; val cs = ListLookup(io.dpath.inst, - List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), Array( - BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), - JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), - JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), - JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), - RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,SYNC_N,N,N,N), + J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), - SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,SYNC_N,N,N,N), + ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64H, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_32, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64H, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_32, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,Y,N), - EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,Y), - DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,Y), - ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,Y,N,Y), - FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_D,N,N,N), - FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_I,N,N,N), - CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,Y), - MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,SYNC_N,N,N,Y), - MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,SYNC_N,N,N,Y) + SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N), + EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y), + DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y), + ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y), + FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N), + FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N), + CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y), + MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y), + MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y) // Instructions that have not yet been implemented /* // floating point - FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), // atomic memory operations - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), - AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,SYNC_N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), */ )); @@ -276,7 +279,7 @@ class rocketCtrl extends Component io.imem.req_val := io.host.start && !io.dpath.xcpt_ma_inst; val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; - val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; + val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); @@ -317,12 +320,14 @@ class rocketCtrl extends Component val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; - val ex_reg_div_mul_val = Reg(){Bool()}; + val ex_reg_inst_div_mul_val = Reg(){Bool()}; val ex_reg_mem_val = Reg(){Bool()}; val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_privileged = Reg(resetVal = Bool(false)); + val ex_reg_inst_di = Reg(resetVal = Bool(false)); + val ex_reg_inst_ei = Reg(resetVal = Bool(false)); val ex_reg_flush_inst = Reg(resetVal = Bool(false)); val ex_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); @@ -331,6 +336,8 @@ class rocketCtrl extends Component val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + val mem_reg_inst_di = Reg(resetVal = Bool(false)); + val mem_reg_inst_ei = Reg(resetVal = Bool(false)); val mem_reg_flush_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_itlb = Reg(resetVal = Bool(false)); @@ -338,8 +345,10 @@ class rocketCtrl extends Component val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); - - val wb_reg_flush_inst = Reg(resetVal = Bool(false)); + + val wb_reg_inst_di = Reg(resetVal = Bool(false)); + val wb_reg_inst_ei = Reg(resetVal = Bool(false)); + val wb_reg_flush_inst = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -359,12 +368,14 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; ex_reg_btb_hit <== Bool(false); - ex_reg_div_mul_val <== Bool(false); + ex_reg_inst_div_mul_val <== Bool(false); ex_reg_mem_val <== Bool(false); ex_reg_mem_cmd <== UFix(0, 4); ex_reg_mem_type <== UFix(0, 3); ex_reg_eret <== Bool(false); ex_reg_privileged <== Bool(false); + ex_reg_inst_di <== Bool(false); + ex_reg_inst_ei <== Bool(false); ex_reg_flush_inst <== Bool(false); ex_reg_xcpt_ma_inst <== Bool(false); ex_reg_xcpt_itlb <== Bool(false); @@ -376,12 +387,14 @@ class rocketCtrl extends Component otherwise { ex_reg_br_type <== id_br_type; ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; + ex_reg_inst_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; ex_reg_mem_val <== id_mem_val.toBool; ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; ex_reg_eret <== id_eret.toBool; ex_reg_privileged <== id_privileged.toBool; + ex_reg_inst_di <== (id_irq === I_DI); + ex_reg_inst_ei <== (id_irq === I_EI); ex_reg_flush_inst <== (id_sync === SYNC_I); ex_reg_xcpt_ma_inst <== id_reg_xcpt_ma_inst; ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; @@ -427,6 +440,8 @@ class rocketCtrl extends Component mem_reg_mem_cmd <== UFix(0, 4); mem_reg_mem_type <== UFix(0, 3); mem_reg_privileged <== Bool(false); + mem_reg_inst_di <== Bool(false); + mem_reg_inst_ei <== Bool(false); mem_reg_flush_inst <== Bool(false); mem_reg_xcpt_ma_inst <== Bool(false); mem_reg_xcpt_itlb <== Bool(false); @@ -436,12 +451,14 @@ class rocketCtrl extends Component mem_reg_xcpt_syscall <== Bool(false); } otherwise { - mem_reg_div_mul_val <== ex_reg_div_mul_val; + mem_reg_div_mul_val <== ex_reg_inst_div_mul_val; mem_reg_eret <== ex_reg_eret; mem_reg_mem_val <== ex_reg_mem_val; mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; mem_reg_privileged <== ex_reg_privileged; + mem_reg_inst_di <== ex_reg_inst_di; + mem_reg_inst_ei <== ex_reg_inst_ei; mem_reg_flush_inst <== ex_reg_flush_inst; mem_reg_xcpt_ma_inst <== ex_reg_xcpt_ma_inst; mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; @@ -453,10 +470,14 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killm) { wb_reg_div_mul_val <== Bool(false); + wb_reg_inst_di <== Bool(false); + wb_reg_inst_ei <== Bool(false); wb_reg_flush_inst <== Bool(false); } otherwise { wb_reg_div_mul_val <== mem_reg_div_mul_val; + wb_reg_inst_di <== mem_reg_inst_di; + wb_reg_inst_ei <== mem_reg_inst_ei; wb_reg_flush_inst <== mem_reg_flush_inst; } @@ -572,7 +593,7 @@ class rocketCtrl extends Component // check for divide and multiply instructions in ex,mem,wb stages val dm_stall_ex = - ex_reg_div_mul_val && + ex_reg_inst_div_mul_val && ((id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || (id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr))); @@ -611,10 +632,10 @@ class rocketCtrl extends Component val mul_wb = io.dpath.mul_result_val; val div_wb = io.dpath.div_result_val & !mul_wb; - io.flush_inst := wb_reg_flush_inst; + + io.flush_inst := wb_reg_flush_inst; io.dpath.stalld := ctrl_stalld.toBool; - io.dpath.killf := take_pc | ~io.imem.resp_val; io.dpath.killd := ctrl_killd.toBool; io.dpath.killx := kill_ex.toBool; @@ -639,6 +660,8 @@ class rocketCtrl extends Component io.dpath.ren_pcr := id_ren_pcr.toBool; io.dpath.wen_pcr := id_wen_pcr.toBool; io.dpath.eret := id_eret.toBool; + io.dpath.irq_disable := wb_reg_inst_di; + io.dpath.irq_enable := wb_reg_inst_ei; } } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 641d40d1..d2340ccb 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -459,6 +459,8 @@ class rocketDpath extends Component pcr.io.w.en := wb_reg_ctrl_wen_pcr; pcr.io.w.data := wb_reg_wdata; + pcr.io.di := io.ctrl.irq_disable; + pcr.io.ei := io.ctrl.irq_enable; pcr.io.eret := wb_reg_ctrl_eret; pcr.io.exception := wb_reg_ctrl_exception; pcr.io.cause := wb_reg_ctrl_cause; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index fbbd1c08..edbb22af 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -53,6 +53,8 @@ class ioDpathPCR extends Bundle() val pc = UFix(VADDR_BITS, 'input); val badvaddr = UFix(VADDR_BITS, 'input); val eret = Bool('input); + val ei = Bool('input); + val di = Bool('input); val ptbr_wen = Bool('output); val irq_timer = Bool('output); val irq_ipi = Bool('output); @@ -125,7 +127,15 @@ class rocketDpathPCR extends Component reg_epc <== io.pc; reg_cause <== io.cause; } - + + when (!io.exception && io.di) { + reg_status_et <== Bool(false); + } + + when (!io.exception && io.ei) { + reg_status_et <== Bool(true); + } + when (!io.exception && io.eret) { reg_status_s <== reg_status_ps; reg_status_et <== Bool(true); From db87924fbf64b1e2066f04451d4598fee2534ccf Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 14 Nov 2011 14:35:10 -0800 Subject: [PATCH 0046/1087] made eret instruction take an illegal inst exception when ET is set --- rocket/src/main/scala/consts.scala | 7 ++++--- rocket/src/main/scala/ctrl.scala | 5 +++-- rocket/src/main/scala/dpath_util.scala | 8 +++++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 9c3fece3..d27d1003 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -155,8 +155,8 @@ object Constants val PCR_COMPARE = UFix( 5, 5); val PCR_CAUSE = UFix( 6, 5); val PCR_PTBR = UFix( 7, 5); - val PCR_SENDIPI = UFix( 8, 5); - val PCR_CLEARIPI = UFix( 9, 5); + val PCR_SEND_IPI = UFix( 8, 5); + val PCR_CLR_IPI = UFix( 9, 5); val PCR_COREID = UFix(10, 5); val PCR_K0 = UFix(12, 5); val PCR_K1 = UFix(13, 5); @@ -169,6 +169,7 @@ object Constants val SR_ET = 0; // enable traps val SR_EF = 1; // enable floating point val SR_EV = 2; // enable vector unit + val SR_EC = 3; // enable compressed instruction encoding val SR_PS = 4; // mode stack bit val SR_S = 5; // user/supervisor mode val SR_UX = 6; // 64 bit user mode @@ -176,7 +177,6 @@ object Constants val SR_VM = 16; // VM enable val COREID = 0; - val NUMCORES = 1; val PADDR_BITS = 40; val VADDR_BITS = 43; val PGIDX_BITS = 13; @@ -195,6 +195,7 @@ object Constants val START_ADDR = 0x2000; + val HAVE_RVC = Bool(false); val HAVE_FPU = Bool(false); val HAVE_VEC = Bool(false); } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d8628f63..9ee4c92d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -363,7 +363,8 @@ class rocketCtrl extends Component } } - val illegal_inst = !id_int_val.toBool && !id_fp_val.toBool; + // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) + val illegal_inst = !(id_int_val.toBool || id_fp_val.toBool) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; @@ -399,7 +400,7 @@ class rocketCtrl extends Component ex_reg_xcpt_ma_inst <== id_reg_xcpt_ma_inst; ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; ex_reg_xcpt_illegal <== illegal_inst; - ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(5)).toBool; + ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(SR_S)).toBool; ex_reg_xcpt_fpu <== id_fp_val.toBool; ex_reg_xcpt_syscall <== id_syscall.toBool; } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index edbb22af..2d4f6604 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -81,6 +81,7 @@ class rocketDpathPCR extends Component val reg_status_im = Reg(resetVal = Bits(0,8)); val reg_status_sx = Reg(resetVal = Bool(true)); val reg_status_ux = Reg(resetVal = Bool(true)); + val reg_status_ec = Reg(resetVal = Bool(false)); val reg_status_ef = Reg(resetVal = Bool(false)); val reg_status_ev = Reg(resetVal = Bool(false)); val reg_status_s = Reg(resetVal = Bool(true)); @@ -90,7 +91,7 @@ class rocketDpathPCR extends Component val r_irq_timer = Reg(resetVal = Bool(false)); val r_irq_ipi = Reg(resetVal = Bool(false)); - val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, Bits(0,1), reg_status_ev, reg_status_ef, reg_status_et); + val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; io.ptbr_wen := reg_status_vm.toBool && !io.exception && io.w.en && (io.w.addr === PCR_PTBR); @@ -151,6 +152,7 @@ class rocketDpathPCR extends Component reg_status_ps <== io.w.data(SR_PS).toBool; reg_status_ev <== HAVE_VEC && io.w.data(SR_EV).toBool; reg_status_ef <== HAVE_FPU && io.w.data(SR_EF).toBool; + reg_status_ec <== HAVE_RVC && io.w.data(SR_EC).toBool; reg_status_et <== io.w.data(SR_ET).toBool; } when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS-1,0).toUFix; } @@ -160,8 +162,8 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0).toUFix; r_irq_timer <== Bool(false); } when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(31,0); } - when (io.w.addr === PCR_SENDIPI) { r_irq_ipi <== Bool(true); } - when (io.w.addr === PCR_CLEARIPI) { r_irq_ipi <== Bool(false); } + when (io.w.addr === PCR_SEND_IPI) { r_irq_ipi <== Bool(true); } + when (io.w.addr === PCR_CLR_IPI) { r_irq_ipi <== Bool(false); } when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } when (io.w.addr === PCR_K1) { reg_k1 <== io.w.data; } when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } From 48cec01710e300283970e2df22a0ff10cfb0f5ed Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 15 Nov 2011 00:11:22 -0800 Subject: [PATCH 0047/1087] updated riscv-bmarks and riscv-tests to build with new toolchain --- rocket/src/main/scala/cpu.scala | 12 +-- rocket/src/main/scala/dcache.scala | 139 +++++++++++++++++++++-------- rocket/src/main/scala/dpath.scala | 4 +- 3 files changed, 109 insertions(+), 46 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index e543d0e0..8cde5d50 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -53,12 +53,11 @@ class rocketProc extends Component dpath.io.host ^^ io.host; ctrl.io.host.start := io.host.start; dpath.io.debug ^^ io.debug; -// dpath.io.imem.resp_data ^^ io.imem.resp_data; - - // FIXME: make this less verbose + // FIXME: try to make this more compact + // connect ITLB to I$, ctrl, dpath - itlb.io.cpu.invalidate := dpath.io.ptbr_wen || ctrl.io.flush_inst; + itlb.io.cpu.invalidate := dpath.io.ptbr_wen; itlb.io.cpu.status := dpath.io.ctrl.status; itlb.io.cpu.req_val := ctrl.io.imem.req_val; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR @@ -76,7 +75,6 @@ class rocketProc extends Component // connect DTLB to D$ arbiter, ctrl+dpath -// dtlb.io.cpu.invalidate := Bool(false); // FIXME dtlb.io.cpu.invalidate := dpath.io.ptbr_wen; dtlb.io.cpu.status := dpath.io.ctrl.status; dtlb.io.cpu.req_val := ctrl.io.dmem.req_val; @@ -99,14 +97,10 @@ class rocketProc extends Component arb.io.mem ^^ io.dmem // connect arbiter to ctrl+dpath+DTLB -// arb.io.cpu.req_val := dtlb.io.cpu.resp_val; arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; -// arb.io.cpu.dtlb_busy := dtlb.io.cpu.resp_busy; arb.io.cpu.dtlb_miss := dtlb.io.cpu.resp_miss; - -// arb.io.cpu.req_addr := dtlb.io.cpu.resp_addr; arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; arb.io.cpu.req_data := dpath.io.dmem.req_data; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index d71b2155..e7ac5744 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -48,7 +48,7 @@ class rocketDCacheStoreGen extends Component { val req_type = Bits(3, 'input); val req_addr_lsb = Bits(3, 'input); val req_data = Bits(64, 'input); - val store_wmask = Bits(64, 'output); + val store_wmask = Bits(8, 'output); val store_data = Bits(64, 'output); } @@ -86,16 +86,8 @@ class rocketDCacheStoreGen extends Component { Mux(io.req_type === MT_D, wmask_d, UFix(0, 8))))); - io.store_wmask := - Cat(Fill(8, store_wmask_byte(7)), - Fill(8, store_wmask_byte(6)), - Fill(8, store_wmask_byte(5)), - Fill(8, store_wmask_byte(4)), - Fill(8, store_wmask_byte(3)), - Fill(8, store_wmask_byte(2)), - Fill(8, store_wmask_byte(1)), - Fill(8, store_wmask_byte(0))); - + io.store_wmask := store_wmask_byte; + io.store_data := Mux(io.req_type === MT_B, Fill(8, io.req_data( 7,0)), Mux(io.req_type === MT_H, Fill(4, io.req_data(15,0)), @@ -183,7 +175,7 @@ class rocketDCacheDM(lines: Int) extends Component { val offsetmsb = indexlsb-1; val offsetlsb = 3; - val s_reset :: s_ready :: s_replay_load :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(8) { UFix() }; + val s_reset :: s_ready :: s_replay_load :: s_write_amo :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(9) { UFix() }; val state = Reg(resetVal = s_reset); // idx arrives one clock cycle prior to ppn b/c of DTLB @@ -194,6 +186,7 @@ class rocketDCacheDM(lines: Int) extends Component { val r_cpu_req_type = Reg(resetVal = Bits(0,3)); val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); val r_cpu_resp_val = Reg(resetVal = Bool(false)); + val r_amo_data = Reg(resetVal = Bits(0,64)); val p_store_data = Reg(resetVal = Bits(0,64)); val p_store_idx = Reg(resetVal = Bits(0,PGIDX_BITS)); @@ -203,11 +196,13 @@ class rocketDCacheDM(lines: Int) extends Component { val req_store = (io.cpu.req_cmd === M_XWR); val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD); val req_flush = (io.cpu.req_cmd === M_FLA); + val req_amo = io.cpu.req_cmd(3).toBool; val r_req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); val r_req_store = (r_cpu_req_cmd === M_XWR); val r_req_flush = (r_cpu_req_cmd === M_FLA); val r_req_ptw_load = (r_cpu_req_cmd === M_PRD); - + val r_req_amo = r_cpu_req_cmd(3).toBool; + when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_idx <== io.cpu.req_idx; r_cpu_req_cmd <== io.cpu.req_cmd; @@ -273,7 +268,7 @@ class rocketDCacheDM(lines: Int) extends Component { // load/store addresses conflict if they are to any part of the same 64 bit word val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); - val ldst_conflict = tag_valid && tag_match && r_req_load && p_store_valid && addr_match; + val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match; val store_hit = r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store ; // write the pending store data when the cache is idle, when the next command isn't a load @@ -297,6 +292,10 @@ class rocketDCacheDM(lines: Int) extends Component { p_store_type <== io.cpu.req_type; } + when (io.cpu.req_val && io.cpu.req_rdy && req_amo) { + r_amo_data <== io.cpu.req_data; + } + when (store_hit && !drain_store) { p_store_valid <== Bool(true); } @@ -304,52 +303,85 @@ class rocketDCacheDM(lines: Int) extends Component { p_store_valid <== Bool(false); db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } - when (resolve_store) { + when (resolve_store || (state === s_write_amo)) { db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } - // generate write mask and data signals for stores + // generate write mask and data signals for stores and amos val storegen = new rocketDCacheStoreGen(); storegen.io.req_addr_lsb := p_store_idx(2,0); storegen.io.req_data := p_store_data; - storegen.io.req_type := p_store_type + storegen.io.req_type := p_store_type; val store_data = Fill(2, storegen.io.store_data); - val store_wmask_d = storegen.io.store_wmask; + val store_wmask_b = storegen.io.store_wmask; + val store_wmask_d = Cat(Fill(8, store_wmask_b(7)), + Fill(8, store_wmask_b(6)), + Fill(8, store_wmask_b(5)), + Fill(8, store_wmask_b(4)), + Fill(8, store_wmask_b(3)), + Fill(8, store_wmask_b(2)), + Fill(8, store_wmask_b(1)), + Fill(8, store_wmask_b(0))); val store_idx_sel = p_store_idx(offsetlsb).toBool; val store_wmask = Mux(store_idx_sel, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); // data array val data_array = new rocketSRAMsp(lines*4, 128); + val data_array_rdata = data_array.io.q; + val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); + val r_resp_data = Reg(resp_data); + + // ALU for AMOs + val amo_wmask = + Mux(r_cpu_req_type === MT_D, ~Bits(0,8), + Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)), + Cat(Bits(0,4), ~Bits(0,4)))); + + val amo_alu = new rocketDCacheAmoALU(); + amo_alu.io.cmd := r_cpu_req_cmd; + amo_alu.io.wmask := amo_wmask; + amo_alu.io.lhs := r_resp_data.toUFix; + amo_alu.io.rhs := r_amo_data.toUFix; + val amo_alu_out = amo_alu.io.result; + data_array.io.a := Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next), Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), - Mux((state === s_resolve_miss) || (state === s_replay_load), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), + Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; - data_array.io.d := Mux((state === s_refill), io.mem.resp_data, store_data); - data_array.io.we := ((state === s_refill) && io.mem.resp_val) || drain_store || resolve_store; + data_array.io.d := + Mux((state === s_refill), io.mem.resp_data, + Mux((state === s_write_amo), amo_alu_out, + store_data)); + data_array.io.we := + ((state === s_refill) && io.mem.resp_val) || + (state === s_write_amo) || + drain_store || resolve_store; + data_array.io.bweb := Mux((state === s_refill), ~Bits(0,128), store_wmask); data_array.io.ce := - (io.cpu.req_val && io.cpu.req_rdy && req_load) || + (io.cpu.req_val && io.cpu.req_rdy && (req_load || req_amo)) || (state === s_start_writeback) || (state === s_writeback) || - ((state === s_resolve_miss) && r_req_load) || + ((state === s_resolve_miss) && (r_req_load || r_req_amo)) || (state === s_replay_load); - - val data_array_rdata = data_array.io.q; // signal a load miss when the data isn't present in the cache and when it's in the pending store data register - // (causes the cache to block for 2 cycles and the load instruction is replayed) - val load_miss = !io.cpu.dtlb_miss && (state === s_ready) && r_cpu_req_val && r_req_load && (!tag_hit || (p_store_valid && addr_match)); + // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) + val load_miss = + !io.cpu.dtlb_miss && + (state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match)); // output signals // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !r_req_flush)); - io.cpu.resp_val := !io.cpu.dtlb_miss && ((state === s_ready) && tag_hit && r_req_load && !(p_store_valid && addr_match)) || + io.cpu.req_rdy := (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); + io.cpu.resp_val := !io.cpu.dtlb_miss && + ((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && r_req_flush) || r_cpu_resp_val; @@ -358,15 +390,13 @@ class rocketDCacheDM(lines: Int) extends Component { (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0,2))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0,3))); - io.cpu.xcpt_ma_ld := r_cpu_req_val && r_req_load && misaligned; - io.cpu.xcpt_ma_st := r_cpu_req_val && r_req_store && misaligned; + io.cpu.xcpt_ma_ld := r_cpu_req_val && (r_req_load || r_req_amo) && misaligned; + io.cpu.xcpt_ma_st := r_cpu_req_val && (r_req_store || r_req_amo) && misaligned; io.cpu.resp_miss := load_miss; // tag MSB distinguishes between loads destined for the PTW and CPU io.cpu.resp_tag := Cat(r_req_ptw_load, r_cpu_req_type, r_cpu_req_idx(2,0), r_cpu_req_tag); - io.cpu.resp_data := - Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), - data_array_rdata(63,0)); + io.cpu.resp_data := resp_data; io.mem.req_val := (state === s_req_refill) || (state === s_writeback); io.mem.req_rw := (state === s_writeback); @@ -388,9 +418,12 @@ class rocketDCacheDM(lines: Int) extends Component { when (ldst_conflict) { state <== s_replay_load; } - when (!r_cpu_req_val || (tag_hit && !r_req_flush)) { + when (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))) { state <== s_ready; } + when (tag_hit && r_req_amo) { + state <== s_write_amo; + } when (tag_valid & tag_dirty) { state <== s_start_writeback; } @@ -404,6 +437,9 @@ class rocketDCacheDM(lines: Int) extends Component { is (s_replay_load) { state <== s_ready; } + is (s_write_amo) { + state <== s_ready; + } is (s_start_writeback) { state <== s_writeback; } @@ -425,9 +461,42 @@ class rocketDCacheDM(lines: Int) extends Component { when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } } is (s_resolve_miss) { + when (r_req_amo) { + state <== s_write_amo; + } state <== s_ready; } } } +class rocketDCacheAmoALU extends Component { + val io = new Bundle { + val cmd = Bits(4, 'input); + val wmask = Bits(8, 'input); + val lhs = UFix(64, 'input); + val rhs = UFix(64, 'input); + val result = UFix(64, 'output); + } + +// val signed_cmp = (op === M_XA_MIN) || (op === M_XA_MAX); +// val sub = (op === M_XA_MIN) || (op === M_XA_MINU) || +// (op === M_XA_MAX) || (op === M_XA_MAXU); + + val adder_lhs = Cat(io.lhs(63,32),io.wmask(3) & io.lhs(31), io.lhs(30,0)).toUFix; + val adder_rhs = Cat(io.rhs(63,32),io.wmask(3) & io.rhs(31), io.rhs(30,0)).toUFix; +// val adder_rhs = Cat(Mux(sub, ~io.rhs, io.rhs), sub).toUFix; +// val sum = adder_lhs + adder_rhs; +// val adder_out = sum(64,1); + val adder_out = adder_lhs + adder_rhs; + val alu_out = Wire() { UFix() }; + switch (io.cmd) { +// is (M_XA_ADD) { alu_out <== adder_out; } + is (M_XA_SWAP) { alu_out <== io.rhs; } + is (M_XA_AND) { alu_out <== io.lhs & io.rhs; } + is (M_XA_OR) { alu_out <== io.lhs | io.rhs; } + } + alu_out <== adder_out; + io.result := alu_out; } + +} \ No newline at end of file diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d2340ccb..7129fc4e 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -45,7 +45,7 @@ class rocketDpath extends Component val alu = new rocketDpathALU(); val ex_alu_out = alu.io.out; - val ex_jr_target = ex_alu_out(31,0); + val ex_jr_target = ex_alu_out(VADDR_BITS,0); val div = new rocketDivider(64); val div_result = div.io.div_result_bits; @@ -342,7 +342,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_addr := ex_alu_out(PADDR_BITS-1,0); + io.dmem.req_addr := ex_alu_out(VADDR_BITS-1,0); io.dmem.req_data := ex_reg_rs2; io.dmem.req_tag := ex_reg_waddr; From 82a636ff5560370ff9e606cc01d9171876bd9690 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 15 Nov 2011 00:51:45 -0800 Subject: [PATCH 0048/1087] AMOADD, AMOAND, AMOOR, AMOSWAP working --- rocket/src/main/scala/ctrl.scala | 17 +++++++++-------- rocket/src/main/scala/dcache.scala | 24 ++++++++++++++++++++---- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 9ee4c92d..5e7e6add 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -190,6 +190,15 @@ class rocketCtrl extends Component SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), @@ -254,18 +263,10 @@ class rocketCtrl extends Component FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), // atomic memory operations - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index e7ac5744..d683ac14 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -219,7 +219,7 @@ class rocketDCacheDM(lines: Int) extends Component { otherwise { r_cpu_req_val <== Bool(false); } - when (((state === s_resolve_miss) && r_req_load) || (state === s_replay_load)) { + when (((state === s_resolve_miss) && (r_req_load || r_req_amo)) || (state === s_replay_load)) { r_cpu_resp_val <== Bool(true); } otherwise { @@ -339,11 +339,23 @@ class rocketDCacheDM(lines: Int) extends Component { Mux(r_cpu_req_type === MT_D, ~Bits(0,8), Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)), Cat(Bits(0,4), ~Bits(0,4)))); - + + val amo_store_wmask_d = Cat(Fill(8, amo_wmask(7)), + Fill(8, amo_wmask(6)), + Fill(8, amo_wmask(5)), + Fill(8, amo_wmask(4)), + Fill(8, amo_wmask(3)), + Fill(8, amo_wmask(2)), + Fill(8, amo_wmask(1)), + Fill(8, amo_wmask(0))); + + val amo_store_idx_sel = r_cpu_req_idx(offsetlsb).toBool; + val amo_store_wmask = Mux(amo_store_idx_sel, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d)); + val amo_alu = new rocketDCacheAmoALU(); amo_alu.io.cmd := r_cpu_req_cmd; amo_alu.io.wmask := amo_wmask; - amo_alu.io.lhs := r_resp_data.toUFix; + amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix; amo_alu.io.rhs := r_amo_data.toUFix; val amo_alu_out = amo_alu.io.result; @@ -363,7 +375,11 @@ class rocketDCacheDM(lines: Int) extends Component { (state === s_write_amo) || drain_store || resolve_store; - data_array.io.bweb := Mux((state === s_refill), ~Bits(0,128), store_wmask); + data_array.io.bweb := + Mux((state === s_refill), ~Bits(0,128), + Mux((state === s_write_amo), amo_store_wmask, + store_wmask)); + data_array.io.ce := (io.cpu.req_val && io.cpu.req_rdy && (req_load || req_amo)) || (state === s_start_writeback) || From ae98956e6b60b7daba579ff5f6cdbaf2f6573840 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 15 Nov 2011 02:43:51 -0800 Subject: [PATCH 0049/1087] more amo fixes, added more options to testharness to control debug messages --- rocket/src/main/scala/ctrl.scala | 7 ++++--- rocket/src/main/scala/dtlb.scala | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 5e7e6add..d1adf6dd 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -570,8 +570,9 @@ class rocketCtrl extends Component io.dpath.stalld ); - // check for loads in execute and mem stages to detect load/use hazards - val ex_mem_cmd_load = ex_reg_mem_val && (ex_reg_mem_cmd === M_XRD); + // check for loads and amos in execute and mem stages to detect load/use hazards + val ex_mem_cmd_load = + ex_reg_mem_val && ((ex_reg_mem_cmd === M_XRD) || ex_reg_mem_cmd(3).toBool); val lu_stall_ex = ex_mem_cmd_load && @@ -643,7 +644,7 @@ class rocketCtrl extends Component io.dpath.killx := kill_ex.toBool; io.dpath.killm := kill_mem.toBool; - io.dpath.mem_load := mem_reg_mem_val && (mem_reg_mem_cmd === M_XRD); + io.dpath.mem_load := mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); io.dpath.ren2 := id_ren2.toBool; io.dpath.ren1 := id_ren1.toBool; io.dpath.sel_alu2 := id_sel_alu2; diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 90e1088b..93e3f7b7 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -64,7 +64,7 @@ class rocketDTLB(entries: Int) extends Component val req_load = (r_cpu_req_cmd === M_XRD); val req_store = (r_cpu_req_cmd === M_XWR); val req_flush = (r_cpu_req_cmd === M_FLA); -// val req_amo = io.cpu.req_cmd(3).toBool; + val req_amo = io.cpu.req_cmd(3).toBool; val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); @@ -139,20 +139,20 @@ class rocketDTLB(entries: Int) extends Component val outofrange = !tlb_miss && (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); val access_fault_ld = - tlb_hit && req_load && + tlb_hit && (req_load || req_amo) && ((status_s && !sr_array(tag_hit_addr).toBool) || (status_u && !ur_array(tag_hit_addr).toBool)); io.cpu.xcpt_ld := - (lookup && req_load && outofrange) || access_fault_ld; + (lookup && (req_load || req_amo) && outofrange) || access_fault_ld; val access_fault_st = - tlb_hit && req_store && + tlb_hit && (req_store || req_amo) && ((status_s && !sw_array(tag_hit_addr).toBool) || (status_u && !uw_array(tag_hit_addr).toBool)); io.cpu.xcpt_st := - (lookup && req_store && outofrange) || access_fault_st; + (lookup && (req_store || req_amo) && outofrange) || access_fault_st; io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && !tlb_miss, Bool(true)); io.cpu.resp_busy := tlb_miss || (state != s_ready); From fc0f20643aedab3f33f3e0014f4df5cb4bb921e3 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 15 Nov 2011 18:06:41 -0800 Subject: [PATCH 0050/1087] cleanup --- rocket/src/main/scala/instructions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 92812bb0..ea790740 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -93,8 +93,6 @@ object Instructions val RDCYCLE = Bits("b?????_00000_00000_0000000100_1110111",32); val RDTIME = Bits("b?????_00000_00000_0000001100_1110111",32); val RDINSTRET = Bits("b?????_00000_00000_0000010100_1110111",32); - val MOVZ = Bits("b?????_?????_?????_0000000101_1110111",32); - val MOVN = Bits("b?????_?????_?????_0000001101_1110111",32); val EI = Bits("b?????_00000_00000_0000000000_1111011",32); val DI = Bits("b?????_00000_00000_0000000001_1111011",32); val MFPCR = Bits("b?????_00000_?????_0000000010_1111011",32); @@ -171,6 +169,8 @@ object Instructions val FENCE_G_V = Bits("b?????_?????_????????????_101_0101111",32); val FENCE_L_CV = Bits("b?????_?????_????????????_110_0101111",32); val FENCE_G_CV = Bits("b?????_?????_????????????_111_0101111",32); + val MOVZ = Bits("b?????_?????_?????_0000000101_1110111",32); + val MOVN = Bits("b?????_?????_?????_0000001101_1110111",32); val STOP = Bits("b00000_00000_00000_0000000010_1110111",32); val UTIDX = Bits("b?????_00000_00000_0000000011_1110111",32); val VLD = Bits("b?????_?????_00000_0000000011_0001011",32); From 886857fa47b4a201c6cf90f16f038e9796b442ae Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Tue, 15 Nov 2011 18:07:36 -0800 Subject: [PATCH 0051/1087] writes of PC weren't being sign extended --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7129fc4e..63ab7698 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -373,7 +373,7 @@ class rocketDpath extends Component // writeback select mux ex_wdata := Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1, - Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_reg_pc_plus4, + Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_reg_pc_plus4(VADDR_BITS-1)), ex_reg_pc_plus4), Mux(ex_reg_ctrl_sel_wb === WB_ALU, ex_alu_out, Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Bits(0, 64))))).toBits; From 80b4253318dde231f4d29a597db001e77bfaa7c4 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 16 Nov 2011 02:04:28 -0800 Subject: [PATCH 0052/1087] fixed dcache amo bug, cleaned up testharness, added RDTIME instruction --- rocket/src/main/scala/consts.scala | 3 +-- rocket/src/main/scala/ctrl.scala | 3 ++- rocket/src/main/scala/dcache.scala | 9 ++++++--- rocket/src/main/scala/dpath.scala | 7 ++++++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d27d1003..4d28d5aa 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -79,8 +79,7 @@ object Constants val WB_PC = UFix(0, 3); val WB_ALU = UFix(1, 3); val WB_PCR = UFix(2, 3); - val WB_CR = UFix(3, 3); - val WB_MUL = UFix(4, 3); + val WB_TSC = UFix(3, 3); val N = UFix(0, 1); val Y = UFix(1, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d1adf6dd..a317785e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -252,7 +252,8 @@ class rocketCtrl extends Component FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N), CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y), MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y), - MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y) + MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y), + RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N) // Instructions that have not yet been implemented /* diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index d683ac14..ab0f6f02 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -303,9 +303,12 @@ class rocketDCacheDM(lines: Int) extends Component { p_store_valid <== Bool(false); db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } - when (resolve_store || (state === s_write_amo)) { + when (resolve_store) { db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } + when (state === s_write_amo) { + db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + } when (tag_we) { db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } @@ -357,7 +360,7 @@ class rocketDCacheDM(lines: Int) extends Component { amo_alu.io.wmask := amo_wmask; amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix; amo_alu.io.rhs := r_amo_data.toUFix; - val amo_alu_out = amo_alu.io.result; + val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result); data_array.io.a := Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), @@ -515,4 +518,4 @@ class rocketDCacheAmoALU extends Component { io.result := alu_out; } -} \ No newline at end of file +} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 63ab7698..32bc74cd 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -370,13 +370,18 @@ class rocketDpath extends Component (~(ex_reg_rs1(63) ^ ex_reg_rs2(63)) & io.ctrl.br_ltu | ex_reg_rs1(63) & ~ex_reg_rs2(63)).toBool; + // time stamp counter + val tsc_reg = Reg(resetVal = UFix(0,64)); + tsc_reg <== tsc_reg + UFix(1); + // writeback select mux ex_wdata := Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1, Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_reg_pc_plus4(VADDR_BITS-1)), ex_reg_pc_plus4), Mux(ex_reg_ctrl_sel_wb === WB_ALU, ex_alu_out, Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, - Bits(0, 64))))).toBits; + Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, + Bits(0, 64)))))).toBits; // memory stage mem_reg_pc <== ex_reg_pc; From 5a322ff00c287b0534067f989019c0b6f199878f Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 17 Nov 2011 11:17:37 -0800 Subject: [PATCH 0053/1087] fixed dtlb bug (swapped r/w permissions), added fake mtfsr/mffsr/fld/fst instructions --- rocket/src/main/scala/ctrl.scala | 155 ++++++++++++++++--------------- rocket/src/main/scala/dtlb.scala | 16 ++-- 2 files changed, 90 insertions(+), 81 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a317785e..b9d8e535 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -87,76 +87,76 @@ class rocketCtrl extends Component { val io = new ioCtrlAll(); - val fp = - ListLookup(io.dpath.inst, - List(Bool(false)), - Array( - FMOVZ -> List(Bool(true)), - FMOVN -> List(Bool(true)), - FADD_S -> List(Bool(true)), - FSUB_S -> List(Bool(true)), - FMUL_S -> List(Bool(true)), - FDIV_S -> List(Bool(true)), - FSQRT_S -> List(Bool(true)), - FSGNJ_S -> List(Bool(true)), - FSGNJN_S -> List(Bool(true)), - FSGNJX_S -> List(Bool(true)), - FADD_D -> List(Bool(true)), - FSUB_D -> List(Bool(true)), - FMUL_D -> List(Bool(true)), - FDIV_D -> List(Bool(true)), - FSQRT_D -> List(Bool(true)), - FSGNJ_D -> List(Bool(true)), - FSGNJN_D -> List(Bool(true)), - FSGNJX_D -> List(Bool(true)), - FCVT_L_S -> List(Bool(true)), - FCVT_LU_S -> List(Bool(true)), - FCVT_W_S -> List(Bool(true)), - FCVT_WU_S -> List(Bool(true)), - FCVT_L_D -> List(Bool(true)), - FCVT_LU_D -> List(Bool(true)), - FCVT_W_D -> List(Bool(true)), - FCVT_WU_D -> List(Bool(true)), - FCVT_S_L -> List(Bool(true)), - FCVT_S_LU -> List(Bool(true)), - FCVT_S_W -> List(Bool(true)), - FCVT_S_WU -> List(Bool(true)), - FCVT_D_L -> List(Bool(true)), - FCVT_D_LU -> List(Bool(true)), - FCVT_D_W -> List(Bool(true)), - FCVT_D_WU -> List(Bool(true)), - FCVT_S_D -> List(Bool(true)), - FCVT_D_S -> List(Bool(true)), - FEQ_S -> List(Bool(true)), - FLT_S -> List(Bool(true)), - FLE_S -> List(Bool(true)), - FEQ_D -> List(Bool(true)), - FLT_D -> List(Bool(true)), - FLE_D -> List(Bool(true)), - FMIN_S -> List(Bool(true)), - FMAX_S -> List(Bool(true)), - FMIN_D -> List(Bool(true)), - FMAX_D -> List(Bool(true)), - MFTX_S -> List(Bool(true)), - MFTX_D -> List(Bool(true)), - MFFSR -> List(Bool(true)), - MXTF_S -> List(Bool(true)), - MXTF_D -> List(Bool(true)), - MTFSR -> List(Bool(true)), - FLW -> List(Bool(true)), - FLD -> List(Bool(true)), - FSW -> List(Bool(true)), - FSD -> List(Bool(true)), - FMADD_S -> List(Bool(true)), - FMSUB_S -> List(Bool(true)), - FNMSUB_S -> List(Bool(true)), - FNMADD_S -> List(Bool(true)), - FMADD_D -> List(Bool(true)), - FMSUB_D -> List(Bool(true)), - FNMSUB_D -> List(Bool(true)), - FNMADD_D -> List(Bool(true)) - )); - val id_fp_val :: Nil = fp; +// val fp = +// ListLookup(io.dpath.inst, +// List(Bool(false)), +// Array( +// FMOVZ -> List(Bool(true)), +// FMOVN -> List(Bool(true)), +// FADD_S -> List(Bool(true)), +// FSUB_S -> List(Bool(true)), +// FMUL_S -> List(Bool(true)), +// FDIV_S -> List(Bool(true)), +// FSQRT_S -> List(Bool(true)), +// FSGNJ_S -> List(Bool(true)), +// FSGNJN_S -> List(Bool(true)), +// FSGNJX_S -> List(Bool(true)), +// FADD_D -> List(Bool(true)), +// FSUB_D -> List(Bool(true)), +// FMUL_D -> List(Bool(true)), +// FDIV_D -> List(Bool(true)), +// FSQRT_D -> List(Bool(true)), +// FSGNJ_D -> List(Bool(true)), +// FSGNJN_D -> List(Bool(true)), +// FSGNJX_D -> List(Bool(true)), +// FCVT_L_S -> List(Bool(true)), +// FCVT_LU_S -> List(Bool(true)), +// FCVT_W_S -> List(Bool(true)), +// FCVT_WU_S -> List(Bool(true)), +// FCVT_L_D -> List(Bool(true)), +// FCVT_LU_D -> List(Bool(true)), +// FCVT_W_D -> List(Bool(true)), +// FCVT_WU_D -> List(Bool(true)), +// FCVT_S_L -> List(Bool(true)), +// FCVT_S_LU -> List(Bool(true)), +// FCVT_S_W -> List(Bool(true)), +// FCVT_S_WU -> List(Bool(true)), +// FCVT_D_L -> List(Bool(true)), +// FCVT_D_LU -> List(Bool(true)), +// FCVT_D_W -> List(Bool(true)), +// FCVT_D_WU -> List(Bool(true)), +// FCVT_S_D -> List(Bool(true)), +// FCVT_D_S -> List(Bool(true)), +// FEQ_S -> List(Bool(true)), +// FLT_S -> List(Bool(true)), +// FLE_S -> List(Bool(true)), +// FEQ_D -> List(Bool(true)), +// FLT_D -> List(Bool(true)), +// FLE_D -> List(Bool(true)), +// FMIN_S -> List(Bool(true)), +// FMAX_S -> List(Bool(true)), +// FMIN_D -> List(Bool(true)), +// FMAX_D -> List(Bool(true)), +// MFTX_S -> List(Bool(true)), +// MFTX_D -> List(Bool(true)), +// MFFSR -> List(Bool(true)), +// MXTF_S -> List(Bool(true)), +// MXTF_D -> List(Bool(true)), +// MTFSR -> List(Bool(true)), +// FLW -> List(Bool(true)), +// FLD -> List(Bool(true)), +// FSW -> List(Bool(true)), +// FSD -> List(Bool(true)), +// FMADD_S -> List(Bool(true)), +// FMSUB_S -> List(Bool(true)), +// FNMSUB_S -> List(Bool(true)), +// FNMADD_S -> List(Bool(true)), +// FMADD_D -> List(Bool(true)), +// FMSUB_D -> List(Bool(true)), +// FNMSUB_D -> List(Bool(true)), +// FNMADD_D -> List(Bool(true)) +// )); +// val id_fp_val :: Nil = fp; val xpr64 = Y; val cs = @@ -253,9 +253,16 @@ class rocketCtrl extends Component CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y), MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y), MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y), - RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N) + RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), // Instructions that have not yet been implemented + // Faking these for now so akaros will boot + MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FLW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FLD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FSW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FSD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N) /* // floating point FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), @@ -366,7 +373,8 @@ class rocketCtrl extends Component } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) - val illegal_inst = !(id_int_val.toBool || id_fp_val.toBool) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); +// val illegal_inst = !(id_int_val.toBool || id_fp_val.toBool) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); + val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status(SR_ET).toBool); when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; @@ -403,7 +411,8 @@ class rocketCtrl extends Component ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; ex_reg_xcpt_illegal <== illegal_inst; ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(SR_S)).toBool; - ex_reg_xcpt_fpu <== id_fp_val.toBool; +// ex_reg_xcpt_fpu <== id_fp_val.toBool; + ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 93e3f7b7..dabcc02d 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -85,10 +85,10 @@ class rocketDTLB(entries: Int) extends Component val status_vm = io.cpu.status(SR_VM).toBool // virtual memory enable // extract fields from PT permission bits - val ptw_perm_ur = io.ptw.resp_perm(1); - val ptw_perm_uw = io.ptw.resp_perm(2); - val ptw_perm_sr = io.ptw.resp_perm(4); - val ptw_perm_sw = io.ptw.resp_perm(5); + val ptw_perm_ur = io.ptw.resp_perm(2); + val ptw_perm_uw = io.ptw.resp_perm(1); + val ptw_perm_sr = io.ptw.resp_perm(5); + val ptw_perm_sw = io.ptw.resp_perm(4); // permission bit arrays val ur_array = Reg(resetVal = Bits(0, entries)); // user read permission @@ -143,16 +143,16 @@ class rocketDTLB(entries: Int) extends Component ((status_s && !sr_array(tag_hit_addr).toBool) || (status_u && !ur_array(tag_hit_addr).toBool)); - io.cpu.xcpt_ld := - (lookup && (req_load || req_amo) && outofrange) || access_fault_ld; + io.cpu.xcpt_ld := access_fault_ld; +// (lookup && (req_load || req_amo) && outofrange) || access_fault_ld; val access_fault_st = tlb_hit && (req_store || req_amo) && ((status_s && !sw_array(tag_hit_addr).toBool) || (status_u && !uw_array(tag_hit_addr).toBool)); - io.cpu.xcpt_st := - (lookup && (req_store || req_amo) && outofrange) || access_fault_st; + io.cpu.xcpt_st := access_fault_st; +// (lookup && (req_store || req_amo) && outofrange) || access_fault_st; io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && !tlb_miss, Bool(true)); io.cpu.resp_busy := tlb_miss || (state != s_ready); From c42d8149b7309241a18fb4baee7ea57c4fcc785c Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 17 Nov 2011 23:50:45 -0800 Subject: [PATCH 0054/1087] moved PCR writeback to end of MEM stage, cleanup of dcache/dpath/ctrl --- rocket/src/main/scala/cpu.scala | 2 - rocket/src/main/scala/ctrl.scala | 36 +++++---------- rocket/src/main/scala/dcache.scala | 57 +++++++++++------------ rocket/src/main/scala/dpath.scala | 64 ++++++++------------------ rocket/src/main/scala/dpath_util.scala | 3 +- 5 files changed, 58 insertions(+), 104 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 8cde5d50..0d46f702 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -10,7 +10,6 @@ class ioDebug(view: List[String] = null) extends Bundle(view) val id_valid = Bool('output); val ex_valid = Bool('output); val mem_valid = Bool('output); - val wb_valid = Bool('output); } class ioHost(view: List[String] = null) extends Bundle(view) @@ -73,7 +72,6 @@ class rocketProc extends Component // ctrl.io.itlb_miss := itlb.io.cpu.resp_miss; io.imem.itlb_miss := itlb.io.cpu.resp_miss; - // connect DTLB to D$ arbiter, ctrl+dpath dtlb.io.cpu.invalidate := dpath.io.ptbr_wen; dtlb.io.cpu.status := dpath.io.ctrl.status; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index b9d8e535..be527aa5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -33,7 +33,8 @@ class ioCtrlDpath extends Bundle() val sel_wb = UFix(3, 'output); val ren_pcr = Bool('output); val wen_pcr = Bool('output); - val eret = Bool('output); + val id_eret = Bool('output); + val mem_eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); // enable/disable interrupts @@ -247,7 +248,7 @@ class rocketCtrl extends Component SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N), EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y), DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y), - ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y), + ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y), FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N), FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N), CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y), @@ -354,10 +355,6 @@ class rocketCtrl extends Component val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); - - val wb_reg_inst_di = Reg(resetVal = Bool(false)); - val wb_reg_inst_ei = Reg(resetVal = Bool(false)); - val wb_reg_flush_inst = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -479,19 +476,8 @@ class rocketCtrl extends Component mem_reg_xcpt_fpu <== ex_reg_xcpt_fpu; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } - - when (reset.toBool || io.dpath.killm) { - wb_reg_div_mul_val <== Bool(false); - wb_reg_inst_di <== Bool(false); - wb_reg_inst_ei <== Bool(false); - wb_reg_flush_inst <== Bool(false); - } - otherwise { - wb_reg_div_mul_val <== mem_reg_div_mul_val; - wb_reg_inst_di <== mem_reg_inst_di; - wb_reg_inst_ei <== mem_reg_inst_ei; - wb_reg_flush_inst <== mem_reg_flush_inst; - } + + wb_reg_div_mul_val <== mem_reg_div_mul_val; // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction @@ -644,9 +630,8 @@ class rocketCtrl extends Component // for divider, multiplier writeback val mul_wb = io.dpath.mul_result_val; val div_wb = io.dpath.div_result_val & !mul_wb; - - - io.flush_inst := wb_reg_flush_inst; + + io.flush_inst := mem_reg_flush_inst; io.dpath.stalld := ctrl_stalld.toBool; io.dpath.killf := take_pc | ~io.imem.resp_val; @@ -672,9 +657,10 @@ class rocketCtrl extends Component io.dpath.sel_wb := id_sel_wb; io.dpath.ren_pcr := id_ren_pcr.toBool; io.dpath.wen_pcr := id_wen_pcr.toBool; - io.dpath.eret := id_eret.toBool; - io.dpath.irq_disable := wb_reg_inst_di; - io.dpath.irq_enable := wb_reg_inst_ei; + io.dpath.id_eret := id_eret.toBool; + io.dpath.mem_eret := mem_reg_eret; + io.dpath.irq_disable := mem_reg_inst_di && !kill_mem; + io.dpath.irq_enable := mem_reg_inst_ei && !kill_mem; } } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index ab0f6f02..1fcda1a5 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -7,7 +7,6 @@ import scala.math._; // interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { -// val dtlb_busy = Bool('input); val dtlb_miss = Bool('input); val req_val = Bool('input); val req_rdy = Bool('output); @@ -48,7 +47,7 @@ class rocketDCacheStoreGen extends Component { val req_type = Bits(3, 'input); val req_addr_lsb = Bits(3, 'input); val req_data = Bits(64, 'input); - val store_wmask = Bits(8, 'output); + val store_wmask = Bits(64, 'output); val store_data = Bits(64, 'output); } @@ -86,7 +85,16 @@ class rocketDCacheStoreGen extends Component { Mux(io.req_type === MT_D, wmask_d, UFix(0, 8))))); - io.store_wmask := store_wmask_byte; + val store_wmask_d = Cat(Fill(8, store_wmask_byte(7)), + Fill(8, store_wmask_byte(6)), + Fill(8, store_wmask_byte(5)), + Fill(8, store_wmask_byte(4)), + Fill(8, store_wmask_byte(3)), + Fill(8, store_wmask_byte(2)), + Fill(8, store_wmask_byte(1)), + Fill(8, store_wmask_byte(0))); + + io.store_wmask := store_wmask_d; io.store_data := Mux(io.req_type === MT_B, Fill(8, io.req_data( 7,0)), @@ -260,7 +268,7 @@ class rocketDCacheDM(lines: Int) extends Component { when (tag_we && r_req_flush) { vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } - val vb_rdata = Reg(vb_array(tag_addr).toBool); + val vb_rdata = vb_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; val tag_valid = r_cpu_req_val && vb_rdata; val tag_match = (tag_rdata === io.cpu.req_ppn); val tag_hit = tag_valid && tag_match; @@ -282,28 +290,28 @@ class rocketDCacheDM(lines: Int) extends Component { // after the cache line refill has completed val resolve_store = (state === s_resolve_miss) && r_req_store; - // dirty bit array - val db_array = Reg(resetVal = Bits(0, lines)); - val tag_dirty = Reg(db_array(tag_addr)).toBool; - + // pending store data when (io.cpu.req_val && io.cpu.req_rdy && req_store) { p_store_idx <== io.cpu.req_idx; p_store_data <== io.cpu.req_data; p_store_type <== io.cpu.req_type; } - - when (io.cpu.req_val && io.cpu.req_rdy && req_amo) { - r_amo_data <== io.cpu.req_data; - } - when (store_hit && !drain_store) { p_store_valid <== Bool(true); } when (drain_store) { p_store_valid <== Bool(false); - db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } - when (resolve_store) { + + // AMO operand + when (io.cpu.req_val && io.cpu.req_rdy && req_amo) { + r_amo_data <== io.cpu.req_data; + } + + // dirty bit array + val db_array = Reg(resetVal = Bits(0, lines)); + val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; + when ((r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store) || resolve_store) { db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (state === s_write_amo) { @@ -319,17 +327,8 @@ class rocketDCacheDM(lines: Int) extends Component { storegen.io.req_data := p_store_data; storegen.io.req_type := p_store_type; val store_data = Fill(2, storegen.io.store_data); - val store_wmask_b = storegen.io.store_wmask; - val store_wmask_d = Cat(Fill(8, store_wmask_b(7)), - Fill(8, store_wmask_b(6)), - Fill(8, store_wmask_b(5)), - Fill(8, store_wmask_b(4)), - Fill(8, store_wmask_b(3)), - Fill(8, store_wmask_b(2)), - Fill(8, store_wmask_b(1)), - Fill(8, store_wmask_b(0))); - val store_idx_sel = p_store_idx(offsetlsb).toBool; - val store_wmask = Mux(store_idx_sel, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); + val store_wmask_d = storegen.io.store_wmask; + val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); // data array val data_array = new rocketSRAMsp(lines*4, 128); @@ -351,9 +350,8 @@ class rocketDCacheDM(lines: Int) extends Component { Fill(8, amo_wmask(2)), Fill(8, amo_wmask(1)), Fill(8, amo_wmask(0))); - - val amo_store_idx_sel = r_cpu_req_idx(offsetlsb).toBool; - val amo_store_wmask = Mux(amo_store_idx_sel, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d)); + + val amo_store_wmask = Mux(r_cpu_req_idx(offsetlsb).toBool, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d)); val amo_alu = new rocketDCacheAmoALU(); amo_alu.io.cmd := r_cpu_req_cmd; @@ -373,6 +371,7 @@ class rocketDCacheDM(lines: Int) extends Component { Mux((state === s_refill), io.mem.resp_data, Mux((state === s_write_amo), amo_alu_out, store_data)); + data_array.io.we := ((state === s_refill) && io.mem.resp_val) || (state === s_write_amo) || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 32bc74cd..4192cbd3 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -60,7 +60,7 @@ class rocketDpath extends Component val rfile = new rocketDpathRegfile(); // instruction fetch definitions - val if_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); + val if_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); // instruction decode definitions val id_reg_valid = Reg(resetVal = Bool(false)); @@ -80,6 +80,7 @@ class rocketDpath extends Component val ex_reg_waddr = Reg(resetVal = UFix(0,5)); val ex_reg_ctrl_sel_alu2 = Reg(resetVal = A2_X); val ex_reg_ctrl_sel_alu1 = Reg(resetVal = A1_X); + val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); val ex_reg_ctrl_fn_dw = Reg(resetVal = DW_X); val ex_reg_ctrl_fn_alu = Reg(resetVal = FN_X); val ex_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); @@ -91,36 +92,23 @@ class rocketDpath extends Component val ex_reg_ctrl_wen = Reg(resetVal = Bool(false)); val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); val ex_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); val ex_wdata = Wire() { Bits() }; // memory definitions val mem_reg_valid = Reg(resetVal = Bool(false)); val mem_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); - val mem_reg_pc_plus4 = Reg(resetVal = UFix(0,VADDR_BITS)); val mem_reg_waddr = Reg(resetVal = UFix(0,5)); val mem_reg_wdata = Reg(resetVal = Bits(0,64)); val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); - val mem_reg_pcr = Reg(resetVal = Bits(0,64)); - val mem_reg_ctrl_eret = Reg(resetVal = Bool(false)); val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); // writeback definitions - val wb_reg_valid = Reg(resetVal = Bool(false)); - val wb_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); - val wb_reg_mem_req_addr = Reg(resetVal = UFix(0,VADDR_BITS)); val wb_reg_waddr = Reg(resetVal = UFix(0,5)); val wb_reg_wdata = Reg(resetVal = Bits(0,64)); val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); - val wb_reg_raddr2 = Reg(resetVal = UFix(0,5)); - val wb_reg_ctrl_cause = Reg(resetVal = UFix(0,5)); - val wb_reg_ctrl_eret = Reg(resetVal = Bool(false)); - val wb_reg_ctrl_exception = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); - val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); - val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); val r_dmem_resp_val = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg(resetVal = UFix(0,5)); @@ -153,7 +141,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_pcr(VADDR_BITS-1,0).toUFix, // only used for ERET + Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_wdata, // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, UFix(0, VADDR_BITS))))))))))); @@ -162,14 +150,15 @@ class rocketDpath extends Component if_reg_pc <== UFix(START_ADDR, VADDR_BITS); } when (!io.ctrl.stallf) { - if_reg_pc <== if_next_pc; + if_reg_pc <== if_next_pc.toUFix; } - + + // FIXME: make sure PCs are properly sign extended io.ctrl.xcpt_ma_inst := if_next_pc(1,0) != Bits(0,2) io.imem.req_addr := Mux(io.ctrl.stallf, if_reg_pc, - if_next_pc); + if_next_pc.toUFix); btb.io.current_pc4 := if_pc_plus4; btb.io.hit ^^ io.ctrl.btb_hit; @@ -294,7 +283,7 @@ class rocketDpath extends Component ex_reg_ctrl_mul_val <== io.ctrl.mul_val; ex_reg_ctrl_wen <== io.ctrl.wen; ex_reg_ctrl_wen_pcr <== io.ctrl.wen_pcr; - ex_reg_ctrl_eret <== io.ctrl.eret; + ex_reg_ctrl_eret <== io.ctrl.id_eret; } val ex_alu_in2 = @@ -338,7 +327,7 @@ class rocketDpath extends Component io.ctrl.mul_result_val := mul.io.result_val; - io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection + io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module @@ -385,22 +374,18 @@ class rocketDpath extends Component // memory stage mem_reg_pc <== ex_reg_pc; - mem_reg_pc_plus4 <== ex_reg_pc_plus4; - mem_reg_pcr <== ex_pcr; mem_reg_waddr <== ex_reg_waddr; mem_reg_wdata <== ex_wdata; mem_reg_ctrl_ll_wb <== ex_reg_ctrl_ll_wb; mem_reg_raddr2 <== ex_reg_raddr2; - + when (io.ctrl.killx) { mem_reg_valid <== Bool(false); - mem_reg_ctrl_eret <== Bool(false); mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); } otherwise { mem_reg_valid <== ex_reg_valid; - mem_reg_ctrl_eret <== ex_reg_ctrl_eret; mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; } @@ -418,26 +403,15 @@ class rocketDpath extends Component r_dmem_resp_type <== dmem_resp_type; r_dmem_resp_data <== mem_dmem_resp_data; - wb_reg_pc <== mem_reg_pc; wb_reg_waddr <== mem_reg_waddr; wb_reg_wdata <== mem_reg_wdata; wb_reg_ctrl_ll_wb <== mem_reg_ctrl_ll_wb; - wb_reg_raddr2 <== mem_reg_raddr2; - wb_reg_ctrl_eret <== mem_reg_ctrl_eret; - wb_reg_ctrl_exception <== io.ctrl.exception; - wb_reg_ctrl_cause <== io.ctrl.cause; - wb_reg_mem_req_addr <== io.dmem.req_addr; - wb_reg_badvaddr_wen <== io.ctrl.badvaddr_wen; when (io.ctrl.killm) { - wb_reg_valid <== Bool(false); wb_reg_ctrl_wen <== Bool(false); - wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { - wb_reg_valid <== mem_reg_valid; wb_reg_ctrl_wen <== mem_reg_ctrl_wen; - wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } // crossbar/sign extension for 8/16 bit loads (moved to earlier in file) @@ -460,24 +434,22 @@ class rocketDpath extends Component io.ctrl.sboard_clr1a := r_dmem_resp_waddr; // processor control regfile write - pcr.io.w.addr := wb_reg_raddr2; - pcr.io.w.en := wb_reg_ctrl_wen_pcr; - pcr.io.w.data := wb_reg_wdata; + pcr.io.w.addr := mem_reg_raddr2; + pcr.io.w.en := mem_reg_ctrl_wen_pcr && !io.ctrl.killm; + pcr.io.w.data := mem_reg_wdata; pcr.io.di := io.ctrl.irq_disable; pcr.io.ei := io.ctrl.irq_enable; - pcr.io.eret := wb_reg_ctrl_eret; - pcr.io.exception := wb_reg_ctrl_exception; - pcr.io.cause := wb_reg_ctrl_cause; - pcr.io.pc := wb_reg_pc; - pcr.io.badvaddr := wb_reg_mem_req_addr; - pcr.io.badvaddr_wen := wb_reg_badvaddr_wen; + pcr.io.eret := io.ctrl.mem_eret; + pcr.io.exception := io.ctrl.exception; + pcr.io.cause := io.ctrl.cause; + pcr.io.pc := mem_reg_pc; + pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; // temporary debug outputs so things don't get optimized away io.debug.id_valid := id_reg_valid; io.debug.ex_valid := ex_reg_valid; io.debug.mem_valid := mem_reg_valid; - io.debug.wb_valid := wb_reg_valid; } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 2d4f6604..751bf796 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -51,7 +51,6 @@ class ioDpathPCR extends Bundle() val cause = UFix(5, 'input); val badvaddr_wen = Bool('input); val pc = UFix(VADDR_BITS, 'input); - val badvaddr = UFix(VADDR_BITS, 'input); val eret = Bool('input); val ei = Bool('input); val di = Bool('input); @@ -114,7 +113,7 @@ class rocketDpathPCR extends Component } when (io.badvaddr_wen) { - reg_badvaddr <== io.badvaddr; + reg_badvaddr <== io.w.data.toUFix; } when (io.exception && !reg_status_et) { From 11f0e3daf45063ab67968f0bd3db2749124c6776 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 18 Nov 2011 00:17:30 -0800 Subject: [PATCH 0055/1087] more cleanup --- rocket/src/main/scala/cpu.scala | 3 --- rocket/src/main/scala/ctrl.scala | 10 +++++----- rocket/src/main/scala/dpath.scala | 6 ------ 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 0d46f702..e38f3813 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -7,9 +7,6 @@ import Constants._; class ioDebug(view: List[String] = null) extends Bundle(view) { val error_mode = Bool('output); - val id_valid = Bool('output); - val ex_valid = Bool('output); - val mem_valid = Bool('output); } class ioHost(view: List[String] = null) extends Bundle(view) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index be527aa5..680a68f7 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -330,7 +330,7 @@ class rocketCtrl extends Component val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; - val ex_reg_inst_div_mul_val = Reg(){Bool()}; + val ex_reg_div_mul_val = Reg(){Bool()}; val ex_reg_mem_val = Reg(){Bool()}; val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; @@ -376,7 +376,7 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; ex_reg_btb_hit <== Bool(false); - ex_reg_inst_div_mul_val <== Bool(false); + ex_reg_div_mul_val <== Bool(false); ex_reg_mem_val <== Bool(false); ex_reg_mem_cmd <== UFix(0, 4); ex_reg_mem_type <== UFix(0, 3); @@ -395,7 +395,7 @@ class rocketCtrl extends Component otherwise { ex_reg_br_type <== id_br_type; ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_inst_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; + ex_reg_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; ex_reg_mem_val <== id_mem_val.toBool; ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; @@ -460,7 +460,7 @@ class rocketCtrl extends Component mem_reg_xcpt_syscall <== Bool(false); } otherwise { - mem_reg_div_mul_val <== ex_reg_inst_div_mul_val; + mem_reg_div_mul_val <== ex_reg_div_mul_val; mem_reg_eret <== ex_reg_eret; mem_reg_mem_val <== ex_reg_mem_val; mem_reg_mem_cmd <== ex_reg_mem_cmd; @@ -592,7 +592,7 @@ class rocketCtrl extends Component // check for divide and multiply instructions in ex,mem,wb stages val dm_stall_ex = - ex_reg_inst_div_mul_val && + ex_reg_div_mul_val && ((id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || (id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr))); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4192cbd3..97a9af33 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -445,12 +445,6 @@ class rocketDpath extends Component pcr.io.cause := io.ctrl.cause; pcr.io.pc := mem_reg_pc; pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; - - // temporary debug outputs so things don't get optimized away - io.debug.id_valid := id_reg_valid; - io.debug.ex_valid := ex_reg_valid; - io.debug.mem_valid := mem_reg_valid; - } } From 8f3927fdfacd490a8d6becd5f74394c731a98a20 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 30 Nov 2011 18:07:57 -0800 Subject: [PATCH 0056/1087] queue data type is now templated --- rocket/src/main/scala/icache_prefetch.scala | 4 +- rocket/src/main/scala/queues.scala | 48 +++++++++++---------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index fe7b761c..62fde4f7 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -23,7 +23,7 @@ class ioIPrefetcher extends Bundle() { class rocketIPrefetcher extends Component() { val io = new ioIPrefetcher(); - val pdq = new queueSimplePF(128, 4, 2); + val pdq = (new queueSimplePF(4)) { Bits(width = 128) }; val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_invalid); @@ -89,4 +89,4 @@ class rocketIPrefetcher extends Component() { } } -} \ No newline at end of file +} diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index b2f7aa16..01326b54 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -3,6 +3,7 @@ package queues import Chisel._ import Node._; +import scala.math._; class ioQueueCtrl(addr_sz: Int) extends Bundle() { @@ -16,8 +17,9 @@ class ioQueueCtrl(addr_sz: Int) extends Bundle() val raddr = UFix(addr_sz, 'output); } -class queueCtrl(entries: Int, addr_sz: Int) extends Component +class queueCtrl(entries: Int) extends Component { + val addr_sz = ceil(log(entries)/log(2)).toInt override val io = new ioQueueCtrl(addr_sz); // Enqueue and dequeue pointers @@ -79,28 +81,28 @@ class queueCtrl(entries: Int, addr_sz: Int) extends Component full <== full_next; } -class ioQueueSimplePF(data_sz: Int) extends Bundle() +class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle() { val q_reset = Bool('input); val enq_val = Bool('input); val enq_rdy = Bool('output); val deq_val = Bool('output); val deq_rdy = Bool('input); - val enq_bits = Bits(data_sz, 'input); - val deq_bits = Bits(data_sz, 'output); + val enq_bits = data.asInput; + val deq_bits = data.asOutput; } -class queueSimplePF(data_sz: Int, entries: Int, addr_sz: Int) extends Component +class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component { - override val io = new ioQueueSimplePF(data_sz); - val ctrl = new queueCtrl(entries, addr_sz); + override val io = new ioQueueSimplePF()(data); + val ctrl = new queueCtrl(entries); ctrl.io.q_reset ^^ io.q_reset; ctrl.io.deq_val ^^ io.deq_val; ctrl.io.enq_rdy ^^ io.enq_rdy; ctrl.io.enq_val ^^ io.enq_val; ctrl.io.deq_rdy ^^ io.deq_rdy; val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq_bits); - io.deq_bits := ram(ctrl.io.raddr); + ram.read(ctrl.io.raddr) ^^ io.deq_bits; } // TODO: SHOULD USE INHERITANCE BUT BREAKS INTROSPECTION CODE @@ -117,8 +119,9 @@ class ioQueueCtrlFlow(addr_sz: Int) extends Bundle() /* IOqueueCtrl */ val flowthru = Bool('output); } -class queueCtrlFlow(entries: Int, addr_sz: Int) extends Component +class queueCtrlFlow(entries: Int) extends Component { + val addr_sz = ceil(log(entries)/log(2)).toInt override val io = new ioQueueCtrlFlow(addr_sz); // Enqueue and dequeue pointers @@ -175,39 +178,40 @@ class queueCtrlFlow(entries: Int, addr_sz: Int) extends Component full <== full_next; } -class ioQueueDpathFlow(data_sz: Int, addr_sz: Int) extends Bundle() +class ioQueueDpathFlow[T <: Data](addr_sz: Int)(data: => T) extends Bundle() { val wen = Bool('input); val flowthru = Bool('input); - val deq_bits = Bits(data_sz, 'output); - val enq_bits = Bits(data_sz, 'input); + val deq_bits = data.asOutput; + val enq_bits = data.asInput; val waddr = UFix(addr_sz, 'input); val raddr = UFix(addr_sz, 'input); } -class queueDpathFlow(data_sz: Int, entries: Int, addr_sz: Int) extends Component +class queueDpathFlow[T <: Data](entries: Int)(data: => T) extends Component { - override val io = new ioQueueDpathFlow(data_sz, addr_sz); + val addr_sz = ceil(log(entries)/log(2)).toInt + override val io = new ioQueueDpathFlow(addr_sz)(data); val ram = Mem(entries, io.wen, io.waddr, io.enq_bits); val rout = ram(io.raddr); - io.deq_bits := Mux(io.flowthru, io.enq_bits, rout); + Mux(io.flowthru, io.enq_bits, rout) ^^ io.deq_bits; } -class ioQueueFlowPF(data_sz: Int) extends Bundle() +class ioQueueFlowPF[T <: Data](data: => T) extends Bundle() { val enq_val = Bool('input); val enq_rdy = Bool('output); - val enq_bits = Bits(data_sz, 'input); + val enq_bits = data.asInput; val deq_val = Bool('output); val deq_rdy = Bool('input); - val deq_bits = Bits(data_sz, 'output); + val deq_bits = data.asOutput; } -class queueFlowPF(data_sz: Int, entries: Int, addr_sz: Int) extends Component +class queueFlowPF[T <: Data](entries: Int)(data: => T) extends Component { - override val io = new ioQueueFlowPF(data_sz); - val ctrl = new queueCtrlFlow(entries, addr_sz); - val dpath = new queueDpathFlow(data_sz, entries, addr_sz); + override val io = new ioQueueFlowPF(data); + val ctrl = new queueCtrlFlow(entries); + val dpath = new queueDpathFlow(entries)(data); ctrl.io.deq_rdy ^^ io.deq_rdy; ctrl.io.wen <> dpath.io.wen; From bc44572d99ea873539c93edbb60183c49d561daa Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 30 Nov 2011 21:54:55 -0800 Subject: [PATCH 0057/1087] bugfixes due to new hcl jar file --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/dpath.scala | 13 +++++-------- rocket/src/main/scala/icache_prefetch.scala | 14 +++++++------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 4d28d5aa..5a0ee148 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -199,4 +199,4 @@ object Constants val HAVE_VEC = Bool(false); } -} \ No newline at end of file +} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 97a9af33..56c044d8 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -45,7 +45,7 @@ class rocketDpath extends Component val alu = new rocketDpathALU(); val ex_alu_out = alu.io.out; - val ex_jr_target = ex_alu_out(VADDR_BITS,0); + val ex_jr_target = ex_alu_out(VADDR_BITS-1,0); val div = new rocketDivider(64); val div_result = div.io.div_result_bits; @@ -57,10 +57,10 @@ class rocketDpath extends Component val mul_result_tag = mul.io.result_tag; val mul_result_val = mul.io.result_val; - val rfile = new rocketDpathRegfile(); + val rfile = new rocketDpathRegfile(); // instruction fetch definitions - val if_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); + val if_reg_pc = Reg(resetVal = UFix(START_ADDR,VADDR_BITS)); // instruction decode definitions val id_reg_valid = Reg(resetVal = Bool(false)); @@ -141,15 +141,12 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_wdata, // only used for ERET + Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_wdata(VADDR_BITS-1,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, UFix(0, VADDR_BITS))))))))))); - when (!io.host.start){ - if_reg_pc <== UFix(START_ADDR, VADDR_BITS); - } - when (!io.ctrl.stallf) { + when (!io.ctrl.stallf && io.host.start) { if_reg_pc <== if_next_pc.toUFix; } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index fe7b761c..a0dd4837 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -40,23 +40,23 @@ class rocketIPrefetcher extends Component() { val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); - io.mem.req_tag := !(io.icache.req_val && !hit); - io.mem.req_addr := Mux(io.mem.req_tag.toBool, prefetch_addr, io.icache.req_addr); + io.mem.req_tag := Cat(Bits(0,2), !(io.icache.req_val && !hit)); + io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); val pdq_reset = Reg(resetVal = Bool(true)); pdq_reset <== demand_miss & ~hit | (state === s_bad_resp_wait); - val fill_cnt = Reg(resetVal = UFix(0, 2)); + val fill_cnt = Reg(resetVal = UFix(0,2)); when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1,1); } val fill_done = (fill_cnt === UFix(3,2)) & ip_mem_resp_val; val forward = Reg(resetVal = Bool(false)); - val forward_cnt = Reg(resetVal = UFix(0, 2)); + val forward_cnt = Reg(resetVal = UFix(0,2)); when (forward & pdq.io.deq_val) { forward_cnt <== forward_cnt + UFix(1,1); } - val forward_done = (forward_cnt === UFix(3, 2)) & pdq.io.deq_val; + val forward_done = (forward_cnt === UFix(3,2)) & pdq.io.deq_val; forward <== (demand_miss & hit | forward & ~forward_done); - io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag.toBool) || (forward && pdq.io.deq_val); + io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq_val); io.icache.resp_data := Mux(forward, pdq.io.deq_bits, io.mem.resp_data); pdq.io.q_reset := pdq_reset; @@ -89,4 +89,4 @@ class rocketIPrefetcher extends Component() { } } -} \ No newline at end of file +} From da2fdf4f854f411b685eff1600264c1445b348eb Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 30 Nov 2011 22:51:59 -0800 Subject: [PATCH 0058/1087] fixed console i/o --- rocket/src/main/scala/cpu.scala | 6 ++---- rocket/src/main/scala/ctrl.scala | 4 +--- rocket/src/main/scala/dpath.scala | 3 +++ rocket/src/main/scala/dpath_util.scala | 6 ++++++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index e38f3813..602a0716 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -106,10 +106,8 @@ class rocketProc extends Component dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; - // FIXME: console disconnected -// io.console.bits := dpath.io.dpath.rs1(7,0); - io.console.bits := Bits(0,8); - io.console.valid := ctrl.io.console.valid; + io.console.bits := dpath.io.console.bits; + io.console.valid := dpath.io.console.valid; ctrl.io.console.rdy := io.console.rdy; } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 680a68f7..8cf3d8f3 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -70,7 +70,7 @@ class ioCtrlDpath extends Bundle() class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); - val console = new ioConsole(List("rdy", "valid")); + val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); val host = new ioHost(List("start")); @@ -299,8 +299,6 @@ class rocketCtrl extends Component val id_ren1 = id_renx1; val id_console_out_val = id_wen_pcr & (id_raddr2 === PCR_CONSOLE); - val console_out_fire = id_console_out_val & ~io.dpath.killd; - io.console.valid := console_out_fire.toBool; val wb_reg_div_mul_val = Reg(){Bool()}; val dcache_miss = Reg(io.dmem.resp_miss); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 56c044d8..5041395f 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -25,6 +25,7 @@ class ioDpathAll extends Bundle() { val host = new ioHost(); val ctrl = new ioCtrlDpath().flip(); + val console = new ioConsole(List("valid","bits")); val debug = new ioDebug(); val dmem = new ioDpathDmem(); val imem = new ioDpathImem(); @@ -442,6 +443,8 @@ class rocketDpath extends Component pcr.io.cause := io.ctrl.cause; pcr.io.pc := mem_reg_pc; pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; + io.console.bits := pcr.io.console_data; + io.console.valid := pcr.io.console_val; } } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 751bf796..9f6e9cc7 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -57,6 +57,8 @@ class ioDpathPCR extends Bundle() val ptbr_wen = Bool('output); val irq_timer = Bool('output); val irq_ipi = Bool('output); + val console_data = Bits(8, 'output); + val console_val = Bool('output); } class rocketDpathPCR extends Component @@ -101,6 +103,10 @@ class rocketDpathPCR extends Component io.debug.error_mode := reg_error_mode; io.r.data := rdata; + val console_wen = !io.exception && io.w.en && (io.w.addr === PCR_CONSOLE); + io.console_data := Mux(console_wen, io.w.data(7,0), Bits(0,8)); + io.console_val := console_wen; + when (io.host.from_wen) { reg_tohost <== Bits(0,32); reg_fromhost <== io.host.from; From cf1965493bcd5019e908322f5ec3938cf9b68b37 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Thu, 1 Dec 2011 13:14:33 -0800 Subject: [PATCH 0059/1087] renamed SRAM modules to match TSMC65 MC generated SRAMs --- rocket/src/main/scala/dcache.scala | 28 +++++++++-------- rocket/src/main/scala/icache.scala | 48 +++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 1fcda1a5..d9872515 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -249,16 +249,18 @@ class rocketDCacheDM(lines: Int) extends Component { ((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) || ((state === s_resolve_miss) && r_req_flush); - val tag_array = new rocketSRAMsp(lines, tagbits); +// val tag_array = new rocketSRAMsp(lines, tagbits); + val tag_array = new TS1N65LPA128X27M4; tag_array.io.a := tag_addr; tag_array.io.d := r_cpu_req_ppn; - tag_array.io.we := tag_we; - tag_array.io.bweb := ~Bits(0,tagbits); - tag_array.io.ce := + tag_array.io.web := ~tag_we; + tag_array.io.bweb := Bits(0,tagbits); + tag_array.io.ceb := !( (io.cpu.req_val && io.cpu.req_rdy) || (state === s_start_writeback) || - (state === s_writeback); + (state === s_writeback)); val tag_rdata = tag_array.io.q; + tag_array.io.tsel := Bits(1,2); // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); @@ -331,7 +333,8 @@ class rocketDCacheDM(lines: Int) extends Component { val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); // data array - val data_array = new rocketSRAMsp(lines*4, 128); +// val data_array = new rocketSRAMsp(lines*4, 128); + val data_array = new TS1N65LPA512X128M4; val data_array_rdata = data_array.io.q; val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); val r_resp_data = Reg(resp_data); @@ -372,23 +375,24 @@ class rocketDCacheDM(lines: Int) extends Component { Mux((state === s_write_amo), amo_alu_out, store_data)); - data_array.io.we := + data_array.io.web := !( ((state === s_refill) && io.mem.resp_val) || (state === s_write_amo) || - drain_store || resolve_store; + drain_store || resolve_store); - data_array.io.bweb := + data_array.io.bweb := ~( Mux((state === s_refill), ~Bits(0,128), Mux((state === s_write_amo), amo_store_wmask, - store_wmask)); + store_wmask))); - data_array.io.ce := + data_array.io.ceb := !( (io.cpu.req_val && io.cpu.req_rdy && (req_load || req_amo)) || (state === s_start_writeback) || (state === s_writeback) || ((state === s_resolve_miss) && (r_req_load || r_req_amo)) || - (state === s_replay_load); + (state === s_replay_load)); + data_array.io.tsel := Bits(1,2); // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) val load_miss = diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index a7fa954e..2d332657 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -39,19 +39,41 @@ class ioSRAMsp (width: Int, addrbits: Int) extends Bundle { val a = UFix(addrbits, 'input); // address val d = Bits(width, 'input); // data input val bweb = Bits(width, 'input); // bit write enable mask - val ce = Bool('input); // chip enable - val we = Bool('input); // write enable + val ceb = Bool('input); // chip enable + val web = Bool('input); // write enable val q = Bits(width, 'output); // data out + val tsel = Bits(2, 'input); } // single ported SRAM +class TS1N65LPA128X27M4 extends Component { + val addrbits = 7; + val width = 27; + val entries = 128; + val io = new ioSRAMsp(width, addrbits); + val sram = Mem(entries, ~io.web, io.a, io.d, wrMask = ~io.bweb, resetVal = null); + val rdata = Reg(Mux(~io.ceb, sram.read(io.a), Bits(0,width))); + io.q := rdata; +} + +class TS1N65LPA512X128M4 extends Component { + val addrbits = 9; + val width = 128; + val entries = 512; + val io = new ioSRAMsp(width, addrbits); + val sram = Mem(entries, ~io.web, io.a, io.d, wrMask = ~io.bweb, resetVal = null); + val rdata = Reg(Mux(~io.ceb, sram.read(io.a), Bits(0,width))); + io.q := rdata; +} +/* class rocketSRAMsp(entries: Int, width: Int) extends Component { val addrbits = ceil(log10(entries)/log10(2)).toInt; val io = new ioSRAMsp(width, addrbits); val sram = Mem(entries, io.we, io.a, io.d, wrMask = io.bweb, resetVal = null); val rdata = Reg(Mux(io.ce, sram.read(io.a), Bits(0,width))); io.q := rdata; -} +} +*/ // basic direct mapped instruction cache // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines @@ -99,16 +121,18 @@ class rocketICacheDM(lines: Int) extends Component { } // tag array - val tag_array = new rocketSRAMsp(lines, tagbits); +// val tag_array = new rocketSRAMsp(lines, tagbits); + val tag_array = new TS1N65LPA128X27M4; val tag_addr = Mux((state === s_refill_wait), r_cpu_req_idx(PGIDX_BITS-1,offsetbits), io.cpu.req_idx(PGIDX_BITS-1,offsetbits)).toUFix; val tag_we = (state === s_refill_wait) && io.mem.resp_val; tag_array.io.a := tag_addr; tag_array.io.d := r_cpu_req_ppn; - tag_array.io.we := tag_we; - tag_array.io.bweb := ~Bits(0,tagbits); - tag_array.io.ce := (io.cpu.req_val && io.cpu.req_rdy); + tag_array.io.web := ~tag_we; + tag_array.io.tsel := Bits(1,2); + tag_array.io.bweb := Bits(0,tagbits); + tag_array.io.ceb := !(io.cpu.req_val && io.cpu.req_rdy); val tag_rdata = tag_array.io.q; // valid bit array @@ -124,14 +148,16 @@ class rocketICacheDM(lines: Int) extends Component { val tag_match = (tag_rdata === io.cpu.req_ppn); // data array - val data_array = new rocketSRAMsp(lines*4, 128); +// val data_array = new rocketSRAMsp(lines*4, 128); + val data_array = new TS1N65LPA512X128M4; data_array.io.a := Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; data_array.io.d := io.mem.resp_data; - data_array.io.we := io.mem.resp_val; - data_array.io.bweb := ~Bits(0,128); - data_array.io.ce := (io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss); + data_array.io.web := ~io.mem.resp_val; + data_array.io.bweb := Bits(0,128); + data_array.io.tsel := Bits(1,2); + data_array.io.ceb := !((io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss)); val data_array_rdata = data_array.io.q; From e70b41241cf363580bcd75ef1184838cebcab07f Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 2 Dec 2011 01:56:17 -0800 Subject: [PATCH 0060/1087] changed branch addr generation to get it off critical path --- rocket/src/main/scala/ctrl.scala | 6 +++++- rocket/src/main/scala/dpath.scala | 7 +++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 8cf3d8f3..7c229e03 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,6 +37,8 @@ class ioCtrlDpath extends Bundle() val mem_eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); + // instruction in execute is an unconditional jump + val ex_jmp = Bool('output); // enable/disable interrupts val irq_enable = Bool('output); val irq_disable = Bool('output); @@ -411,6 +413,7 @@ class rocketCtrl extends Component ex_reg_xcpt_syscall <== id_syscall.toBool; } + val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; val blt = io.dpath.br_lt; @@ -428,6 +431,7 @@ class rocketCtrl extends Component val jr_taken = (ex_reg_br_type === BR_JR); val j_taken = (ex_reg_br_type === BR_J); + io.dpath.ex_jmp := j_taken; io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; io.dmem.req_cmd := ex_reg_mem_cmd; @@ -538,9 +542,9 @@ class rocketCtrl extends Component Mux(mem_reg_eret, PC_PCR, // eret instruction Mux(replay_ex, PC_EX, // D$ blocked, D$ miss, privileged inst Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch + Mux(j_taken, PC_BR, // jump Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch Mux(jr_taken, PC_JR, // jump register - Mux(j_taken, PC_J, // jump Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB PC_4))))))))); // PC+4 diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 5041395f..ca91e138 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -127,8 +127,8 @@ class rocketDpath extends Component // FIXME: which bits to extract should be calculated based on VADDR_BITS val branch_adder_rhs = - Mux(io.ctrl.sel_pc === PC_BR, Cat(ex_sign_extend_split(41,0), UFix(0, 1)), - Cat(Fill(17, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0, 1))); + Mux(io.ctrl.ex_jmp, Cat(Fill(17, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0,1)), + Cat(ex_sign_extend_split(41,0), UFix(0, 1))); val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; @@ -140,12 +140,11 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_EX, ex_reg_pc, Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, - Mux(io.ctrl.sel_pc === PC_J, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_wdata(VADDR_BITS-1,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, - UFix(0, VADDR_BITS))))))))))); + UFix(0, VADDR_BITS)))))))))); when (!io.ctrl.stallf && io.host.start) { if_reg_pc <== if_next_pc.toUFix; From c580180b6614e41e45ef1b242c505a0fef16acbb Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Fri, 2 Dec 2011 02:01:08 -0800 Subject: [PATCH 0061/1087] tweaks to cache/SRAM interface for TSMC65 SRAMs --- rocket/src/main/scala/dcache.scala | 44 ++++++++++++---------- rocket/src/main/scala/icache.scala | 60 ++++++++++++++++-------------- 2 files changed, 58 insertions(+), 46 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index d9872515..7e55b10a 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -251,16 +251,18 @@ class rocketDCacheDM(lines: Int) extends Component { // val tag_array = new rocketSRAMsp(lines, tagbits); val tag_array = new TS1N65LPA128X27M4; - tag_array.io.a := tag_addr; - tag_array.io.d := r_cpu_req_ppn; - tag_array.io.web := ~tag_we; - tag_array.io.bweb := Bits(0,tagbits); - tag_array.io.ceb := !( - (io.cpu.req_val && io.cpu.req_rdy) || + val tag_array_ceb = Mux(reset, Bool(true), !( + (io.cpu.req_val && io.cpu.req_rdy) || (state === s_start_writeback) || - (state === s_writeback)); - val tag_rdata = tag_array.io.q; - tag_array.io.tsel := Bits(1,2); + (state === s_writeback))); + val tag_array_web = Mux(reset, Bool(true), !tag_we); + tag_array.io.A := tag_addr; + tag_array.io.D := r_cpu_req_ppn; + tag_array.io.CEB := tag_array_ceb && tag_array_web; + tag_array.io.WEB := tag_array_web; + tag_array.io.BWEB := Bits(0,tagbits); + val tag_rdata = tag_array.io.Q; + tag_array.io.TSEL := Bits(1,2); // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); @@ -335,7 +337,7 @@ class rocketDCacheDM(lines: Int) extends Component { // data array // val data_array = new rocketSRAMsp(lines*4, 128); val data_array = new TS1N65LPA512X128M4; - val data_array_rdata = data_array.io.q; + val data_array_rdata = data_array.io.Q; val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); val r_resp_data = Reg(resp_data); @@ -363,36 +365,40 @@ class rocketDCacheDM(lines: Int) extends Component { amo_alu.io.rhs := r_amo_data.toUFix; val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result); - data_array.io.a := + data_array.io.A := Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next), Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; - data_array.io.d := + data_array.io.D := Mux((state === s_refill), io.mem.resp_data, Mux((state === s_write_amo), amo_alu_out, store_data)); - - data_array.io.web := !( + + val data_array_web = Mux(reset, Bool(true), !( ((state === s_refill) && io.mem.resp_val) || (state === s_write_amo) || - drain_store || resolve_store); + drain_store || resolve_store)); + + data_array.io.WEB := data_array_web; - data_array.io.bweb := ~( + data_array.io.BWEB := ~( Mux((state === s_refill), ~Bits(0,128), Mux((state === s_write_amo), amo_store_wmask, store_wmask))); - data_array.io.ceb := !( + val data_array_ceb = Mux(reset, Bool(true), !( (io.cpu.req_val && io.cpu.req_rdy && (req_load || req_amo)) || (state === s_start_writeback) || (state === s_writeback) || ((state === s_resolve_miss) && (r_req_load || r_req_amo)) || - (state === s_replay_load)); + (state === s_replay_load))); - data_array.io.tsel := Bits(1,2); + data_array.io.CEB := data_array_ceb && data_array_web; + + data_array.io.TSEL := Bits(1,2); // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) val load_miss = diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 2d332657..e7a53ee3 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -36,13 +36,13 @@ class ioICacheDM extends Bundle() // single port SRAM i/o class ioSRAMsp (width: Int, addrbits: Int) extends Bundle { - val a = UFix(addrbits, 'input); // address - val d = Bits(width, 'input); // data input - val bweb = Bits(width, 'input); // bit write enable mask - val ceb = Bool('input); // chip enable - val web = Bool('input); // write enable - val q = Bits(width, 'output); // data out - val tsel = Bits(2, 'input); + val A = UFix(addrbits, 'input); // address + val D = Bits(width, 'input); // data input + val BWEB = Bits(width, 'input); // bit write enable mask + val CEB = Bool('input); // chip enable + val WEB = Bool('input); // write enable + val Q = Bits(width, 'output); // data out + val TSEL = Bits(2, 'input); } // single ported SRAM @@ -51,9 +51,10 @@ class TS1N65LPA128X27M4 extends Component { val width = 27; val entries = 128; val io = new ioSRAMsp(width, addrbits); - val sram = Mem(entries, ~io.web, io.a, io.d, wrMask = ~io.bweb, resetVal = null); - val rdata = Reg(Mux(~io.ceb, sram.read(io.a), Bits(0,width))); - io.q := rdata; + val wmask = ~io.BWEB; + val sram = Mem(entries, !io.WEB, io.A, io.D, wrMask = wmask, resetVal = null); + val rdata = Reg(Mux(!io.CEB, sram.read(io.A), Bits(0,width))); + io.Q := rdata; } class TS1N65LPA512X128M4 extends Component { @@ -61,9 +62,10 @@ class TS1N65LPA512X128M4 extends Component { val width = 128; val entries = 512; val io = new ioSRAMsp(width, addrbits); - val sram = Mem(entries, ~io.web, io.a, io.d, wrMask = ~io.bweb, resetVal = null); - val rdata = Reg(Mux(~io.ceb, sram.read(io.a), Bits(0,width))); - io.q := rdata; + val wmask = ~io.BWEB; + val sram = Mem(entries, !io.WEB, io.A, io.D, wrMask = wmask, resetVal = null); + val rdata = Reg(Mux(!io.CEB, sram.read(io.A), Bits(0,width))); + io.Q := rdata; } /* class rocketSRAMsp(entries: Int, width: Int) extends Component { @@ -127,13 +129,15 @@ class rocketICacheDM(lines: Int) extends Component { Mux((state === s_refill_wait), r_cpu_req_idx(PGIDX_BITS-1,offsetbits), io.cpu.req_idx(PGIDX_BITS-1,offsetbits)).toUFix; val tag_we = (state === s_refill_wait) && io.mem.resp_val; - tag_array.io.a := tag_addr; - tag_array.io.d := r_cpu_req_ppn; - tag_array.io.web := ~tag_we; - tag_array.io.tsel := Bits(1,2); - tag_array.io.bweb := Bits(0,tagbits); - tag_array.io.ceb := !(io.cpu.req_val && io.cpu.req_rdy); - val tag_rdata = tag_array.io.q; + val tag_array_ceb = Mux(reset, Bool(true), !(io.cpu.req_val && io.cpu.req_rdy)); + val tag_array_web = Mux(reset, Bool(true), !tag_we); + tag_array.io.A := tag_addr; + tag_array.io.D := r_cpu_req_ppn; + tag_array.io.CEB := tag_array_ceb && tag_array_web; + tag_array.io.WEB := tag_array_web; + tag_array.io.TSEL := Bits(1,2); + tag_array.io.BWEB := Bits(0,tagbits); + val tag_rdata = tag_array.io.Q; // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); @@ -150,16 +154,18 @@ class rocketICacheDM(lines: Int) extends Component { // data array // val data_array = new rocketSRAMsp(lines*4, 128); val data_array = new TS1N65LPA512X128M4; - data_array.io.a := + val data_array_ceb = Mux(reset, Bool(true), !((io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss))); + val data_array_web = Mux(reset, Bool(true), ~io.mem.resp_val); + data_array.io.A := Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; - data_array.io.d := io.mem.resp_data; - data_array.io.web := ~io.mem.resp_val; - data_array.io.bweb := Bits(0,128); - data_array.io.tsel := Bits(1,2); - data_array.io.ceb := !((io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss)); + data_array.io.D := io.mem.resp_data; + data_array.io.CEB := data_array_ceb && data_array_web; + data_array.io.WEB := data_array_web; + data_array.io.BWEB := Bits(0,128); + data_array.io.TSEL := Bits(1,2); - val data_array_rdata = data_array.io.q; + val data_array_rdata = data_array.io.Q; // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; From e894b798707fec4fd1927df79fc1a1a3a9c71203 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sat, 3 Dec 2011 19:41:15 -0800 Subject: [PATCH 0062/1087] caches now use Mem4() memories for tag+data arrays --- rocket/src/main/scala/dcache.scala | 71 ++++++++++--------------- rocket/src/main/scala/icache.scala | 83 +++++------------------------- rocket/src/main/scala/top.scala | 3 +- 3 files changed, 42 insertions(+), 115 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 7e55b10a..9c9c680b 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -249,20 +249,11 @@ class rocketDCacheDM(lines: Int) extends Component { ((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) || ((state === s_resolve_miss) && r_req_flush); -// val tag_array = new rocketSRAMsp(lines, tagbits); - val tag_array = new TS1N65LPA128X27M4; - val tag_array_ceb = Mux(reset, Bool(true), !( - (io.cpu.req_val && io.cpu.req_rdy) || - (state === s_start_writeback) || - (state === s_writeback))); - val tag_array_web = Mux(reset, Bool(true), !tag_we); - tag_array.io.A := tag_addr; - tag_array.io.D := r_cpu_req_ppn; - tag_array.io.CEB := tag_array_ceb && tag_array_web; - tag_array.io.WEB := tag_array_web; - tag_array.io.BWEB := Bits(0,tagbits); - val tag_rdata = tag_array.io.Q; - tag_array.io.TSEL := Bits(1,2); + val tag_array = Mem4(lines, r_cpu_req_ppn); + tag_array.setReadLatency(0); + val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); +// tag_array.write(tag_addr, r_cpu_req_ppn, tag_we); +// val tag_rdata = tag_array(tag_addr); // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); @@ -334,14 +325,9 @@ class rocketDCacheDM(lines: Int) extends Component { val store_wmask_d = storegen.io.store_wmask; val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); - // data array -// val data_array = new rocketSRAMsp(lines*4, 128); - val data_array = new TS1N65LPA512X128M4; - val data_array_rdata = data_array.io.Q; - val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); - val r_resp_data = Reg(resp_data); - // ALU for AMOs + val amo_alu = new rocketDCacheAmoALU(); + val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result); val amo_wmask = Mux(r_cpu_req_type === MT_D, ~Bits(0,8), Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)), @@ -358,47 +344,42 @@ class rocketDCacheDM(lines: Int) extends Component { val amo_store_wmask = Mux(r_cpu_req_idx(offsetlsb).toBool, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d)); - val amo_alu = new rocketDCacheAmoALU(); - amo_alu.io.cmd := r_cpu_req_cmd; - amo_alu.io.wmask := amo_wmask; - amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix; - amo_alu.io.rhs := r_amo_data.toUFix; - val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result); - - data_array.io.A := + // data array + val data_addr = Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next), Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; - data_array.io.D := + val data_wdata = Mux((state === s_refill), io.mem.resp_data, Mux((state === s_write_amo), amo_alu_out, store_data)); - val data_array_web = Mux(reset, Bool(true), !( + val data_we = ((state === s_refill) && io.mem.resp_val) || (state === s_write_amo) || - drain_store || resolve_store)); - - data_array.io.WEB := data_array_web; + drain_store || resolve_store; - data_array.io.BWEB := ~( + val data_wmask = Mux((state === s_refill), ~Bits(0,128), Mux((state === s_write_amo), amo_store_wmask, - store_wmask))); - - val data_array_ceb = Mux(reset, Bool(true), !( - (io.cpu.req_val && io.cpu.req_rdy && (req_load || req_amo)) || - (state === s_start_writeback) || - (state === s_writeback) || - ((state === s_resolve_miss) && (r_req_load || r_req_amo)) || - (state === s_replay_load))); + store_wmask)); - data_array.io.CEB := data_array_ceb && data_array_web; + val data_array = Mem4(lines*4, data_wdata); + data_array.setReadLatency(0); + val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); +// data_array.write(data_addr, data_wdata, data_we, data_wmask); +// val data_array_rdata = data_array(data_addr); + val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); + val r_resp_data = Reg(resp_data); + + amo_alu.io.cmd := r_cpu_req_cmd; + amo_alu.io.wmask := amo_wmask; + amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix; + amo_alu.io.rhs := r_amo_data.toUFix; - data_array.io.TSEL := Bits(1,2); // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) val load_miss = diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index e7a53ee3..59aa9018 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -34,49 +34,6 @@ class ioICacheDM extends Bundle() val mem = new ioIcache().flip(); } -// single port SRAM i/o -class ioSRAMsp (width: Int, addrbits: Int) extends Bundle { - val A = UFix(addrbits, 'input); // address - val D = Bits(width, 'input); // data input - val BWEB = Bits(width, 'input); // bit write enable mask - val CEB = Bool('input); // chip enable - val WEB = Bool('input); // write enable - val Q = Bits(width, 'output); // data out - val TSEL = Bits(2, 'input); -} - -// single ported SRAM -class TS1N65LPA128X27M4 extends Component { - val addrbits = 7; - val width = 27; - val entries = 128; - val io = new ioSRAMsp(width, addrbits); - val wmask = ~io.BWEB; - val sram = Mem(entries, !io.WEB, io.A, io.D, wrMask = wmask, resetVal = null); - val rdata = Reg(Mux(!io.CEB, sram.read(io.A), Bits(0,width))); - io.Q := rdata; -} - -class TS1N65LPA512X128M4 extends Component { - val addrbits = 9; - val width = 128; - val entries = 512; - val io = new ioSRAMsp(width, addrbits); - val wmask = ~io.BWEB; - val sram = Mem(entries, !io.WEB, io.A, io.D, wrMask = wmask, resetVal = null); - val rdata = Reg(Mux(!io.CEB, sram.read(io.A), Bits(0,width))); - io.Q := rdata; -} -/* -class rocketSRAMsp(entries: Int, width: Int) extends Component { - val addrbits = ceil(log10(entries)/log10(2)).toInt; - val io = new ioSRAMsp(width, addrbits); - val sram = Mem(entries, io.we, io.a, io.d, wrMask = io.bweb, resetVal = null); - val rdata = Reg(Mux(io.ce, sram.read(io.a), Bits(0,width))); - io.q := rdata; -} -*/ - // basic direct mapped instruction cache // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines // parameters : @@ -121,23 +78,17 @@ class rocketICacheDM(lines: Int) extends Component { when (io.mem.resp_val) { refill_count <== refill_count + UFix(1); } - - // tag array -// val tag_array = new rocketSRAMsp(lines, tagbits); - val tag_array = new TS1N65LPA128X27M4; val tag_addr = Mux((state === s_refill_wait), r_cpu_req_idx(PGIDX_BITS-1,offsetbits), io.cpu.req_idx(PGIDX_BITS-1,offsetbits)).toUFix; val tag_we = (state === s_refill_wait) && io.mem.resp_val; - val tag_array_ceb = Mux(reset, Bool(true), !(io.cpu.req_val && io.cpu.req_rdy)); - val tag_array_web = Mux(reset, Bool(true), !tag_we); - tag_array.io.A := tag_addr; - tag_array.io.D := r_cpu_req_ppn; - tag_array.io.CEB := tag_array_ceb && tag_array_web; - tag_array.io.WEB := tag_array_web; - tag_array.io.TSEL := Bits(1,2); - tag_array.io.BWEB := Bits(0,tagbits); - val tag_rdata = tag_array.io.Q; + + val tag_array = Mem4(lines, r_cpu_req_ppn); + tag_array.setReadLatency(0); + val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); + +// tag_array.write(tag_addr, r_cpu_req_ppn, tag_we); +// val tag_rdata = tag_array.read(tag_addr); // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); @@ -152,21 +103,15 @@ class rocketICacheDM(lines: Int) extends Component { val tag_match = (tag_rdata === io.cpu.req_ppn); // data array -// val data_array = new rocketSRAMsp(lines*4, 128); - val data_array = new TS1N65LPA512X128M4; - val data_array_ceb = Mux(reset, Bool(true), !((io.cpu.req_rdy && io.cpu.req_val) || (state === s_resolve_miss))); - val data_array_web = Mux(reset, Bool(true), ~io.mem.resp_val); - data_array.io.A := + val data_addr = Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; - data_array.io.D := io.mem.resp_data; - data_array.io.CEB := data_array_ceb && data_array_web; - data_array.io.WEB := data_array_web; - data_array.io.BWEB := Bits(0,128); - data_array.io.TSEL := Bits(1,2); - - val data_array_rdata = data_array.io.Q; - + val data_array = Mem4(lines*4, io.mem.resp_data); + data_array.setReadLatency(0); + val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); +// data_array.write(data_addr, io.mem.resp_data, io.mem.resp_val); +// val data_array_rdata = data_array.read(data_addr); + // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 26f27010..4cac1465 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -38,7 +38,8 @@ object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance // val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); - val cpu_args = args ++ Array("--target-dir", "generated-src","--debug"); +// val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); + val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match { From ff95cacb559c274345c238c7637738f8652943ea Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Sun, 4 Dec 2011 01:18:38 -0800 Subject: [PATCH 0063/1087] icache/dcache tag+data arrays now implemented using Mem4() however there seems to be a bug - readLatency needs to be set to 0 for C model to work, and 1 for Verilog model. --- rocket/src/main/scala/dcache.scala | 4 ---- rocket/src/main/scala/icache.scala | 7 +------ 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 9c9c680b..bd12811e 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -252,8 +252,6 @@ class rocketDCacheDM(lines: Int) extends Component { val tag_array = Mem4(lines, r_cpu_req_ppn); tag_array.setReadLatency(0); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); -// tag_array.write(tag_addr, r_cpu_req_ppn, tag_we); -// val tag_rdata = tag_array(tag_addr); // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); @@ -370,8 +368,6 @@ class rocketDCacheDM(lines: Int) extends Component { val data_array = Mem4(lines*4, data_wdata); data_array.setReadLatency(0); val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); -// data_array.write(data_addr, data_wdata, data_we, data_wmask); -// val data_array_rdata = data_array(data_addr); val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); val r_resp_data = Reg(resp_data); diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 59aa9018..334fb616 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -86,10 +86,7 @@ class rocketICacheDM(lines: Int) extends Component { val tag_array = Mem4(lines, r_cpu_req_ppn); tag_array.setReadLatency(0); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); - -// tag_array.write(tag_addr, r_cpu_req_ppn, tag_we); -// val tag_rdata = tag_array.read(tag_addr); - + // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); when (io.cpu.invalidate) { @@ -109,8 +106,6 @@ class rocketICacheDM(lines: Int) extends Component { val data_array = Mem4(lines*4, io.mem.resp_data); data_array.setReadLatency(0); val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); -// data_array.write(data_addr, io.mem.resp_data, io.mem.resp_val); -// val data_array_rdata = data_array.read(data_addr); // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; From fa784d1d7d852d483e7c483f4cc21d7b0056e262 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 5 Dec 2011 00:33:17 -0800 Subject: [PATCH 0064/1087] made setReadLatency argument a parameter defined in consts.scala --- rocket/src/main/scala/consts.scala | 2 ++ rocket/src/main/scala/dcache.scala | 6 ++++-- rocket/src/main/scala/icache.scala | 6 ++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 5a0ee148..d93eac51 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -197,6 +197,8 @@ object Constants val HAVE_RVC = Bool(false); val HAVE_FPU = Bool(false); val HAVE_VEC = Bool(false); + + val SRAM_READ_LATENCY = 0; } } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index bd12811e..0c6849a2 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -250,7 +250,8 @@ class rocketDCacheDM(lines: Int) extends Component { ((state === s_resolve_miss) && r_req_flush); val tag_array = Mem4(lines, r_cpu_req_ppn); - tag_array.setReadLatency(0); + tag_array.setReadLatency(SRAM_READ_LATENCY); +// tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); // valid bit array @@ -366,7 +367,8 @@ class rocketDCacheDM(lines: Int) extends Component { store_wmask)); val data_array = Mem4(lines*4, data_wdata); - data_array.setReadLatency(0); + data_array.setReadLatency(SRAM_READ_LATENCY); +// data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); val r_resp_data = Reg(resp_data); diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 334fb616..c82dda68 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -84,7 +84,8 @@ class rocketICacheDM(lines: Int) extends Component { val tag_we = (state === s_refill_wait) && io.mem.resp_val; val tag_array = Mem4(lines, r_cpu_req_ppn); - tag_array.setReadLatency(0); + tag_array.setReadLatency(SRAM_READ_LATENCY); +// tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); // valid bit array @@ -104,7 +105,8 @@ class rocketICacheDM(lines: Int) extends Component { Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; val data_array = Mem4(lines*4, io.mem.resp_data); - data_array.setReadLatency(0); + data_array.setReadLatency(SRAM_READ_LATENCY); +// data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); // output signals From a87ad06780261fb40a8b5507aa28561a9075c916 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 5 Dec 2011 15:45:44 -0800 Subject: [PATCH 0065/1087] Automatically infer rocketCAM address width --- rocket/src/main/scala/dtlb.scala | 4 ++-- rocket/src/main/scala/itlb.scala | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index dabcc02d..23f1563a 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -68,7 +68,7 @@ class rocketDTLB(entries: Int) extends Component val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); + val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); tag_cam.io.clear := io.cpu.invalidate; @@ -183,4 +183,4 @@ class rocketDTLB(entries: Int) extends Component } } } -} \ No newline at end of file +} diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index dcfb0efb..95fde913 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -18,7 +18,8 @@ class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { val write_addr = UFix(addr_bits, 'input); } -class rocketCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Component { +class rocketCAM(entries: Int, tag_bits: Int) extends Component { + val addr_bits = ceil(log(entries)/log(2)).toInt; val io = new ioCAM(entries, addr_bits, tag_bits); val cam_tags = Mem(entries, io.write, io.write_addr, io.write_tag); @@ -112,7 +113,7 @@ class rocketITLB(entries: Int) extends Component val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - val tag_cam = new rocketCAM(entries, addr_bits, ASID_BITS+VPN_BITS); + val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); tag_cam.io.clear := io.cpu.invalidate; @@ -204,4 +205,4 @@ class rocketITLB(entries: Int) extends Component } } } -} \ No newline at end of file +} From 218f63e66e7b72a11cbe9be2368b3ff90ef7ca46 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 Dec 2011 00:42:43 -0800 Subject: [PATCH 0066/1087] code cleanup/parameterization --- rocket/src/main/scala/arbiter.scala | 27 ++++---- rocket/src/main/scala/consts.scala | 26 ++++++- rocket/src/main/scala/dcache.scala | 4 +- rocket/src/main/scala/icache.scala | 23 ++++--- rocket/src/main/scala/icache_prefetch.scala | 38 +++++------ rocket/src/main/scala/queues.scala | 26 +++---- rocket/src/main/scala/util.scala | 75 ++++++++++++++++++++- 7 files changed, 153 insertions(+), 66 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 7e2ca8b4..eef8ea6f 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -10,15 +10,15 @@ class ioMem() extends Bundle val req_rdy = Bool('input); val req_rw = Bool('output); val req_addr = UFix(PADDR_BITS, 'output); - val req_wdata = Bits(128, 'output); - val req_tag = Bits(4, 'output); + val req_wdata = Bits(MEM_DATA_BITS, 'output); + val req_tag = Bits(MEM_TAG_BITS, 'output); val resp_val = Bool('input); - val resp_tag = Bits(4, 'input); - val resp_data = Bits(128, 'input); + val resp_tag = Bits(MEM_TAG_BITS, 'input); + val resp_data = Bits(MEM_DATA_BITS, 'input); } -class ioArbiter extends Bundle() { +class ioMemArbiter extends Bundle() { val mem = new ioMem(); val dcache = new ioDcache(); // val icache = new ioIcache(); @@ -26,7 +26,7 @@ class ioArbiter extends Bundle() { } class rocketMemArbiter extends Component { - val io = new ioArbiter(); + val io = new ioMemArbiter(); // ***************************** // Interface to memory @@ -41,11 +41,8 @@ class rocketMemArbiter extends Component { // Give priority to Icache io.mem.req_addr := Mux(io.icache.req_val,io.icache.req_addr,io.dcache.req_addr); - // high bit of tag=0 for I$, tag=0 for D$ -// io.mem.req_tag := Mux(io.icache.req_val,Bits(0,4),Bits(1,4)); - io.mem.req_tag := Mux(io.icache.req_val, - Cat(Bits(0,1), io.icache.req_tag), - Cat(Bits(1,1), io.dcache.req_tag)); + // low bit of tag=0 for I$, 1 for D$ + io.mem.req_tag := Cat(Mux(io.icache.req_val, io.icache.req_tag, io.dcache.req_tag), !io.icache.req_val) // Just pass through write data (only D$ will write) io.mem.req_wdata := io.dcache.req_wdata; @@ -59,15 +56,15 @@ class rocketMemArbiter extends Component { io.dcache.req_rdy := io.mem.req_rdy && !io.icache.req_val; // Response will only be valid for D$ or I$ not both because of tag bits - io.icache.resp_val := io.mem.resp_val && !io.mem.resp_tag(3).toBool; - io.dcache.resp_val := io.mem.resp_val && io.mem.resp_tag(3).toBool; + io.icache.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool; + io.dcache.resp_val := io.mem.resp_val && io.mem.resp_tag(0).toBool; // Feed through data to both io.icache.resp_data := io.mem.resp_data; io.dcache.resp_data := io.mem.resp_data; - io.icache.resp_tag := io.mem.resp_tag(2,0); -// io.dcache.resp_tag := io.mem.resp_tag(2,0); + io.icache.resp_tag := io.mem.resp_tag >> UFix(1) + io.dcache.resp_tag := io.mem.resp_tag >> UFix(1) } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d93eac51..1c4c2996 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -1,6 +1,7 @@ package Top { import Chisel._ +import scala.math._ object Constants { @@ -125,10 +126,12 @@ object Constants val M_X = UFix(0, 4); val M_XRD = Bits("b0000", 4); // int load val M_XWR = Bits("b0001", 4); // int store - val M_FRD = Bits("b0010", 4); // fp load - val M_FWR = Bits("b0011", 4); // fp store - val M_FLA = Bits("b0100", 4); // flush cache + val M_PFR = Bits("b0010", 4); // prefetch with intent to read + val M_PFW = Bits("b0011", 4); // prefetch with intent to write + val M_FLA = Bits("b0100", 4); // write back and invlaidate all lines val M_PRD = Bits("b0101", 4); // PTW load + val M_INV = Bits("b0110", 4); // write back and invalidate line + val M_CLN = Bits("b0111", 4); // write back line val M_XA_ADD = Bits("b1000", 4); val M_XA_SWAP = Bits("b1001", 4); val M_XA_AND = Bits("b1010", 4); @@ -183,6 +186,23 @@ object Constants val VPN_BITS = VADDR_BITS-PGIDX_BITS; val ASID_BITS = 7; val PERM_BITS = 6; + + // rocketNBDCacheDM parameters + val CPU_DATA_BITS = 64; + val CPU_TAG_BITS = 5; + val OFFSET_BITS = 6; // log2(cache line size in bytes) + val NMSHR = 2; // number of primary misses + val NRPQ = 16; // number of secondary misses + val NSDQ = 10; // number of secondary stores/AMOs + val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) + val IDX_BITS = PGIDX_BITS - OFFSET_BITS; + + // external memory interface + val IMEM_TAG_BITS = 1; + val DMEM_TAG_BITS = ceil(log(NMSHR)/log(2)).toInt; + val MEM_TAG_BITS = 1 + max(IMEM_TAG_BITS, DMEM_TAG_BITS); + val MEM_DATA_BITS = 128; + val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS; val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 0c6849a2..23d8352a 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -27,13 +27,13 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { // interface between D$ and next level in memory hierarchy class ioDcache(view: List[String] = null) extends Bundle(view) { val req_addr = UFix(PADDR_BITS, 'input); - val req_tag = UFix(3, 'input); + val req_tag = UFix(DMEM_TAG_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); val req_wdata = Bits(128, 'input); val req_rw = Bool('input); val resp_data = Bits(128, 'output); -// val resp_tag = Bits(3, 'output); + val resp_tag = Bits(DMEM_TAG_BITS, 'output); val resp_val = Bool('output); } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c82dda68..e65591be 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -24,7 +24,7 @@ class ioIcache(view: List[String] = null) extends Bundle (view) val req_addr = UFix(PADDR_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); - val resp_data = Bits(128, 'output); + val resp_data = Bits(MEM_DATA_BITS, 'output); val resp_val = Bool('output); } @@ -50,8 +50,9 @@ class rocketICacheDM(lines: Int) extends Component { val indexmsb = taglsb-1; val indexlsb = offsetbits; val offsetmsb = indexlsb-1; - val offsetlsb = 2; val databits = 32; + val offsetlsb = ceil(log(databits/8)/log(2)).toInt; + val rf_cnt_bits = ceil(log(REFILL_CYCLES)/log(2)).toInt; val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: s_resolve_miss :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_reset); @@ -74,7 +75,7 @@ class rocketICacheDM(lines: Int) extends Component { } // refill counter - val refill_count = Reg(resetVal = UFix(0,2)); + val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); when (io.mem.resp_val) { refill_count <== refill_count + UFix(1); } @@ -104,7 +105,7 @@ class rocketICacheDM(lines: Int) extends Component { val data_addr = Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; - val data_array = Mem4(lines*4, io.mem.resp_data); + val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); data_array.setReadLatency(SRAM_READ_LATENCY); // data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); @@ -112,14 +113,14 @@ class rocketICacheDM(lines: Int) extends Component { // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); - io.cpu.resp_data := - MuxLookup(r_cpu_req_idx(offsetmsb-2, offsetlsb).toUFix, data_array_rdata(127, 96), - Array(UFix(2) -> data_array_rdata(95,64), - UFix(1) -> data_array_rdata(63,32), - UFix(0) -> data_array_rdata(31,0))); + + val word_mux = (new MuxN(REFILL_CYCLES)) { Bits(width = databits) } + word_mux.io.sel := r_cpu_req_idx(offsetmsb - rf_cnt_bits, offsetlsb).toUFix + for (i <- 0 to MEM_DATA_BITS/databits-1) { word_mux.io.in(i) := data_array_rdata((i+1)*databits-1, i*databits) } + io.cpu.resp_data := word_mux.io.out io.mem.req_val := (state === s_request); - io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2)).toUFix; + io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0, rf_cnt_bits)).toUFix; // control state machine switch (state) { @@ -146,7 +147,7 @@ class rocketICacheDM(lines: Int) extends Component { } } is (s_refill) { - when (io.mem.resp_val && (refill_count === UFix(3,2))) { + when (io.mem.resp_val && (~refill_count === UFix(0))) { state <== s_resolve_miss; } } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 9e1d1486..540734ec 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -3,17 +3,17 @@ package Top { import Chisel._; import Node._; import Constants._; -import queues._; +import scala.math._; class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) { val req_addr = UFix(PADDR_BITS, 'output); val req_val = Bool('output); val req_rdy = Bool('input); - val req_tag = Bits(3, 'output); - val resp_data = Bits(128, 'input); + val req_tag = Bits(IMEM_TAG_BITS, 'output); + val resp_data = Bits(MEM_DATA_BITS, 'input); val resp_val = Bool('input); - val resp_tag = Bits(3, 'input); + val resp_tag = Bits(IMEM_TAG_BITS, 'input); } class ioIPrefetcher extends Bundle() { @@ -23,14 +23,14 @@ class ioIPrefetcher extends Bundle() { class rocketIPrefetcher extends Component() { val io = new ioIPrefetcher(); - val pdq = (new queueSimplePF(4)) { Bits(width = 128) }; + val pdq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) }; val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_invalid); val demand_miss = io.icache.req_val & io.icache.req_rdy; - val prefetch_addr = Reg(resetVal = UFix(0,32)); - when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(4); } + val prefetch_addr = Reg() { UFix(width = PADDR_BITS) }; + when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(REFILL_CYCLES); } val addr_match = (prefetch_addr === io.icache.req_addr); val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; @@ -40,29 +40,29 @@ class rocketIPrefetcher extends Component() { val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); - io.mem.req_tag := Cat(Bits(0,2), !(io.icache.req_val && !hit)); + io.mem.req_tag := !(io.icache.req_val && !hit); io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); val pdq_reset = Reg(resetVal = Bool(true)); pdq_reset <== demand_miss & ~hit | (state === s_bad_resp_wait); - val fill_cnt = Reg(resetVal = UFix(0,2)); - when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1,1); } - val fill_done = (fill_cnt === UFix(3,2)) & ip_mem_resp_val; + val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); + when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1); } + val fill_done = (~fill_cnt === UFix(0)) & ip_mem_resp_val; val forward = Reg(resetVal = Bool(false)); - val forward_cnt = Reg(resetVal = UFix(0,2)); - when (forward & pdq.io.deq_val) { forward_cnt <== forward_cnt + UFix(1,1); } - val forward_done = (forward_cnt === UFix(3,2)) & pdq.io.deq_val; + val forward_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); + when (forward & pdq.io.deq.valid) { forward_cnt <== forward_cnt + UFix(1); } + val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; forward <== (demand_miss & hit | forward & ~forward_done); - io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq_val); - io.icache.resp_data := Mux(forward, pdq.io.deq_bits, io.mem.resp_data); + io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq.valid); + io.icache.resp_data := Mux(forward, pdq.io.deq.bits, io.mem.resp_data); pdq.io.q_reset := pdq_reset; - pdq.io.enq_bits := io.mem.resp_data; - pdq.io.enq_val := ip_mem_resp_val.toBool; - pdq.io.deq_rdy := forward; + pdq.io.enq.bits := io.mem.resp_data; + pdq.io.enq.valid := ip_mem_resp_val.toBool; + pdq.io.deq.ready := forward; switch (state) { is (s_invalid) { diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 01326b54..b81d90a5 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -1,4 +1,4 @@ -package queues +package Top { import Chisel._ @@ -81,15 +81,11 @@ class queueCtrl(entries: Int) extends Component full <== full_next; } -class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle() +class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle { - val q_reset = Bool('input); - val enq_val = Bool('input); - val enq_rdy = Bool('output); - val deq_val = Bool('output); - val deq_rdy = Bool('input); - val enq_bits = data.asInput; - val deq_bits = data.asOutput; + val q_reset = Bool('input); + val enq = new ioDecoupled()(data) + val deq = new ioDecoupled()(data).flip } class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component @@ -97,12 +93,12 @@ class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component override val io = new ioQueueSimplePF()(data); val ctrl = new queueCtrl(entries); ctrl.io.q_reset ^^ io.q_reset; - ctrl.io.deq_val ^^ io.deq_val; - ctrl.io.enq_rdy ^^ io.enq_rdy; - ctrl.io.enq_val ^^ io.enq_val; - ctrl.io.deq_rdy ^^ io.deq_rdy; - val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq_bits); - ram.read(ctrl.io.raddr) ^^ io.deq_bits; + ctrl.io.deq_val ^^ io.deq.valid; + ctrl.io.enq_rdy ^^ io.enq.ready; + ctrl.io.enq_val ^^ io.enq.valid; + ctrl.io.deq_rdy ^^ io.deq.ready; + val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq.bits); + ram.read(ctrl.io.raddr) ^^ io.deq.bits; } // TODO: SHOULD USE INHERITANCE BUT BREAKS INTROSPECTION CODE diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index edc3173d..4b0d86ec 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -5,6 +5,79 @@ import Chisel._ import Node._; import scala.math._; +class MuxN[T <: Data](n: Int)(data: => T) extends Component { + val io = new Bundle { + val sel = Bits(width = ceil(log(n)/log(2)).toInt) + val in = Vec(n) { data }.asInput() + val out = data.asOutput() + } + + val out = Vec(n) { Wire() { data } } + out(0) <== io.in(0) + for (i <- 1 to n-1) { + out(i) <== Mux(io.sel === UFix(i), io.in(i), out(i-1)) + } + + out(n-1) ^^ io.out +} + +class Mux1H(n: Int, w: Int) extends Component +{ + val io = new Bundle { + val sel = Vec(n) { Bool(dir = 'input) } + val in = Vec(n) { Bits(width = w, dir = 'input) } + val out = Bits(width = w, dir = 'output) + } + + if (n > 1) { + val out = Vec(n) { Wire() { Bits(width = w) } } + out(0) <== io.in(0) & Fill(w, io.sel(0)) + for (i <- 1 to n-1) { + out(i) <== out(i-1) | (io.in(i) & Fill(w, io.sel(i))) + } + + io.out := out(n-1) + } else { + io.out := io.in(0) + } +} + +class ioDecoupled[T <: Data]()(data: => T) extends Bundle +{ + val valid = Bool('input) + val ready = Bool('output) + val bits = data.asInput +} + +class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { + val in = Vec(n) { (new ioDecoupled()) { data } } + val out = (new ioDecoupled()) { data }.flip() +} + +class Arbiter[T <: Data](n: Int)(data: => T) extends Component { + val io = new ioArbiter(n)(data) + val dout = Vec(n) { Wire() { data } } + val vout = Wire { Bool() } + + io.in(0).ready := io.out.ready + for (i <- 1 to n-1) { + io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready + } + + dout(n-1) <== io.in(n-1).bits + for (i <- n-2 to 0) { + dout(i) <== Mux(io.in(i).valid, io.in(i).bits, dout(i+1)) + } + + for (i <- 0 to n-2) { + when (io.in(i).valid) { vout <== Bool(true) } + } + vout <== io.in(n-1).valid + + vout ^^ io.out.valid + dout(0) ^^ io.out.bits +} + class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle { val in = UFix(in_width, 'input); @@ -49,4 +122,4 @@ class priorityEncoder(width: Int) extends Component io.out := l_out; } -} \ No newline at end of file +} From c01e1f1cef9c3098fe792ba150c202fe339d2ce2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 Dec 2011 19:42:58 -0800 Subject: [PATCH 0067/1087] Don't replay from EX stage. EX replays are now handled from MEM. We may move them to WB. --- rocket/src/main/scala/consts.scala | 11 +- rocket/src/main/scala/cpu.scala | 6 +- rocket/src/main/scala/ctrl.scala | 116 ++-- rocket/src/main/scala/dcache.scala | 2 +- rocket/src/main/scala/dpath.scala | 4 +- rocket/src/main/scala/dtlb.scala | 7 +- rocket/src/main/scala/nbdcache.scala | 850 +++++++++++++++++++++++++++ 7 files changed, 921 insertions(+), 75 deletions(-) create mode 100644 rocket/src/main/scala/nbdcache.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1c4c2996..30b98b0c 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -19,13 +19,10 @@ object Constants val PC_BTB = UFix(1, 4); val PC_EX4 = UFix(2, 4); val PC_BR = UFix(3, 4); - val PC_J = UFix(4, 4); - val PC_JR = UFix(5, 4); - val PC_PCR = UFix(6, 4); - val PC_MEM = UFix(7, 4); - val PC_MEM4 = UFix(8, 4); - val PC_EX = UFix(9, 4); - val PC_EVEC = UFix(10, 4); + val PC_JR = UFix(4, 4); + val PC_PCR = UFix(5, 4); + val PC_MEM = UFix(6, 4); + val PC_EVEC = UFix(7, 4); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 602a0716..142ee382 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -72,13 +72,13 @@ class rocketProc extends Component // connect DTLB to D$ arbiter, ctrl+dpath dtlb.io.cpu.invalidate := dpath.io.ptbr_wen; dtlb.io.cpu.status := dpath.io.ctrl.status; - dtlb.io.cpu.req_val := ctrl.io.dmem.req_val; + dtlb.io.cpu.req_val := ctrl.io.dtlb_val; dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS-1,PGIDX_BITS); ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu.xcpt_ld; ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; - ctrl.io.dtlb_busy := dtlb.io.cpu.resp_busy; + ctrl.io.dtlb_rdy := dtlb.io.cpu.req_rdy; ctrl.io.dtlb_miss := dtlb.io.cpu.resp_miss; ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld; ctrl.io.xcpt_ma_st := io.dmem.xcpt_ma_st; @@ -95,7 +95,7 @@ class rocketProc extends Component arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; - arb.io.cpu.dtlb_miss := dtlb.io.cpu.resp_miss; + arb.io.cpu.dtlb_miss := ctrl.io.dpath.killm; arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; arb.io.cpu.req_data := dpath.io.dmem.req_data; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7c229e03..302ae0fb 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -76,7 +76,8 @@ class ioCtrlAll extends Bundle() val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); val host = new ioHost(List("start")); - val dtlb_busy = Bool('input); + val dtlb_val = Bool('output) + val dtlb_rdy = Bool('input); val dtlb_miss = Bool('input); val flush_inst = Bool('output); val xcpt_dtlb_ld = Bool('input); @@ -297,10 +298,10 @@ class rocketCtrl extends Component val id_raddr1 = io.dpath.inst(26,22); val id_waddr = io.dpath.inst(31,27); - val id_ren2 = id_renx2; - val id_ren1 = id_renx1; + val id_ren2 = id_renx2.toBool; + val id_ren1 = id_renx1.toBool; - val id_console_out_val = id_wen_pcr & (id_raddr2 === PCR_CONSOLE); + val id_console_out_val = id_wen_pcr.toBool && (id_raddr2 === PCR_CONSOLE); val wb_reg_div_mul_val = Reg(){Bool()}; val dcache_miss = Reg(io.dmem.resp_miss); @@ -355,6 +356,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + val mem_reg_replay = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -432,10 +434,6 @@ class rocketCtrl extends Component val jr_taken = (ex_reg_br_type === BR_JR); val j_taken = (ex_reg_br_type === BR_J); io.dpath.ex_jmp := j_taken; - - io.dmem.req_val := ex_reg_mem_val && ~io.dpath.killx; - io.dmem.req_cmd := ex_reg_mem_cmd; - io.dmem.req_type := ex_reg_mem_type; val mem_reg_div_mul_val = Reg(){Bool()}; val mem_reg_eret = Reg(){Bool()}; @@ -526,40 +524,40 @@ class rocketCtrl extends Component io.dpath.exception := mem_exception; io.dpath.cause := mem_cause; io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st; + + // replay mem stage PC on a DTLB miss + val mem_hazard = io.dtlb_miss + val replay_mem = mem_hazard || mem_reg_replay; + val kill_mem = mem_hazard || mem_exception; + + // control transfer from ex/mem + val take_pc_ex = (ex_reg_btb_hit != br_taken) || jr_taken || j_taken + val take_pc_mem = mem_exception || mem_reg_eret || replay_mem + val take_pc = take_pc_ex || take_pc_mem // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions - val replay_ex = (ex_reg_mem_val && !io.dmem.req_rdy) || io.dmem.resp_miss || mem_reg_flush_inst || mem_reg_privileged; - - // replay mem stage PC on a DTLB miss - val replay_mem = io.dtlb_miss; - val kill_mem = mem_exception || replay_mem; - val kill_ex = replay_ex || kill_mem; + val ex_hazard = io.dmem.resp_miss || mem_reg_privileged || mem_reg_flush_inst + val mem_kill_ex = kill_mem || take_pc_mem + val kill_ex = mem_kill_ex || ex_hazard || !(io.dmem.req_rdy && io.dtlb_rdy) && ex_reg_mem_val + val kill_dtlb = mem_kill_ex || ex_hazard || !io.dmem.req_rdy + val kill_dmem = mem_kill_ex || ex_hazard || !io.dtlb_rdy + + mem_reg_replay <== kill_ex && !mem_kill_ex io.dpath.sel_pc := Mux(replay_mem, PC_MEM, // dtlb miss Mux(mem_exception, PC_EVEC, // exception Mux(mem_reg_eret, PC_PCR, // eret instruction - Mux(replay_ex, PC_EX, // D$ blocked, D$ miss, privileged inst Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch Mux(j_taken, PC_BR, // jump Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch Mux(jr_taken, PC_JR, // jump register Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB - PC_4))))))))); // PC+4 + PC_4)))))))); // PC+4 io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken & ~kill_ex & ~kill_mem; - val take_pc = - ~ex_reg_btb_hit & br_taken | - ex_reg_btb_hit & ~br_taken | - jr_taken | - j_taken | - mem_exception | - mem_reg_eret | - replay_ex | - replay_mem; - io.dpath.stallf := ~take_pc & ( @@ -574,8 +572,8 @@ class rocketCtrl extends Component val lu_stall_ex = ex_mem_cmd_load && - ((id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || - (id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr))); + ((id_ren1 && (id_raddr1 === io.dpath.ex_waddr)) || + (id_ren2 && (id_raddr2 === io.dpath.ex_waddr))); val mem_mem_cmd_load_bh = mem_reg_mem_val && @@ -587,47 +585,48 @@ class rocketCtrl extends Component val lu_stall_mem = mem_mem_cmd_load_bh && - ((id_ren1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || - (id_ren2.toBool && (id_raddr2 === io.dpath.mem_waddr))); + ((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) || + (id_ren2 && (id_raddr2 === io.dpath.mem_waddr))); val lu_stall = lu_stall_ex || lu_stall_mem; // check for divide and multiply instructions in ex,mem,wb stages val dm_stall_ex = ex_reg_div_mul_val && - ((id_ren1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || - (id_ren2.toBool && (id_raddr2 === io.dpath.ex_waddr))); + ((id_ren1 && (id_raddr1 === io.dpath.ex_waddr)) || + (id_ren2 && (id_raddr2 === io.dpath.ex_waddr))); val dm_stall_mem = mem_reg_div_mul_val && - ((id_ren1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || - (id_ren2.toBool && (id_raddr2 === io.dpath.mem_waddr))); + ((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) || + (id_ren2 && (id_raddr2 === io.dpath.mem_waddr))); val dm_stall_wb = wb_reg_div_mul_val && - ((id_ren1.toBool && (id_raddr1 === io.dpath.wb_waddr)) || - (id_ren2.toBool && (id_raddr2 === io.dpath.wb_waddr))); + ((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) || + (id_ren2 && (id_raddr2 === io.dpath.wb_waddr))); val dm_stall = dm_stall_ex || dm_stall_mem || dm_stall_wb; val ctrl_stalld = - ~take_pc & + !take_pc && ( - dm_stall | - lu_stall | - id_ren2 & id_stall_raddr2 | - id_ren1 & id_stall_raddr1 | - (id_sel_wa === WA_RD) & id_stall_waddr | - (id_sel_wa === WA_RA) & id_stall_ra | - id_mem_val & (~io.dmem.req_rdy | io.dtlb_busy) | - (id_sync === SYNC_D) & ~io.dmem.req_rdy | - id_console_out_val & ~io.console.rdy | - id_div_val & ~io.dpath.div_rdy | - io.dpath.div_result_val | + dm_stall || + lu_stall || + id_ren2 && id_stall_raddr2 || + id_ren1 && id_stall_raddr1 || + (id_sel_wa === WA_RD) && id_stall_waddr || + (id_sel_wa === WA_RA) && id_stall_ra || + id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || + (id_sync === SYNC_D) && !io.dmem.req_rdy || + id_console_out_val && !io.console.rdy || + id_div_val.toBool && !io.dpath.div_rdy || + io.dpath.div_result_val || io.dpath.mul_result_val ); - val ctrl_killd = take_pc | ctrl_stalld; + val ctrl_killd = take_pc || ctrl_stalld; + val ctrl_killf = take_pc || !io.imem.resp_val; // for divider, multiplier writeback val mul_wb = io.dpath.mul_result_val; @@ -635,15 +634,15 @@ class rocketCtrl extends Component io.flush_inst := mem_reg_flush_inst; - io.dpath.stalld := ctrl_stalld.toBool; - io.dpath.killf := take_pc | ~io.imem.resp_val; - io.dpath.killd := ctrl_killd.toBool; - io.dpath.killx := kill_ex.toBool; - io.dpath.killm := kill_mem.toBool; + io.dpath.stalld := ctrl_stalld; + io.dpath.killf := ctrl_killf; + io.dpath.killd := ctrl_killd; + io.dpath.killx := kill_ex; + io.dpath.killm := kill_mem; io.dpath.mem_load := mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); - io.dpath.ren2 := id_ren2.toBool; - io.dpath.ren1 := id_ren1.toBool; + io.dpath.ren2 := id_ren2; + io.dpath.ren1 := id_ren1; io.dpath.sel_alu2 := id_sel_alu2; io.dpath.sel_alu1 := id_sel_alu1.toBool; io.dpath.fn_dw := id_fn_dw.toBool; @@ -663,6 +662,11 @@ class rocketCtrl extends Component io.dpath.mem_eret := mem_reg_eret; io.dpath.irq_disable := mem_reg_inst_di && !kill_mem; io.dpath.irq_enable := mem_reg_inst_ei && !kill_mem; + + io.dtlb_val := ex_reg_mem_val && !kill_dtlb; + io.dmem.req_val := ex_reg_mem_val && !kill_dmem; + io.dmem.req_cmd := ex_reg_mem_cmd; + io.dmem.req_type := ex_reg_mem_type; } } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 23d8352a..02d538f0 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -155,7 +155,7 @@ class rocketDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; - dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss; + dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss && !flush_waiting; dcache.io.mem ^^ io.mem; io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ca91e138..1fd32d2f 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -135,16 +135,14 @@ class rocketDpath extends Component btb.io.correct_target := ex_branch_target; val if_next_pc = - Mux(io.ctrl.sel_pc === PC_4, if_pc_plus4, Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, - Mux(io.ctrl.sel_pc === PC_EX, ex_reg_pc, Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_wdata(VADDR_BITS-1,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, - UFix(0, VADDR_BITS)))))))))); + if_pc_plus4))))))); // PC_4 when (!io.ctrl.stallf && io.host.start) { if_reg_pc <== if_next_pc.toUFix; diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 23f1563a..e10591dd 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -20,7 +20,6 @@ class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) val req_asid = Bits(ASID_BITS, 'input); val req_vpn = UFix(VPN_BITS, 'input); // lookup responses - val resp_busy = Bool('output); val resp_miss = Bool('output); // val resp_val = Bool('output); val resp_ppn = UFix(PPN_BITS, 'output); @@ -154,12 +153,10 @@ class rocketDTLB(entries: Int) extends Component io.cpu.xcpt_st := access_fault_st; // (lookup && (req_store || req_amo) && outofrange) || access_fault_st; - io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && !tlb_miss, Bool(true)); - io.cpu.resp_busy := tlb_miss || (state != s_ready); + io.cpu.req_rdy := (state === s_ready) && !tlb_miss; io.cpu.resp_miss := tlb_miss; io.cpu.resp_ppn := - Mux(status_vm, Mux(req_flush, Bits(0,PPN_BITS), tag_ram(tag_hit_addr)), - r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; + Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala new file mode 100644 index 00000000..ecb3eaa5 --- /dev/null +++ b/rocket/src/main/scala/nbdcache.scala @@ -0,0 +1,850 @@ +package Top { + +import Chisel._ +import Node._; +import Constants._; +import scala.math._; + +class rocketNBDCacheStoreGen extends Component { + val io = new Bundle { + val req_type = Bits(3, 'input) + val req_addr_lsb = Bits(3, 'input) + val req_data = Bits(64, 'input) + val store_wmask = Bits(8, 'output) + val store_data = Bits(64, 'output) + } + + // generate write mask and store data signals based on store type and address LSBs + val wmask = Wire { Bits(8) } + switch (io.req_type(1,0)) + { + is (MT_B) { wmask <== Bits( 1,1) << io.req_addr_lsb(2,0).toUFix } + is (MT_H) { wmask <== Bits( 3,2) << Cat(io.req_addr_lsb(2,1), Bits(0,1)).toUFix } + is (MT_W) { wmask <== Bits( 15,4) << Cat(io.req_addr_lsb(2,2), Bits(0,2)).toUFix } + otherwise { wmask <== Bits(255,8) } // MT_D + } + + val data = Wire { Bits(64) } + switch (io.req_type(1,0)) + { + is (MT_B) { data <== Fill(8, io.req_data( 7,0)) } + is (MT_H) { data <== Fill(4, io.req_data(15,0)) } + is (MT_W) { data <== Fill(2, io.req_data(31,0)) } + otherwise { data <== io.req_data } // MT_D + } + + io.store_wmask := wmask + io.store_data := data +} + +class DataMemCmd extends Bundle { + val offset = Bits(width = OFFSET_BITS) + val cmd = Bits(width = 4) + val typ = Bits(width = 3) +} + +class RPQEntry extends Bundle { + val cmd = new DataMemCmd() + val sdq_id = UFix(width = ceil(log(NSDQ)/log(2)).toInt) +} + +class Replay extends Bundle { + val idx = Bits(width = IDX_BITS) + val cmd = new DataMemCmd() + val sdq_id = UFix(width = ceil(log(NSDQ)/log(2)).toInt) +} + +class DataReq extends Bundle { + val idx = Bits(width = IDX_BITS) + val cmd = new DataMemCmd() + val data = Bits(width = CPU_DATA_BITS) +} + +class DataArrayReq extends Bundle { + val idx = Bits(width = IDX_BITS) + val offset = Bits(width = ceil(log(REFILL_CYCLES)/log(2)).toInt) + val rw = Bool() + val wmask = Bits(width = MEM_DATA_BITS/8) + val data = Bits(width = MEM_DATA_BITS) +} + +class MemReq extends Bundle { + val rw = Bool() + val addr = Bits(width = PPN_BITS+IDX_BITS) +} + +class WritebackReq extends Bundle { + val ppn = Bits(width = PPN_BITS) + val idx = Bits(width = IDX_BITS) +} + +class MetaData extends Bundle { + val valid = Bool() + val dirty = Bool() + val tag = Bits(width = PPN_BITS) +} + +class MetaReq extends Bundle { + val idx = Bits(width = IDX_BITS) + val rw = Bool() + val data = new MetaData() +} + +class MSHR extends Component { + val io = new Bundle { + val req_pri_val = Bool('input) + val req_pri_rdy = Bool('output) + val req_sec_val = Bool('input) + val req_sec_rdy = Bool('output) + val req_ppn = Bits(PPN_BITS, 'input) + val req_idx = Bits(IDX_BITS, 'input) + val req_cmd = new RPQEntry().asInput + val req_tag = Bits(CPU_TAG_BITS, 'input) + + val idx_match = Bool('output) + val tag = Bits(PPN_BITS, 'output) + + val mem_resp_val = Bool('input) + val mem_req = (new ioDecoupled) { new MemReq() }.flip + val meta_req = (new ioDecoupled) { new MetaReq() }.flip + val replay = (new ioDecoupled) { new Replay() }.flip + } + + val valid = Reg(resetVal = Bool(false)) + val dirty = Reg { Bool() } + val requested = Reg { Bool() } + val refilled = Reg { Bool() } + val ppn = Reg { Bits() } + val idx = Reg { Bits() } + + val req_load = (io.req_cmd.cmd.cmd === M_XRD) || (io.req_cmd.cmd.cmd === M_PRD) || (io.req_cmd.cmd.cmd === M_PFR) + val req_use_rpq = (io.req_cmd.cmd.cmd != M_PFR) && (io.req_cmd.cmd.cmd != M_PFW) + val next_dirty = io.req_pri_val && io.req_pri_rdy && !req_load || io.req_sec_val && io.req_sec_rdy && (!req_load || dirty) + val sec_rdy = io.idx_match && !refilled && (dirty || !requested || req_load) + + val rpq = (new queueSimplePF(NRPQ)) { new RPQEntry() } + rpq.io.q_reset := Bool(false) + rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq + io.req_cmd ^^ rpq.io.enq.bits + rpq.io.deq.ready := io.replay.ready && refilled + + when (io.req_pri_val && io.req_pri_rdy) { + valid <== Bool(true) + requested <== Bool(false) + refilled <== Bool(false) + ppn <== io.req_ppn + idx <== io.req_idx + } + when (io.mem_req.valid && io.mem_req.ready) { + requested <== Bool(true) + } + when (io.mem_resp_val) { + refilled <== Bool(true) + } + when (io.meta_req.valid && io.meta_req.ready) { + valid <== Bool(false) + } + dirty <== next_dirty + + io.idx_match := valid && (idx === io.req_idx) + io.tag := ppn + io.req_pri_rdy := !valid + io.req_sec_rdy := sec_rdy && rpq.io.enq.ready + + io.meta_req.valid := valid && refilled && !rpq.io.deq.valid + io.meta_req.bits.rw := Bool(true) + io.meta_req.bits.idx := idx + io.meta_req.bits.data.valid := Bool(true) + io.meta_req.bits.data.dirty := dirty + io.meta_req.bits.data.tag := ppn + + io.mem_req.valid := valid && !requested + //io.mem_req.bits.itm := next_dirty + io.mem_req.bits.rw := Bool(false) + io.mem_req.bits.addr := Cat(ppn, idx) + + io.replay.valid := rpq.io.deq.valid && refilled + io.replay.bits.idx := idx + rpq.io.deq.bits.cmd ^^ io.replay.bits.cmd + io.replay.bits.sdq_id := rpq.io.deq.bits.sdq_id +} + +class MSHRFile extends Component { + val io = new Bundle { + val req_val = Bool('input) + val req_rdy = Bool('output) + val req_cmd = (new DataMemCmd).asInput + val req_ppn = Bits(PADDR_BITS, 'input) + val req_idx = Bits(IDX_BITS, 'input) + val req_data = Bits(64, 'input) + val req_tag = Bits(CPU_TAG_BITS, 'input) + + val mem_resp_val = Bool('input) + val mem_resp_tag = Bits(DMEM_TAG_BITS, 'input) + + val mem_req = (new ioDecoupled) { new MemReq() }.flip() + val meta_req = (new ioDecoupled) { new MetaReq() }.flip() + val replay = (new ioDecoupled) { new Replay() }.flip() + } + + val idx_match = Wire { Bool() } + val pri_rdy = Wire { Bool() } + val sec_rdy = Wire { Bool() } + + val tag_mux = new Mux1H(NMSHR, PPN_BITS) + val meta_req_arb = (new Arbiter(NMSHR)) { new MetaReq() } + val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } + val replay_arb = (new Arbiter(NMSHR)) { new RPQEntry() } + val alloc_arb = (new Arbiter(NMSHR)) { Bool() } + + val tag_match = tag_mux.io.out === io.req_ppn + + for (i <- 0 to NMSHR-1) { + val mshr = new MSHR() + + val rpqe = new RPQEntry().asInput + rpqe.cmd.offset <== io.req_cmd.offset + rpqe.cmd.cmd <== Mux(io.req_cmd.cmd === M_PRD, M_XRD, io.req_cmd.cmd) + rpqe.cmd.typ <== io.req_cmd.typ + rpqe.sdq_id <== UFix(0) + + tag_mux.io.sel(i) := mshr.io.idx_match + tag_mux.io.in(i) := mshr.io.tag + + alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy + mshr.io.req_pri_val := io.req_val && !idx_match && alloc_arb.io.in(i).ready + + mshr.io.req_sec_val := io.req_val && tag_match + mshr.io.req_ppn := io.req_ppn + mshr.io.req_idx := io.req_idx + mshr.io.req_tag := io.req_tag + rpqe ^^ mshr.io.req_cmd + + mshr.io.meta_req <> meta_req_arb.io.in(i) + mshr.io.mem_req <> mem_req_arb.io.in(i) + mshr.io.replay <> replay_arb.io.in(i) + + mshr.io.mem_resp_val := io.mem_resp_val && (UFix(i) === io.mem_resp_tag) + + when (mshr.io.req_pri_rdy) { pri_rdy <== Bool(true) } + when (mshr.io.req_sec_rdy) { sec_rdy <== Bool(true) } + when (mshr.io.idx_match) { idx_match <== Bool(true) } + } + pri_rdy <== Bool(false) + sec_rdy <== Bool(false) + idx_match <== Bool(false) + + meta_req_arb.io.out ^^ io.meta_req + mem_req_arb.io.out ^^ io.mem_req + replay_arb.io.out ^^ io.replay + + io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) +} + +class StoreDataUnit extends Component { + val io = new Bundle { + val sdq_enq = (new ioDecoupled) { Bits(width = CPU_DATA_BITS) } + val sdq_id = UFix(width = ceil(log(NSDQ)/log(2)).toInt, dir = 'output) + val replay = (new ioDecoupled) { new Replay() } + val data_req = (new ioDecoupled) { new DataReq() }.flip() + } + + val cmdq = (new queueSimplePF(2)) { new Replay() } + val dataq = (new queueSimplePF(2)) { Bits(width = CPU_DATA_BITS) } + + val next_dataq_enq_rdy = !dataq.io.deq.valid || dataq.io.enq.ready && (!dataq.io.enq.valid || dataq.io.deq.ready) + val next_dataq_enq_val = io.replay.valid && next_dataq_enq_rdy && (io.replay.bits.cmd.cmd != M_XRD) && cmdq.io.enq.ready + dataq.io.enq.valid := Reg(next_dataq_enq_val, resetVal = Bool(false)) + dataq.io.enq.bits := sdq_dout + dataq.io.deq.ready := io.data_req.ready && (cmdq.io.deq.bits.cmd.cmd != M_XRD) + + cmdq.io.enq.valid := io.replay.valid && ((io.replay.bits.cmd.cmd === M_XRD) || next_dataq_enq_rdy) + io.replay.bits ^^ cmdq.io.enq.bits + cmdq.io.deq.ready := io.data_req.ready && ((cmdq.io.deq.bits.cmd.cmd === M_XRD) || dataq.io.deq.valid) + + val sdq = Mem4(NSDQ, io.sdq_enq.bits); + sdq.setReadLatency(1); + sdq.setTarget('inst); + val sdq_addr = Mux(next_dataq_enq_val, io.replay.bits.sdq_id, io.sdq_id) + val sdq_wen = io.sdq_enq.valid && io.sdq_enq.ready + val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = next_dataq_enq_val || sdq_wen); + val sdq_val = Reg(resetVal = Bits(0, ceil(log(NSDQ)/log(2)).toInt)) + when (next_dataq_enq_val) { sdq_val <== sdq_val.bitSet(io.replay.bits.sdq_id, Bool(false)) } + when (sdq_wen) { sdq_val <== sdq_val.bitSet(io.sdq_id, Bool(true)) } + + def priority_enc(in: Bits, n: Int = 0): Bits = if (in.width == n-1) UFix(n-1) else if(in(n) == Bool(true)) UFix(n) else priority_enc(in, n+1) + io.sdq_id := priority_enc(~sdq_val) + io.sdq_enq.ready := ((~sdq_val) != UFix(0)) && !next_dataq_enq_val + io.replay.ready := cmdq.io.enq.ready && next_dataq_enq_rdy + io.data_req.valid := cmdq.io.deq.valid && ((cmdq.io.deq.bits.cmd.cmd === M_XRD) || dataq.io.deq.valid) + io.data_req.bits.idx := cmdq.io.deq.bits.idx + cmdq.io.deq.bits.cmd ^^ io.data_req.bits.cmd + io.data_req.bits.data := dataq.io.deq.bits +} + +class WritebackUnit extends Component { + val io = new Bundle { + val wb_req = (new ioDecoupled) { new WritebackReq() } + val data_req = (new ioDecoupled) { new DataReq() }.flip() + val data_resp = Bits(width = MEM_DATA_BITS, dir = 'input) + val mem_req = (new ioDecoupled) { new MemReq() }.flip() + } + + val wbq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } + val valid = Reg(resetVal = Bool(false)) + val cnt = Reg() { UFix(ceil(log(REFILL_CYCLES)/log(2)).toInt) } + val addr = Reg() { new WritebackReq() } + + wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) + wbq.io.enq.bits := io.data_resp + wbq.io.deq.ready := io.mem_req.ready && (~cnt === UFix(0)) + + when (io.wb_req.valid && io.wb_req.ready) { valid <== Bool(true); cnt <== UFix(0); addr <== io.wb_req.bits } + when (io.data_req.valid && io.data_req.ready) { cnt <== cnt + UFix(1) } + when ((~cnt === UFix(0)) && !wbq.io.deq.valid) { valid <== Bool(false) } + + io.wb_req.ready := !valid + io.data_req.valid := valid && wbq.io.enq.ready + io.data_req.bits.idx := addr.idx + io.data_req.bits.cmd.offset := cnt * UFix(MEM_DATA_BITS/8) + io.data_req.bits.cmd.cmd := M_XRD + io.data_req.bits.cmd.typ := UFix(0) + io.data_req.bits.data := wbq.io.deq.bits + io.mem_req.valid := wbq.io.deq.valid && (~cnt === UFix(0)) + io.mem_req.bits.rw := Bool(true) + io.mem_req.bits.addr := Cat(addr.ppn, addr.idx) +} + +class FlushUnit extends Component { + val io = new Bundle { + val flush_req = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) } + val flush_resp = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) }.flip() + val meta_req = (new ioDecoupled) { new MetaReq() }.flip() + val meta_resp = (new MetaData).asInput() + + val wb_req_val = Bool(dir = 'output) + val wb_req_rdy = Bool(dir = 'input) + } + + val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_writeback :: s_meta_write :: s_done :: Nil = Enum(7) { UFix() } + val state = Reg(resetVal = s_reset) + val tag = Reg() { Bits(width = CPU_TAG_BITS) } + val cnt = Reg() { UFix(ceil(log(REFILL_CYCLES)/log(2)).toInt) } + val next_cnt = cnt + UFix(1) + + switch (state) { + is(s_reset) { when (io.meta_req.ready) { state <== Mux(~cnt === UFix(0), s_ready, s_reset); cnt <== next_cnt } } + is(s_ready) { when (io.flush_req.valid) { state <== s_meta_read; tag <== io.flush_req.bits } } + is(s_meta_read) { when (io.meta_req.ready) { state <== s_meta_wait } } + is(s_meta_wait) { state <== Mux(io.meta_resp.valid && io.meta_resp.dirty, s_writeback, s_meta_write) } + is(s_writeback) { when (io.wb_req_rdy) { state <== s_meta_write } } + is(s_meta_write) { when (io.meta_req.ready) { state <== Mux(~cnt === UFix(0), s_done, s_meta_read); cnt <== next_cnt } } + is(s_done) { when (io.flush_resp.ready) { state <== s_ready } } + } + + io.flush_req.ready := state === s_ready + io.flush_resp.valid := state === s_done + io.flush_resp.bits := tag + io.meta_req.valid := (state === s_meta_read) || (state === s_meta_write) || (state === s_reset) + io.meta_req.bits.idx := cnt + io.meta_req.bits.rw := (state === s_meta_write) || (state === s_reset) + io.meta_req.bits.data.valid := Bool(false) + io.meta_req.bits.data.dirty := Bool(false) + io.meta_req.bits.data.tag := UFix(0) + io.wb_req_val := state === s_writeback +} + +class MetaDataArray(lines: Int) extends Component { + val io = new Bundle { + val req = (new ioDecoupled) { new MetaReq() } + val resp = (new MetaData).asOutput() + } + + val array = Mem4(lines, io.resp) + array.setReadLatency(1) + array.setTarget('inst) + val rdata = array.rw(io.req.bits.idx, io.req.bits.data, io.req.valid && io.req.bits.rw, cs = io.req.valid) + rdata ^^ io.resp + io.req.ready := Bool(true) +} + +class DataArray(lines: Int) extends Component { + val io = new Bundle { + val req = (new ioDecoupled) { new DataArrayReq() } + val resp = Bits(width = MEM_DATA_BITS, dir = 'output) + } + + val wmask_array = Vec(MEM_DATA_BITS/8) { Wire() { Bits(width = MEM_DATA_BITS) } } + wmask_array(0) <== Fill(8, io.req.bits.wmask(0)) + for (i <- 1 to MEM_DATA_BITS/8-1) { + wmask_array(i) <== Cat(Fill(8, io.req.bits.wmask(i)), wmask_array(i-1)(8*(i+1)-1, 8*i)) + } + val wmask = wmask_array(MEM_DATA_BITS/8-1) + + val array = Mem4(lines*REFILL_CYCLES, io.resp) + array.setReadLatency(1) + array.setTarget('inst) + val addr = Cat(io.req.bits.idx, io.req.bits.offset) + val rdata = array.rw(addr, io.req.bits.data, io.req.valid && io.req.bits.rw, wmask, cs = io.req.valid) + rdata ^^ io.resp + io.req.ready := Bool(true) +} + +// state machine to flush (write back dirty lines, invalidate clean ones) the D$ +class rocketNBDCacheDM_flush(lines: Int) extends Component { + val io = new ioDCacheDM(); + val dcache = new rocketNBDCacheDM(lines); + + val addrbits = PADDR_BITS; + val indexbits = ceil(log10(lines)/log10(2)).toInt; + val offsetbits = 6; + val tagmsb = addrbits - 1; + val taglsb = indexbits+offsetbits; + val tagbits = tagmsb-taglsb+1; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = 3; + + val flush_count = Reg(resetVal = UFix(0, indexbits)); + val flush_resp_count = Reg(resetVal = UFix(0, indexbits)); + val flushing = Reg(resetVal = Bool(false)); + val flush_waiting = Reg(resetVal = Bool(false)); + val r_cpu_req_tag = Reg(resetVal = Bits(0, 5)); + + when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) + { + r_cpu_req_tag <== io.cpu.req_tag; + flushing <== Bool(true); + flush_waiting <== Bool(true); + } + + when (dcache.io.cpu.req_rdy && (flush_count === ~Bits(0, indexbits))) { + flushing <== Bool(false); + } + when (dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag) && (flush_resp_count === ~Bits(0, indexbits))) { + flush_waiting <== Bool(false); + } + + when (flushing && dcache.io.cpu.req_rdy) { + flush_count <== flush_count + UFix(1,1); + } + when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag(5,0) === r_cpu_req_tag)) { + flush_resp_count <== flush_resp_count + UFix(1,1); + } + + dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_cmd != M_FLA) && !flush_waiting) || flushing; + dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); + dcache.io.cpu.req_idx := Mux(flushing, Cat(flush_count, Bits(0,offsetbits)), io.cpu.req_idx); + dcache.io.cpu.req_ppn := Mux(flushing, UFix(0,PPN_BITS), io.cpu.req_ppn); + dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); + dcache.io.cpu.req_type := io.cpu.req_type; + dcache.io.cpu.req_data ^^ io.cpu.req_data; + dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss; + dcache.io.mem ^^ io.mem; + + io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; + io.cpu.xcpt_ma_st := dcache.io.cpu.xcpt_ma_st; + io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; + io.cpu.resp_miss := dcache.io.cpu.resp_miss; + io.cpu.resp_data := dcache.io.cpu.resp_data; + io.cpu.resp_tag := dcache.io.cpu.resp_tag; + io.cpu.resp_val := dcache.io.cpu.resp_val & + !(flush_waiting && (io.cpu.resp_tag === r_cpu_req_tag) && (flush_count != ~Bits(0, addrbits))); + +} + +class rocketNBDCacheAMOALU extends Component { + val io = new Bundle { + val cmd = Bits(4, 'input) + val wmask = Bits(64/8, 'input) + val lhs = UFix(64, 'input) + val rhs = UFix(64, 'input) + val result = UFix(64, 'output) + } + + val signed = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) + val sub = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) + val min = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) + + val addsub_rhs = Mux(sub, ~io.rhs, io.rhs) + val adder_lhs = Cat(io.lhs(63,32), io.wmask(3) & io.lhs(31), io.lhs(30,0)).toUFix; + val adder_rhs = Cat(addsub_rhs(63,32), io.wmask(3) & addsub_rhs(31), addsub_rhs(30,0)).toUFix; + val adder_out = adder_lhs + adder_rhs + sub.toUFix + + val cmp_lhs = Mux(io.wmask(7), io.lhs(63), io.lhs(31)) + val cmp_rhs = Mux(io.wmask(7), io.rhs(63), io.rhs(31)) + val cmp_diff = Mux(io.wmask(7), adder_out(63), adder_out(31)) + val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(signed, cmp_lhs, cmp_rhs)) + val cmp_out = Mux(min === less, io.lhs, io.rhs) + + val alu_out = Wire() { UFix() }; + switch (io.cmd) { + is (M_XA_ADD) { alu_out <== adder_out } + is (M_XA_SWAP) { alu_out <== io.rhs } + is (M_XA_AND) { alu_out <== io.lhs & io.rhs } + is (M_XA_OR) { alu_out <== io.lhs | io.rhs } + } + alu_out <== cmp_out + + io.result := alu_out +} + +// XXX broken for CPU_DATA_WIDTH != 64 +class AMOUnit extends Component { + val io = new Bundle { + val req = (new ioDecoupled) { new DataReq() } + val lhs = Bits(width = CPU_DATA_BITS) + val rhs = Bits(width = CPU_DATA_BITS) + val wmask = Bits(width = CPU_DATA_BITS/8, dir = 'input) + val data_req = (new ioDecoupled) { new DataReq() }.flip() + } + + val valid = Reg(resetVal = Bool(false)) + val r_cmd = Reg() { new DataMemCmd() } + val r_idx = Reg() { Bits(width = IDX_BITS) } + val r_lhs = Reg() { Bits(width = 64) } + val r_rhs = Reg() { Bits(width = 64) } + val r_wmask = Reg() { Bits(width = 64/8) } + when (io.req.valid && io.req.ready) { + valid <== Bool(true); + r_idx <== io.req.bits.idx + r_lhs <== io.lhs; + r_rhs <== io.rhs; + r_cmd <== io.req.bits.cmd; + r_wmask <== io.wmask + } + when (io.data_req.valid && io.data_req.ready) { + valid <== Bool(false) + } + + val alu = new rocketNBDCacheAMOALU + alu.io.cmd := r_cmd.cmd + alu.io.wmask := r_wmask + alu.io.lhs := r_lhs + alu.io.rhs := r_rhs + + io.req.ready := !valid + io.data_req.valid := valid + io.data_req.bits.idx := r_idx + r_cmd ^^ io.data_req.bits.cmd + io.data_req.bits.data := alu.io.result +} + +class rocketNBDCacheDM(lines: Int) extends Component { + val io = new ioDCacheDM(); + + val addrbits = PADDR_BITS; + val indexbits = ceil(log(lines)/log(2)).toInt; + val offsetbits = OFFSET_BITS; + val tagmsb = PADDR_BITS-1; + val taglsb = indexbits+offsetbits; + val tagbits = tagmsb-taglsb+1; + val indexmsb = taglsb-1; + val indexlsb = offsetbits; + val offsetmsb = indexlsb-1; + val offsetlsb = ceil(log(CPU_DATA_BITS/8)/log(2)).toInt; + + val s_reset :: s_ready :: s_replay_load :: s_write_amo :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(9) { UFix() }; + val state = Reg(resetVal = s_reset); + + // idx arrives one clock cycle prior to ppn b/c of DTLB + val r_cpu_req_idx = Reg(resetVal = Bits(0, PGIDX_BITS)); + val r_cpu_req_ppn = Reg(resetVal = Bits(0, PPN_BITS)); + val r_cpu_req_val = Reg(resetVal = Bool(false)); + val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); + val r_cpu_req_type = Reg(resetVal = Bits(0,3)); + val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); + val r_cpu_resp_val = Reg(resetVal = Bool(false)); + val r_amo_data = Reg(resetVal = Bits(0,64)); + + val p_store_data = Reg(resetVal = Bits(0,64)); + val p_store_idx = Reg(resetVal = Bits(0,PGIDX_BITS)); + val p_store_type = Reg(resetVal = Bits(0,3)); + val p_store_valid = Reg(resetVal = Bool(false)); + + val req_store = (io.cpu.req_cmd === M_XWR); + val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD); + val req_flush = (io.cpu.req_cmd === M_FLA); + val req_amo = io.cpu.req_cmd(3).toBool; + val r_req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); + val r_req_store = (r_cpu_req_cmd === M_XWR); + val r_req_flush = (r_cpu_req_cmd === M_FLA); + val r_req_ptw_load = (r_cpu_req_cmd === M_PRD); + val r_req_amo = r_cpu_req_cmd(3).toBool; + + when (io.cpu.req_val && io.cpu.req_rdy) { + r_cpu_req_idx <== io.cpu.req_idx; + r_cpu_req_cmd <== io.cpu.req_cmd; + r_cpu_req_type <== io.cpu.req_type; + r_cpu_req_tag <== io.cpu.req_tag; + } + + when ((state === s_ready) && r_cpu_req_val && !io.cpu.dtlb_miss) { + r_cpu_req_ppn <== io.cpu.req_ppn; + } + when (io.cpu.req_rdy) { + r_cpu_req_val <== io.cpu.req_val; + } + otherwise { + r_cpu_req_val <== Bool(false); + } + when (((state === s_resolve_miss) && (r_req_load || r_req_amo)) || (state === s_replay_load)) { + r_cpu_resp_val <== Bool(true); + } + otherwise { + r_cpu_resp_val <== Bool(false); + } + + // refill counter + val rr_count = Reg(resetVal = UFix(0,2)); + val rr_count_next = rr_count + UFix(1); + when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) { + rr_count <== rr_count_next; + } + + // tag array + val tag_addr = + Mux((state === s_ready), io.cpu.req_idx(PGIDX_BITS-1,offsetbits), + r_cpu_req_idx(PGIDX_BITS-1,offsetbits)).toUFix; + val tag_we = + ((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) || + ((state === s_resolve_miss) && r_req_flush); + + val tag_array = Mem4(lines, r_cpu_req_ppn); + tag_array.setReadLatency(SRAM_READ_LATENCY); +// tag_array.setTarget('inst); + val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, lines)); + when (tag_we && !r_req_flush) { + vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + } + when (tag_we && r_req_flush) { + vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); + } + val vb_rdata = vb_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; + val tag_valid = r_cpu_req_val && vb_rdata; + val tag_match = (tag_rdata === io.cpu.req_ppn); + val tag_hit = tag_valid && tag_match; + val miss = r_cpu_req_val && (!vb_rdata || !tag_match); + + // load/store addresses conflict if they are to any part of the same 64 bit word + val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); + val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match; + val store_hit = r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store ; + + // write the pending store data when the cache is idle, when the next command isn't a load + // or when there's a load to the same address (in which case there's a 2 cycle delay: + // once cycle to write the store data and another to read the data back) + val drain_store = + ((store_hit || p_store_valid) && (!io.cpu.req_val || req_store || req_flush)) || + (p_store_valid && (miss || ldst_conflict)); + + // write pending store data from a store which missed + // after the cache line refill has completed + val resolve_store = (state === s_resolve_miss) && r_req_store; + + // pending store data + when (io.cpu.req_val && io.cpu.req_rdy && req_store) { + p_store_idx <== io.cpu.req_idx; + p_store_data <== io.cpu.req_data; + p_store_type <== io.cpu.req_type; + } + when (store_hit && !drain_store) { + p_store_valid <== Bool(true); + } + when (drain_store) { + p_store_valid <== Bool(false); + } + + // AMO operand + when (io.cpu.req_val && io.cpu.req_rdy && req_amo) { + r_amo_data <== io.cpu.req_data; + } + + // dirty bit array + val db_array = Reg(resetVal = Bits(0, lines)); + val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; + when ((r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store) || resolve_store) { + db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + } + when (state === s_write_amo) { + db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + } + when (tag_we) { + db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); + } + + val mshr = new MSHRFile() + mshr.io.req_val := r_cpu_req_val + mshr.io.req_ppn := r_cpu_req_ppn + mshr.io.req_idx := r_cpu_req_idx(PGIDX_BITS-1, offsetbits) + mshr.io.req_cmd.offset := r_cpu_req_idx(offsetbits-1, 0) + mshr.io.req_cmd.cmd := r_cpu_req_cmd + mshr.io.req_cmd.typ := r_cpu_req_type + + // generate write mask and data signals for stores and amos + val storegen = new rocketDCacheStoreGen(); + storegen.io.req_addr_lsb := p_store_idx(2,0); + storegen.io.req_data := p_store_data; + storegen.io.req_type := p_store_type; + val store_data = Fill(2, storegen.io.store_data); + val store_wmask_d = storegen.io.store_wmask; + val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); + + // ALU for AMOs + val amo_alu = new rocketNBDCacheAMOALU(); + val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result); + val amo_wmask = + Mux(r_cpu_req_type === MT_D, ~Bits(0,8), + Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)), + Cat(Bits(0,4), ~Bits(0,4)))); + + val amo_store_wmask_d = Cat(Fill(8, amo_wmask(7)), + Fill(8, amo_wmask(6)), + Fill(8, amo_wmask(5)), + Fill(8, amo_wmask(4)), + Fill(8, amo_wmask(3)), + Fill(8, amo_wmask(2)), + Fill(8, amo_wmask(1)), + Fill(8, amo_wmask(0))); + + val amo_store_wmask = Mux(r_cpu_req_idx(offsetlsb).toBool, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d)); + + // data array + val data_addr = + Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), + Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next), + Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), + Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), + io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; + + val data_wdata = + Mux((state === s_refill), io.mem.resp_data, + Mux((state === s_write_amo), amo_alu_out, + store_data)); + + val data_we = + ((state === s_refill) && io.mem.resp_val) || + (state === s_write_amo) || + drain_store || resolve_store; + + val data_wmask = + Mux((state === s_refill), ~Bits(0,128), + Mux((state === s_write_amo), amo_store_wmask, + store_wmask)); + + val data_array = Mem4(lines*4, data_wdata); + data_array.setReadLatency(SRAM_READ_LATENCY); +// data_array.setTarget('inst); + val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); + val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); + val r_resp_data = Reg(resp_data); + + amo_alu.io.cmd := r_cpu_req_cmd; + amo_alu.io.wmask := amo_wmask; + amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix; + amo_alu.io.rhs := r_amo_data.toUFix; + + // signal a load miss when the data isn't present in the cache and when it's in the pending store data register + // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) + val load_miss = + !io.cpu.dtlb_miss && + (state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match)); + + // output signals + // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush + io.cpu.req_rdy := mshr.io.req_rdy && (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); + io.cpu.resp_val := !io.cpu.dtlb_miss && + ((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) || + ((state === s_resolve_miss) && r_req_flush) || + r_cpu_resp_val; + + val misaligned = + (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && r_cpu_req_idx(0).toBool) || + (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0,2))) || + ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0,3))); + + io.cpu.xcpt_ma_ld := r_cpu_req_val && (r_req_load || r_req_amo) && misaligned; + io.cpu.xcpt_ma_st := r_cpu_req_val && (r_req_store || r_req_amo) && misaligned; + + io.cpu.resp_miss := load_miss; + // tag MSB distinguishes between loads destined for the PTW and CPU + io.cpu.resp_tag := Cat(r_req_ptw_load, r_cpu_req_type, r_cpu_req_idx(2,0), r_cpu_req_tag); + io.cpu.resp_data := resp_data; + + io.mem.req_val := (state === s_req_refill) || (state === s_writeback); + io.mem.req_rw := (state === s_writeback); + io.mem.req_wdata := data_array_rdata; + io.mem.req_tag := UFix(0); + io.mem.req_addr := + Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), + Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2))).toUFix; + + // control state machine + switch (state) { + is (s_reset) { + state <== s_ready; + } + is (s_ready) { + when (io.cpu.dtlb_miss) { + state <== s_ready; + } + when (ldst_conflict) { + state <== s_replay_load; + } + when (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))) { + state <== s_ready; + } + when (tag_hit && r_req_amo) { + state <== s_write_amo; + } + when (tag_valid & tag_dirty) { + state <== s_start_writeback; + } + when (r_req_flush) { + state <== s_resolve_miss; + } + otherwise { + state <== s_req_refill; + } + } + is (s_replay_load) { + state <== s_ready; + } + is (s_write_amo) { + state <== s_ready; + } + is (s_start_writeback) { + state <== s_writeback; + } + is (s_writeback) { + when (io.mem.req_rdy && (rr_count === UFix(3,2))) { + when (r_req_flush) { + state <== s_resolve_miss; + } + otherwise { + state <== s_req_refill; + } + } + } + is (s_req_refill) + { + when (io.mem.req_rdy) { state <== s_refill; } + } + is (s_refill) { + when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } + } + is (s_resolve_miss) { + when (r_req_amo) { + state <== s_write_amo; + } + state <== s_ready; + } + } +} + + +} From ce201559f318ddbab01c23ce4e91a63bca0e7b64 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 10 Dec 2011 00:42:09 -0800 Subject: [PATCH 0068/1087] Support cache->cpu nacks one cycle after request --- rocket/src/main/scala/consts.scala | 3 ++- rocket/src/main/scala/cpu.scala | 3 ++- rocket/src/main/scala/ctrl.scala | 10 ++++---- rocket/src/main/scala/dcache.scala | 34 +++++++++++++++------------- rocket/src/main/scala/dpath.scala | 6 ++--- rocket/src/main/scala/nbdcache.scala | 16 ++++++------- rocket/src/main/scala/ptw.scala | 33 +++++++++++++++++---------- 7 files changed, 60 insertions(+), 45 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 30b98b0c..f7212ad2 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -186,7 +186,8 @@ object Constants // rocketNBDCacheDM parameters val CPU_DATA_BITS = 64; - val CPU_TAG_BITS = 5; + val CPU_TAG_BITS = 11; + val DCACHE_TAG_BITS = 1 + CPU_TAG_BITS; val OFFSET_BITS = 6; // log2(cache line size in bytes) val NMSHR = 2; // number of primary misses val NRPQ = 16; // number of secondary misses diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 142ee382..eaeba65a 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -95,13 +95,14 @@ class rocketProc extends Component arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; - arb.io.cpu.dtlb_miss := ctrl.io.dpath.killm; + arb.io.cpu.req_nack := ctrl.io.dpath.killm; arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; arb.io.cpu.req_data := dpath.io.dmem.req_data; arb.io.cpu.req_tag := dpath.io.dmem.req_tag; ctrl.io.dmem.req_rdy := dtlb.io.cpu.req_rdy && arb.io.cpu.req_rdy; ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; + ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack; dpath.io.dmem.resp_val := arb.io.cpu.resp_val; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 302ae0fb..dc995b01 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,6 +37,7 @@ class ioCtrlDpath extends Bundle() val mem_eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); + val ex_mem_type = UFix(3, 'output) // instruction in execute is an unconditional jump val ex_jmp = Bool('output); // enable/disable interrupts @@ -74,7 +75,7 @@ class ioCtrlAll extends Bundle() val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss")).flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); val host = new ioHost(List("start")); val dtlb_val = Bool('output) val dtlb_rdy = Bool('input); @@ -303,8 +304,8 @@ class rocketCtrl extends Component val id_console_out_val = id_wen_pcr.toBool && (id_raddr2 === PCR_CONSOLE); - val wb_reg_div_mul_val = Reg(){Bool()}; - val dcache_miss = Reg(io.dmem.resp_miss); + val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) + val dcache_miss = Reg(io.dmem.resp_miss, resetVal = Bool(false)); val sboard = new rocketCtrlSboard(); sboard.io.raddra := id_raddr2.toUFix; @@ -526,7 +527,7 @@ class rocketCtrl extends Component io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st; // replay mem stage PC on a DTLB miss - val mem_hazard = io.dtlb_miss + val mem_hazard = io.dtlb_miss || io.dmem.resp_nack val replay_mem = mem_hazard || mem_reg_replay; val kill_mem = mem_hazard || mem_exception; @@ -667,6 +668,7 @@ class rocketCtrl extends Component io.dmem.req_val := ex_reg_mem_val && !kill_dmem; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; + io.dpath.ex_mem_type:= ex_reg_mem_type } } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 02d538f0..0e61feb4 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -7,7 +7,7 @@ import scala.math._; // interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { - val dtlb_miss = Bool('input); + val req_nack = Bool('input); val req_val = Bool('input); val req_rdy = Bool('output); val req_cmd = Bits(4, 'input); @@ -15,13 +15,14 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val req_idx = Bits(PGIDX_BITS, 'input); val req_ppn = Bits(PPN_BITS, 'input); val req_data = Bits(64, 'input); - val req_tag = Bits(5, 'input); + val req_tag = Bits(DCACHE_TAG_BITS, 'input); val xcpt_ma_ld = Bool('output); // misaligned load val xcpt_ma_st = Bool('output); // misaligned store val resp_miss = Bool('output); + val resp_nack = Bool('output); val resp_val = Bool('output); val resp_data = Bits(64, 'output); - val resp_tag = Bits(12, 'output); + val resp_tag = Bits(DCACHE_TAG_BITS, 'output); } // interface between D$ and next level in memory hierarchy @@ -125,7 +126,7 @@ class rocketDCacheDM_flush(lines: Int) extends Component { val flush_resp_count = Reg(resetVal = UFix(0, indexbits)); val flushing = Reg(resetVal = Bool(false)); val flush_waiting = Reg(resetVal = Bool(false)); - val r_cpu_req_tag = Reg(resetVal = Bits(0, 5)); + val r_cpu_req_tag = Reg() { Bits() } when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) { @@ -144,7 +145,7 @@ class rocketDCacheDM_flush(lines: Int) extends Component { when (flushing && dcache.io.cpu.req_rdy) { flush_count <== flush_count + UFix(1,1); } - when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag(5,0) === r_cpu_req_tag)) { + when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag)) { flush_resp_count <== flush_resp_count + UFix(1,1); } @@ -155,13 +156,14 @@ class rocketDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; - dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss && !flush_waiting; + dcache.io.cpu.req_nack := io.cpu.req_nack && !flush_waiting; dcache.io.mem ^^ io.mem; io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; io.cpu.xcpt_ma_st := dcache.io.cpu.xcpt_ma_st; io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; io.cpu.resp_miss := dcache.io.cpu.resp_miss; + io.cpu.resp_nack := dcache.io.cpu.resp_nack; io.cpu.resp_data := dcache.io.cpu.resp_data; io.cpu.resp_tag := dcache.io.cpu.resp_tag; io.cpu.resp_val := dcache.io.cpu.resp_val & @@ -192,7 +194,7 @@ class rocketDCacheDM(lines: Int) extends Component { val r_cpu_req_val = Reg(resetVal = Bool(false)); val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); val r_cpu_req_type = Reg(resetVal = Bits(0,3)); - val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); + val r_cpu_req_tag = Reg() { Bits() } val r_cpu_resp_val = Reg(resetVal = Bool(false)); val r_amo_data = Reg(resetVal = Bits(0,64)); @@ -218,7 +220,7 @@ class rocketDCacheDM(lines: Int) extends Component { r_cpu_req_tag <== io.cpu.req_tag; } - when ((state === s_ready) && r_cpu_req_val && !io.cpu.dtlb_miss) { + when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_nack) { r_cpu_req_ppn <== io.cpu.req_ppn; } when (io.cpu.req_rdy) { @@ -271,7 +273,7 @@ class rocketDCacheDM(lines: Int) extends Component { // load/store addresses conflict if they are to any part of the same 64 bit word val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match; - val store_hit = r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store ; + val store_hit = r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store ; // write the pending store data when the cache is idle, when the next command isn't a load // or when there's a load to the same address (in which case there's a 2 cycle delay: @@ -305,7 +307,7 @@ class rocketDCacheDM(lines: Int) extends Component { // dirty bit array val db_array = Reg(resetVal = Bits(0, lines)); val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; - when ((r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store) || resolve_store) { + when ((r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store) || resolve_store) { db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (state === s_write_amo) { @@ -381,13 +383,13 @@ class rocketDCacheDM(lines: Int) extends Component { // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) val load_miss = - !io.cpu.dtlb_miss && + !io.cpu.req_nack && (state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match)); // output signals // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); - io.cpu.resp_val := !io.cpu.dtlb_miss && + io.cpu.req_rdy := (state === s_ready) && !io.cpu.req_nack && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); + io.cpu.resp_val := !io.cpu.req_nack && ((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && r_req_flush) || r_cpu_resp_val; @@ -401,8 +403,8 @@ class rocketDCacheDM(lines: Int) extends Component { io.cpu.xcpt_ma_st := r_cpu_req_val && (r_req_store || r_req_amo) && misaligned; io.cpu.resp_miss := load_miss; - // tag MSB distinguishes between loads destined for the PTW and CPU - io.cpu.resp_tag := Cat(r_req_ptw_load, r_cpu_req_type, r_cpu_req_idx(2,0), r_cpu_req_tag); + io.cpu.resp_nack := Bool(false) + io.cpu.resp_tag := r_cpu_req_tag io.cpu.resp_data := resp_data; io.mem.req_val := (state === s_req_refill) || (state === s_writeback); @@ -419,7 +421,7 @@ class rocketDCacheDM(lines: Int) extends Component { state <== s_ready; } is (s_ready) { - when (io.cpu.dtlb_miss) { + when (io.cpu.req_nack) { state <== s_ready; } when (ldst_conflict) { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 1fd32d2f..2de40382 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -8,10 +8,10 @@ import Instructions._ class ioDpathDmem extends Bundle() { val req_addr = UFix(VADDR_BITS, 'output); - val req_tag = UFix(5, 'output); + val req_tag = UFix(CPU_TAG_BITS, 'output); val req_data = Bits(64, 'output); val resp_val = Bool('input); - val resp_tag = Bits(12, 'input); // FIXME: MSB is ignored + val resp_tag = Bits(CPU_TAG_BITS, 'input); val resp_data = Bits(64, 'input); } @@ -328,7 +328,7 @@ class rocketDpath extends Component // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_alu_out(VADDR_BITS-1,0); io.dmem.req_data := ex_reg_rs2; - io.dmem.req_tag := ex_reg_waddr; + io.dmem.req_tag := Cat(io.ctrl.ex_mem_type, io.dmem.req_addr(2,0), ex_reg_waddr).toUFix; // processor control regfile read pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ecb3eaa5..0d498d26 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -440,7 +440,7 @@ class rocketNBDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; - dcache.io.cpu.dtlb_miss := io.cpu.dtlb_miss; + dcache.io.cpu.req_nack := io.cpu.req_nack; dcache.io.mem ^^ io.mem; io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; @@ -580,7 +580,7 @@ class rocketNBDCacheDM(lines: Int) extends Component { r_cpu_req_tag <== io.cpu.req_tag; } - when ((state === s_ready) && r_cpu_req_val && !io.cpu.dtlb_miss) { + when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_nack) { r_cpu_req_ppn <== io.cpu.req_ppn; } when (io.cpu.req_rdy) { @@ -633,7 +633,7 @@ class rocketNBDCacheDM(lines: Int) extends Component { // load/store addresses conflict if they are to any part of the same 64 bit word val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match; - val store_hit = r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store ; + val store_hit = r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store ; // write the pending store data when the cache is idle, when the next command isn't a load // or when there's a load to the same address (in which case there's a 2 cycle delay: @@ -667,7 +667,7 @@ class rocketNBDCacheDM(lines: Int) extends Component { // dirty bit array val db_array = Reg(resetVal = Bits(0, lines)); val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; - when ((r_cpu_req_val && !io.cpu.dtlb_miss && tag_hit && r_req_store) || resolve_store) { + when ((r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store) || resolve_store) { db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (state === s_write_amo) { @@ -751,13 +751,13 @@ class rocketNBDCacheDM(lines: Int) extends Component { // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) val load_miss = - !io.cpu.dtlb_miss && + !io.cpu.req_nack && (state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match)); // output signals // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := mshr.io.req_rdy && (state === s_ready) && !io.cpu.dtlb_miss && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); - io.cpu.resp_val := !io.cpu.dtlb_miss && + io.cpu.req_rdy := mshr.io.req_rdy && (state === s_ready) && !io.cpu.req_nack && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); + io.cpu.resp_val := !io.cpu.req_nack && ((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && r_req_flush) || r_cpu_resp_val; @@ -789,7 +789,7 @@ class rocketNBDCacheDM(lines: Int) extends Component { state <== s_ready; } is (s_ready) { - when (io.cpu.dtlb_miss) { + when (io.cpu.req_nack) { state <== s_ready; } when (ldst_conflict) { diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 52bdf8e8..8c14e96d 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -7,7 +7,7 @@ import scala.math._; class ioDmemArbiter extends Bundle { - val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val")); + val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val", "resp_nack")); val cpu = new ioDmem(); val mem = new ioDmem().flip(); } @@ -24,31 +24,31 @@ class rocketDmemArbiter extends Component io.mem.req_cmd := Mux(io.ptw.req_val, io.ptw.req_cmd, io.cpu.req_cmd); io.mem.req_type := Mux(io.ptw.req_val, io.ptw.req_type, io.cpu.req_type); io.mem.req_idx := Mux(io.ptw.req_val, io.ptw.req_idx, io.cpu.req_idx); -// io.mem.req_ppn := Mux(io.ptw.req_val, io.ptw.req_ppn, io.cpu.req_ppn); io.mem.req_ppn := Mux(r_ptw_req_val, r_ptw_req_ppn, io.cpu.req_ppn); io.mem.req_data := io.cpu.req_data; - io.mem.req_tag := Mux(io.ptw.req_val, Bits(0,5), io.cpu.req_tag); -// io.mem.dtlb_busy := io.cpu.dtlb_busy; - io.mem.dtlb_miss := io.cpu.dtlb_miss; + io.mem.req_tag := Cat(io.cpu.req_tag, io.ptw.req_val); + io.mem.req_nack := io.cpu.req_nack; io.ptw.req_rdy := io.mem.req_rdy; io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; - io.cpu.resp_miss := io.mem.resp_miss && !io.mem.resp_tag(11).toBool; + io.cpu.resp_miss := io.mem.resp_miss && !io.mem.resp_tag(0).toBool; - io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(11).toBool; - io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(11).toBool; + io.cpu.resp_nack := io.mem.resp_nack && !r_ptw_req_val + io.ptw.resp_nack := io.mem.resp_nack && r_ptw_req_val + + io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool; + io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(0).toBool; io.ptw.resp_data := io.mem.resp_data; io.cpu.resp_data := io.mem.resp_data; -// io.cpu.resp_tag := io.mem.resp_tag(10,0); - io.cpu.resp_tag := io.mem.resp_tag; + io.cpu.resp_tag := io.mem.resp_tag >> UFix(1); } class ioPTW extends Bundle { val itlb = new ioTLB_PTW().flip(); val dtlb = new ioTLB_PTW().flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_ppn", "req_idx", "resp_data", "resp_val")).flip(); + val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_ppn", "req_idx", "resp_data", "resp_val", "resp_nack")).flip(); val ptbr = UFix(PADDR_BITS, 'input); } @@ -139,6 +139,9 @@ class rocketPTW extends Component } } is (s_l1_wait) { + when (io.dmem.resp_nack) { + state <== s_l1_req + } when (io.dmem.resp_val) { when (resp_ptd) { // page table descriptor state <== s_l2_req; @@ -161,6 +164,9 @@ class rocketPTW extends Component } } is (s_l2_wait) { + when (io.dmem.resp_nack) { + state <== s_l2_req + } when (io.dmem.resp_val) { when (resp_ptd) { // page table descriptor state <== s_l3_req; @@ -183,6 +189,9 @@ class rocketPTW extends Component } } is (s_l3_wait) { + when (io.dmem.resp_nack) { + state <== s_l3_req + } when (io.dmem.resp_val) { when (resp_pte) { // page table entry state <== s_done; @@ -201,4 +210,4 @@ class rocketPTW extends Component } } -} \ No newline at end of file +} From 8308345364fd42e94fb91828a17b4b3538efc1e1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 10 Dec 2011 07:01:47 -0800 Subject: [PATCH 0069/1087] work in progress on hellacache --- rocket/src/main/scala/dcache.scala | 4 +- rocket/src/main/scala/nbdcache.scala | 524 +++++++++------------------ rocket/src/main/scala/top.scala | 2 +- rocket/src/main/scala/util.scala | 10 +- 4 files changed, 172 insertions(+), 368 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 0e61feb4..dcd15167 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -31,9 +31,9 @@ class ioDcache(view: List[String] = null) extends Bundle(view) { val req_tag = UFix(DMEM_TAG_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); - val req_wdata = Bits(128, 'input); + val req_wdata = Bits(MEM_DATA_BITS, 'input); val req_rw = Bool('input); - val resp_data = Bits(128, 'output); + val resp_data = Bits(MEM_DATA_BITS, 'output); val resp_tag = Bits(DMEM_TAG_BITS, 'output); val resp_val = Bool('output); } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0d498d26..2cf6fbe6 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -84,7 +84,7 @@ class MetaData extends Bundle { val tag = Bits(width = PPN_BITS) } -class MetaReq extends Bundle { +class MetaArrayReq extends Bundle { val idx = Bits(width = IDX_BITS) val rw = Bool() val data = new MetaData() @@ -106,7 +106,7 @@ class MSHR extends Component { val mem_resp_val = Bool('input) val mem_req = (new ioDecoupled) { new MemReq() }.flip - val meta_req = (new ioDecoupled) { new MetaReq() }.flip + val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip val replay = (new ioDecoupled) { new Replay() }.flip } @@ -117,7 +117,7 @@ class MSHR extends Component { val ppn = Reg { Bits() } val idx = Reg { Bits() } - val req_load = (io.req_cmd.cmd.cmd === M_XRD) || (io.req_cmd.cmd.cmd === M_PRD) || (io.req_cmd.cmd.cmd === M_PFR) + val req_load = (io.req_cmd.cmd.cmd === M_XRD) || (io.req_cmd.cmd.cmd === M_PFR) val req_use_rpq = (io.req_cmd.cmd.cmd != M_PFR) && (io.req_cmd.cmd.cmd != M_PFW) val next_dirty = io.req_pri_val && io.req_pri_rdy && !req_load || io.req_sec_val && io.req_sec_rdy && (!req_load || dirty) val sec_rdy = io.idx_match && !refilled && (dirty || !requested || req_load) @@ -183,7 +183,7 @@ class MSHRFile extends Component { val mem_resp_tag = Bits(DMEM_TAG_BITS, 'input) val mem_req = (new ioDecoupled) { new MemReq() }.flip() - val meta_req = (new ioDecoupled) { new MetaReq() }.flip() + val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() val replay = (new ioDecoupled) { new Replay() }.flip() } @@ -192,7 +192,7 @@ class MSHRFile extends Component { val sec_rdy = Wire { Bool() } val tag_mux = new Mux1H(NMSHR, PPN_BITS) - val meta_req_arb = (new Arbiter(NMSHR)) { new MetaReq() } + val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } val replay_arb = (new Arbiter(NMSHR)) { new RPQEntry() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } @@ -204,7 +204,7 @@ class MSHRFile extends Component { val rpqe = new RPQEntry().asInput rpqe.cmd.offset <== io.req_cmd.offset - rpqe.cmd.cmd <== Mux(io.req_cmd.cmd === M_PRD, M_XRD, io.req_cmd.cmd) + rpqe.cmd.cmd <== io.req_cmd.cmd rpqe.cmd.typ <== io.req_cmd.typ rpqe.sdq_id <== UFix(0) @@ -284,7 +284,7 @@ class StoreDataUnit extends Component { class WritebackUnit extends Component { val io = new Bundle { - val wb_req = (new ioDecoupled) { new WritebackReq() } + val req = (new ioDecoupled) { new WritebackReq() } val data_req = (new ioDecoupled) { new DataReq() }.flip() val data_resp = Bits(width = MEM_DATA_BITS, dir = 'input) val mem_req = (new ioDecoupled) { new MemReq() }.flip() @@ -292,18 +292,18 @@ class WritebackUnit extends Component { val wbq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } val valid = Reg(resetVal = Bool(false)) - val cnt = Reg() { UFix(ceil(log(REFILL_CYCLES)/log(2)).toInt) } + val cnt = Reg() { UFix(width = ceil(log(REFILL_CYCLES)/log(2)).toInt) } val addr = Reg() { new WritebackReq() } wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) wbq.io.enq.bits := io.data_resp wbq.io.deq.ready := io.mem_req.ready && (~cnt === UFix(0)) - when (io.wb_req.valid && io.wb_req.ready) { valid <== Bool(true); cnt <== UFix(0); addr <== io.wb_req.bits } + when (io.req.valid && io.req.ready) { valid <== Bool(true); cnt <== UFix(0); addr <== io.req.bits } when (io.data_req.valid && io.data_req.ready) { cnt <== cnt + UFix(1) } when ((~cnt === UFix(0)) && !wbq.io.deq.valid) { valid <== Bool(false) } - io.wb_req.ready := !valid + io.req.ready := !valid io.data_req.valid := valid && wbq.io.enq.ready io.data_req.bits.idx := addr.idx io.data_req.bits.cmd.offset := cnt * UFix(MEM_DATA_BITS/8) @@ -315,48 +315,47 @@ class WritebackUnit extends Component { io.mem_req.bits.addr := Cat(addr.ppn, addr.idx) } -class FlushUnit extends Component { +class FlushUnit(lines: Int) extends Component { val io = new Bundle { - val flush_req = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) } - val flush_resp = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) }.flip() - val meta_req = (new ioDecoupled) { new MetaReq() }.flip() + val req = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) } + val resp = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) }.flip() + val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() val meta_resp = (new MetaData).asInput() - - val wb_req_val = Bool(dir = 'output) - val wb_req_rdy = Bool(dir = 'input) + val wb_req = (new ioDecoupled) { new WritebackReq() }.flip() } - val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_writeback :: s_meta_write :: s_done :: Nil = Enum(7) { UFix() } - val state = Reg(resetVal = s_reset) + val s_reset0 :: s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_meta_write :: s_done :: Nil = Enum(7) { UFix() } + val state = Reg(resetVal = s_reset0) val tag = Reg() { Bits(width = CPU_TAG_BITS) } - val cnt = Reg() { UFix(ceil(log(REFILL_CYCLES)/log(2)).toInt) } + val cnt = Reg() { UFix(width = ceil(log(lines)/log(2)).toInt) } val next_cnt = cnt + UFix(1) switch (state) { + is(s_reset0) { state <== s_reset; cnt <== UFix(0) } is(s_reset) { when (io.meta_req.ready) { state <== Mux(~cnt === UFix(0), s_ready, s_reset); cnt <== next_cnt } } - is(s_ready) { when (io.flush_req.valid) { state <== s_meta_read; tag <== io.flush_req.bits } } + is(s_ready) { when (io.req.valid) { state <== s_meta_read; tag <== io.req.bits } } is(s_meta_read) { when (io.meta_req.ready) { state <== s_meta_wait } } - is(s_meta_wait) { state <== Mux(io.meta_resp.valid && io.meta_resp.dirty, s_writeback, s_meta_write) } - is(s_writeback) { when (io.wb_req_rdy) { state <== s_meta_write } } + is(s_meta_wait) { state <== Mux(io.meta_resp.valid && io.meta_resp.dirty && !io.wb_req.ready, s_meta_read, s_meta_write) } is(s_meta_write) { when (io.meta_req.ready) { state <== Mux(~cnt === UFix(0), s_done, s_meta_read); cnt <== next_cnt } } - is(s_done) { when (io.flush_resp.ready) { state <== s_ready } } + is(s_done) { when (io.resp.ready) { state <== s_ready } } } - io.flush_req.ready := state === s_ready - io.flush_resp.valid := state === s_done - io.flush_resp.bits := tag + io.req.ready := state === s_ready + io.resp.valid := state === s_done + io.resp.bits := tag io.meta_req.valid := (state === s_meta_read) || (state === s_meta_write) || (state === s_reset) io.meta_req.bits.idx := cnt io.meta_req.bits.rw := (state === s_meta_write) || (state === s_reset) io.meta_req.bits.data.valid := Bool(false) io.meta_req.bits.data.dirty := Bool(false) io.meta_req.bits.data.tag := UFix(0) - io.wb_req_val := state === s_writeback + io.wb_req.valid := state === s_meta_wait + io.meta_resp ^^ io.wb_req.bits } class MetaDataArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MetaReq() } + val req = (new ioDecoupled) { new MetaArrayReq() } val resp = (new MetaData).asOutput() } @@ -390,70 +389,6 @@ class DataArray(lines: Int) extends Component { io.req.ready := Bool(true) } -// state machine to flush (write back dirty lines, invalidate clean ones) the D$ -class rocketNBDCacheDM_flush(lines: Int) extends Component { - val io = new ioDCacheDM(); - val dcache = new rocketNBDCacheDM(lines); - - val addrbits = PADDR_BITS; - val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; - val tagmsb = addrbits - 1; - val taglsb = indexbits+offsetbits; - val tagbits = tagmsb-taglsb+1; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val offsetlsb = 3; - - val flush_count = Reg(resetVal = UFix(0, indexbits)); - val flush_resp_count = Reg(resetVal = UFix(0, indexbits)); - val flushing = Reg(resetVal = Bool(false)); - val flush_waiting = Reg(resetVal = Bool(false)); - val r_cpu_req_tag = Reg(resetVal = Bits(0, 5)); - - when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) - { - r_cpu_req_tag <== io.cpu.req_tag; - flushing <== Bool(true); - flush_waiting <== Bool(true); - } - - when (dcache.io.cpu.req_rdy && (flush_count === ~Bits(0, indexbits))) { - flushing <== Bool(false); - } - when (dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag) && (flush_resp_count === ~Bits(0, indexbits))) { - flush_waiting <== Bool(false); - } - - when (flushing && dcache.io.cpu.req_rdy) { - flush_count <== flush_count + UFix(1,1); - } - when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag(5,0) === r_cpu_req_tag)) { - flush_resp_count <== flush_resp_count + UFix(1,1); - } - - dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_cmd != M_FLA) && !flush_waiting) || flushing; - dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); - dcache.io.cpu.req_idx := Mux(flushing, Cat(flush_count, Bits(0,offsetbits)), io.cpu.req_idx); - dcache.io.cpu.req_ppn := Mux(flushing, UFix(0,PPN_BITS), io.cpu.req_ppn); - dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); - dcache.io.cpu.req_type := io.cpu.req_type; - dcache.io.cpu.req_data ^^ io.cpu.req_data; - dcache.io.cpu.req_nack := io.cpu.req_nack; - dcache.io.mem ^^ io.mem; - - io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; - io.cpu.xcpt_ma_st := dcache.io.cpu.xcpt_ma_st; - io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; - io.cpu.resp_miss := dcache.io.cpu.resp_miss; - io.cpu.resp_data := dcache.io.cpu.resp_data; - io.cpu.resp_tag := dcache.io.cpu.resp_tag; - io.cpu.resp_val := dcache.io.cpu.resp_val & - !(flush_waiting && (io.cpu.resp_tag === r_cpu_req_tag) && (flush_count != ~Bits(0, addrbits))); - -} - class rocketNBDCacheAMOALU extends Component { val io = new Bundle { val cmd = Bits(4, 'input) @@ -531,7 +466,7 @@ class AMOUnit extends Component { io.data_req.bits.data := alu.io.result } -class rocketNBDCacheDM(lines: Int) extends Component { +class HellaCache(lines: Int) extends Component { val io = new ioDCacheDM(); val addrbits = PADDR_BITS; @@ -544,306 +479,175 @@ class rocketNBDCacheDM(lines: Int) extends Component { val indexlsb = offsetbits; val offsetmsb = indexlsb-1; val offsetlsb = ceil(log(CPU_DATA_BITS/8)/log(2)).toInt; + val rf_cnt_bits = ceil(log(REFILL_CYCLES)/log(2)).toInt - val s_reset :: s_ready :: s_replay_load :: s_write_amo :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(9) { UFix() }; - val state = Reg(resetVal = s_reset); - - // idx arrives one clock cycle prior to ppn b/c of DTLB - val r_cpu_req_idx = Reg(resetVal = Bits(0, PGIDX_BITS)); - val r_cpu_req_ppn = Reg(resetVal = Bits(0, PPN_BITS)); - val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); - val r_cpu_req_type = Reg(resetVal = Bits(0,3)); - val r_cpu_req_tag = Reg(resetVal = Bits(0,5)); - val r_cpu_resp_val = Reg(resetVal = Bool(false)); - val r_amo_data = Reg(resetVal = Bits(0,64)); + val r_cpu_req_val_ = Reg(io.cpu.req_val, resetVal = Bool(false)) + val r_cpu_req_idx = Reg() { Bits() } + val r_cpu_req_cmd = Reg() { Bits() } + val r_cpu_req_type = Reg() { Bits() } + val r_cpu_req_tag = Reg() { Bits() } - val p_store_data = Reg(resetVal = Bits(0,64)); - val p_store_idx = Reg(resetVal = Bits(0,PGIDX_BITS)); - val p_store_type = Reg(resetVal = Bits(0,3)); - val p_store_valid = Reg(resetVal = Bool(false)); + val p_store_valid = Reg(resetVal = Bool(false)) + val p_store_data = Reg() { Bits() } + val p_store_idx = Reg() { Bits() } + val p_store_type = Reg() { Bits() } - val req_store = (io.cpu.req_cmd === M_XWR); - val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD); - val req_flush = (io.cpu.req_cmd === M_FLA); - val req_amo = io.cpu.req_cmd(3).toBool; - val r_req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); - val r_req_store = (r_cpu_req_cmd === M_XWR); - val r_req_flush = (r_cpu_req_cmd === M_FLA); - val r_req_ptw_load = (r_cpu_req_cmd === M_PRD); - val r_req_amo = r_cpu_req_cmd(3).toBool; + val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_nack + val req_store = (io.cpu.req_cmd === M_XWR) + val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD) + val req_flush = (io.cpu.req_cmd === M_FLA) + val req_amo = io.cpu.req_cmd(3).toBool + val req_read = req_load || req_amo + val req_write = req_store || req_amo + val r_req_load = (r_cpu_req_cmd === M_XRD) + val r_req_store = (r_cpu_req_cmd === M_XWR) + val r_req_flush = (r_cpu_req_cmd === M_FLA) + val r_req_amo = r_cpu_req_cmd(3).toBool + val r_req_read = r_req_load || r_req_amo + val r_req_write = r_req_store || r_req_amo - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_idx <== io.cpu.req_idx; - r_cpu_req_cmd <== io.cpu.req_cmd; - r_cpu_req_type <== io.cpu.req_type; - r_cpu_req_tag <== io.cpu.req_tag; + when (io.cpu.req_val) { + r_cpu_req_idx <== io.cpu.req_idx + r_cpu_req_cmd <== Mux(req_load, M_XRD, io.cpu.req_cmd) + r_cpu_req_type <== io.cpu.req_type + r_cpu_req_tag <== io.cpu.req_tag } - - when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_nack) { - r_cpu_req_ppn <== io.cpu.req_ppn; - } - when (io.cpu.req_rdy) { - r_cpu_req_val <== io.cpu.req_val; - } - otherwise { - r_cpu_req_val <== Bool(false); - } - when (((state === s_resolve_miss) && (r_req_load || r_req_amo)) || (state === s_replay_load)) { - r_cpu_resp_val <== Bool(true); - } - otherwise { - r_cpu_resp_val <== Bool(false); - } - + + // tags + val meta = new MetaDataArray(lines) + val meta_arb = (new Arbiter(3)) { new MetaArrayReq() } + meta_arb.io.out <> meta.io.req + + // data + val data = new DataArray(lines) + val data_arb = (new Arbiter(3)) { new DataArrayReq() } + data_arb.io.out <> data.io.req + + // writeback unit + val wb = new WritebackUnit + val wb_arb = (new Arbiter(2)) { new WritebackReq() } + wb_arb.io.out <> wb.io.req + + // reset and flush unit + val flusher = new FlushUnit(lines) + flusher.io.req.valid := r_cpu_req_val && r_req_flush + flusher.io.wb_req <> wb_arb.io.in(0) + flusher.io.meta_req <> meta_arb.io.in(0) + flusher.io.meta_resp <> meta.io.resp + + // cpu tag check + val meta_req = new MetaArrayReq().asInput + meta_req.idx <== io.cpu.req_idx + meta_req.rw <== Bool(false) + meta_arb.io.in(2).valid := io.cpu.req_val + meta_req ^^ meta_arb.io.in(2).bits + val early_tag_nack = !meta_arb.io.in(2).ready + val tag_match = meta.io.resp.valid && (meta.io.resp.tag === io.cpu.req_ppn) + val hit = r_cpu_req_val && tag_match + val miss = r_cpu_req_val && !tag_match + // refill counter - val rr_count = Reg(resetVal = UFix(0,2)); + val rr_count = Reg(resetVal = UFix(0, rf_cnt_bits)); val rr_count_next = rr_count + UFix(1); - when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) { - rr_count <== rr_count_next; - } + when (io.mem.resp_val) { rr_count <== rr_count_next } - // tag array - val tag_addr = - Mux((state === s_ready), io.cpu.req_idx(PGIDX_BITS-1,offsetbits), - r_cpu_req_idx(PGIDX_BITS-1,offsetbits)).toUFix; - val tag_we = - ((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) || - ((state === s_resolve_miss) && r_req_flush); + // refill response + val rr = new DataArrayReq().asInput + rr.offset <== rr_count + rr.idx <== Bits(0) /* TODO: get this from MSHR file */ + rr.rw <== Bool(true) + rr.wmask <== ~UFix(0) + rr.data <== io.mem.resp_data + data_arb.io.in(0).valid := io.mem.resp_val + rr ^^ data_arb.io.in(0).bits - val tag_array = Mem4(lines, r_cpu_req_ppn); - tag_array.setReadLatency(SRAM_READ_LATENCY); -// tag_array.setTarget('inst); - val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); + // load hits + val load = new DataArrayReq().asInput + load.offset <== io.cpu.req_idx(offsetmsb, offsetlsb+rf_cnt_bits) + load.idx <== io.cpu.req_idx(indexmsb, indexlsb) + load.rw <== Bool(false) + load.wmask <== ~UFix(0) // don't care + load.data <== io.mem.resp_data // don't care + data_arb.io.in(2).valid := io.cpu.req_val && req_read + load ^^ data_arb.io.in(2).bits + val early_load_nack = req_read && !data_arb.io.in(2).ready - // valid bit array - val vb_array = Reg(resetVal = Bits(0, lines)); - when (tag_we && !r_req_flush) { - vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); - } - when (tag_we && r_req_flush) { - vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); - } - val vb_rdata = vb_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; - val tag_valid = r_cpu_req_val && vb_rdata; - val tag_match = (tag_rdata === io.cpu.req_ppn); - val tag_hit = tag_valid && tag_match; - val miss = r_cpu_req_val && (!vb_rdata || !tag_match); + // load/store addresses conflict if they are to any part of the same word + p_store_valid <== Bool(false) + val p_store_match = r_req_read && p_store_valid && (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) - // load/store addresses conflict if they are to any part of the same 64 bit word - val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); - val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match; - val store_hit = r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store ; + // store hits. + // we nack new stores if a pending store can't retire for some reason. + // we drain a pending store if the CPU performs a store or a + // conflictig load, or if the cache misses or is idle. + val store = new DataArrayReq().asInput + store.offset <== p_store_idx(offsetmsb, offsetlsb+rf_cnt_bits) + store.idx <== p_store_idx(indexmsb, indexlsb) + store.rw <== Bool(true) + store.wmask <== UFix(0) // TODO + store.data <== UFix(0) // TODO + data_arb.io.in(1).valid := p_store_valid && (miss || !io.cpu.req_val || req_store || (r_cpu_req_val && p_store_match)) + val early_store_nack = req_write && p_store_valid && !data_arb.io.in(1).ready - // write the pending store data when the cache is idle, when the next command isn't a load - // or when there's a load to the same address (in which case there's a 2 cycle delay: - // once cycle to write the store data and another to read the data back) - val drain_store = - ((store_hit || p_store_valid) && (!io.cpu.req_val || req_store || req_flush)) || - (p_store_valid && (miss || ldst_conflict)); - - // write pending store data from a store which missed - // after the cache line refill has completed - val resolve_store = (state === s_resolve_miss) && r_req_store; + // tag update after a miss or a store to an exclusive clean line. + // we don't look at the meta ready signal because the only requester + // with higher precedence is the flush unit, which nacks us anyway. + val meta_update = new MetaArrayReq().asInput + meta_update.idx <== r_cpu_req_idx + meta_update.rw <== Bool(true) + meta_update.data.valid <== tag_match + meta_update.data.dirty <== tag_match + meta_update.data.tag <== io.cpu.req_ppn + meta_req.data <== meta_update.data // don't care + meta_arb.io.in(1).valid := miss && wb_arb.io.in(1).ready || hit && r_req_write + meta_update ^^ meta_arb.io.in(1).bits - // pending store data - when (io.cpu.req_val && io.cpu.req_rdy && req_store) { + // pending store data, also used for AMO RHS + when (io.cpu.req_val && req_store && !early_store_nack) { p_store_idx <== io.cpu.req_idx; - p_store_data <== io.cpu.req_data; p_store_type <== io.cpu.req_type; } - when (store_hit && !drain_store) { - p_store_valid <== Bool(true); - } - when (drain_store) { - p_store_valid <== Bool(false); - } - - // AMO operand - when (io.cpu.req_val && io.cpu.req_rdy && req_amo) { - r_amo_data <== io.cpu.req_data; + when (io.cpu.req_val && req_write && !early_store_nack) { + p_store_data <== io.cpu.req_data } - // dirty bit array - val db_array = Reg(resetVal = Bits(0, lines)); - val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; - when ((r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store) || resolve_store) { - db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); - } - when (state === s_write_amo) { - db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); - } - when (tag_we) { - db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); - } - - val mshr = new MSHRFile() + /*val mshr = new MSHRFile() mshr.io.req_val := r_cpu_req_val - mshr.io.req_ppn := r_cpu_req_ppn + mshr.io.req_ppn := io.cpu.req_ppn mshr.io.req_idx := r_cpu_req_idx(PGIDX_BITS-1, offsetbits) mshr.io.req_cmd.offset := r_cpu_req_idx(offsetbits-1, 0) mshr.io.req_cmd.cmd := r_cpu_req_cmd - mshr.io.req_cmd.typ := r_cpu_req_type - - // generate write mask and data signals for stores and amos - val storegen = new rocketDCacheStoreGen(); - storegen.io.req_addr_lsb := p_store_idx(2,0); - storegen.io.req_data := p_store_data; - storegen.io.req_type := p_store_type; - val store_data = Fill(2, storegen.io.store_data); - val store_wmask_d = storegen.io.store_wmask; - val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); - - // ALU for AMOs - val amo_alu = new rocketNBDCacheAMOALU(); - val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result); - val amo_wmask = - Mux(r_cpu_req_type === MT_D, ~Bits(0,8), - Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)), - Cat(Bits(0,4), ~Bits(0,4)))); - - val amo_store_wmask_d = Cat(Fill(8, amo_wmask(7)), - Fill(8, amo_wmask(6)), - Fill(8, amo_wmask(5)), - Fill(8, amo_wmask(4)), - Fill(8, amo_wmask(3)), - Fill(8, amo_wmask(2)), - Fill(8, amo_wmask(1)), - Fill(8, amo_wmask(0))); - - val amo_store_wmask = Mux(r_cpu_req_idx(offsetlsb).toBool, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d)); - - // data array - val data_addr = - Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), - Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next), - Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), - Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), - io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; - - val data_wdata = - Mux((state === s_refill), io.mem.resp_data, - Mux((state === s_write_amo), amo_alu_out, - store_data)); - - val data_we = - ((state === s_refill) && io.mem.resp_val) || - (state === s_write_amo) || - drain_store || resolve_store; - - val data_wmask = - Mux((state === s_refill), ~Bits(0,128), - Mux((state === s_write_amo), amo_store_wmask, - store_wmask)); - - val data_array = Mem4(lines*4, data_wdata); - data_array.setReadLatency(SRAM_READ_LATENCY); -// data_array.setTarget('inst); - val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); - val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); - val r_resp_data = Reg(resp_data); - - amo_alu.io.cmd := r_cpu_req_cmd; - amo_alu.io.wmask := amo_wmask; - amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix; - amo_alu.io.rhs := r_amo_data.toUFix; + mshr.io.req_cmd.typ := r_cpu_req_type*/ // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) - val load_miss = - !io.cpu.req_nack && - (state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match)); + val early_nack = early_tag_nack || early_load_nack || early_store_nack + val nack = Reg(early_nack) || p_store_match || !flusher.io.req.ready + val load_miss = !nack && miss && r_req_read + val resp_val = (!nack && hit && r_req_read) || flusher.io.resp.valid - // output signals - // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := mshr.io.req_rdy && (state === s_ready) && !io.cpu.req_nack && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); - io.cpu.resp_val := !io.cpu.req_nack && - ((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) || - ((state === s_resolve_miss) && r_req_flush) || - r_cpu_resp_val; + // report that cache is always ready. we nack instead. + io.cpu.req_rdy := Bool(true) + io.cpu.resp_nack := r_cpu_req_val_ && nack + io.cpu.resp_val := resp_val val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && r_cpu_req_idx(0).toBool) || (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0,2))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0,3))); - io.cpu.xcpt_ma_ld := r_cpu_req_val && (r_req_load || r_req_amo) && misaligned; - io.cpu.xcpt_ma_st := r_cpu_req_val && (r_req_store || r_req_amo) && misaligned; + io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned + io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned - io.cpu.resp_miss := load_miss; - // tag MSB distinguishes between loads destined for the PTW and CPU - io.cpu.resp_tag := Cat(r_req_ptw_load, r_cpu_req_type, r_cpu_req_idx(2,0), r_cpu_req_tag); - io.cpu.resp_data := resp_data; + io.cpu.resp_miss := load_miss + io.cpu.resp_tag := flusher.io.resp.bits + io.cpu.resp_data := Bits(0) - io.mem.req_val := (state === s_req_refill) || (state === s_writeback); - io.mem.req_rw := (state === s_writeback); - io.mem.req_wdata := data_array_rdata; - io.mem.req_tag := UFix(0); - io.mem.req_addr := - Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), - Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2))).toUFix; - - // control state machine - switch (state) { - is (s_reset) { - state <== s_ready; - } - is (s_ready) { - when (io.cpu.req_nack) { - state <== s_ready; - } - when (ldst_conflict) { - state <== s_replay_load; - } - when (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))) { - state <== s_ready; - } - when (tag_hit && r_req_amo) { - state <== s_write_amo; - } - when (tag_valid & tag_dirty) { - state <== s_start_writeback; - } - when (r_req_flush) { - state <== s_resolve_miss; - } - otherwise { - state <== s_req_refill; - } - } - is (s_replay_load) { - state <== s_ready; - } - is (s_write_amo) { - state <== s_ready; - } - is (s_start_writeback) { - state <== s_writeback; - } - is (s_writeback) { - when (io.mem.req_rdy && (rr_count === UFix(3,2))) { - when (r_req_flush) { - state <== s_resolve_miss; - } - otherwise { - state <== s_req_refill; - } - } - } - is (s_req_refill) - { - when (io.mem.req_rdy) { state <== s_refill; } - } - is (s_refill) { - when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } - } - is (s_resolve_miss) { - when (r_req_amo) { - state <== s_write_amo; - } - state <== s_ready; - } - } + io.mem.req_val := Bool(false) + io.mem.req_rw := Bool(false) + io.mem.req_wdata := Bits(0) + io.mem.req_tag := UFix(0) + io.mem.req_addr := UFix(0) } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 4cac1465..d84d333e 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -17,7 +17,7 @@ class Top() extends Component { val cpu = new rocketProc(); val icache = new rocketICacheDM(128); // # 64 byte cache lines val icache_pf = new rocketIPrefetcher(); - val dcache = new rocketDCacheDM_flush(128); + val dcache = new HellaCache(128); val arbiter = new rocketMemArbiter(); arbiter.io.mem ^^ io.mem; diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 4b0d86ec..e013a16c 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -64,9 +64,9 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready } - dout(n-1) <== io.in(n-1).bits - for (i <- n-2 to 0) { - dout(i) <== Mux(io.in(i).valid, io.in(i).bits, dout(i+1)) + dout(0) <== io.in(n-1).bits + for (i <- 1 to n-1) { + dout(i) <== Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout(i-1)) } for (i <- 0 to n-2) { @@ -74,8 +74,8 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { } vout <== io.in(n-1).valid - vout ^^ io.out.valid - dout(0) ^^ io.out.bits + vout ^^ io.out.valid + dout(n-1) ^^ io.out.bits } class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle From 0ea2704b809a8e2e09636dfdf3c080e3a36b2e68 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 12 Dec 2011 03:23:12 -0800 Subject: [PATCH 0070/1087] new mftx instruction format --- rocket/src/main/scala/instructions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index ea790740..0b23c3cd 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -146,8 +146,8 @@ object Instructions val FMAX_S = Bits("b?????_?????_?????_11001_000_00_1010011",32); val FMIN_D = Bits("b?????_?????_?????_11000_000_01_1010011",32); val FMAX_D = Bits("b?????_?????_?????_11001_000_01_1010011",32); - val MFTX_S = Bits("b?????_00000_?????_11100_000_00_1010011",32); - val MFTX_D = Bits("b?????_00000_?????_11100_000_01_1010011",32); + val MFTX_S = Bits("b?????_?????_00000_11100_000_00_1010011",32); + val MFTX_D = Bits("b?????_?????_00000_11100_000_01_1010011",32); val MFFSR = Bits("b?????_00000_00000_11101_000_00_1010011",32); val MXTF_S = Bits("b?????_?????_00000_11110_000_00_1010011",32); val MXTF_D = Bits("b?????_?????_00000_11110_000_01_1010011",32); From 56c4f44c2abccd92d7f0c9e3e570d59bed466e5c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 12 Dec 2011 06:49:16 -0800 Subject: [PATCH 0071/1087] hellacache returns! but AMOs are unimplemented. --- rocket/src/main/scala/arbiter.scala | 2 +- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/cpu.scala | 4 +- rocket/src/main/scala/ctrl.scala | 16 +- rocket/src/main/scala/dcache.scala | 25 +- rocket/src/main/scala/dpath.scala | 51 +- rocket/src/main/scala/icache.scala | 18 +- rocket/src/main/scala/icache_prefetch.scala | 6 +- rocket/src/main/scala/nbdcache.scala | 640 +++++++++++++------- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/queues.scala | 6 +- rocket/src/main/scala/util.scala | 57 +- 12 files changed, 487 insertions(+), 342 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index eef8ea6f..01708fe5 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -9,7 +9,7 @@ class ioMem() extends Bundle val req_val = Bool('output); val req_rdy = Bool('input); val req_rw = Bool('output); - val req_addr = UFix(PADDR_BITS, 'output); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'output); val req_wdata = Bits(MEM_DATA_BITS, 'output); val req_tag = Bits(MEM_TAG_BITS, 'output); diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index f7212ad2..23b2f2b6 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -186,7 +186,7 @@ object Constants // rocketNBDCacheDM parameters val CPU_DATA_BITS = 64; - val CPU_TAG_BITS = 11; + val CPU_TAG_BITS = 5; val DCACHE_TAG_BITS = 1 + CPU_TAG_BITS; val OFFSET_BITS = 6; // log2(cache line size in bytes) val NMSHR = 2; // number of primary misses diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index eaeba65a..08e5a0ea 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -66,7 +66,6 @@ class rocketProc extends Component ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; ctrl.io.xcpt_itlb := itlb.io.cpu.exception; -// ctrl.io.itlb_miss := itlb.io.cpu.resp_miss; io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect DTLB to D$ arbiter, ctrl+dpath @@ -95,7 +94,7 @@ class rocketProc extends Component arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; - arb.io.cpu.req_nack := ctrl.io.dpath.killm; + arb.io.cpu.req_kill := ctrl.io.dmem.req_kill; arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; arb.io.cpu.req_data := dpath.io.dmem.req_data; @@ -106,6 +105,7 @@ class rocketProc extends Component dpath.io.dmem.resp_val := arb.io.cpu.resp_val; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; + dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; io.console.bits := dpath.io.console.bits; io.console.valid := dpath.io.console.valid; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dc995b01..6d8cf5a5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -75,7 +75,7 @@ class ioCtrlAll extends Bundle() val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); + val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); val host = new ioHost(List("start")); val dtlb_val = Bool('output) val dtlb_rdy = Bool('input); @@ -358,6 +358,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); + val mem_reg_kill_dmem = Reg(resetVal = Bool(false)); when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -527,7 +528,8 @@ class rocketCtrl extends Component io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st; // replay mem stage PC on a DTLB miss - val mem_hazard = io.dtlb_miss || io.dmem.resp_nack + val mem_hazard = io.dtlb_miss || io.dmem.resp_nack; + val mem_kill_dmem = io.dtlb_miss || mem_exception || mem_reg_kill_dmem; val replay_mem = mem_hazard || mem_reg_replay; val kill_mem = mem_hazard || mem_exception; @@ -541,10 +543,11 @@ class rocketCtrl extends Component val ex_hazard = io.dmem.resp_miss || mem_reg_privileged || mem_reg_flush_inst val mem_kill_ex = kill_mem || take_pc_mem val kill_ex = mem_kill_ex || ex_hazard || !(io.dmem.req_rdy && io.dtlb_rdy) && ex_reg_mem_val - val kill_dtlb = mem_kill_ex || ex_hazard || !io.dmem.req_rdy - val kill_dmem = mem_kill_ex || ex_hazard || !io.dtlb_rdy + val ex_kill_dtlb = mem_kill_ex || ex_hazard || !io.dmem.req_rdy + val ex_kill_dmem = mem_kill_ex || ex_hazard || !io.dtlb_rdy mem_reg_replay <== kill_ex && !mem_kill_ex + mem_reg_kill_dmem <== ex_kill_dmem io.dpath.sel_pc := Mux(replay_mem, PC_MEM, // dtlb miss @@ -664,8 +667,9 @@ class rocketCtrl extends Component io.dpath.irq_disable := mem_reg_inst_di && !kill_mem; io.dpath.irq_enable := mem_reg_inst_ei && !kill_mem; - io.dtlb_val := ex_reg_mem_val && !kill_dtlb; - io.dmem.req_val := ex_reg_mem_val && !kill_dmem; + io.dtlb_val := ex_reg_mem_val && !ex_kill_dtlb; + io.dmem.req_val := ex_reg_mem_val; + io.dmem.req_kill := mem_kill_dmem; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; io.dpath.ex_mem_type:= ex_reg_mem_type diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index dcd15167..3e5774af 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -7,7 +7,7 @@ import scala.math._; // interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { - val req_nack = Bool('input); + val req_kill = Bool('input); val req_val = Bool('input); val req_rdy = Bool('output); val req_cmd = Bits(4, 'input); @@ -22,12 +22,13 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val resp_nack = Bool('output); val resp_val = Bool('output); val resp_data = Bits(64, 'output); + val resp_data_subword = Bits(64, 'output); val resp_tag = Bits(DCACHE_TAG_BITS, 'output); } // interface between D$ and next level in memory hierarchy class ioDcache(view: List[String] = null) extends Bundle(view) { - val req_addr = UFix(PADDR_BITS, 'input); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'input); val req_tag = UFix(DMEM_TAG_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); @@ -156,7 +157,7 @@ class rocketDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; dcache.io.cpu.req_data ^^ io.cpu.req_data; - dcache.io.cpu.req_nack := io.cpu.req_nack && !flush_waiting; + dcache.io.cpu.req_kill := io.cpu.req_kill && !flush_waiting; dcache.io.mem ^^ io.mem; io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; @@ -220,7 +221,7 @@ class rocketDCacheDM(lines: Int) extends Component { r_cpu_req_tag <== io.cpu.req_tag; } - when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_nack) { + when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_kill) { r_cpu_req_ppn <== io.cpu.req_ppn; } when (io.cpu.req_rdy) { @@ -273,7 +274,7 @@ class rocketDCacheDM(lines: Int) extends Component { // load/store addresses conflict if they are to any part of the same 64 bit word val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match; - val store_hit = r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store ; + val store_hit = r_cpu_req_val && !io.cpu.req_kill && tag_hit && r_req_store ; // write the pending store data when the cache is idle, when the next command isn't a load // or when there's a load to the same address (in which case there's a 2 cycle delay: @@ -307,7 +308,7 @@ class rocketDCacheDM(lines: Int) extends Component { // dirty bit array val db_array = Reg(resetVal = Bits(0, lines)); val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; - when ((r_cpu_req_val && !io.cpu.req_nack && tag_hit && r_req_store) || resolve_store) { + when ((r_cpu_req_val && !io.cpu.req_kill && tag_hit && r_req_store) || resolve_store) { db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (state === s_write_amo) { @@ -383,13 +384,13 @@ class rocketDCacheDM(lines: Int) extends Component { // signal a load miss when the data isn't present in the cache and when it's in the pending store data register // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) val load_miss = - !io.cpu.req_nack && + !io.cpu.req_kill && (state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match)); // output signals // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := (state === s_ready) && !io.cpu.req_nack && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); - io.cpu.resp_val := !io.cpu.req_nack && + io.cpu.req_rdy := (state === s_ready) && !io.cpu.req_kill && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); + io.cpu.resp_val := !io.cpu.req_kill && ((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) || ((state === s_resolve_miss) && r_req_flush) || r_cpu_resp_val; @@ -412,8 +413,8 @@ class rocketDCacheDM(lines: Int) extends Component { io.mem.req_wdata := data_array_rdata; io.mem.req_tag := UFix(0); io.mem.req_addr := - Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), - Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0,2))).toUFix; + Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_idx(PGIDX_BITS-1, offsetbits)), + Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits))).toUFix; // control state machine switch (state) { @@ -421,7 +422,7 @@ class rocketDCacheDM(lines: Int) extends Component { state <== s_ready; } is (s_ready) { - when (io.cpu.req_nack) { + when (io.cpu.req_kill) { state <== s_ready; } when (ldst_conflict) { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 2de40382..57f2efd6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -13,6 +13,7 @@ class ioDpathDmem extends Bundle() val resp_val = Bool('input); val resp_tag = Bits(CPU_TAG_BITS, 'input); val resp_data = Bits(64, 'input); + val resp_data_subword = Bits(64, 'input); } class ioDpathImem extends Bundle() @@ -113,9 +114,6 @@ class rocketDpath extends Component val r_dmem_resp_val = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg(resetVal = UFix(0,5)); - val r_dmem_resp_pos = Reg(resetVal = UFix(0,3)); - val r_dmem_resp_type = Reg(resetVal = Bits(0,3)); - val r_dmem_resp_data = Reg(resetVal = Bits(0,64)); // instruction fetch stage val if_pc_plus4 = if_reg_pc + UFix(4); @@ -194,52 +192,22 @@ class rocketDpath extends Component Mux(io.ctrl.sel_wa === WA_RA, RA, UFix(0, 5))))); - // moved this here to avoid having to do forward declaration - // FIXME: cleanup - // 64/32 bit load handling (in mem stage) - val dmem_resp_pos = io.dmem.resp_tag(7,5).toUFix; - val dmem_resp_type = io.dmem.resp_tag(10,8); - - val mem_dmem_resp_data_w = - Mux(dmem_resp_pos(2).toBool, io.dmem.resp_data(63, 32), io.dmem.resp_data(31, 0)); - - val mem_dmem_resp_data = - Mux(dmem_resp_type === MT_D, io.dmem.resp_data, - Mux(dmem_resp_type === MT_W, Cat(Fill(32, mem_dmem_resp_data_w(31)), mem_dmem_resp_data_w), - Cat(UFix(0,32), mem_dmem_resp_data_w))); - - // crossbar/sign extension for 8/16 bit loads (in writeback stage) - val dmem_resp_data_h = - Mux(r_dmem_resp_pos(1).toBool, r_dmem_resp_data(31, 16), r_dmem_resp_data(15, 0)); - val dmem_resp_data_b = - Mux(r_dmem_resp_pos(0).toBool, dmem_resp_data_h(15, 8), dmem_resp_data_h(7, 0)); - - val dmem_resp_data_final = - Mux(r_dmem_resp_type === MT_B, Cat(Fill(56, dmem_resp_data_b(7)), dmem_resp_data_b), - Mux(r_dmem_resp_type === MT_BU, Cat(UFix(0, 56), dmem_resp_data_b), - Mux(r_dmem_resp_type === MT_H, Cat(Fill(48, dmem_resp_data_h(15)), dmem_resp_data_h), - Mux(r_dmem_resp_type === MT_HU, Cat(UFix(0, 48), dmem_resp_data_h), - Mux((r_dmem_resp_type === MT_W) || - (r_dmem_resp_type === MT_WU) || - (r_dmem_resp_type === MT_D), r_dmem_resp_data, - UFix(0,64)))))); - // bypass muxes val id_rs1 = Mux(io.ctrl.div_wb, div_result, Mux(io.ctrl.mul_wb, mul_result, Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, mem_dmem_resp_data, - Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, dmem_resp_data_final, + Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, io.dmem.resp_data, + Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, io.dmem.resp_data_subword, Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata, id_rdata1))))))); val id_rs2 = Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, mem_dmem_resp_data, - Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, dmem_resp_data_final, + Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, io.dmem.resp_data, + Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, io.dmem.resp_data_subword, Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata, id_rdata2))))); @@ -328,7 +296,7 @@ class rocketDpath extends Component // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_alu_out(VADDR_BITS-1,0); io.dmem.req_data := ex_reg_rs2; - io.dmem.req_tag := Cat(io.ctrl.ex_mem_type, io.dmem.req_addr(2,0), ex_reg_waddr).toUFix; + io.dmem.req_tag := ex_reg_waddr; // processor control regfile read pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; @@ -393,10 +361,7 @@ class rocketDpath extends Component // writeback stage r_dmem_resp_val <== io.dmem.resp_val; - r_dmem_resp_waddr <== io.dmem.resp_tag(4,0).toUFix; - r_dmem_resp_pos <== dmem_resp_pos; - r_dmem_resp_type <== dmem_resp_type; - r_dmem_resp_data <== mem_dmem_resp_data; + r_dmem_resp_waddr <== io.dmem.resp_tag.toUFix wb_reg_waddr <== mem_reg_waddr; wb_reg_wdata <== mem_reg_wdata; @@ -418,7 +383,7 @@ class rocketDpath extends Component rfile.io.w1.addr := r_dmem_resp_waddr; rfile.io.w1.en := r_dmem_resp_val; - rfile.io.w1.data := dmem_resp_data_final; + rfile.io.w1.data := io.dmem.resp_data_subword; io.ctrl.wb_waddr := wb_reg_waddr; diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index e65591be..97485fdb 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -21,7 +21,7 @@ class ioImem(view: List[String] = null) extends Bundle (view) // interface between I$ and memory (128 bits wide) class ioIcache(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(PADDR_BITS, 'input); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'input); val req_val = Bool('input); val req_rdy = Bool('output); val resp_data = Bits(MEM_DATA_BITS, 'output); @@ -43,7 +43,7 @@ class rocketICacheDM(lines: Int) extends Component { val addrbits = PADDR_BITS; val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; + val offsetbits = OFFSET_BITS; val tagmsb = addrbits - 1; val taglsb = indexbits+offsetbits; val tagbits = addrbits-taglsb; @@ -57,8 +57,8 @@ class rocketICacheDM(lines: Int) extends Component { val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: s_resolve_miss :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_reset); - val r_cpu_req_idx = Reg(resetVal = Bits(0, PGIDX_BITS)); - val r_cpu_req_ppn = Reg(resetVal = Bits(0, PPN_BITS)); + val r_cpu_req_idx = Reg { Bits(width = PGIDX_BITS) } + val r_cpu_req_ppn = Reg { Bits(width = PPN_BITS) } val r_cpu_req_val = Reg(resetVal = Bool(false)); when (io.cpu.req_val && io.cpu.req_rdy) { @@ -114,13 +114,11 @@ class rocketICacheDM(lines: Int) extends Component { io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); - val word_mux = (new MuxN(REFILL_CYCLES)) { Bits(width = databits) } - word_mux.io.sel := r_cpu_req_idx(offsetmsb - rf_cnt_bits, offsetlsb).toUFix - for (i <- 0 to MEM_DATA_BITS/databits-1) { word_mux.io.in(i) := data_array_rdata((i+1)*databits-1, i*databits) } - io.cpu.resp_data := word_mux.io.out - + val test = Wire { Bits(width = MEM_DATA_BITS) } + test <== data_array_rdata + io.cpu.resp_data := Slice(MEM_DATA_BITS/databits, test, r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb)) io.mem.req_val := (state === s_request); - io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits), Bits(0, rf_cnt_bits)).toUFix; + io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(indexmsb,indexlsb)).toUFix // control state machine switch (state) { diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 540734ec..668c8b83 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -7,7 +7,7 @@ import scala.math._; class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(PADDR_BITS, 'output); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'output); val req_val = Bool('output); val req_rdy = Bool('input); val req_tag = Bits(IMEM_TAG_BITS, 'output); @@ -29,8 +29,8 @@ class rocketIPrefetcher extends Component() { val state = Reg(resetVal = s_invalid); val demand_miss = io.icache.req_val & io.icache.req_rdy; - val prefetch_addr = Reg() { UFix(width = PADDR_BITS) }; - when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(REFILL_CYCLES); } + val prefetch_addr = Reg() { UFix(width = io.icache.req_addr.width) }; + when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(1); } val addr_match = (prefetch_addr === io.icache.req_addr); val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2cf6fbe6..b43d3187 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -5,64 +5,104 @@ import Node._; import Constants._; import scala.math._; -class rocketNBDCacheStoreGen extends Component { +class StoreMaskGen extends Component { val io = new Bundle { - val req_type = Bits(3, 'input) - val req_addr_lsb = Bits(3, 'input) - val req_data = Bits(64, 'input) - val store_wmask = Bits(8, 'output) - val store_data = Bits(64, 'output) + val typ = Bits(3, 'input) + val addr = Bits(3, 'input) + val wmask = Bits(8, 'output) } - // generate write mask and store data signals based on store type and address LSBs - val wmask = Wire { Bits(8) } - switch (io.req_type(1,0)) + val mask = Wire { Bits(width = io.wmask.width) } + switch (io.typ(1,0)) { - is (MT_B) { wmask <== Bits( 1,1) << io.req_addr_lsb(2,0).toUFix } - is (MT_H) { wmask <== Bits( 3,2) << Cat(io.req_addr_lsb(2,1), Bits(0,1)).toUFix } - is (MT_W) { wmask <== Bits( 15,4) << Cat(io.req_addr_lsb(2,2), Bits(0,2)).toUFix } - otherwise { wmask <== Bits(255,8) } // MT_D + is (MT_B) { mask <== Bits( 1,1) << io.addr(2,0).toUFix } + is (MT_H) { mask <== Bits( 3,2) << Cat(io.addr(2,1), Bits(0,1)).toUFix } + is (MT_W) { mask <== Bits( 15,4) << Cat(io.addr(2,2), Bits(0,2)).toUFix } + otherwise { mask <== Bits(255,8) } // MT_D } - - val data = Wire { Bits(64) } - switch (io.req_type(1,0)) - { - is (MT_B) { data <== Fill(8, io.req_data( 7,0)) } - is (MT_H) { data <== Fill(4, io.req_data(15,0)) } - is (MT_W) { data <== Fill(2, io.req_data(31,0)) } - otherwise { data <== io.req_data } // MT_D - } - - io.store_wmask := wmask - io.store_data := data + io.wmask := mask } -class DataMemCmd extends Bundle { - val offset = Bits(width = OFFSET_BITS) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) +class StoreDataGen extends Component { + val io = new Bundle { + val typ = Bits(3, 'input) + val addr = Bits(3, 'input) + val din = Bits(64, 'input) + val dout = Bits(64, 'output) + } + + val data = Wire { Bits(width = io.din.width) } + switch (io.typ(1,0)) + { + is (MT_B) { data <== Fill(8, io.din( 7,0)) } + is (MT_H) { data <== Fill(4, io.din(15,0)) } + is (MT_W) { data <== Fill(2, io.din(31,0)) } + otherwise { data <== io.din } // MT_D + } + io.dout := data +} + +class LoadDataGen extends Component { + val io = new Bundle { + val typ = Bits(3, 'input) + val addr = Bits(3, 'input) + val din = Bits(64, 'input) + val dout = Bits(64, 'output) + val dout_subword = Bits(64, 'output) + } + + val shifted = io.din >> Cat(io.addr(2), Bits(0, 5)).toUFix + val extended = Wire { Bits(width = io.din.width) } + switch (io.typ) + { + is(MT_W) { extended <== Cat(Fill(32, shifted(31)), shifted(31,0)) } + is(MT_WU) { extended <== Cat(Bits(0, 32), shifted(31,0)) } + otherwise { extended <== shifted } + } + + val shifted_subword = Reg(extended) >> Cat(Reg(io.addr(1,0)), Bits(0, 3)).toUFix + val extended_subword = Wire { Bits(width = io.din.width) } + switch (Reg(io.typ)) + { + is (MT_B) { extended_subword <== Cat(Fill(56, shifted_subword( 7)), shifted_subword( 7, 0)) } + is (MT_BU) { extended_subword <== Cat(Bits(0, 56), shifted_subword( 7, 0)) } + is (MT_H) { extended_subword <== Cat(Fill(48, shifted_subword(15)), shifted_subword(15, 0)) } + is (MT_HU) { extended_subword <== Cat(Bits(0, 48), shifted_subword(15, 0)) } + otherwise { extended_subword <== shifted_subword } + } + + io.dout := extended + io.dout_subword := extended_subword } class RPQEntry extends Bundle { - val cmd = new DataMemCmd() - val sdq_id = UFix(width = ceil(log(NSDQ)/log(2)).toInt) + val offset = Bits(width = OFFSET_BITS) + val cmd = Bits(width = 4) + val typ = Bits(width = 3) + val sdq_id = UFix(width = log2up(NSDQ)) + val tag = Bits(width = CPU_TAG_BITS) } class Replay extends Bundle { val idx = Bits(width = IDX_BITS) - val cmd = new DataMemCmd() - val sdq_id = UFix(width = ceil(log(NSDQ)/log(2)).toInt) + val offset = Bits(width = OFFSET_BITS) + val cmd = Bits(width = 4) + val typ = Bits(width = 3) + val sdq_id = UFix(width = log2up(NSDQ)) + val tag = Bits(width = CPU_TAG_BITS) } class DataReq extends Bundle { - val idx = Bits(width = IDX_BITS) - val cmd = new DataMemCmd() + val idx = Bits(width = IDX_BITS) + val offset = Bits(width = IDX_BITS) + val cmd = Bits(width = 4) + val typ = Bits(width = 3) val data = Bits(width = CPU_DATA_BITS) } class DataArrayReq extends Bundle { val idx = Bits(width = IDX_BITS) - val offset = Bits(width = ceil(log(REFILL_CYCLES)/log(2)).toInt) + val offset = Bits(width = log2up(REFILL_CYCLES)) val rw = Bool() val wmask = Bits(width = MEM_DATA_BITS/8) val data = Bits(width = MEM_DATA_BITS) @@ -70,7 +110,8 @@ class DataArrayReq extends Bundle { class MemReq extends Bundle { val rw = Bool() - val addr = Bits(width = PPN_BITS+IDX_BITS) + val addr = UFix(width = PPN_BITS+IDX_BITS) + val tag = Bits(width = DMEM_TAG_BITS) } class WritebackReq extends Bundle { @@ -90,7 +131,7 @@ class MetaArrayReq extends Bundle { val data = new MetaData() } -class MSHR extends Component { +class MSHR(id: Int) extends Component { val io = new Bundle { val req_pri_val = Bool('input) val req_pri_rdy = Bool('output) @@ -98,10 +139,14 @@ class MSHR extends Component { val req_sec_rdy = Bool('output) val req_ppn = Bits(PPN_BITS, 'input) val req_idx = Bits(IDX_BITS, 'input) - val req_cmd = new RPQEntry().asInput + val req_offset = Bits(width = OFFSET_BITS) + val req_cmd = Bits(width = 4) + val req_type = Bits(width = 3) + val req_sdq_id = UFix(width = log2up(NSDQ)) val req_tag = Bits(CPU_TAG_BITS, 'input) val idx_match = Bool('output) + val idx = Bits(IDX_BITS, 'output) val tag = Bits(PPN_BITS, 'output) val mem_resp_val = Bool('input) @@ -115,17 +160,21 @@ class MSHR extends Component { val requested = Reg { Bool() } val refilled = Reg { Bool() } val ppn = Reg { Bits() } - val idx = Reg { Bits() } + val idx_ = Reg { Bits() } - val req_load = (io.req_cmd.cmd.cmd === M_XRD) || (io.req_cmd.cmd.cmd === M_PFR) - val req_use_rpq = (io.req_cmd.cmd.cmd != M_PFR) && (io.req_cmd.cmd.cmd != M_PFW) + val req_load = (io.req_cmd === M_XRD) || (io.req_cmd === M_PFR) + val req_use_rpq = (io.req_cmd != M_PFR) && (io.req_cmd != M_PFW) val next_dirty = io.req_pri_val && io.req_pri_rdy && !req_load || io.req_sec_val && io.req_sec_rdy && (!req_load || dirty) val sec_rdy = io.idx_match && !refilled && (dirty || !requested || req_load) val rpq = (new queueSimplePF(NRPQ)) { new RPQEntry() } rpq.io.q_reset := Bool(false) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq - io.req_cmd ^^ rpq.io.enq.bits + rpq.io.enq.bits.offset := io.req_offset + rpq.io.enq.bits.cmd := io.req_cmd + rpq.io.enq.bits.typ := io.req_type + rpq.io.enq.bits.sdq_id := io.req_sdq_id + rpq.io.enq.bits.tag := io.req_tag rpq.io.deq.ready := io.replay.ready && refilled when (io.req_pri_val && io.req_pri_rdy) { @@ -133,7 +182,7 @@ class MSHR extends Component { requested <== Bool(false) refilled <== Bool(false) ppn <== io.req_ppn - idx <== io.req_idx + idx_ <== io.req_idx } when (io.mem_req.valid && io.mem_req.ready) { requested <== Bool(true) @@ -146,14 +195,15 @@ class MSHR extends Component { } dirty <== next_dirty - io.idx_match := valid && (idx === io.req_idx) + io.idx_match := valid && (idx_ === io.req_idx) + io.idx := idx_ io.tag := ppn io.req_pri_rdy := !valid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready io.meta_req.valid := valid && refilled && !rpq.io.deq.valid io.meta_req.bits.rw := Bool(true) - io.meta_req.bits.idx := idx + io.meta_req.bits.idx := idx_ io.meta_req.bits.data.valid := Bool(true) io.meta_req.bits.data.dirty := dirty io.meta_req.bits.data.tag := ppn @@ -161,26 +211,34 @@ class MSHR extends Component { io.mem_req.valid := valid && !requested //io.mem_req.bits.itm := next_dirty io.mem_req.bits.rw := Bool(false) - io.mem_req.bits.addr := Cat(ppn, idx) + io.mem_req.bits.addr := Cat(ppn, idx_).toUFix + io.mem_req.bits.tag := Bits(id) io.replay.valid := rpq.io.deq.valid && refilled - io.replay.bits.idx := idx - rpq.io.deq.bits.cmd ^^ io.replay.bits.cmd + io.replay.bits.idx := idx_ + io.replay.bits.tag := rpq.io.deq.bits.tag + io.replay.bits.offset := rpq.io.deq.bits.offset + io.replay.bits.cmd := rpq.io.deq.bits.cmd + io.replay.bits.typ := rpq.io.deq.bits.typ io.replay.bits.sdq_id := rpq.io.deq.bits.sdq_id } class MSHRFile extends Component { val io = new Bundle { - val req_val = Bool('input) - val req_rdy = Bool('output) - val req_cmd = (new DataMemCmd).asInput - val req_ppn = Bits(PADDR_BITS, 'input) - val req_idx = Bits(IDX_BITS, 'input) - val req_data = Bits(64, 'input) - val req_tag = Bits(CPU_TAG_BITS, 'input) + val req_val = Bool('input) + val req_rdy = Bool('output) + val req_ppn = Bits(PPN_BITS, 'input) + val req_idx = Bits(IDX_BITS, 'input) + val req_offset = Bits(OFFSET_BITS, 'input) + val req_cmd = Bits(4, 'input) + val req_type = Bits(3, 'input) + val req_data = Bits(CPU_DATA_BITS, 'input) + val req_tag = Bits(CPU_TAG_BITS, 'input) + val req_sdq_id = UFix(log2up(NSDQ), 'input) - val mem_resp_val = Bool('input) - val mem_resp_tag = Bits(DMEM_TAG_BITS, 'input) + val mem_resp_val = Bool('input) + val mem_resp_tag = Bits(DMEM_TAG_BITS, 'input) + val mem_resp_idx = Bits(IDX_BITS, 'output) val mem_req = (new ioDecoupled) { new MemReq() }.flip() val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() @@ -192,39 +250,42 @@ class MSHRFile extends Component { val sec_rdy = Wire { Bool() } val tag_mux = new Mux1H(NMSHR, PPN_BITS) + val mem_resp_idx_mux = new Mux1H(NMSHR, IDX_BITS) val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } - val replay_arb = (new Arbiter(NMSHR)) { new RPQEntry() } + val replay_arb = (new Arbiter(NMSHR)) { new Replay() } + val alloc_arb = (new Arbiter(NMSHR)) { Bool() } + alloc_arb.io.out.ready := io.req_val && !idx_match val tag_match = tag_mux.io.out === io.req_ppn for (i <- 0 to NMSHR-1) { - val mshr = new MSHR() - - val rpqe = new RPQEntry().asInput - rpqe.cmd.offset <== io.req_cmd.offset - rpqe.cmd.cmd <== io.req_cmd.cmd - rpqe.cmd.typ <== io.req_cmd.typ - rpqe.sdq_id <== UFix(0) + val mshr = new MSHR(i) tag_mux.io.sel(i) := mshr.io.idx_match tag_mux.io.in(i) := mshr.io.tag alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy - mshr.io.req_pri_val := io.req_val && !idx_match && alloc_arb.io.in(i).ready + mshr.io.req_pri_val := alloc_arb.io.in(i).ready mshr.io.req_sec_val := io.req_val && tag_match mshr.io.req_ppn := io.req_ppn - mshr.io.req_idx := io.req_idx mshr.io.req_tag := io.req_tag - rpqe ^^ mshr.io.req_cmd + mshr.io.req_idx := io.req_idx + mshr.io.req_offset := io.req_offset + mshr.io.req_cmd := io.req_cmd + mshr.io.req_type := io.req_type + mshr.io.req_sdq_id := io.req_sdq_id mshr.io.meta_req <> meta_req_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) - mshr.io.mem_resp_val := io.mem_resp_val && (UFix(i) === io.mem_resp_tag) + val mem_resp_val = io.mem_resp_val && (UFix(i) === io.mem_resp_tag) + mshr.io.mem_resp_val := mem_resp_val + mem_resp_idx_mux.io.sel(i) := mem_resp_val + mem_resp_idx_mux.io.in(i) := mshr.io.idx when (mshr.io.req_pri_rdy) { pri_rdy <== Bool(true) } when (mshr.io.req_sec_rdy) { sec_rdy <== Bool(true) } @@ -239,80 +300,101 @@ class MSHRFile extends Component { replay_arb.io.out ^^ io.replay io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) + io.mem_resp_idx := mem_resp_idx_mux.io.out } -class StoreDataUnit extends Component { +class ReplayUnit extends Component { val io = new Bundle { val sdq_enq = (new ioDecoupled) { Bits(width = CPU_DATA_BITS) } - val sdq_id = UFix(width = ceil(log(NSDQ)/log(2)).toInt, dir = 'output) + val sdq_id = UFix(log2up(NSDQ), 'output) val replay = (new ioDecoupled) { new Replay() } val data_req = (new ioDecoupled) { new DataReq() }.flip() + val cpu_resp_val = Bool('output) + val cpu_resp_tag = Bits(CPU_TAG_BITS, 'output) } - val cmdq = (new queueSimplePF(2)) { new Replay() } - val dataq = (new queueSimplePF(2)) { Bits(width = CPU_DATA_BITS) } + val sdq_val = Reg(resetVal = UFix(0, NSDQ)) + val sdq_allocator = new priorityEncoder(NSDQ) + sdq_allocator.io.in := ~sdq_val + val sdq_alloc_id = sdq_allocator.io.out.toUFix - val next_dataq_enq_rdy = !dataq.io.deq.valid || dataq.io.enq.ready && (!dataq.io.enq.valid || dataq.io.deq.ready) - val next_dataq_enq_val = io.replay.valid && next_dataq_enq_rdy && (io.replay.bits.cmd.cmd != M_XRD) && cmdq.io.enq.ready - dataq.io.enq.valid := Reg(next_dataq_enq_val, resetVal = Bool(false)) - dataq.io.enq.bits := sdq_dout - dataq.io.deq.ready := io.data_req.ready && (cmdq.io.deq.bits.cmd.cmd != M_XRD) + val replay_retry = Wire { Bool() } + val replay_val = Reg(io.replay.valid || replay_retry, resetVal = Bool(false)) + replay_retry <== replay_val && !io.data_req.ready - cmdq.io.enq.valid := io.replay.valid && ((io.replay.bits.cmd.cmd === M_XRD) || next_dataq_enq_rdy) - io.replay.bits ^^ cmdq.io.enq.bits - cmdq.io.deq.ready := io.data_req.ready && ((cmdq.io.deq.bits.cmd.cmd === M_XRD) || dataq.io.deq.valid) + val rp = Reg { new Replay() } + when (io.replay.valid && io.replay.ready) { rp <== io.replay.bits } - val sdq = Mem4(NSDQ, io.sdq_enq.bits); - sdq.setReadLatency(1); - sdq.setTarget('inst); - val sdq_addr = Mux(next_dataq_enq_val, io.replay.bits.sdq_id, io.sdq_id) + val rp_amo = rp.cmd(3).toBool + val rp_store = (rp.cmd === M_XWR) + val rp_load = (rp.cmd === M_XRD) + val rp_write = rp_store || rp_amo + val rp_read = rp_load || rp_amo + + val sdq_ren_new = io.replay.valid && (io.replay.bits.cmd != M_XRD) + val sdq_ren_retry = replay_retry && rp_write + val sdq_ren = sdq_ren_new || sdq_ren_retry val sdq_wen = io.sdq_enq.valid && io.sdq_enq.ready - val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = next_dataq_enq_val || sdq_wen); - val sdq_val = Reg(resetVal = Bits(0, ceil(log(NSDQ)/log(2)).toInt)) - when (next_dataq_enq_val) { sdq_val <== sdq_val.bitSet(io.replay.bits.sdq_id, Bool(false)) } - when (sdq_wen) { sdq_val <== sdq_val.bitSet(io.sdq_id, Bool(true)) } + val sdq_addr = Mux(sdq_ren_retry, rp.sdq_id, Mux(sdq_ren_new, io.replay.bits.sdq_id, sdq_alloc_id)) - def priority_enc(in: Bits, n: Int = 0): Bits = if (in.width == n-1) UFix(n-1) else if(in(n) == Bool(true)) UFix(n) else priority_enc(in, n+1) - io.sdq_id := priority_enc(~sdq_val) - io.sdq_enq.ready := ((~sdq_val) != UFix(0)) && !next_dataq_enq_val - io.replay.ready := cmdq.io.enq.ready && next_dataq_enq_rdy - io.data_req.valid := cmdq.io.deq.valid && ((cmdq.io.deq.bits.cmd.cmd === M_XRD) || dataq.io.deq.valid) - io.data_req.bits.idx := cmdq.io.deq.bits.idx - cmdq.io.deq.bits.cmd ^^ io.data_req.bits.cmd - io.data_req.bits.data := dataq.io.deq.bits + val sdq = Mem4(NSDQ, io.sdq_enq.bits) + sdq.setReadLatency(0) + sdq.setTarget('inst) + val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) + + val sdq_free = replay_val && !replay_retry && rp_write + sdq_val <== sdq_val & ~(sdq_free.toUFix << rp.sdq_id) | (sdq_wen.toUFix << sdq_alloc_id) + + io.sdq_enq.ready := (~sdq_val != UFix(0)) && !sdq_ren + io.sdq_id := sdq_alloc_id + + io.replay.ready := !replay_retry + + io.data_req.valid := replay_val + io.data_req.bits.idx := rp.idx + io.data_req.bits.offset := rp.offset + io.data_req.bits.cmd := rp.cmd + io.data_req.bits.typ := rp.typ + io.data_req.bits.data := sdq_dout + + io.cpu_resp_val := Reg(replay_val && !replay_retry && rp_read, resetVal = Bool(false)) + io.cpu_resp_tag := Reg(rp.tag) } class WritebackUnit extends Component { val io = new Bundle { val req = (new ioDecoupled) { new WritebackReq() } - val data_req = (new ioDecoupled) { new DataReq() }.flip() - val data_resp = Bits(width = MEM_DATA_BITS, dir = 'input) + val data_req = (new ioDecoupled) { new DataArrayReq() }.flip() + val data_resp = Bits(MEM_DATA_BITS, 'input) val mem_req = (new ioDecoupled) { new MemReq() }.flip() + val mem_req_data = Bits(MEM_DATA_BITS, 'output) } val wbq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } val valid = Reg(resetVal = Bool(false)) - val cnt = Reg() { UFix(width = ceil(log(REFILL_CYCLES)/log(2)).toInt) } + val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } val addr = Reg() { new WritebackReq() } wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) wbq.io.enq.bits := io.data_resp - wbq.io.deq.ready := io.mem_req.ready && (~cnt === UFix(0)) + wbq.io.deq.ready := io.mem_req.ready && (cnt === UFix(REFILL_CYCLES)) when (io.req.valid && io.req.ready) { valid <== Bool(true); cnt <== UFix(0); addr <== io.req.bits } when (io.data_req.valid && io.data_req.ready) { cnt <== cnt + UFix(1) } - when ((~cnt === UFix(0)) && !wbq.io.deq.valid) { valid <== Bool(false) } + when ((cnt === UFix(REFILL_CYCLES)) && !wbq.io.deq.valid) { valid <== Bool(false) } io.req.ready := !valid io.data_req.valid := valid && wbq.io.enq.ready io.data_req.bits.idx := addr.idx - io.data_req.bits.cmd.offset := cnt * UFix(MEM_DATA_BITS/8) - io.data_req.bits.cmd.cmd := M_XRD - io.data_req.bits.cmd.typ := UFix(0) - io.data_req.bits.data := wbq.io.deq.bits - io.mem_req.valid := wbq.io.deq.valid && (~cnt === UFix(0)) + io.data_req.bits.offset := cnt + io.data_req.bits.rw := Bool(false) + io.data_req.bits.wmask := Bits(0) + io.data_req.bits.data := Bits(0) + io.mem_req.valid := wbq.io.deq.valid && (cnt === UFix(REFILL_CYCLES)) io.mem_req.bits.rw := Bool(true) - io.mem_req.bits.addr := Cat(addr.ppn, addr.idx) + io.mem_req.bits.addr := Cat(addr.ppn, addr.idx).toUFix + io.mem_req.bits.tag := Bits(0) + io.mem_req_data := wbq.io.deq.bits } class FlushUnit(lines: Int) extends Component { @@ -324,14 +406,13 @@ class FlushUnit(lines: Int) extends Component { val wb_req = (new ioDecoupled) { new WritebackReq() }.flip() } - val s_reset0 :: s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_meta_write :: s_done :: Nil = Enum(7) { UFix() } - val state = Reg(resetVal = s_reset0) - val tag = Reg() { Bits(width = CPU_TAG_BITS) } - val cnt = Reg() { UFix(width = ceil(log(lines)/log(2)).toInt) } + val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_meta_write :: s_done :: Nil = Enum(6) { UFix() } + val state = Reg(resetVal = s_reset) + val tag = Reg() { Bits() } + val cnt = Reg(resetVal = UFix(0, log2up(lines))) val next_cnt = cnt + UFix(1) switch (state) { - is(s_reset0) { state <== s_reset; cnt <== UFix(0) } is(s_reset) { when (io.meta_req.ready) { state <== Mux(~cnt === UFix(0), s_ready, s_reset); cnt <== next_cnt } } is(s_ready) { when (io.req.valid) { state <== s_meta_read; tag <== io.req.bits } } is(s_meta_read) { when (io.meta_req.ready) { state <== s_meta_wait } } @@ -357,14 +438,26 @@ class MetaDataArray(lines: Int) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new MetaArrayReq() } val resp = (new MetaData).asOutput() + + val state_req = (new ioDecoupled) { new MetaArrayReq() } } - val array = Mem4(lines, io.resp) - array.setReadLatency(1) - array.setTarget('inst) - val rdata = array.rw(io.req.bits.idx, io.req.bits.data, io.req.valid && io.req.bits.rw, cs = io.req.valid) - rdata ^^ io.resp - io.req.ready := Bool(true) + val vd_array = Mem4(lines, Bits(width = 2)) + vd_array.setReadLatency(0) + val vd_wdata1 = Cat(io.req.bits.data.valid, io.req.bits.data.dirty) + val vd_rdata1 = vd_array.rw(io.req.bits.idx, vd_wdata1, io.req.valid && io.req.bits.rw) + val vd_wdata2 = Cat(io.state_req.bits.data.valid, io.req.bits.data.dirty) + vd_array.write(io.state_req.bits.idx, vd_wdata2, io.state_req.valid && io.state_req.bits.rw) + + val tag_array = Mem4(lines, io.resp.tag) + tag_array.setReadLatency(0) + tag_array.setTarget('inst) + val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) + + io.resp.valid := vd_rdata1(1).toBool + io.resp.dirty := vd_rdata1(0).toBool + io.resp.tag := tag_rdata + io.req.ready := Bool(true) } class DataArray(lines: Int) extends Component { @@ -373,19 +466,14 @@ class DataArray(lines: Int) extends Component { val resp = Bits(width = MEM_DATA_BITS, dir = 'output) } - val wmask_array = Vec(MEM_DATA_BITS/8) { Wire() { Bits(width = MEM_DATA_BITS) } } - wmask_array(0) <== Fill(8, io.req.bits.wmask(0)) - for (i <- 1 to MEM_DATA_BITS/8-1) { - wmask_array(i) <== Cat(Fill(8, io.req.bits.wmask(i)), wmask_array(i-1)(8*(i+1)-1, 8*i)) - } - val wmask = wmask_array(MEM_DATA_BITS/8-1) + val wmask = FillInterleaved(8, io.req.bits.wmask) val array = Mem4(lines*REFILL_CYCLES, io.resp) - array.setReadLatency(1) + array.setReadLatency(0) array.setTarget('inst) val addr = Cat(io.req.bits.idx, io.req.bits.offset) val rdata = array.rw(addr, io.req.bits.data, io.req.valid && io.req.bits.rw, wmask, cs = io.req.valid) - rdata ^^ io.resp + io.resp := rdata io.req.ready := Bool(true) } @@ -413,7 +501,7 @@ class rocketNBDCacheAMOALU extends Component { val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(signed, cmp_lhs, cmp_rhs)) val cmp_out = Mux(min === less, io.lhs, io.rhs) - val alu_out = Wire() { UFix() }; + val alu_out = Wire { UFix(width = io.result.width) }; switch (io.cmd) { is (M_XA_ADD) { alu_out <== adder_out } is (M_XA_SWAP) { alu_out <== io.rhs } @@ -425,7 +513,7 @@ class rocketNBDCacheAMOALU extends Component { io.result := alu_out } -// XXX broken for CPU_DATA_WIDTH != 64 +// XXX broken for CPU_DATA_BITS != 64 class AMOUnit extends Component { val io = new Bundle { val req = (new ioDecoupled) { new DataReq() } @@ -435,18 +523,22 @@ class AMOUnit extends Component { val data_req = (new ioDecoupled) { new DataReq() }.flip() } - val valid = Reg(resetVal = Bool(false)) - val r_cmd = Reg() { new DataMemCmd() } - val r_idx = Reg() { Bits(width = IDX_BITS) } - val r_lhs = Reg() { Bits(width = 64) } - val r_rhs = Reg() { Bits(width = 64) } - val r_wmask = Reg() { Bits(width = 64/8) } + val valid = Reg(resetVal = Bool(false)) + val r_cmd = Reg() { Bits() } + val r_offset = Reg() { Bits() } + val r_type = Reg() { Bits() } + val r_idx = Reg() { Bits() } + val r_lhs = Reg() { Bits() } + val r_rhs = Reg() { Bits() } + val r_wmask = Reg() { Bits() } when (io.req.valid && io.req.ready) { valid <== Bool(true); r_idx <== io.req.bits.idx r_lhs <== io.lhs; r_rhs <== io.rhs; r_cmd <== io.req.bits.cmd; + r_type <== io.req.bits.typ; + r_offset <== io.req.bits.offset; r_wmask <== io.wmask } when (io.data_req.valid && io.data_req.ready) { @@ -454,7 +546,7 @@ class AMOUnit extends Component { } val alu = new rocketNBDCacheAMOALU - alu.io.cmd := r_cmd.cmd + alu.io.cmd := r_cmd alu.io.wmask := r_wmask alu.io.lhs := r_lhs alu.io.rhs := r_rhs @@ -462,7 +554,9 @@ class AMOUnit extends Component { io.req.ready := !valid io.data_req.valid := valid io.data_req.bits.idx := r_idx - r_cmd ^^ io.data_req.bits.cmd + io.data_req.bits.cmd := r_cmd + io.data_req.bits.typ := r_type + io.data_req.bits.offset := r_offset io.data_req.bits.data := alu.io.result } @@ -470,7 +564,7 @@ class HellaCache(lines: Int) extends Component { val io = new ioDCacheDM(); val addrbits = PADDR_BITS; - val indexbits = ceil(log(lines)/log(2)).toInt; + val indexbits = log2up(lines); val offsetbits = OFFSET_BITS; val tagmsb = PADDR_BITS-1; val taglsb = indexbits+offsetbits; @@ -478,21 +572,33 @@ class HellaCache(lines: Int) extends Component { val indexmsb = taglsb-1; val indexlsb = offsetbits; val offsetmsb = indexlsb-1; - val offsetlsb = ceil(log(CPU_DATA_BITS/8)/log(2)).toInt; - val rf_cnt_bits = ceil(log(REFILL_CYCLES)/log(2)).toInt + val offsetlsb = log2up(CPU_DATA_BITS/8); + val ramindexlsb = log2up(MEM_DATA_BITS/8); + val early_nack = Reg { Bool() } val r_cpu_req_val_ = Reg(io.cpu.req_val, resetVal = Bool(false)) + val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_kill && !early_nack val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_tag = Reg() { Bits() } + val r_cpu_req_data = Reg() { Bits() } + + val r2_cpu_req_val = Reg(r_cpu_req_val, resetVal = Bool(false)) + val r2_cpu_req_ppn = Reg(io.cpu.req_ppn) + val r2_cpu_req_idx = Reg(r_cpu_req_idx) + val r2_cpu_req_cmd = Reg(r_cpu_req_cmd) + val r2_cpu_req_type = Reg(r_cpu_req_type) + val r2_cpu_req_tag = Reg(r_cpu_req_tag) - val p_store_valid = Reg(resetVal = Bool(false)) - val p_store_data = Reg() { Bits() } - val p_store_idx = Reg() { Bits() } - val p_store_type = Reg() { Bits() } + val p_store_valid = Reg(resetVal = Bool(false)) + val p_store_data = Reg() { Bits() } + val p_store_idx = Reg() { Bits() } + val p_store_type = Reg() { Bits() } + + val store_data_wide = Wire { Bits(width = MEM_DATA_BITS) } + val store_wmask_wide = Wire { Bits(width = MEM_DATA_BITS) } - val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_nack val req_store = (io.cpu.req_cmd === M_XWR) val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD) val req_flush = (io.cpu.req_cmd === M_FLA) @@ -505,12 +611,23 @@ class HellaCache(lines: Int) extends Component { val r_req_amo = r_cpu_req_cmd(3).toBool val r_req_read = r_req_load || r_req_amo val r_req_write = r_req_store || r_req_amo + val r2_req_load = (r2_cpu_req_cmd === M_XRD) + val r2_req_store = (r2_cpu_req_cmd === M_XWR) + val r2_req_amo = r2_cpu_req_cmd(3).toBool + val r2_req_write = r2_req_store || r2_req_amo + + val nack_wb = Wire { Bool() } + val nack_mshr = Wire { Bool() } + val nack_sdq = Wire { Bool() } when (io.cpu.req_val) { - r_cpu_req_idx <== io.cpu.req_idx - r_cpu_req_cmd <== Mux(req_load, M_XRD, io.cpu.req_cmd) - r_cpu_req_type <== io.cpu.req_type - r_cpu_req_tag <== io.cpu.req_tag + r_cpu_req_idx <== io.cpu.req_idx + r_cpu_req_cmd <== Mux(req_load, M_XRD, io.cpu.req_cmd) + r_cpu_req_type <== io.cpu.req_type + r_cpu_req_tag <== io.cpu.req_tag + when (req_write) { + r_cpu_req_data <== io.cpu.req_data + } } // tags @@ -520,13 +637,17 @@ class HellaCache(lines: Int) extends Component { // data val data = new DataArray(lines) - val data_arb = (new Arbiter(3)) { new DataArrayReq() } + val data_arb = (new Arbiter(5)) { new DataArrayReq() } data_arb.io.out <> data.io.req // writeback unit val wb = new WritebackUnit + val mem_arb = (new Arbiter(2)) { new MemReq() } val wb_arb = (new Arbiter(2)) { new WritebackReq() } wb_arb.io.out <> wb.io.req + wb.io.data_req <> data_arb.io.in(1) + wb.io.data_resp <> data.io.resp + wb.io.mem_req <> mem_arb.io.in(0) // reset and flush unit val flusher = new FlushUnit(lines) @@ -536,119 +657,168 @@ class HellaCache(lines: Int) extends Component { flusher.io.meta_resp <> meta.io.resp // cpu tag check - val meta_req = new MetaArrayReq().asInput - meta_req.idx <== io.cpu.req_idx - meta_req.rw <== Bool(false) meta_arb.io.in(2).valid := io.cpu.req_val - meta_req ^^ meta_arb.io.in(2).bits + meta_arb.io.in(2).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + meta_arb.io.in(2).bits.rw := Bool(false) + meta_arb.io.in(2).bits.data.valid := Bool(false) // don't care + meta_arb.io.in(2).bits.data.dirty := Bool(false) // don't care + meta_arb.io.in(2).bits.data.tag := UFix(0) // don't care val early_tag_nack = !meta_arb.io.in(2).ready val tag_match = meta.io.resp.valid && (meta.io.resp.tag === io.cpu.req_ppn) - val hit = r_cpu_req_val && tag_match - val miss = r_cpu_req_val && !tag_match + val tag_hit = r_cpu_req_val && tag_match + val tag_miss = r_cpu_req_val && !tag_match // refill counter - val rr_count = Reg(resetVal = UFix(0, rf_cnt_bits)); - val rr_count_next = rr_count + UFix(1); + val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val rr_count_next = rr_count + UFix(1) when (io.mem.resp_val) { rr_count <== rr_count_next } // refill response - val rr = new DataArrayReq().asInput - rr.offset <== rr_count - rr.idx <== Bits(0) /* TODO: get this from MSHR file */ - rr.rw <== Bool(true) - rr.wmask <== ~UFix(0) - rr.data <== io.mem.resp_data - data_arb.io.in(0).valid := io.mem.resp_val - rr ^^ data_arb.io.in(0).bits + val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) + data_arb.io.in(0).valid := io.mem.resp_val || block_during_refill + data_arb.io.in(0).bits.offset := rr_count + data_arb.io.in(0).bits.rw := !block_during_refill + data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) + data_arb.io.in(0).bits.data := io.mem.resp_data + + // writeback + val wb_rdy = wb_arb.io.in(1).ready + wb_arb.io.in(1).valid := r_cpu_req_val && !tag_match && meta.io.resp.dirty + wb_arb.io.in(1).bits.ppn := meta.io.resp.tag + wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) // load hits - val load = new DataArrayReq().asInput - load.offset <== io.cpu.req_idx(offsetmsb, offsetlsb+rf_cnt_bits) - load.idx <== io.cpu.req_idx(indexmsb, indexlsb) - load.rw <== Bool(false) - load.wmask <== ~UFix(0) // don't care - load.data <== io.mem.resp_data // don't care - data_arb.io.in(2).valid := io.cpu.req_val && req_read - load ^^ data_arb.io.in(2).bits - val early_load_nack = req_read && !data_arb.io.in(2).ready - - // load/store addresses conflict if they are to any part of the same word - p_store_valid <== Bool(false) - val p_store_match = r_req_read && p_store_valid && (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) + data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) + data_arb.io.in(4).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + data_arb.io.in(4).bits.rw := Bool(false) + data_arb.io.in(4).bits.wmask := UFix(0) // don't care + data_arb.io.in(4).bits.data := io.mem.resp_data // don't care + data_arb.io.in(4).valid := io.cpu.req_val && req_read + val early_load_nack = req_read && !data_arb.io.in(4).ready // store hits. // we nack new stores if a pending store can't retire for some reason. // we drain a pending store if the CPU performs a store or a - // conflictig load, or if the cache misses or is idle. - val store = new DataArrayReq().asInput - store.offset <== p_store_idx(offsetmsb, offsetlsb+rf_cnt_bits) - store.idx <== p_store_idx(indexmsb, indexlsb) - store.rw <== Bool(true) - store.wmask <== UFix(0) // TODO - store.data <== UFix(0) // TODO - data_arb.io.in(1).valid := p_store_valid && (miss || !io.cpu.req_val || req_store || (r_cpu_req_val && p_store_match)) - val early_store_nack = req_write && p_store_valid && !data_arb.io.in(1).ready + // conflictig load, or if the cache is idle, or after a miss. + val p_store_match = r_cpu_req_val && r_req_read && p_store_valid && (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) + val drain_store_val = (p_store_valid && (!io.cpu.req_val || req_store || Reg(tag_miss))) || p_store_match + data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) + data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) + data_arb.io.in(2).bits.rw := Bool(true) + data_arb.io.in(2).bits.wmask := store_wmask_wide + data_arb.io.in(2).bits.data := store_data_wide + data_arb.io.in(2).valid := drain_store_val + val drain_store_rdy = data_arb.io.in(2).ready + val drain_store = drain_store_val && drain_store_rdy + val p_store_notready = p_store_valid && !drain_store + p_store_valid <== p_store_notready || (tag_hit && r_req_store) // tag update after a miss or a store to an exclusive clean line. - // we don't look at the meta ready signal because the only requester - // with higher precedence is the flush unit, which nacks us anyway. - val meta_update = new MetaArrayReq().asInput - meta_update.idx <== r_cpu_req_idx - meta_update.rw <== Bool(true) - meta_update.data.valid <== tag_match - meta_update.data.dirty <== tag_match - meta_update.data.tag <== io.cpu.req_ppn - meta_req.data <== meta_update.data // don't care - meta_arb.io.in(1).valid := miss && wb_arb.io.in(1).ready || hit && r_req_write - meta_update ^^ meta_arb.io.in(1).bits + val clear_valid = tag_miss && !r_req_flush && meta.io.resp.valid + val set_dirty = tag_hit && !meta.io.resp.dirty && r_req_write + meta.io.state_req.valid := clear_valid || set_dirty + meta.io.state_req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) + meta.io.state_req.bits.data.valid := tag_match + meta.io.state_req.bits.data.dirty := tag_match // pending store data, also used for AMO RHS - when (io.cpu.req_val && req_store && !early_store_nack) { - p_store_idx <== io.cpu.req_idx; - p_store_type <== io.cpu.req_type; - } - when (io.cpu.req_val && req_write && !early_store_nack) { - p_store_data <== io.cpu.req_data + val storegen = new StoreDataGen + storegen.io.typ := r_cpu_req_type + storegen.io.addr := r_cpu_req_idx(offsetlsb-1, 0) + storegen.io.din := r_cpu_req_data + when (tag_hit && r_req_store && !p_store_notready) { + p_store_idx <== r_cpu_req_idx + p_store_type <== r_cpu_req_type + p_store_data <== storegen.io.dout } - /*val mshr = new MSHRFile() - mshr.io.req_val := r_cpu_req_val + // miss handling + val mshr = new MSHRFile + val replayer = new ReplayUnit + mshr.io.req_val := tag_miss && !r_req_flush && !nack_mshr mshr.io.req_ppn := io.cpu.req_ppn - mshr.io.req_idx := r_cpu_req_idx(PGIDX_BITS-1, offsetbits) - mshr.io.req_cmd.offset := r_cpu_req_idx(offsetbits-1, 0) - mshr.io.req_cmd.cmd := r_cpu_req_cmd - mshr.io.req_cmd.typ := r_cpu_req_type*/ + mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) + mshr.io.req_data := p_store_data + mshr.io.req_tag := r_cpu_req_tag + mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) + mshr.io.req_cmd := r_cpu_req_cmd + mshr.io.req_type := r_cpu_req_type + mshr.io.req_sdq_id := replayer.io.sdq_id + mshr.io.mem_resp_val := io.mem.resp_val + mshr.io.mem_resp_tag := io.mem.resp_tag + mshr.io.mem_req <> mem_arb.io.in(1) + mshr.io.meta_req <> meta_arb.io.in(1) + mshr.io.replay <> replayer.io.replay + replayer.io.sdq_enq.valid := tag_miss && r_req_write && !nack_sdq + replayer.io.sdq_enq.bits := storegen.io.dout + data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx - // signal a load miss when the data isn't present in the cache and when it's in the pending store data register - // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) - val early_nack = early_tag_nack || early_load_nack || early_store_nack - val nack = Reg(early_nack) || p_store_match || !flusher.io.req.ready - val load_miss = !nack && miss && r_req_read - val resp_val = (!nack && hit && r_req_read) || flusher.io.resp.valid + // replays + val replay = replayer.io.data_req.bits + data_arb.io.in(3).bits.offset := replay.offset(offsetmsb,ramindexlsb) + data_arb.io.in(3).bits.idx := replay.idx + data_arb.io.in(3).bits.rw := replay.cmd === M_XWR + data_arb.io.in(3).bits.wmask := store_wmask_wide + data_arb.io.in(3).bits.data := store_data_wide + data_arb.io.in(3).valid := replayer.io.data_req.valid + replayer.io.data_req.ready := data_arb.io.in(3).ready + + // store write mask generation. + // assumes pending stores are higher-priority than store replays. + val maskgen = new StoreMaskGen + val store_offset = Mux(drain_store_val, p_store_idx(offsetmsb,0), replay.offset) + maskgen.io.typ := Mux(drain_store_val, p_store_type, replay.typ) + maskgen.io.addr := store_offset(offsetlsb-1,0) + store_wmask_wide <== maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix + val store_data = Mux(drain_store_val, p_store_data, replay.data) + store_data_wide <== Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) + + // load data subword mux/sign extension. + // assumes load replays are higher-priority than load hits. + // subword loads are delayed by one cycle. + val loadgen = new LoadDataGen + val loadgen_use_replay = Reg(replayer.io.data_req.valid) + loadgen.io.typ := Mux(loadgen_use_replay, Reg(replay.typ), r_cpu_req_type) + loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(offsetlsb-1,0) + loadgen.io.din := Slice(MEM_DATA_BITS/CPU_DATA_BITS, data.io.resp, r_cpu_req_idx(ramindexlsb-1,offsetlsb).toUFix) + + early_nack <== early_tag_nack || early_load_nack + + val nack_miss_wb = meta.io.resp.dirty && !wb_rdy + val nack_miss_mshr = !mshr.io.req_rdy + val nack_miss_sdq = r_req_write && !replayer.io.sdq_enq.ready + + nack_wb <== nack_miss_mshr || nack_miss_sdq || p_store_notready || p_store_match + nack_mshr <== nack_miss_wb || nack_miss_sdq || p_store_notready || p_store_match + nack_sdq <== nack_miss_wb || nack_miss_mshr || p_store_notready || p_store_match + + val nack_for_flush = r_req_flush && !flusher.io.req.ready + val nack = p_store_match || r_req_store && p_store_notready || early_nack || + tag_miss && !r_req_flush && (nack_miss_wb || nack_miss_mshr || nack_miss_sdq || p_store_notready) // report that cache is always ready. we nack instead. io.cpu.req_rdy := Bool(true) - io.cpu.resp_nack := r_cpu_req_val_ && nack - io.cpu.resp_val := resp_val + io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack + io.cpu.resp_val := (tag_hit && !nack && r_req_read) || flusher.io.resp.valid || replayer.io.cpu_resp_val + io.cpu.resp_miss := tag_miss && !nack && r_req_read + io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, Mux(flusher.io.resp.valid, flusher.io.resp.bits, r_cpu_req_tag)) + io.cpu.resp_data := loadgen.io.dout + io.cpu.resp_data_subword := loadgen.io.dout_subword val misaligned = - (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && r_cpu_req_idx(0).toBool) || - (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0,2))) || - ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0,3))); + (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || + (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || + ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned - io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned - - io.cpu.resp_miss := load_miss - io.cpu.resp_tag := flusher.io.resp.bits - io.cpu.resp_data := Bits(0) + io.cpu.xcpt_ma_ld := r_cpu_req_val_ && !io.cpu.req_kill && r_req_read && misaligned + io.cpu.xcpt_ma_st := r_cpu_req_val_ && !io.cpu.req_kill && r_req_write && misaligned - io.mem.req_val := Bool(false) - io.mem.req_rw := Bool(false) - io.mem.req_wdata := Bits(0) - io.mem.req_tag := UFix(0) - io.mem.req_addr := UFix(0) + mem_arb.io.out.ready := io.mem.req_rdy + io.mem.req_val := mem_arb.io.out.valid + io.mem.req_rw := mem_arb.io.out.bits.rw + io.mem.req_wdata := wb.io.mem_req_data + io.mem.req_tag := mem_arb.io.out.bits.tag.toUFix + io.mem.req_addr := mem_arb.io.out.bits.addr } - } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 8c14e96d..6251e463 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -27,7 +27,7 @@ class rocketDmemArbiter extends Component io.mem.req_ppn := Mux(r_ptw_req_val, r_ptw_req_ppn, io.cpu.req_ppn); io.mem.req_data := io.cpu.req_data; io.mem.req_tag := Cat(io.cpu.req_tag, io.ptw.req_val); - io.mem.req_nack := io.cpu.req_nack; + io.mem.req_kill := io.cpu.req_kill; io.ptw.req_rdy := io.mem.req_rdy; io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index b81d90a5..d9604271 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -19,7 +19,7 @@ class ioQueueCtrl(addr_sz: Int) extends Bundle() class queueCtrl(entries: Int) extends Component { - val addr_sz = ceil(log(entries)/log(2)).toInt + val addr_sz = log2up(entries) override val io = new ioQueueCtrl(addr_sz); // Enqueue and dequeue pointers @@ -117,7 +117,7 @@ class ioQueueCtrlFlow(addr_sz: Int) extends Bundle() /* IOqueueCtrl */ class queueCtrlFlow(entries: Int) extends Component { - val addr_sz = ceil(log(entries)/log(2)).toInt + val addr_sz = log2up(entries) override val io = new ioQueueCtrlFlow(addr_sz); // Enqueue and dequeue pointers @@ -186,7 +186,7 @@ class ioQueueDpathFlow[T <: Data](addr_sz: Int)(data: => T) extends Bundle() class queueDpathFlow[T <: Data](entries: Int)(data: => T) extends Component { - val addr_sz = ceil(log(entries)/log(2)).toInt + val addr_sz = log2up(entries) override val io = new ioQueueDpathFlow(addr_sz)(data); val ram = Mem(entries, io.wen, io.waddr, io.enq_bits); val rout = ram(io.raddr); diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index e013a16c..6949a9c0 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -5,20 +5,32 @@ import Chisel._ import Node._; import scala.math._; -class MuxN[T <: Data](n: Int)(data: => T) extends Component { - val io = new Bundle { - val sel = Bits(width = ceil(log(n)/log(2)).toInt) - val in = Vec(n) { data }.asInput() - val out = data.asOutput() - } +object log2up +{ + def apply(in: Int) = ceil(log(in)/log(2)).toInt +} - val out = Vec(n) { Wire() { data } } - out(0) <== io.in(0) - for (i <- 1 to n-1) { - out(i) <== Mux(io.sel === UFix(i), io.in(i), out(i-1)) +object Slice +{ + def apply(n: Int, in: Bits, sel: Bits) = + { + val w = in.width / n + var out = in(w-1, 0) & Fill(w, sel === UFix(0)) + for (i <- 1 until n) + out = out | (in((i+1)*w-1, i*w) & Fill(w, sel === Bits(i))) + if (n > 1) out else in } +} - out(n-1) ^^ io.out +object FillInterleaved +{ + def apply(n: Int, in: Bits) = + { + var out = Fill(n, in(0)) + for (i <- 1 until in.width) + out = Cat(Fill(n, in(i)), out) + out + } } class Mux1H(n: Int, w: Int) extends Component @@ -30,13 +42,10 @@ class Mux1H(n: Int, w: Int) extends Component } if (n > 1) { - val out = Vec(n) { Wire() { Bits(width = w) } } - out(0) <== io.in(0) & Fill(w, io.sel(0)) - for (i <- 1 to n-1) { - out(i) <== out(i-1) | (io.in(i) & Fill(w, io.sel(i))) - } - - io.out := out(n-1) + var out = io.in(0) & Fill(w, io.sel(0)) + for (i <- 1 to n-1) + out = out | (io.in(i) & Fill(w, io.sel(i))) + io.out := out } else { io.out := io.in(0) } @@ -56,7 +65,6 @@ class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { class Arbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioArbiter(n)(data) - val dout = Vec(n) { Wire() { data } } val vout = Wire { Bool() } io.in(0).ready := io.out.ready @@ -64,18 +72,17 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready } - dout(0) <== io.in(n-1).bits - for (i <- 1 to n-1) { - dout(i) <== Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout(i-1)) - } + var dout = io.in(n-1).bits + for (i <- 1 to n-1) + dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) for (i <- 0 to n-2) { when (io.in(i).valid) { vout <== Bool(true) } } vout <== io.in(n-1).valid - vout ^^ io.out.valid - dout(n-1) ^^ io.out.bits + vout ^^ io.out.valid + dout ^^ io.out.bits } class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle From a8d0cd95e6ef0630451b06f09c91eacf663c0bee Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Dec 2011 03:26:11 -0800 Subject: [PATCH 0072/1087] hellacache now works --- rocket/src/main/scala/consts.scala | 4 +- rocket/src/main/scala/ctrl.scala | 17 +- rocket/src/main/scala/dcache.scala | 5 +- rocket/src/main/scala/dtlb.scala | 7 +- rocket/src/main/scala/icache.scala | 5 +- rocket/src/main/scala/nbdcache.scala | 314 +++++++++++++-------------- rocket/src/main/scala/ptw.scala | 5 +- rocket/src/main/scala/util.scala | 12 - 8 files changed, 174 insertions(+), 195 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 23b2f2b6..16f7b8a2 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -126,7 +126,7 @@ object Constants val M_PFR = Bits("b0010", 4); // prefetch with intent to read val M_PFW = Bits("b0011", 4); // prefetch with intent to write val M_FLA = Bits("b0100", 4); // write back and invlaidate all lines - val M_PRD = Bits("b0101", 4); // PTW load + val M_FENCE = Bits("b0101", 4); // memory fence val M_INV = Bits("b0110", 4); // write back and invalidate line val M_CLN = Bits("b0111", 4); // write back line val M_XA_ADD = Bits("b1000", 4); @@ -191,7 +191,7 @@ object Constants val OFFSET_BITS = 6; // log2(cache line size in bytes) val NMSHR = 2; // number of primary misses val NRPQ = 16; // number of secondary misses - val NSDQ = 10; // number of secondary stores/AMOs + val NSDQ = 17; // number of secondary stores/AMOs val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) val IDX_BITS = PGIDX_BITS - OFFSET_BITS; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6d8cf5a5..f44ee4d5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -253,8 +253,8 @@ class rocketCtrl extends Component EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y), DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y), ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y), - FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N), - FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N), + FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N), + FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N), CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y), MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y), MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y), @@ -494,11 +494,14 @@ class rocketCtrl extends Component Mux(p_irq_ipi, UFix(21,5), Mux(p_irq_timer, UFix(23,5), UFix(0,5))); + + val mem_xcpt_ma_ld = io.xcpt_ma_ld && !mem_reg_kill_dmem + val mem_xcpt_ma_st = io.xcpt_ma_st && !mem_reg_kill_dmem val mem_exception = interrupt || - io.xcpt_ma_ld || - io.xcpt_ma_st || + mem_xcpt_ma_ld || + mem_xcpt_ma_st || io.xcpt_dtlb_ld || io.xcpt_dtlb_st || mem_reg_xcpt_illegal || @@ -516,8 +519,8 @@ class rocketCtrl extends Component Mux(mem_reg_xcpt_fpu, UFix(4,5), // FPU disabled Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call // breakpoint - Mux(io.xcpt_ma_ld, UFix(8,5), // misaligned load - Mux(io.xcpt_ma_st, UFix(9,5), // misaligned store + Mux(mem_xcpt_ma_ld, UFix(8,5), // misaligned load + Mux(mem_xcpt_ma_st, UFix(9,5), // misaligned store Mux(io.xcpt_dtlb_ld, UFix(10,5), // load fault Mux(io.xcpt_dtlb_st, UFix(11,5), // store fault UFix(0,5))))))))))); // instruction address misaligned @@ -622,7 +625,7 @@ class rocketCtrl extends Component (id_sel_wa === WA_RD) && id_stall_waddr || (id_sel_wa === WA_RA) && id_stall_ra || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || - (id_sync === SYNC_D) && !io.dmem.req_rdy || + ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy || id_div_val.toBool && !io.dpath.div_rdy || io.dpath.div_result_val || diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 3e5774af..31266888 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -205,13 +205,12 @@ class rocketDCacheDM(lines: Int) extends Component { val p_store_valid = Reg(resetVal = Bool(false)); val req_store = (io.cpu.req_cmd === M_XWR); - val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD); + val req_load = (io.cpu.req_cmd === M_XRD); val req_flush = (io.cpu.req_cmd === M_FLA); val req_amo = io.cpu.req_cmd(3).toBool; - val r_req_load = (r_cpu_req_cmd === M_XRD) || (r_cpu_req_cmd === M_PRD); + val r_req_load = (r_cpu_req_cmd === M_XRD); val r_req_store = (r_cpu_req_cmd === M_XWR); val r_req_flush = (r_cpu_req_cmd === M_FLA); - val r_req_ptw_load = (r_cpu_req_cmd === M_PRD); val r_req_amo = r_cpu_req_cmd(3).toBool; when (io.cpu.req_val && io.cpu.req_rdy) { diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index e10591dd..921bbacc 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -62,8 +62,7 @@ class rocketDTLB(entries: Int) extends Component val req_load = (r_cpu_req_cmd === M_XRD); val req_store = (r_cpu_req_cmd === M_XWR); - val req_flush = (r_cpu_req_cmd === M_FLA); - val req_amo = io.cpu.req_cmd(3).toBool; + val req_amo = r_cpu_req_cmd(3).toBool; val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); @@ -118,7 +117,7 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup = (state === s_ready) && r_cpu_req_val && !req_flush; + val lookup = (state === s_ready) && r_cpu_req_val && (req_load || req_store || req_amo); val lookup_hit = lookup && tag_hit; val lookup_miss = lookup && !tag_hit; val tlb_hit = status_vm && lookup_hit; @@ -143,7 +142,6 @@ class rocketDTLB(entries: Int) extends Component (status_u && !ur_array(tag_hit_addr).toBool)); io.cpu.xcpt_ld := access_fault_ld; -// (lookup && (req_load || req_amo) && outofrange) || access_fault_ld; val access_fault_st = tlb_hit && (req_store || req_amo) && @@ -151,7 +149,6 @@ class rocketDTLB(entries: Int) extends Component (status_u && !uw_array(tag_hit_addr).toBool)); io.cpu.xcpt_st := access_fault_st; -// (lookup && (req_store || req_amo) && outofrange) || access_fault_st; io.cpu.req_rdy := (state === s_ready) && !tlb_miss; io.cpu.resp_miss := tlb_miss; diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 97485fdb..870a3ee2 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -113,10 +113,7 @@ class rocketICacheDM(lines: Int) extends Component { // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); - - val test = Wire { Bits(width = MEM_DATA_BITS) } - test <== data_array_rdata - io.cpu.resp_data := Slice(MEM_DATA_BITS/databits, test, r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb)) + io.cpu.resp_data := data_array_rdata >> Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix io.mem.req_val := (state === s_request); io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(indexmsb,indexlsb)).toUFix diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b43d3187..d172fa06 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -42,16 +42,18 @@ class StoreDataGen extends Component { io.dout := data } +// this currently requires that CPU_DATA_BITS == 64 class LoadDataGen extends Component { val io = new Bundle { val typ = Bits(3, 'input) - val addr = Bits(3, 'input) - val din = Bits(64, 'input) + val addr = Bits(log2up(MEM_DATA_BITS/8), 'input) + val din = Bits(MEM_DATA_BITS, 'input) val dout = Bits(64, 'output) - val dout_subword = Bits(64, 'output) + val r_dout = Bits(64, 'output) + val r_dout_subword = Bits(64, 'output) } - val shifted = io.din >> Cat(io.addr(2), Bits(0, 5)).toUFix + val shifted = io.din >> Cat(io.addr(io.addr.width-1,2), Bits(0, 5)).toUFix val extended = Wire { Bits(width = io.din.width) } switch (io.typ) { @@ -60,7 +62,8 @@ class LoadDataGen extends Component { otherwise { extended <== shifted } } - val shifted_subword = Reg(extended) >> Cat(Reg(io.addr(1,0)), Bits(0, 3)).toUFix + val r_extended = Reg(extended) + val shifted_subword = r_extended >> Cat(Reg(io.addr(1,0)), Bits(0, 3)).toUFix val extended_subword = Wire { Bits(width = io.din.width) } switch (Reg(io.typ)) { @@ -72,7 +75,8 @@ class LoadDataGen extends Component { } io.dout := extended - io.dout_subword := extended_subword + io.r_dout := r_extended + io.r_dout_subword := extended_subword } class RPQEntry extends Bundle { @@ -80,7 +84,7 @@ class RPQEntry extends Bundle { val cmd = Bits(width = 4) val typ = Bits(width = 3) val sdq_id = UFix(width = log2up(NSDQ)) - val tag = Bits(width = CPU_TAG_BITS) + val tag = Bits(width = DCACHE_TAG_BITS) } class Replay extends Bundle { @@ -89,12 +93,12 @@ class Replay extends Bundle { val cmd = Bits(width = 4) val typ = Bits(width = 3) val sdq_id = UFix(width = log2up(NSDQ)) - val tag = Bits(width = CPU_TAG_BITS) + val tag = Bits(width = DCACHE_TAG_BITS) } class DataReq extends Bundle { val idx = Bits(width = IDX_BITS) - val offset = Bits(width = IDX_BITS) + val offset = Bits(width = OFFSET_BITS) val cmd = Bits(width = 4) val typ = Bits(width = 3) val data = Bits(width = CPU_DATA_BITS) @@ -143,7 +147,7 @@ class MSHR(id: Int) extends Component { val req_cmd = Bits(width = 4) val req_type = Bits(width = 3) val req_sdq_id = UFix(width = log2up(NSDQ)) - val req_tag = Bits(CPU_TAG_BITS, 'input) + val req_tag = Bits(DCACHE_TAG_BITS, 'input) val idx_match = Bool('output) val idx = Bits(IDX_BITS, 'output) @@ -164,7 +168,7 @@ class MSHR(id: Int) extends Component { val req_load = (io.req_cmd === M_XRD) || (io.req_cmd === M_PFR) val req_use_rpq = (io.req_cmd != M_PFR) && (io.req_cmd != M_PFW) - val next_dirty = io.req_pri_val && io.req_pri_rdy && !req_load || io.req_sec_val && io.req_sec_rdy && (!req_load || dirty) + val next_dirty = dirty || io.req_sec_val && io.req_sec_rdy && !req_load val sec_rdy = io.idx_match && !refilled && (dirty || !requested || req_load) val rpq = (new queueSimplePF(NRPQ)) { new RPQEntry() } @@ -179,6 +183,7 @@ class MSHR(id: Int) extends Component { when (io.req_pri_val && io.req_pri_rdy) { valid <== Bool(true) + dirty <== !req_load requested <== Bool(false) refilled <== Bool(false) ppn <== io.req_ppn @@ -233,13 +238,15 @@ class MSHRFile extends Component { val req_cmd = Bits(4, 'input) val req_type = Bits(3, 'input) val req_data = Bits(CPU_DATA_BITS, 'input) - val req_tag = Bits(CPU_TAG_BITS, 'input) + val req_tag = Bits(DCACHE_TAG_BITS, 'input) val req_sdq_id = UFix(log2up(NSDQ), 'input) val mem_resp_val = Bool('input) val mem_resp_tag = Bits(DMEM_TAG_BITS, 'input) val mem_resp_idx = Bits(IDX_BITS, 'output) + val fence_rdy = Bool('output) + val mem_req = (new ioDecoupled) { new MemReq() }.flip() val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() val replay = (new ioDecoupled) { new Replay() }.flip() @@ -247,6 +254,7 @@ class MSHRFile extends Component { val idx_match = Wire { Bool() } val pri_rdy = Wire { Bool() } + val fence = Wire { Bool() } val sec_rdy = Wire { Bool() } val tag_mux = new Mux1H(NMSHR, PPN_BITS) @@ -284,14 +292,16 @@ class MSHRFile extends Component { val mem_resp_val = io.mem_resp_val && (UFix(i) === io.mem_resp_tag) mshr.io.mem_resp_val := mem_resp_val - mem_resp_idx_mux.io.sel(i) := mem_resp_val + mem_resp_idx_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) mem_resp_idx_mux.io.in(i) := mshr.io.idx when (mshr.io.req_pri_rdy) { pri_rdy <== Bool(true) } + when (!mshr.io.req_pri_rdy) { fence <== Bool(true) } when (mshr.io.req_sec_rdy) { sec_rdy <== Bool(true) } when (mshr.io.idx_match) { idx_match <== Bool(true) } } pri_rdy <== Bool(false) + fence <== Bool(false) sec_rdy <== Bool(false) idx_match <== Bool(false) @@ -301,6 +311,7 @@ class MSHRFile extends Component { io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) io.mem_resp_idx := mem_resp_idx_mux.io.out + io.fence_rdy := !fence } class ReplayUnit extends Component { @@ -310,7 +321,7 @@ class ReplayUnit extends Component { val replay = (new ioDecoupled) { new Replay() } val data_req = (new ioDecoupled) { new DataReq() }.flip() val cpu_resp_val = Bool('output) - val cpu_resp_tag = Bits(CPU_TAG_BITS, 'output) + val cpu_resp_tag = Bits(DCACHE_TAG_BITS, 'output) } val sdq_val = Reg(resetVal = UFix(0, NSDQ)) @@ -366,6 +377,7 @@ class WritebackUnit extends Component { val req = (new ioDecoupled) { new WritebackReq() } val data_req = (new ioDecoupled) { new DataArrayReq() }.flip() val data_resp = Bits(MEM_DATA_BITS, 'input) + val refill_req = (new ioDecoupled) { new MemReq() } val mem_req = (new ioDecoupled) { new MemReq() }.flip() val mem_req_data = Bits(MEM_DATA_BITS, 'output) } @@ -375,32 +387,39 @@ class WritebackUnit extends Component { val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } val addr = Reg() { new WritebackReq() } + // don't allow memory requests to bypass conflicting writebacks. + // TODO: turn this into a victim buffer. + val block_refill = valid && (io.refill_req.bits.addr(IDX_BITS-1,0) === addr.idx) + val refill_val = io.refill_req.valid && !block_refill + wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) wbq.io.enq.bits := io.data_resp - wbq.io.deq.ready := io.mem_req.ready && (cnt === UFix(REFILL_CYCLES)) + wbq.io.deq.ready := io.mem_req.ready && !refill_val && (cnt === UFix(REFILL_CYCLES)) when (io.req.valid && io.req.ready) { valid <== Bool(true); cnt <== UFix(0); addr <== io.req.bits } when (io.data_req.valid && io.data_req.ready) { cnt <== cnt + UFix(1) } when ((cnt === UFix(REFILL_CYCLES)) && !wbq.io.deq.valid) { valid <== Bool(false) } io.req.ready := !valid - io.data_req.valid := valid && wbq.io.enq.ready + io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) io.data_req.bits.idx := addr.idx io.data_req.bits.offset := cnt io.data_req.bits.rw := Bool(false) io.data_req.bits.wmask := Bits(0) io.data_req.bits.data := Bits(0) - io.mem_req.valid := wbq.io.deq.valid && (cnt === UFix(REFILL_CYCLES)) - io.mem_req.bits.rw := Bool(true) - io.mem_req.bits.addr := Cat(addr.ppn, addr.idx).toUFix - io.mem_req.bits.tag := Bits(0) + + io.refill_req.ready := io.mem_req.ready && !block_refill + io.mem_req.valid := refill_val || wbq.io.deq.valid && (cnt === UFix(REFILL_CYCLES)) + io.mem_req.bits.rw := !refill_val + io.mem_req.bits.addr := Mux(refill_val, io.refill_req.bits.addr, Cat(addr.ppn, addr.idx).toUFix) + io.mem_req.bits.tag := io.refill_req.bits.tag io.mem_req_data := wbq.io.deq.bits } class FlushUnit(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) } - val resp = (new ioDecoupled) { Bits(width = CPU_TAG_BITS) }.flip() + val req = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) } + val resp = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) }.flip() val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() val meta_resp = (new MetaData).asInput() val wb_req = (new ioDecoupled) { new WritebackReq() }.flip() @@ -431,7 +450,8 @@ class FlushUnit(lines: Int) extends Component { io.meta_req.bits.data.dirty := Bool(false) io.meta_req.bits.data.tag := UFix(0) io.wb_req.valid := state === s_meta_wait - io.meta_resp ^^ io.wb_req.bits + io.wb_req.bits.ppn := io.meta_resp.tag + io.wb_req.bits.idx := cnt } class MetaDataArray(lines: Int) extends Component { @@ -444,10 +464,10 @@ class MetaDataArray(lines: Int) extends Component { val vd_array = Mem4(lines, Bits(width = 2)) vd_array.setReadLatency(0) + val vd_wdata2 = Cat(io.state_req.bits.data.valid, io.state_req.bits.data.dirty) + vd_array.write(io.state_req.bits.idx, vd_wdata2, io.state_req.valid && io.state_req.bits.rw) val vd_wdata1 = Cat(io.req.bits.data.valid, io.req.bits.data.dirty) val vd_rdata1 = vd_array.rw(io.req.bits.idx, vd_wdata1, io.req.valid && io.req.bits.rw) - val vd_wdata2 = Cat(io.state_req.bits.data.valid, io.req.bits.data.dirty) - vd_array.write(io.state_req.bits.idx, vd_wdata2, io.state_req.valid && io.state_req.bits.rw) val tag_array = Mem4(lines, io.resp.tag) tag_array.setReadLatency(0) @@ -477,31 +497,29 @@ class DataArray(lines: Int) extends Component { io.req.ready := Bool(true) } -class rocketNBDCacheAMOALU extends Component { +class AMOALU extends Component { val io = new Bundle { - val cmd = Bits(4, 'input) - val wmask = Bits(64/8, 'input) - val lhs = UFix(64, 'input) - val rhs = UFix(64, 'input) - val result = UFix(64, 'output) + val cmd = Bits(4, 'input) + val typ = Bits(3, 'input) + val lhs = UFix(64, 'input) + val rhs = UFix(64, 'input) + val out = UFix(64, 'output) } val signed = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) val sub = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) val min = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) + val word = (io.typ === MT_W) || (io.typ === MT_WU) - val addsub_rhs = Mux(sub, ~io.rhs, io.rhs) - val adder_lhs = Cat(io.lhs(63,32), io.wmask(3) & io.lhs(31), io.lhs(30,0)).toUFix; - val adder_rhs = Cat(addsub_rhs(63,32), io.wmask(3) & addsub_rhs(31), addsub_rhs(30,0)).toUFix; - val adder_out = adder_lhs + adder_rhs + sub.toUFix + val adder_out = (Cat(io.lhs, UFix(0,1)).toUFix + Cat(io.rhs ^ Fill(io.rhs.width, sub), sub).toUFix) >> UFix(1) - val cmp_lhs = Mux(io.wmask(7), io.lhs(63), io.lhs(31)) - val cmp_rhs = Mux(io.wmask(7), io.rhs(63), io.rhs(31)) - val cmp_diff = Mux(io.wmask(7), adder_out(63), adder_out(31)) + val cmp_lhs = Mux(word, io.lhs(31), io.lhs(63)) + val cmp_rhs = Mux(word, io.rhs(31), io.rhs(63)) + val cmp_diff = Mux(word, adder_out(31), adder_out(63)) val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(signed, cmp_lhs, cmp_rhs)) val cmp_out = Mux(min === less, io.lhs, io.rhs) - val alu_out = Wire { UFix(width = io.result.width) }; + val alu_out = Wire { UFix(width = io.out.width) }; switch (io.cmd) { is (M_XA_ADD) { alu_out <== adder_out } is (M_XA_SWAP) { alu_out <== io.rhs } @@ -510,54 +528,7 @@ class rocketNBDCacheAMOALU extends Component { } alu_out <== cmp_out - io.result := alu_out -} - -// XXX broken for CPU_DATA_BITS != 64 -class AMOUnit extends Component { - val io = new Bundle { - val req = (new ioDecoupled) { new DataReq() } - val lhs = Bits(width = CPU_DATA_BITS) - val rhs = Bits(width = CPU_DATA_BITS) - val wmask = Bits(width = CPU_DATA_BITS/8, dir = 'input) - val data_req = (new ioDecoupled) { new DataReq() }.flip() - } - - val valid = Reg(resetVal = Bool(false)) - val r_cmd = Reg() { Bits() } - val r_offset = Reg() { Bits() } - val r_type = Reg() { Bits() } - val r_idx = Reg() { Bits() } - val r_lhs = Reg() { Bits() } - val r_rhs = Reg() { Bits() } - val r_wmask = Reg() { Bits() } - when (io.req.valid && io.req.ready) { - valid <== Bool(true); - r_idx <== io.req.bits.idx - r_lhs <== io.lhs; - r_rhs <== io.rhs; - r_cmd <== io.req.bits.cmd; - r_type <== io.req.bits.typ; - r_offset <== io.req.bits.offset; - r_wmask <== io.wmask - } - when (io.data_req.valid && io.data_req.ready) { - valid <== Bool(false) - } - - val alu = new rocketNBDCacheAMOALU - alu.io.cmd := r_cmd - alu.io.wmask := r_wmask - alu.io.lhs := r_lhs - alu.io.rhs := r_rhs - - io.req.ready := !valid - io.data_req.valid := valid - io.data_req.bits.idx := r_idx - io.data_req.bits.cmd := r_cmd - io.data_req.bits.typ := r_type - io.data_req.bits.offset := r_offset - io.data_req.bits.data := alu.io.result + io.out := alu_out } class HellaCache(lines: Int) extends Component { @@ -576,53 +547,54 @@ class HellaCache(lines: Int) extends Component { val ramindexlsb = log2up(MEM_DATA_BITS/8); val early_nack = Reg { Bool() } - val r_cpu_req_val_ = Reg(io.cpu.req_val, resetVal = Bool(false)) + val r_cpu_req_val_ = Reg(io.cpu.req_val && io.cpu.req_rdy, resetVal = Bool(false)) val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_kill && !early_nack val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_tag = Reg() { Bits() } val r_cpu_req_data = Reg() { Bits() } - - val r2_cpu_req_val = Reg(r_cpu_req_val, resetVal = Bool(false)) - val r2_cpu_req_ppn = Reg(io.cpu.req_ppn) - val r2_cpu_req_idx = Reg(r_cpu_req_idx) - val r2_cpu_req_cmd = Reg(r_cpu_req_cmd) - val r2_cpu_req_type = Reg(r_cpu_req_type) - val r2_cpu_req_tag = Reg(r_cpu_req_tag) val p_store_valid = Reg(resetVal = Bool(false)) val p_store_data = Reg() { Bits() } val p_store_idx = Reg() { Bits() } + val p_store_cmd = Reg() { Bits() } val p_store_type = Reg() { Bits() } val store_data_wide = Wire { Bits(width = MEM_DATA_BITS) } val store_wmask_wide = Wire { Bits(width = MEM_DATA_BITS) } val req_store = (io.cpu.req_cmd === M_XWR) - val req_load = (io.cpu.req_cmd === M_XRD) || (io.cpu.req_cmd === M_PRD) - val req_flush = (io.cpu.req_cmd === M_FLA) + val req_load = (io.cpu.req_cmd === M_XRD) val req_amo = io.cpu.req_cmd(3).toBool val req_read = req_load || req_amo val req_write = req_store || req_amo val r_req_load = (r_cpu_req_cmd === M_XRD) val r_req_store = (r_cpu_req_cmd === M_XWR) val r_req_flush = (r_cpu_req_cmd === M_FLA) + val r_req_fence = (r_cpu_req_cmd === M_FENCE) val r_req_amo = r_cpu_req_cmd(3).toBool val r_req_read = r_req_load || r_req_amo val r_req_write = r_req_store || r_req_amo - val r2_req_load = (r2_cpu_req_cmd === M_XRD) - val r2_req_store = (r2_cpu_req_cmd === M_XWR) - val r2_req_amo = r2_cpu_req_cmd(3).toBool - val r2_req_write = r2_req_store || r2_req_amo + val r_req_readwrite = r_req_read || r_req_write val nack_wb = Wire { Bool() } val nack_mshr = Wire { Bool() } val nack_sdq = Wire { Bool() } + + // replay unit + val replayer = new ReplayUnit + val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool + val replay_amo_rdy = replayer.io.data_req.ready + val replay_amo = replay_amo_val && replay_amo_rdy + val r_replay_amo = Reg(replay_amo, resetVal = Bool(false)) + when (replay_amo) { + r_cpu_req_data <== replayer.io.data_req.bits.data + } when (io.cpu.req_val) { r_cpu_req_idx <== io.cpu.req_idx - r_cpu_req_cmd <== Mux(req_load, M_XRD, io.cpu.req_cmd) + r_cpu_req_cmd <== io.cpu.req_cmd r_cpu_req_type <== io.cpu.req_type r_cpu_req_tag <== io.cpu.req_tag when (req_write) { @@ -642,19 +614,10 @@ class HellaCache(lines: Int) extends Component { // writeback unit val wb = new WritebackUnit - val mem_arb = (new Arbiter(2)) { new MemReq() } val wb_arb = (new Arbiter(2)) { new WritebackReq() } wb_arb.io.out <> wb.io.req - wb.io.data_req <> data_arb.io.in(1) + wb.io.data_req <> data_arb.io.in(3) wb.io.data_resp <> data.io.resp - wb.io.mem_req <> mem_arb.io.in(0) - - // reset and flush unit - val flusher = new FlushUnit(lines) - flusher.io.req.valid := r_cpu_req_val && r_req_flush - flusher.io.wb_req <> wb_arb.io.in(0) - flusher.io.meta_req <> meta_arb.io.in(0) - flusher.io.meta_resp <> meta.io.resp // cpu tag check meta_arb.io.in(2).valid := io.cpu.req_val @@ -667,6 +630,7 @@ class HellaCache(lines: Int) extends Component { val tag_match = meta.io.resp.valid && (meta.io.resp.tag === io.cpu.req_ppn) val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match + val dirty = meta.io.resp.valid && meta.io.resp.dirty // refill counter val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) @@ -683,7 +647,7 @@ class HellaCache(lines: Int) extends Component { // writeback val wb_rdy = wb_arb.io.in(1).ready - wb_arb.io.in(1).valid := r_cpu_req_val && !tag_match && meta.io.resp.dirty + wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && dirty && !nack_wb wb_arb.io.in(1).bits.ppn := meta.io.resp.tag wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) @@ -696,12 +660,12 @@ class HellaCache(lines: Int) extends Component { data_arb.io.in(4).valid := io.cpu.req_val && req_read val early_load_nack = req_read && !data_arb.io.in(4).ready - // store hits. + // store hits and AMO hits and misses use a pending store register. // we nack new stores if a pending store can't retire for some reason. // we drain a pending store if the CPU performs a store or a // conflictig load, or if the cache is idle, or after a miss. val p_store_match = r_cpu_req_val && r_req_read && p_store_valid && (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) - val drain_store_val = (p_store_valid && (!io.cpu.req_val || req_store || Reg(tag_miss))) || p_store_match + val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(tag_miss))) || p_store_match data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.rw := Bool(true) @@ -710,32 +674,38 @@ class HellaCache(lines: Int) extends Component { data_arb.io.in(2).valid := drain_store_val val drain_store_rdy = data_arb.io.in(2).ready val drain_store = drain_store_val && drain_store_rdy - val p_store_notready = p_store_valid && !drain_store - p_store_valid <== p_store_notready || (tag_hit && r_req_store) + val p_store_rdy = !p_store_valid || drain_store + val p_amo = Reg(tag_hit && r_req_amo && drain_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) + p_store_valid <== !p_store_rdy || (tag_hit && r_req_store) || p_amo // tag update after a miss or a store to an exclusive clean line. - val clear_valid = tag_miss && !r_req_flush && meta.io.resp.valid + val clear_valid = tag_miss && r_req_readwrite && meta.io.resp.valid && (!dirty || wb_rdy && !nack_wb) val set_dirty = tag_hit && !meta.io.resp.dirty && r_req_write meta.io.state_req.valid := clear_valid || set_dirty + meta.io.state_req.bits.rw := Bool(true) meta.io.state_req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) meta.io.state_req.bits.data.valid := tag_match meta.io.state_req.bits.data.dirty := tag_match // pending store data, also used for AMO RHS val storegen = new StoreDataGen + val amoalu = new AMOALU storegen.io.typ := r_cpu_req_type storegen.io.addr := r_cpu_req_idx(offsetlsb-1, 0) storegen.io.din := r_cpu_req_data - when (tag_hit && r_req_store && !p_store_notready) { - p_store_idx <== r_cpu_req_idx - p_store_type <== r_cpu_req_type + when (p_amo) { + p_store_data <== amoalu.io.out + } + when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { + p_store_idx <== Mux(r_replay_amo, Reg(Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset)), r_cpu_req_idx) + p_store_type <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.typ), r_cpu_req_type) + p_store_cmd <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.cmd), r_cpu_req_cmd) p_store_data <== storegen.io.dout } // miss handling val mshr = new MSHRFile - val replayer = new ReplayUnit - mshr.io.req_val := tag_miss && !r_req_flush && !nack_mshr + mshr.io.req_val := tag_miss && r_req_readwrite && !nack_mshr mshr.io.req_ppn := io.cpu.req_ppn mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) mshr.io.req_data := p_store_data @@ -744,9 +714,9 @@ class HellaCache(lines: Int) extends Component { mshr.io.req_cmd := r_cpu_req_cmd mshr.io.req_type := r_cpu_req_type mshr.io.req_sdq_id := replayer.io.sdq_id - mshr.io.mem_resp_val := io.mem.resp_val + mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) mshr.io.mem_resp_tag := io.mem.resp_tag - mshr.io.mem_req <> mem_arb.io.in(1) + mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.replay <> replayer.io.replay replayer.io.sdq_enq.valid := tag_miss && r_req_write && !nack_sdq @@ -755,70 +725,94 @@ class HellaCache(lines: Int) extends Component { // replays val replay = replayer.io.data_req.bits - data_arb.io.in(3).bits.offset := replay.offset(offsetmsb,ramindexlsb) - data_arb.io.in(3).bits.idx := replay.idx - data_arb.io.in(3).bits.rw := replay.cmd === M_XWR - data_arb.io.in(3).bits.wmask := store_wmask_wide - data_arb.io.in(3).bits.data := store_data_wide - data_arb.io.in(3).valid := replayer.io.data_req.valid - replayer.io.data_req.ready := data_arb.io.in(3).ready + val stall_replay = r_replay_amo || p_amo || p_store_valid + val replay_val = replayer.io.data_req.valid && !stall_replay + data_arb.io.in(1).bits.offset := replay.offset(offsetmsb,ramindexlsb) + data_arb.io.in(1).bits.idx := replay.idx + data_arb.io.in(1).bits.rw := replay.cmd === M_XWR + data_arb.io.in(1).bits.wmask := store_wmask_wide + data_arb.io.in(1).bits.data := store_data_wide + data_arb.io.in(1).valid := replay_val + replayer.io.data_req.ready := data_arb.io.in(1).ready && !stall_replay // store write mask generation. - // assumes pending stores are higher-priority than store replays. + // assumes store replays are higher-priority than pending stores. val maskgen = new StoreMaskGen - val store_offset = Mux(drain_store_val, p_store_idx(offsetmsb,0), replay.offset) - maskgen.io.typ := Mux(drain_store_val, p_store_type, replay.typ) + val store_offset = Mux(!replay_val, p_store_idx(offsetmsb,0), replay.offset) + maskgen.io.typ := Mux(!replay_val, p_store_type, replay.typ) maskgen.io.addr := store_offset(offsetlsb-1,0) store_wmask_wide <== maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix - val store_data = Mux(drain_store_val, p_store_data, replay.data) + val store_data = Mux(!replay_val, p_store_data, replay.data) store_data_wide <== Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) // load data subword mux/sign extension. - // assumes load replays are higher-priority than load hits. // subword loads are delayed by one cycle. val loadgen = new LoadDataGen - val loadgen_use_replay = Reg(replayer.io.data_req.valid) + val loadgen_use_replay = Reg(replayer.io.data_req.valid && replayer.io.data_req.ready) loadgen.io.typ := Mux(loadgen_use_replay, Reg(replay.typ), r_cpu_req_type) - loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(offsetlsb-1,0) - loadgen.io.din := Slice(MEM_DATA_BITS/CPU_DATA_BITS, data.io.resp, r_cpu_req_idx(ramindexlsb-1,offsetlsb).toUFix) + loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(ramindexlsb-1,0) + loadgen.io.din := data.io.resp - early_nack <== early_tag_nack || early_load_nack + amoalu.io.cmd := p_store_cmd + amoalu.io.typ := p_store_type + amoalu.io.lhs := loadgen.io.r_dout.toUFix + amoalu.io.rhs := p_store_data.toUFix - val nack_miss_wb = meta.io.resp.dirty && !wb_rdy + early_nack <== early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo || r_replay_amo + + val nack_miss_wb = dirty && !wb_rdy val nack_miss_mshr = !mshr.io.req_rdy val nack_miss_sdq = r_req_write && !replayer.io.sdq_enq.ready - nack_wb <== nack_miss_mshr || nack_miss_sdq || p_store_notready || p_store_match - nack_mshr <== nack_miss_wb || nack_miss_sdq || p_store_notready || p_store_match - nack_sdq <== nack_miss_wb || nack_miss_mshr || p_store_notready || p_store_match + nack_wb <== nack_miss_mshr || nack_miss_sdq || !p_store_rdy || p_store_match + nack_mshr <== nack_miss_wb || nack_miss_sdq || !p_store_rdy || p_store_match + nack_sdq <== nack_miss_wb || nack_miss_mshr || !p_store_rdy || p_store_match - val nack_for_flush = r_req_flush && !flusher.io.req.ready - val nack = p_store_match || r_req_store && p_store_notready || early_nack || - tag_miss && !r_req_flush && (nack_miss_wb || nack_miss_mshr || nack_miss_sdq || p_store_notready) + // reset and flush unit + val flusher = new FlushUnit(lines) + val flushed = Reg(resetVal = Bool(true)) + val fence_rdy = mshr.io.fence_rdy && wb_rdy && p_store_rdy + flushed <== flushed && !r_cpu_req_val || flusher.io.req.valid && flusher.io.req.ready + flusher.io.req.valid := r_cpu_req_val && r_req_flush && fence_rdy && !flushed + flusher.io.wb_req <> wb_arb.io.in(0) + flusher.io.meta_req <> meta_arb.io.in(0) + flusher.io.meta_resp <> meta.io.resp + flusher.io.resp.ready := Bool(true) // we don't respond to flush requests - // report that cache is always ready. we nack instead. - io.cpu.req_rdy := Bool(true) + // we usually nack rather than reporting that the cache is not ready. + // fences and flushes are the exceptions. + val pending_fence = Reg(resetVal = Bool(false)) + pending_fence <== (r_cpu_req_val && r_req_fence || pending_fence) && !fence_rdy + val nack = p_store_match || + early_nack || + !fence_rdy && (r_req_fence || r_req_flush) || + !p_store_rdy && r_req_write || + !flushed && r_req_flush || + tag_miss && r_req_readwrite && (nack_miss_wb || nack_miss_mshr || nack_miss_sdq || !p_store_rdy) || + !flusher.io.req.ready + + io.cpu.req_rdy := flusher.io.req.ready && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !nack && r_req_read) || flusher.io.resp.valid || replayer.io.cpu_resp_val + io.cpu.resp_val := (tag_hit && !nack && r_req_read) || replayer.io.cpu_resp_val io.cpu.resp_miss := tag_miss && !nack && r_req_read - io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, Mux(flusher.io.resp.valid, flusher.io.resp.bits, r_cpu_req_tag)) + io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_data := loadgen.io.dout - io.cpu.resp_data_subword := loadgen.io.dout_subword + io.cpu.resp_data_subword := loadgen.io.r_dout_subword val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - io.cpu.xcpt_ma_ld := r_cpu_req_val_ && !io.cpu.req_kill && r_req_read && misaligned - io.cpu.xcpt_ma_st := r_cpu_req_val_ && !io.cpu.req_kill && r_req_write && misaligned + io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned + io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned - mem_arb.io.out.ready := io.mem.req_rdy - io.mem.req_val := mem_arb.io.out.valid - io.mem.req_rw := mem_arb.io.out.bits.rw + wb.io.mem_req.ready := io.mem.req_rdy + io.mem.req_val := wb.io.mem_req.valid + io.mem.req_rw := wb.io.mem_req.bits.rw io.mem.req_wdata := wb.io.mem_req_data - io.mem.req_tag := mem_arb.io.out.bits.tag.toUFix - io.mem.req_addr := mem_arb.io.out.bits.addr + io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix + io.mem.req_addr := wb.io.mem_req.bits.addr } } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 6251e463..a5b60e1c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -19,6 +19,7 @@ class rocketDmemArbiter extends Component // must delay ppn part of address from PTW by 1 cycle (to match TLB behavior) val r_ptw_req_val = Reg(io.ptw.req_val); val r_ptw_req_ppn = Reg(io.ptw.req_ppn); + val r_cpu_req_val = Reg(io.cpu.req_val && io.cpu.req_rdy); io.mem.req_val := io.ptw.req_val || io.cpu.req_val; io.mem.req_cmd := Mux(io.ptw.req_val, io.ptw.req_cmd, io.cpu.req_cmd); @@ -27,7 +28,7 @@ class rocketDmemArbiter extends Component io.mem.req_ppn := Mux(r_ptw_req_val, r_ptw_req_ppn, io.cpu.req_ppn); io.mem.req_data := io.cpu.req_data; io.mem.req_tag := Cat(io.cpu.req_tag, io.ptw.req_val); - io.mem.req_kill := io.cpu.req_kill; + io.mem.req_kill := io.cpu.req_kill && r_cpu_req_val; io.ptw.req_rdy := io.mem.req_rdy; io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; @@ -96,7 +97,7 @@ class rocketPTW extends Component (state === s_l2_req) || (state === s_l3_req); - io.dmem.req_cmd := M_PRD; + io.dmem.req_cmd := M_XRD; io.dmem.req_type := MT_D; // io.dmem.req_addr := req_addr; io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 6949a9c0..07e7ae41 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -10,18 +10,6 @@ object log2up def apply(in: Int) = ceil(log(in)/log(2)).toInt } -object Slice -{ - def apply(n: Int, in: Bits, sel: Bits) = - { - val w = in.width / n - var out = in(w-1, 0) & Fill(w, sel === UFix(0)) - for (i <- 1 until n) - out = out | (in((i+1)*w-1, i*w) & Fill(w, sel === Bits(i))) - if (n > 1) out else in - } -} - object FillInterleaved { def apply(n: Int, in: Bits) = From 82700cad7254cf3f84caabdc76b20fdee7d537b4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Dec 2011 07:20:00 -0800 Subject: [PATCH 0073/1087] fix multiplier for rv32 --- rocket/src/main/scala/consts.scala | 12 ++++----- rocket/src/main/scala/ctrl.scala | 12 ++++----- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/multiplier.scala | 37 ++++++++++++++++---------- 4 files changed, 35 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 16f7b8a2..9906eaa9 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -44,13 +44,11 @@ object Constants val A1_RS1 = UFix(0, 1); val A1_LUI = UFix(1, 1); - val MUL_X = UFix(0, 3); - val MUL_NO = UFix(0, 3); - val MUL_64 = UFix(1, 3); - val MUL_64H = UFix(2, 3); - val MUL_64HU = UFix(3, 3); - val MUL_64HSU = UFix(4, 3); - val MUL_32 = UFix(5, 3); + val MUL_X = UFix(0, 2); + val MUL_LO = UFix(0, 2); + val MUL_HU = UFix(1, 2); + val MUL_HS = UFix(2, 2); + val MUL_HSU = UFix(3, 2); val DIV_X = UFix(0, 4); val DIV_NO = UFix(0, 4); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f44ee4d5..05220c7f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -24,7 +24,7 @@ class ioCtrlDpath extends Bundle() val fn_dw = Bool('output); val fn_alu = UFix(4, 'output); val mul_val = Bool('output); - val mul_fn = UFix(3, 'output); + val mul_fn = UFix(2, 'output); val mul_wb = Bool('output); val div_val = Bool('output); val div_fn = UFix(4, 'output); @@ -234,11 +234,11 @@ class rocketCtrl extends Component SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64H, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_64HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, Y,MUL_32, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 57f2efd6..f394361d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -283,6 +283,7 @@ class rocketDpath extends Component // multiplier mul.io.mul_val := ex_reg_ctrl_mul_val && !io.ctrl.killx; + mul.io.dw := ex_reg_ctrl_fn_dw; mul.io.mul_fn := ex_reg_ctrl_mul_fn; mul.io.mul_tag := ex_reg_waddr; mul.io.in0 := ex_reg_rs1; diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 67a53ac3..bbcbef3b 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -7,7 +7,8 @@ import Constants._; class ioMultiplier(width: Int) extends Bundle { // requests val mul_val = Bool('input); - val mul_fn = UFix(3, 'input); + val dw = UFix(1, 'input); + val mul_fn = UFix(2, 'input); val mul_tag = UFix(5, 'input); val in0 = Bits(width, 'input); val in1 = Bits(width, 'input); @@ -22,30 +23,38 @@ class rocketMultiplier extends Component { val io = new ioMultiplier(64); val r_val = Reg(resetVal = Bool(false)); + val r_dw = Reg(resetVal = UFix(0,1)); val r_fn = Reg(resetVal = UFix(0,3)); val r_tag = Reg(resetVal = UFix(0,5)); - val r_in0 = Reg(resetVal = Bits(0,64)); - val r_in1 = Reg(resetVal = Bits(0,64)); + val r_lhs = Reg(resetVal = Bits(0,65)); + val r_rhs = Reg(resetVal = Bits(0,65)); + + val lhs_msb = Mux(io.dw === DW_64, io.in0(63), io.in0(31)).toBool + val lhs_sign = ((io.mul_fn === MUL_HS) || (io.mul_fn === MUL_HSU)) && lhs_msb + val lhs_hi = Mux(io.dw === DW_64, io.in0(63,32), Fill(32, lhs_sign)) + val lhs = Cat(lhs_sign, lhs_hi, io.in0(31,0)) + + val rhs_msb = Mux(io.dw === DW_64, io.in1(63), io.in1(31)).toBool + val rhs_sign = (io.mul_fn === MUL_HS) && rhs_msb + val rhs_hi = Mux(io.dw === DW_64, io.in1(63,32), Fill(32, rhs_sign)) + val rhs = Cat(rhs_sign, rhs_hi, io.in1(31,0)) r_val <== io.mul_val; when (io.mul_val) { + r_dw <== io.dw r_fn <== io.mul_fn; r_tag <== io.mul_tag; - r_in0 <== io.in0; - r_in1 <== io.in1; + r_lhs <== lhs; + r_rhs <== rhs; } - val sxl64 = (r_fn === MUL_64H) || (r_fn === MUL_64HSU); - val sxr64 = (r_fn === MUL_64H); + val mul_result = r_lhs.toFix * r_rhs.toFix; - val lhs = Cat(r_in0(63) & sxl64, r_in0); - val rhs = Cat(r_in1(63) & sxr64, r_in1); + val mul_output64 = Mux(r_fn === MUL_LO, mul_result(63,0), mul_result(127,64)) + val mul_output32 = Mux(r_fn === MUL_LO, mul_result(31,0), mul_result(63,31)) + val mul_output32_ext = Cat(Fill(32, mul_output32(31)), mul_output32) - val mul_result = lhs.toFix * rhs.toFix; - - val mul_output = MuxCase(mul_result(63,0), Array( - ((r_fn === MUL_64H) || (r_fn === MUL_64HU) || (r_fn === MUL_64HSU)) -> mul_result(127,64), - (r_fn === MUL_32) -> Cat(Fill(32, mul_result(31)), mul_result(31, 0)))); + val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext) // just a hack for now, this should be a parameterized number of stages val r_result = Reg(Reg(Reg(mul_output))); From 96c78829b4c7a506bbe4cb974024f1508ce8fb13 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Dec 2011 07:20:32 -0800 Subject: [PATCH 0074/1087] improve ALU and fix revealed emulator bug --- rocket/src/main/scala/dpath_alu.scala | 100 +++++++++----------------- rocket/src/main/scala/util.scala | 13 +++- 2 files changed, 46 insertions(+), 67 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 6c4eadfd..2635f89e 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -19,73 +19,41 @@ class ioALU extends Bundle(){ class rocketDpathALU extends Component { val io = new ioALU(); - - val out64 = - MuxCase(Fix(0, 64), Array( - (io.fn === FN_ADD) -> (io.in1 + io.in2).toFix, - (io.fn === FN_SUB) -> (io.in1 - io.in2).toFix, - (io.fn === FN_SLT) -> (io.in1.toFix < io.in2.toFix), //(io.in1 < io.in2) - (io.fn === FN_SLTU) -> (io.in1 < io.in2).toFix, - (io.fn === FN_AND) -> (io.in1 & io.in2).toFix, - (io.fn === FN_OR) -> (io.in1 | io.in2).toFix, - (io.fn === FN_XOR) -> (io.in1 ^ io.in2).toFix, - (io.fn === FN_SL) -> (io.in1 << io.shamt).toFix, - (io.fn === FN_SR && io.dw === DW_64) -> (io.in1 >> io.shamt).toFix, - (io.fn === FN_SR && io.dw === DW_32) -> (Cat(Fix(0, 32),io.in1(31, 0)).toUFix >> io.shamt), - (io.fn === FN_SRA) -> (io.in1.toFix >>> io.shamt))); - - io.out := MuxLookup(io.dw, Fix(0, 64), Array( - DW_64 -> out64(63,0), - DW_32 -> Cat(Fill(32, out64(31)), out64(31,0)).toFix)).toUFix; + // ADD, SUB + val sub = (io.fn === FN_SUB) || (io.fn === FN_SLT) || (io.fn === FN_SLTU) + val adder_rhs = Mux(sub, ~io.in2, io.in2) + val adder_out = (io.in1 + adder_rhs + sub.toUFix)(63,0) + + // SLT, SLTU + val less = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in1(63)) + val lessu = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in2(63)) + + // SLL, SRL, SRA + val sra = (io.fn === FN_SRA) + val shright = sra || (io.fn === FN_SR) + val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32)) + val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) + val shin_r = Cat(shin_hi, io.in1(31,0)) + val shin = Mux(shright, shin_r, Reverse(shin_r)) + val shout_r = (Cat(sra & shin_r(63), shin).toFix >>> io.shamt)(63,0) + + val out64 = Wire { Bits(64) } + switch(io.fn) + { + is(FN_ADD) { out64 <== adder_out } + is(FN_SUB) { out64 <== adder_out } + is(FN_SLT) { out64 <== less } + is(FN_SLTU) { out64 <== lessu } + is(FN_AND) { out64 <== io.in1 & io.in2 } + is(FN_OR) { out64 <== io.in1 | io.in2 } + is(FN_XOR) { out64 <== io.in1 ^ io.in2 } + is(FN_SL) { out64 <== Reverse(shout_r) } + } + out64 <== shout_r + + val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) + io.out := Cat(out_hi, out64(31,0)).toUFix } -/* -class IoDpathALU extends Bundle { - val in0 = Bits(32,'input); - val in1 = Bits(32,'input); - val fn = Bits(4,'input); - val out = Bits(32,'output); -} - -class DpathALU extends Component { - val io = new IoDpathALU(); - - val adder_in0 = MuxCase(io.in0,Array( - ((io.fn === FN_SUB) | (io.fn === FN_SLT) | (io.fn === FN_SLTU)) -> (~io.in0))); - - val adder_in1 = io.in1; - val adder_cin = MuxCase(Bits(0),Array( - ((io.fn === FN_SUB) | (io.fn === FN_SLT) | (io.fn === FN_SLTU)) -> Bits(1))); - - // Need to make the same width? - val adder_out = Cat(Bits(0,1),adder_in1).toUFix + Cat(Bits(0,1),adder_in0).toUFix + adder_cin.toUFix; - //adder_out := (adder_in1.toUFix + adder_in0.toUFix + adder_cin.toUFix); - - // Determine if there is overflow - val overflow = (io.in0(31) ^ io.in1(31)) & (adder_out(32) != io.in0(31)); - - val compare_yes = MuxLookup(io.fn,Bits(0),Array( - // If unsigned, do subtraction, and if the result is negative, then slt=true - FN_SLTU -> ~adder_out(32), - // If signed, do subtraction, and if the result is negative, then slt=true as well - // But if there is bad overflow (operands same sign and result is a different sign), - // then need to flip - FN_SLT -> ~(adder_out(32) ^ overflow))); - - io.out := MuxLookup(io.fn,Fix(0),Array( - FN_ADD -> adder_out, - FN_SUB -> adder_out, - FN_SLT -> compare_yes, - FN_SLTU -> compare_yes, - FN_AND -> (io.in0 & io.in1), - FN_OR -> (io.in0 | io.in1), - FN_XOR -> (io.in0 ^ io.in1), - FN_SL -> (io.in1 << io.in0(4,0).toUFix), - FN_SR -> (io.in1 >> io.in0(4,0).toUFix), - FN_SRA -> (io.in1.toFix >> io.in0(4,0).toUFix) - )); -} -*/ - } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 07e7ae41..06d3a986 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -15,12 +15,23 @@ object FillInterleaved def apply(n: Int, in: Bits) = { var out = Fill(n, in(0)) - for (i <- 1 until in.width) + for (i <- 1 until in.getWidth) out = Cat(Fill(n, in(i)), out) out } } +object Reverse +{ + def apply(in: Bits) = + { + var out = in(in.getWidth-1) + for (i <- 1 until in.getWidth) + out = Cat(in(in.getWidth-i-1), out) + out + } +} + class Mux1H(n: Int, w: Int) extends Component { val io = new Bundle { From bcceb08373a777ad825519e539f04af7dbb05156 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Dec 2011 07:30:47 -0800 Subject: [PATCH 0075/1087] add dummy mul_rdy signal --- rocket/src/main/scala/ctrl.scala | 2 ++ rocket/src/main/scala/dpath.scala | 3 ++- rocket/src/main/scala/multiplier.scala | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 05220c7f..bfc0f148 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -56,6 +56,7 @@ class ioCtrlDpath extends Bundle() val br_ltu = Bool('input); val div_rdy = Bool('input); val div_result_val = Bool('input); + val mul_rdy = Bool('input); val mul_result_val = Bool('input); val ex_waddr = UFix(5,'input); // write addr from execute stage val mem_waddr = UFix(5,'input); // write addr from memory stage @@ -628,6 +629,7 @@ class rocketCtrl extends Component ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy || id_div_val.toBool && !io.dpath.div_rdy || + id_mul_val.toBool && !io.dpath.mul_rdy || io.dpath.div_result_val || io.dpath.mul_result_val ); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f394361d..1e3ac340 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -288,7 +288,8 @@ class rocketDpath extends Component mul.io.mul_tag := ex_reg_waddr; mul.io.in0 := ex_reg_rs1; mul.io.in1 := ex_reg_rs2; - + + io.ctrl.mul_rdy := mul.io.mul_rdy io.ctrl.mul_result_val := mul.io.result_val; io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index bbcbef3b..e1c17d8c 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -7,6 +7,7 @@ import Constants._; class ioMultiplier(width: Int) extends Bundle { // requests val mul_val = Bool('input); + val mul_rdy = Bool('output); val dw = UFix(1, 'input); val mul_fn = UFix(2, 'input); val mul_tag = UFix(5, 'input); @@ -60,7 +61,8 @@ class rocketMultiplier extends Component { val r_result = Reg(Reg(Reg(mul_output))); val r_result_tag = Reg(Reg(Reg(r_tag))); val r_result_val = Reg(Reg(Reg(r_val))); - + + io.mul_rdy := Bool(true) io.result := r_result; io.result_tag := r_result_tag; io.result_val := r_result_val; From b5a8b6dc7395e4da924fe4c11c9689c83ab001be Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 19 Dec 2011 16:57:53 -0800 Subject: [PATCH 0076/1087] fix divider for RV32 --- rocket/src/main/scala/consts.scala | 15 +++++---------- rocket/src/main/scala/ctrl.scala | 18 +++++++++--------- rocket/src/main/scala/divider.scala | 24 ++++++++---------------- rocket/src/main/scala/dpath.scala | 1 + 4 files changed, 23 insertions(+), 35 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 9906eaa9..fa7b6ddb 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -50,16 +50,11 @@ object Constants val MUL_HS = UFix(2, 2); val MUL_HSU = UFix(3, 2); - val DIV_X = UFix(0, 4); - val DIV_NO = UFix(0, 4); - val DIV_64D = UFix(1, 4); - val DIV_64DU = UFix(2, 4); - val DIV_64R = UFix(3, 4); - val DIV_64RU = UFix(4, 4); - val DIV_32D = UFix(5, 4); - val DIV_32DU = UFix(6, 4); - val DIV_32R = UFix(7, 4); - val DIV_32RU = UFix(8, 4); + val DIV_X = UFix(0, 2); + val DIV_D = UFix(0, 2); + val DIV_DU = UFix(1, 2); + val DIV_R = UFix(2, 2); + val DIV_RU = UFix(3, 2); val M_N = UFix(0, 1); val M_Y = UFix(1, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index bfc0f148..c4dd282a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -27,7 +27,7 @@ class ioCtrlDpath extends Bundle() val mul_fn = UFix(2, 'output); val mul_wb = Bool('output); val div_val = Bool('output); - val div_fn = UFix(4, 'output); + val div_fn = UFix(2, 'output); val div_wb = Bool('output); val sel_wa = Bool('output); val sel_wb = UFix(3, 'output); @@ -241,14 +241,14 @@ class rocketCtrl extends Component MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_64RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_32RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N), EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y), diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 6e383a3e..66c2617f 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -8,7 +8,8 @@ class ioDivider(width: Int) extends Bundle { // requests val div_val = Bool('input); val div_rdy = Bool('output); - val div_fn = UFix(4, 'input); + val dw = UFix(1, 'input); + val div_fn = UFix(2, 'input); val div_waddr = UFix(5, 'input); val dpath_rs1 = Bits(width, 'input); val dpath_rs2 = Bits(width, 'input); @@ -47,26 +48,18 @@ class rocketDivider(width : Int) extends Component { val reg_waddr = Reg(resetVal = UFix(0, 5)); val rem = Reg(resetVal = Bool(false)); val half = Reg(resetVal = Bool(false)); - val tc = Reg(resetVal = Bool(false)); val divisor = Reg(resetVal = UFix(0, width)); val remainder = Reg(resetVal = UFix(0, 2*width+1)); val subtractor = remainder(2*width, width).toUFix - divisor; - val v_tc = ((io.div_fn === DIV_64D) || (io.div_fn === DIV_64R)) || - ((io.div_fn === DIV_32D) || (io.div_fn === DIV_32R)); - - val v_rem = ((io.div_fn === DIV_32R) || (io.div_fn === DIV_32RU)) || - ((io.div_fn === DIV_64R) || (io.div_fn === DIV_64RU)); - - val v_half = ((io.div_fn === DIV_32R) || (io.div_fn === DIV_32RU)) || - ((io.div_fn === DIV_32D) || (io.div_fn === DIV_32DU)); + val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R); // state machine switch (state) { is (s_ready) { when (!io.div_val) { state <== s_ready; } - when (v_tc) { state <== s_neg_inputs }; + when (tc) { state <== s_neg_inputs }; otherwise { state <== s_busy; } } is (s_neg_inputs) { state <== s_busy; } @@ -83,21 +76,20 @@ class rocketDivider(width : Int) extends Component { // if we're doing 32-bit unsigned division, then we don't want the 32-bit // inputs to be sign-extended. - val in_lhs = Mux((v_half && !v_tc), + val in_lhs = Mux(((io.dw === DW_32) && !tc), Cat(Fill(width/2, UFix(0,1)), io.dpath_rs1(width/2-1, 0)), io.dpath_rs1).toUFix; - val in_rhs = Mux((v_half && !v_tc), + val in_rhs = Mux(((io.dw === DW_32) && !tc), Cat(Fill(width/2, UFix(0,1)), io.dpath_rs2(width/2-1, 0)), io.dpath_rs2).toUFix; when ((state === s_ready) && io.div_val) { count <== UFix(0, count_bits); - half <== v_half; + half <== (io.dw === DW_32); neg_quo <== Bool(false); neg_rem <== Bool(false); - rem <== v_rem; - tc <== v_tc; + rem <== (io.div_fn === DIV_R) || (io.div_fn === DIV_RU); reg_waddr <== io.div_waddr; divby0 <== Bool(true); divisor <== in_rhs; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 1e3ac340..9feaaa16 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -271,6 +271,7 @@ class rocketDpath extends Component alu.io.in1 := ex_alu_in1.toUFix; // divider + div.io.dw := ex_reg_ctrl_fn_dw; div.io.div_fn := ex_reg_ctrl_div_fn; div.io.div_val := ex_reg_ctrl_div_val && !io.ctrl.killx; div.io.div_waddr := ex_reg_waddr; From 733fc8e65edc1d24d7d145f3de6db13b33c8eb81 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Dec 2011 03:49:07 -0800 Subject: [PATCH 0077/1087] booth multiplier --- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/multiplier.scala | 77 ++++++++++++++++---------- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 9feaaa16..a907faf6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -292,6 +292,7 @@ class rocketDpath extends Component io.ctrl.mul_rdy := mul.io.mul_rdy io.ctrl.mul_result_val := mul.io.result_val; + mul.io.result_rdy := io.ctrl.mul_wb io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index e1c17d8c..f3570bbf 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -10,63 +10,82 @@ class ioMultiplier(width: Int) extends Bundle { val mul_rdy = Bool('output); val dw = UFix(1, 'input); val mul_fn = UFix(2, 'input); - val mul_tag = UFix(5, 'input); + val mul_tag = UFix(CPU_TAG_BITS, 'input); val in0 = Bits(width, 'input); val in1 = Bits(width, 'input); // responses val result = Bits(width, 'output); - val result_tag = UFix(5, 'output); + val result_tag = UFix(CPU_TAG_BITS, 'output); val result_val = Bool('output); + val result_rdy = Bool('input); } class rocketMultiplier extends Component { val io = new ioMultiplier(64); + val width = 64 + 2 + val cycles = width/2 val r_val = Reg(resetVal = Bool(false)); - val r_dw = Reg(resetVal = UFix(0,1)); - val r_fn = Reg(resetVal = UFix(0,3)); - val r_tag = Reg(resetVal = UFix(0,5)); - val r_lhs = Reg(resetVal = Bits(0,65)); - val r_rhs = Reg(resetVal = Bits(0,65)); + val r_dw = Reg { UFix() } + val r_fn = Reg { UFix() } + val r_tag = Reg { UFix() } + val r_lhs = Reg { Bits() } + val r_prod= Reg { Bits(width = width*2) } + val r_lsb = Reg { Bits() } + val r_cnt = Reg { UFix(width = log2up(cycles+1)) } val lhs_msb = Mux(io.dw === DW_64, io.in0(63), io.in0(31)).toBool val lhs_sign = ((io.mul_fn === MUL_HS) || (io.mul_fn === MUL_HSU)) && lhs_msb val lhs_hi = Mux(io.dw === DW_64, io.in0(63,32), Fill(32, lhs_sign)) - val lhs = Cat(lhs_sign, lhs_hi, io.in0(31,0)) + val lhs_in = Cat(lhs_sign, lhs_hi, io.in0(31,0)) val rhs_msb = Mux(io.dw === DW_64, io.in1(63), io.in1(31)).toBool val rhs_sign = (io.mul_fn === MUL_HS) && rhs_msb val rhs_hi = Mux(io.dw === DW_64, io.in1(63,32), Fill(32, rhs_sign)) - val rhs = Cat(rhs_sign, rhs_hi, io.in1(31,0)) + val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.in1(31,0)) - r_val <== io.mul_val; - when (io.mul_val) { + when (io.mul_val && io.mul_rdy) { + r_val <== Bool(true) + r_cnt <== UFix(0, log2up(cycles+1)) r_dw <== io.dw - r_fn <== io.mul_fn; - r_tag <== io.mul_tag; - r_lhs <== lhs; - r_rhs <== rhs; + r_fn <== io.mul_fn + r_tag <== io.mul_tag + r_lhs <== lhs_in + r_prod<== rhs_in + r_lsb <== Bool(false) + } + when (io.result_val && io.result_rdy) { + r_val <== Bool(false) } - - val mul_result = r_lhs.toFix * r_rhs.toFix; - val mul_output64 = Mux(r_fn === MUL_LO, mul_result(63,0), mul_result(127,64)) - val mul_output32 = Mux(r_fn === MUL_LO, mul_result(31,0), mul_result(63,31)) + val lhs_sext = Cat(r_lhs(width-2), r_lhs(width-2), r_lhs).toUFix + val lhs_twice = Cat(r_lhs(width-2), r_lhs, Bits(0,1)).toUFix + + val addend = Mux(r_prod(0) != r_lsb, lhs_sext, + Mux(r_prod(0) != r_prod(1), lhs_twice, + UFix(0))); + val sub = r_prod(1) + val adder_lhs = Cat(r_prod(width*2-1), r_prod(width*2-1,width)).toUFix + val adder_rhs = Mux(sub, ~addend, addend) + val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(width,0) + + when (r_val && (r_cnt != UFix(cycles))) { + r_lsb <== r_prod(1) + r_prod <== Cat(adder_out(width), adder_out, r_prod(width-1,2)) + r_cnt <== r_cnt + UFix(1) + } + + val mul_output64 = Mux(r_fn === MUL_LO, r_prod(63,0), r_prod(127,64)) + val mul_output32 = Mux(r_fn === MUL_LO, r_prod(31,0), r_prod(63,31)) val mul_output32_ext = Cat(Fill(32, mul_output32(31)), mul_output32) val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext) - - // just a hack for now, this should be a parameterized number of stages - val r_result = Reg(Reg(Reg(mul_output))); - val r_result_tag = Reg(Reg(Reg(r_tag))); - val r_result_val = Reg(Reg(Reg(r_val))); - io.mul_rdy := Bool(true) - io.result := r_result; - io.result_tag := r_result_tag; - io.result_val := r_result_val; - + io.mul_rdy := !r_val + io.result := mul_output; + io.result_tag := r_tag; + io.result_val := r_val && (r_cnt === UFix(cycles)) } } From 38ea10a5f4b8546a5320e221ae6dc70574ba34d9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Dec 2011 04:18:28 -0800 Subject: [PATCH 0078/1087] parameterized multiplier unrolling --- rocket/src/main/scala/multiplier.scala | 34 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index f3570bbf..ad49a0f9 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -23,8 +23,14 @@ class ioMultiplier(width: Int) extends Bundle { class rocketMultiplier extends Component { val io = new ioMultiplier(64); + // width must be even (booth). + // we need an extra bit to handle signed vs. unsigned, + // so we need to add a second to keep width even. val width = 64 + 2 - val cycles = width/2 + // unroll must divide width/2 + val unroll = 3 + + val cycles = width/unroll/2 val r_val = Reg(resetVal = Bool(false)); val r_dw = Reg { UFix() } @@ -62,17 +68,25 @@ class rocketMultiplier extends Component { val lhs_sext = Cat(r_lhs(width-2), r_lhs(width-2), r_lhs).toUFix val lhs_twice = Cat(r_lhs(width-2), r_lhs, Bits(0,1)).toUFix - val addend = Mux(r_prod(0) != r_lsb, lhs_sext, - Mux(r_prod(0) != r_prod(1), lhs_twice, - UFix(0))); - val sub = r_prod(1) - val adder_lhs = Cat(r_prod(width*2-1), r_prod(width*2-1,width)).toUFix - val adder_rhs = Mux(sub, ~addend, addend) - val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(width,0) + var prod = r_prod + var lsb = r_lsb + + for (i <- 0 until unroll) { + val addend = Mux(prod(0) != lsb, lhs_sext, + Mux(prod(0) != prod(1), lhs_twice, + UFix(0))); + val sub = prod(1) + val adder_lhs = Cat(prod(width*2-1), prod(width*2-1,width)).toUFix + val adder_rhs = Mux(sub, ~addend, addend) + val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(width,0) + + lsb = prod(1) + prod = Cat(adder_out(width), adder_out, prod(width-1,2)) + } when (r_val && (r_cnt != UFix(cycles))) { - r_lsb <== r_prod(1) - r_prod <== Cat(adder_out(width), adder_out, r_prod(width-1,2)) + r_lsb <== lsb + r_prod <== prod r_cnt <== r_cnt + UFix(1) } From d65e1a2eee4ca5b4f92104c52a8cda7f119a9431 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Dec 2011 22:08:27 -0800 Subject: [PATCH 0079/1087] vlsi verilog compiles now but doesn't simulate --- rocket/src/main/scala/ctrl.scala | 2 - rocket/src/main/scala/nbdcache.scala | 226 ++++++++++++--------------- rocket/src/main/scala/util.scala | 8 +- 3 files changed, 102 insertions(+), 134 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c4dd282a..1d5777c4 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,7 +37,6 @@ class ioCtrlDpath extends Bundle() val mem_eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); - val ex_mem_type = UFix(3, 'output) // instruction in execute is an unconditional jump val ex_jmp = Bool('output); // enable/disable interrupts @@ -677,7 +676,6 @@ class rocketCtrl extends Component io.dmem.req_kill := mem_kill_dmem; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; - io.dpath.ex_mem_type:= ex_reg_mem_type } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d172fa06..12eea831 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -11,35 +11,32 @@ class StoreMaskGen extends Component { val addr = Bits(3, 'input) val wmask = Bits(8, 'output) } - - val mask = Wire { Bits(width = io.wmask.width) } - switch (io.typ(1,0)) - { - is (MT_B) { mask <== Bits( 1,1) << io.addr(2,0).toUFix } - is (MT_H) { mask <== Bits( 3,2) << Cat(io.addr(2,1), Bits(0,1)).toUFix } - is (MT_W) { mask <== Bits( 15,4) << Cat(io.addr(2,2), Bits(0,2)).toUFix } - otherwise { mask <== Bits(255,8) } // MT_D - } - io.wmask := mask + + val word = (io.typ === MT_W) || (io.typ === MT_WU) + val half = (io.typ === MT_H) || (io.typ === MT_HU) + val byte = (io.typ === MT_B) || (io.typ === MT_BU) + + io.wmask := Mux(byte, Bits( 1,1) << io.addr(2,0).toUFix, + Mux(half, Bits( 3,2) << Cat(io.addr(2,1), Bits(0,1)).toUFix, + Mux(word, Bits( 15,4) << Cat(io.addr(2), Bits(0,2)).toUFix, + Bits(255,8)))); } class StoreDataGen extends Component { val io = new Bundle { val typ = Bits(3, 'input) - val addr = Bits(3, 'input) val din = Bits(64, 'input) val dout = Bits(64, 'output) } - val data = Wire { Bits(width = io.din.width) } - switch (io.typ(1,0)) - { - is (MT_B) { data <== Fill(8, io.din( 7,0)) } - is (MT_H) { data <== Fill(4, io.din(15,0)) } - is (MT_W) { data <== Fill(2, io.din(31,0)) } - otherwise { data <== io.din } // MT_D - } - io.dout := data + val word = (io.typ === MT_W) || (io.typ === MT_WU) + val half = (io.typ === MT_H) || (io.typ === MT_HU) + val byte = (io.typ === MT_B) || (io.typ === MT_BU) + + io.dout := Mux(byte, Fill(8, io.din( 7,0)), + Mux(half, Fill(4, io.din(15,0)), + Mux(word, Fill(2, io.din(31,0)), + io.din))) } // this currently requires that CPU_DATA_BITS == 64 @@ -53,26 +50,27 @@ class LoadDataGen extends Component { val r_dout_subword = Bits(64, 'output) } + val sext = (io.typ === MT_B) || (io.typ === MT_H) || + (io.typ === MT_W) || (io.typ === MT_D) + val word = (io.typ === MT_W) || (io.typ === MT_WU) + val half = (io.typ === MT_H) || (io.typ === MT_HU) + val byte = (io.typ === MT_B) || (io.typ === MT_BU) + val shifted = io.din >> Cat(io.addr(io.addr.width-1,2), Bits(0, 5)).toUFix - val extended = Wire { Bits(width = io.din.width) } - switch (io.typ) - { - is(MT_W) { extended <== Cat(Fill(32, shifted(31)), shifted(31,0)) } - is(MT_WU) { extended <== Cat(Bits(0, 32), shifted(31,0)) } - otherwise { extended <== shifted } - } + val extended = + Mux(word, Cat(Fill(32, sext & shifted(31)), shifted(31,0)), shifted) val r_extended = Reg(extended) - val shifted_subword = r_extended >> Cat(Reg(io.addr(1,0)), Bits(0, 3)).toUFix - val extended_subword = Wire { Bits(width = io.din.width) } - switch (Reg(io.typ)) - { - is (MT_B) { extended_subword <== Cat(Fill(56, shifted_subword( 7)), shifted_subword( 7, 0)) } - is (MT_BU) { extended_subword <== Cat(Bits(0, 56), shifted_subword( 7, 0)) } - is (MT_H) { extended_subword <== Cat(Fill(48, shifted_subword(15)), shifted_subword(15, 0)) } - is (MT_HU) { extended_subword <== Cat(Bits(0, 48), shifted_subword(15, 0)) } - otherwise { extended_subword <== shifted_subword } - } + val r_sext = Reg(sext) + val r_half = Reg(half) + val r_byte = Reg(byte) + val r_addr = Reg(io.addr) + + val shifted_subword = r_extended >> Cat(r_addr(1,0), Bits(0, 3)).toUFix + val extended_subword = + Mux(r_byte, Cat(Fill(56, r_sext & shifted_subword( 7)), shifted_subword( 7,0)), + Mux(r_half, Cat(Fill(48, r_sext & shifted_subword(15)), shifted_subword(15,0)), + shifted_subword)) io.dout := extended io.r_dout := r_extended @@ -237,7 +235,6 @@ class MSHRFile extends Component { val req_offset = Bits(OFFSET_BITS, 'input) val req_cmd = Bits(4, 'input) val req_type = Bits(3, 'input) - val req_data = Bits(CPU_DATA_BITS, 'input) val req_tag = Bits(DCACHE_TAG_BITS, 'input) val req_sdq_id = UFix(log2up(NSDQ), 'input) @@ -252,11 +249,6 @@ class MSHRFile extends Component { val replay = (new ioDecoupled) { new Replay() }.flip() } - val idx_match = Wire { Bool() } - val pri_rdy = Wire { Bool() } - val fence = Wire { Bool() } - val sec_rdy = Wire { Bool() } - val tag_mux = new Mux1H(NMSHR, PPN_BITS) val mem_resp_idx_mux = new Mux1H(NMSHR, IDX_BITS) val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } @@ -264,10 +256,14 @@ class MSHRFile extends Component { val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } - alloc_arb.io.out.ready := io.req_val && !idx_match val tag_match = tag_mux.io.out === io.req_ppn + var idx_match = Bool(false) + var pri_rdy = Bool(false) + var fence = Bool(false) + var sec_rdy = Bool(false) + for (i <- 0 to NMSHR-1) { val mshr = new MSHR(i) @@ -295,15 +291,13 @@ class MSHRFile extends Component { mem_resp_idx_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) mem_resp_idx_mux.io.in(i) := mshr.io.idx - when (mshr.io.req_pri_rdy) { pri_rdy <== Bool(true) } - when (!mshr.io.req_pri_rdy) { fence <== Bool(true) } - when (mshr.io.req_sec_rdy) { sec_rdy <== Bool(true) } - when (mshr.io.idx_match) { idx_match <== Bool(true) } + pri_rdy = pri_rdy || mshr.io.req_pri_rdy + sec_rdy = sec_rdy || mshr.io.req_sec_rdy + fence = fence || !mshr.io.req_pri_rdy + idx_match = idx_match || mshr.io.idx_match } - pri_rdy <== Bool(false) - fence <== Bool(false) - sec_rdy <== Bool(false) - idx_match <== Bool(false) + + alloc_arb.io.out.ready := io.req_val && !idx_match meta_req_arb.io.out ^^ io.meta_req mem_req_arb.io.out ^^ io.mem_req @@ -329,9 +323,9 @@ class ReplayUnit extends Component { sdq_allocator.io.in := ~sdq_val val sdq_alloc_id = sdq_allocator.io.out.toUFix - val replay_retry = Wire { Bool() } - val replay_val = Reg(io.replay.valid || replay_retry, resetVal = Bool(false)) - replay_retry <== replay_val && !io.data_req.ready + val replay_val = Reg(resetVal = Bool(false)) + val replay_retry = replay_val && !io.data_req.ready + replay_val <== io.replay.valid || replay_retry val rp = Reg { new Replay() } when (io.replay.valid && io.replay.ready) { rp <== io.replay.bits } @@ -350,7 +344,7 @@ class ReplayUnit extends Component { val sdq = Mem4(NSDQ, io.sdq_enq.bits) sdq.setReadLatency(0) - sdq.setTarget('inst) +// sdq.setTarget('inst) val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) val sdq_free = replay_val && !replay_retry && rp_write @@ -392,6 +386,7 @@ class WritebackUnit extends Component { val block_refill = valid && (io.refill_req.bits.addr(IDX_BITS-1,0) === addr.idx) val refill_val = io.refill_req.valid && !block_refill + wbq.io.q_reset := Bool(false) wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) wbq.io.enq.bits := io.data_resp wbq.io.deq.ready := io.mem_req.ready && !refill_val && (cnt === UFix(REFILL_CYCLES)) @@ -471,7 +466,7 @@ class MetaDataArray(lines: Int) extends Component { val tag_array = Mem4(lines, io.resp.tag) tag_array.setReadLatency(0) - tag_array.setTarget('inst) +// tag_array.setTarget('inst) val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) io.resp.valid := vd_rdata1(1).toBool @@ -490,7 +485,7 @@ class DataArray(lines: Int) extends Component { val array = Mem4(lines*REFILL_CYCLES, io.resp) array.setReadLatency(0) - array.setTarget('inst) +// array.setTarget('inst) val addr = Cat(io.req.bits.idx, io.req.bits.offset) val rdata = array.rw(addr, io.req.bits.data, io.req.valid && io.req.bits.rw, wmask, cs = io.req.valid) io.resp := rdata @@ -506,7 +501,7 @@ class AMOALU extends Component { val out = UFix(64, 'output) } - val signed = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) + val sgned = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) val sub = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) val min = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) val word = (io.typ === MT_W) || (io.typ === MT_WU) @@ -516,19 +511,14 @@ class AMOALU extends Component { val cmp_lhs = Mux(word, io.lhs(31), io.lhs(63)) val cmp_rhs = Mux(word, io.rhs(31), io.rhs(63)) val cmp_diff = Mux(word, adder_out(31), adder_out(63)) - val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(signed, cmp_lhs, cmp_rhs)) + val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(sgned, cmp_lhs, cmp_rhs)) val cmp_out = Mux(min === less, io.lhs, io.rhs) - val alu_out = Wire { UFix(width = io.out.width) }; - switch (io.cmd) { - is (M_XA_ADD) { alu_out <== adder_out } - is (M_XA_SWAP) { alu_out <== io.rhs } - is (M_XA_AND) { alu_out <== io.lhs & io.rhs } - is (M_XA_OR) { alu_out <== io.lhs | io.rhs } - } - alu_out <== cmp_out - - io.out := alu_out + io.out := Mux(io.cmd === M_XA_ADD, adder_out, + Mux(io.cmd === M_XA_SWAP, io.rhs, + Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, + Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, + /* MIN[U]/MAX[U] */ cmp_out)))); } class HellaCache(lines: Int) extends Component { @@ -560,9 +550,7 @@ class HellaCache(lines: Int) extends Component { val p_store_idx = Reg() { Bits() } val p_store_cmd = Reg() { Bits() } val p_store_type = Reg() { Bits() } - - val store_data_wide = Wire { Bits(width = MEM_DATA_BITS) } - val store_wmask_wide = Wire { Bits(width = MEM_DATA_BITS) } + val r_replay_amo = Reg(resetVal = Bool(false)) val req_store = (io.cpu.req_cmd === M_XWR) val req_load = (io.cpu.req_cmd === M_XRD) @@ -578,18 +566,11 @@ class HellaCache(lines: Int) extends Component { val r_req_write = r_req_store || r_req_amo val r_req_readwrite = r_req_read || r_req_write - val nack_wb = Wire { Bool() } - val nack_mshr = Wire { Bool() } - val nack_sdq = Wire { Bool() } - // replay unit val replayer = new ReplayUnit val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool - val replay_amo_rdy = replayer.io.data_req.ready - val replay_amo = replay_amo_val && replay_amo_rdy - val r_replay_amo = Reg(replay_amo, resetVal = Bool(false)) - when (replay_amo) { + when (replay_amo_val) { r_cpu_req_data <== replayer.io.data_req.bits.data } when (io.cpu.req_val) { @@ -645,12 +626,6 @@ class HellaCache(lines: Int) extends Component { data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.data := io.mem.resp_data - // writeback - val wb_rdy = wb_arb.io.in(1).ready - wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && dirty && !nack_wb - wb_arb.io.in(1).bits.ppn := meta.io.resp.tag - wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - // load hits data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) @@ -664,22 +639,27 @@ class HellaCache(lines: Int) extends Component { // we nack new stores if a pending store can't retire for some reason. // we drain a pending store if the CPU performs a store or a // conflictig load, or if the cache is idle, or after a miss. - val p_store_match = r_cpu_req_val && r_req_read && p_store_valid && (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) + val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) + val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) + val p_store_match = r_cpu_req_val && r_req_read && p_store_idx_match && p_store_offset_match val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(tag_miss))) || p_store_match data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.rw := Bool(true) - data_arb.io.in(2).bits.wmask := store_wmask_wide - data_arb.io.in(2).bits.data := store_data_wide data_arb.io.in(2).valid := drain_store_val - val drain_store_rdy = data_arb.io.in(2).ready - val drain_store = drain_store_val && drain_store_rdy + val drain_store = drain_store_val && data_arb.io.in(2).ready val p_store_rdy = !p_store_valid || drain_store - val p_amo = Reg(tag_hit && r_req_amo && drain_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) + val p_amo = Reg(tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) p_store_valid <== !p_store_rdy || (tag_hit && r_req_store) || p_amo + // writeback + val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match + wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && dirty && !p_store_idx_match + wb_arb.io.in(1).bits.ppn := meta.io.resp.tag + wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) + // tag update after a miss or a store to an exclusive clean line. - val clear_valid = tag_miss && r_req_readwrite && meta.io.resp.valid && (!dirty || wb_rdy && !nack_wb) + val clear_valid = tag_miss && r_req_readwrite && meta.io.resp.valid && (!dirty || wb_rdy) val set_dirty = tag_hit && !meta.io.resp.dirty && r_req_write meta.io.state_req.valid := clear_valid || set_dirty meta.io.state_req.bits.rw := Bool(true) @@ -691,7 +671,6 @@ class HellaCache(lines: Int) extends Component { val storegen = new StoreDataGen val amoalu = new AMOALU storegen.io.typ := r_cpu_req_type - storegen.io.addr := r_cpu_req_idx(offsetlsb-1, 0) storegen.io.din := r_cpu_req_data when (p_amo) { p_store_data <== amoalu.io.out @@ -705,10 +684,9 @@ class HellaCache(lines: Int) extends Component { // miss handling val mshr = new MSHRFile - mshr.io.req_val := tag_miss && r_req_readwrite && !nack_mshr + mshr.io.req_val := tag_miss && r_req_readwrite && (!dirty || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) mshr.io.req_ppn := io.cpu.req_ppn mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) - mshr.io.req_data := p_store_data mshr.io.req_tag := r_cpu_req_tag mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req_cmd := r_cpu_req_cmd @@ -719,7 +697,7 @@ class HellaCache(lines: Int) extends Component { mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.replay <> replayer.io.replay - replayer.io.sdq_enq.valid := tag_miss && r_req_write && !nack_sdq + replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy replayer.io.sdq_enq.bits := storegen.io.dout data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx @@ -727,13 +705,13 @@ class HellaCache(lines: Int) extends Component { val replay = replayer.io.data_req.bits val stall_replay = r_replay_amo || p_amo || p_store_valid val replay_val = replayer.io.data_req.valid && !stall_replay + val replay_rdy = data_arb.io.in(1).ready data_arb.io.in(1).bits.offset := replay.offset(offsetmsb,ramindexlsb) data_arb.io.in(1).bits.idx := replay.idx data_arb.io.in(1).bits.rw := replay.cmd === M_XWR - data_arb.io.in(1).bits.wmask := store_wmask_wide - data_arb.io.in(1).bits.data := store_data_wide data_arb.io.in(1).valid := replay_val - replayer.io.data_req.ready := data_arb.io.in(1).ready && !stall_replay + replayer.io.data_req.ready := replay_rdy && !stall_replay + r_replay_amo <== replay_amo_val && replay_rdy && !stall_replay // store write mask generation. // assumes store replays are higher-priority than pending stores. @@ -741,14 +719,18 @@ class HellaCache(lines: Int) extends Component { val store_offset = Mux(!replay_val, p_store_idx(offsetmsb,0), replay.offset) maskgen.io.typ := Mux(!replay_val, p_store_type, replay.typ) maskgen.io.addr := store_offset(offsetlsb-1,0) - store_wmask_wide <== maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix + val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix val store_data = Mux(!replay_val, p_store_data, replay.data) - store_data_wide <== Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) + val store_data_wide = Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) + data_arb.io.in(1).bits.data := store_data_wide + data_arb.io.in(1).bits.wmask := store_wmask_wide + data_arb.io.in(2).bits.data := store_data_wide + data_arb.io.in(2).bits.wmask := store_wmask_wide // load data subword mux/sign extension. // subword loads are delayed by one cycle. val loadgen = new LoadDataGen - val loadgen_use_replay = Reg(replayer.io.data_req.valid && replayer.io.data_req.ready) + val loadgen_use_replay = Reg(replay_val && replay_rdy) loadgen.io.typ := Mux(loadgen_use_replay, Reg(replay.typ), r_cpu_req_type) loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(ramindexlsb-1,0) loadgen.io.din := data.io.resp @@ -758,22 +740,14 @@ class HellaCache(lines: Int) extends Component { amoalu.io.lhs := loadgen.io.r_dout.toUFix amoalu.io.rhs := p_store_data.toUFix - early_nack <== early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo || r_replay_amo - - val nack_miss_wb = dirty && !wb_rdy - val nack_miss_mshr = !mshr.io.req_rdy - val nack_miss_sdq = r_req_write && !replayer.io.sdq_enq.ready - - nack_wb <== nack_miss_mshr || nack_miss_sdq || !p_store_rdy || p_store_match - nack_mshr <== nack_miss_wb || nack_miss_sdq || !p_store_rdy || p_store_match - nack_sdq <== nack_miss_wb || nack_miss_mshr || !p_store_rdy || p_store_match + early_nack <== early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo // reset and flush unit val flusher = new FlushUnit(lines) val flushed = Reg(resetVal = Bool(true)) - val fence_rdy = mshr.io.fence_rdy && wb_rdy && p_store_rdy - flushed <== flushed && !r_cpu_req_val || flusher.io.req.valid && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val && r_req_flush && fence_rdy && !flushed + val flush_rdy = mshr.io.fence_rdy && wb_rdy && !p_store_valid + flushed <== flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready + flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed flusher.io.wb_req <> wb_arb.io.in(0) flusher.io.meta_req <> meta_arb.io.in(0) flusher.io.meta_resp <> meta.io.resp @@ -782,19 +756,17 @@ class HellaCache(lines: Int) extends Component { // we usually nack rather than reporting that the cache is not ready. // fences and flushes are the exceptions. val pending_fence = Reg(resetVal = Bool(false)) - pending_fence <== (r_cpu_req_val && r_req_fence || pending_fence) && !fence_rdy - val nack = p_store_match || - early_nack || - !fence_rdy && (r_req_fence || r_req_flush) || - !p_store_rdy && r_req_write || - !flushed && r_req_flush || - tag_miss && r_req_readwrite && (nack_miss_wb || nack_miss_mshr || nack_miss_sdq || !p_store_rdy) || - !flusher.io.req.ready + pending_fence <== (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy + val nack_hit = p_store_match || r_req_write && !p_store_rdy + val nack_miss = dirty && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready + val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || + !flushed && r_req_flush + val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush - io.cpu.req_rdy := flusher.io.req.ready && !pending_fence + io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !nack && r_req_read) || replayer.io.cpu_resp_val - io.cpu.resp_miss := tag_miss && !nack && r_req_read + io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || replayer.io.cpu_resp_val + io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data_subword := loadgen.io.r_dout_subword diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 06d3a986..29023619 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -64,7 +64,6 @@ class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { class Arbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioArbiter(n)(data) - val vout = Wire { Bool() } io.in(0).ready := io.out.ready for (i <- 1 to n-1) { @@ -75,10 +74,9 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { for (i <- 1 to n-1) dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) - for (i <- 0 to n-2) { - when (io.in(i).valid) { vout <== Bool(true) } - } - vout <== io.in(n-1).valid + var vout = io.in(0).valid + for (i <- 1 to n-1) + vout = vout || io.in(i).valid vout ^^ io.out.valid dout ^^ io.out.bits From 1028ff7d9bbf9587f03c43a009e6e0b95e79b88c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 29 Dec 2011 23:45:09 -0800 Subject: [PATCH 0080/1087] fix multiplier bug --- rocket/src/main/scala/multiplier.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index ad49a0f9..d7dc114f 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -91,7 +91,7 @@ class rocketMultiplier extends Component { } val mul_output64 = Mux(r_fn === MUL_LO, r_prod(63,0), r_prod(127,64)) - val mul_output32 = Mux(r_fn === MUL_LO, r_prod(31,0), r_prod(63,31)) + val mul_output32 = Mux(r_fn === MUL_LO, r_prod(31,0), r_prod(63,32)) val mul_output32_ext = Cat(Fill(32, mul_output32(31)), mul_output32) val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext) From f9160c53cf84c9f4f20c3d80da831afb7760abe4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 29 Dec 2011 23:46:21 -0800 Subject: [PATCH 0081/1087] fixes for correct verilog generation --- rocket/src/main/scala/nbdcache.scala | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 12eea831..498d8e44 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -141,10 +141,10 @@ class MSHR(id: Int) extends Component { val req_sec_rdy = Bool('output) val req_ppn = Bits(PPN_BITS, 'input) val req_idx = Bits(IDX_BITS, 'input) - val req_offset = Bits(width = OFFSET_BITS) - val req_cmd = Bits(width = 4) - val req_type = Bits(width = 3) - val req_sdq_id = UFix(width = log2up(NSDQ)) + val req_offset = Bits(OFFSET_BITS, 'input) + val req_cmd = Bits(4, 'input) + val req_type = Bits(3, 'input) + val req_sdq_id = UFix(log2up(NSDQ), 'input) val req_tag = Bits(DCACHE_TAG_BITS, 'input) val idx_match = Bool('output) @@ -343,7 +343,7 @@ class ReplayUnit extends Component { val sdq_addr = Mux(sdq_ren_retry, rp.sdq_id, Mux(sdq_ren_new, io.replay.bits.sdq_id, sdq_alloc_id)) val sdq = Mem4(NSDQ, io.sdq_enq.bits) - sdq.setReadLatency(0) + sdq.setReadLatency(SRAM_READ_LATENCY) // sdq.setTarget('inst) val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) @@ -458,21 +458,25 @@ class MetaDataArray(lines: Int) extends Component { } val vd_array = Mem4(lines, Bits(width = 2)) - vd_array.setReadLatency(0) + vd_array.setReadLatency(SRAM_READ_LATENCY) val vd_wdata2 = Cat(io.state_req.bits.data.valid, io.state_req.bits.data.dirty) vd_array.write(io.state_req.bits.idx, vd_wdata2, io.state_req.valid && io.state_req.bits.rw) val vd_wdata1 = Cat(io.req.bits.data.valid, io.req.bits.data.dirty) val vd_rdata1 = vd_array.rw(io.req.bits.idx, vd_wdata1, io.req.valid && io.req.bits.rw) + // don't allow reading and writing of vd_array in same cycle. + // this could be eliminated if the read port were combinational. + val vd_conflict = io.state_req.valid && (io.req.bits.idx === io.state_req.bits.idx) + val tag_array = Mem4(lines, io.resp.tag) - tag_array.setReadLatency(0) + tag_array.setReadLatency(SRAM_READ_LATENCY) // tag_array.setTarget('inst) val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) io.resp.valid := vd_rdata1(1).toBool io.resp.dirty := vd_rdata1(0).toBool io.resp.tag := tag_rdata - io.req.ready := Bool(true) + io.req.ready := !vd_conflict } class DataArray(lines: Int) extends Component { @@ -484,7 +488,7 @@ class DataArray(lines: Int) extends Component { val wmask = FillInterleaved(8, io.req.bits.wmask) val array = Mem4(lines*REFILL_CYCLES, io.resp) - array.setReadLatency(0) + array.setReadLatency(SRAM_READ_LATENCY) // array.setTarget('inst) val addr = Cat(io.req.bits.idx, io.req.bits.offset) val rdata = array.rw(addr, io.req.bits.data, io.req.valid && io.req.bits.rw, wmask, cs = io.req.valid) From 2f8fcebea02a036372f3738a29890f128d7778c9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 1 Jan 2012 16:09:40 -0800 Subject: [PATCH 0082/1087] remove datapath register resets resets --- rocket/src/main/scala/divider.scala | 18 ++++----- rocket/src/main/scala/dpath.scala | 53 +++++++++++++------------- rocket/src/main/scala/dpath_util.scala | 18 ++++----- rocket/src/main/scala/dtlb.scala | 10 ++--- rocket/src/main/scala/itlb.scala | 8 ++-- rocket/src/main/scala/ptw.scala | 10 ++--- 6 files changed, 58 insertions(+), 59 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 66c2617f..4b446d77 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -41,16 +41,16 @@ class rocketDivider(width : Int) extends Component { val state = Reg(resetVal = s_ready); val count_bits = java.math.BigInteger.valueOf(width).bitLength(); - val count = Reg(resetVal = UFix(0, count_bits)); - val divby0 = Reg(resetVal = Bool(false)); - val neg_quo = Reg(resetVal = Bool(false)); - val neg_rem = Reg(resetVal = Bool(false)); - val reg_waddr = Reg(resetVal = UFix(0, 5)); - val rem = Reg(resetVal = Bool(false)); - val half = Reg(resetVal = Bool(false)); + val count = Reg() { UFix() }; + val divby0 = Reg() { Bool() }; + val neg_quo = Reg() { Bool() }; + val neg_rem = Reg() { Bool() }; + val reg_waddr = Reg() { UFix() }; + val rem = Reg() { Bool() }; + val half = Reg() { Bool() }; - val divisor = Reg(resetVal = UFix(0, width)); - val remainder = Reg(resetVal = UFix(0, 2*width+1)); + val divisor = Reg() { UFix() }; + val remainder = Reg() { UFix() }; val subtractor = remainder(2*width, width).toUFix - divisor; val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a907faf6..2c183c62 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -66,31 +66,31 @@ class rocketDpath extends Component // instruction decode definitions val id_reg_valid = Reg(resetVal = Bool(false)); - val id_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); - val id_reg_pc_plus4 = Reg(resetVal = UFix(0,VADDR_BITS)); val id_reg_inst = Reg(resetVal = NOP); + val id_reg_pc = Reg() { UFix() }; + val id_reg_pc_plus4 = Reg() { UFix() }; // execute definitions val ex_reg_valid = Reg(resetVal = Bool(false)); - val ex_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); - val ex_reg_pc_plus4 = Reg(resetVal = UFix(0,VADDR_BITS)); - val ex_reg_inst = Reg(resetVal = Bits(0,32)); - val ex_reg_raddr2 = Reg(resetVal = UFix(0,5)); - val ex_reg_raddr1 = Reg(resetVal = UFix(0,5)); - val ex_reg_rs2 = Reg(resetVal = Bits(0,64)); - val ex_reg_rs1 = Reg(resetVal = Bits(0,64)); - val ex_reg_waddr = Reg(resetVal = UFix(0,5)); - val ex_reg_ctrl_sel_alu2 = Reg(resetVal = A2_X); - val ex_reg_ctrl_sel_alu1 = Reg(resetVal = A1_X); + val ex_reg_pc = Reg() { UFix() }; + val ex_reg_pc_plus4 = Reg() { UFix() }; + val ex_reg_inst = Reg() { Bits() }; + val ex_reg_raddr2 = Reg() { UFix() }; + val ex_reg_raddr1 = Reg() { UFix() }; + val ex_reg_rs2 = Reg() { Bits() }; + val ex_reg_rs1 = Reg() { Bits() }; + val ex_reg_waddr = Reg() { UFix() }; + val ex_reg_ctrl_sel_alu2 = Reg() { UFix() }; + val ex_reg_ctrl_sel_alu1 = Reg() { UFix() }; val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_fn_dw = Reg(resetVal = DW_X); - val ex_reg_ctrl_fn_alu = Reg(resetVal = FN_X); + val ex_reg_ctrl_fn_dw = Reg() { UFix() }; + val ex_reg_ctrl_fn_alu = Reg() { UFix() }; val ex_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val ex_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_mul_fn = Reg(resetVal = MUL_X); + val ex_reg_ctrl_mul_fn = Reg() { UFix() }; val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_div_fn = Reg(resetVal = DIV_X); - val ex_reg_ctrl_sel_wb = Reg(resetVal = WB_X); + val ex_reg_ctrl_div_fn = Reg() { UFix() }; + val ex_reg_ctrl_sel_wb = Reg() { UFix() }; val ex_reg_ctrl_wen = Reg(resetVal = Bool(false)); val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); val ex_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); @@ -98,22 +98,22 @@ class rocketDpath extends Component // memory definitions val mem_reg_valid = Reg(resetVal = Bool(false)); - val mem_reg_pc = Reg(resetVal = UFix(0,VADDR_BITS)); - val mem_reg_waddr = Reg(resetVal = UFix(0,5)); - val mem_reg_wdata = Reg(resetVal = Bits(0,64)); - val mem_reg_raddr2 = Reg(resetVal = UFix(0,5)); + val mem_reg_pc = Reg() { UFix() }; + val mem_reg_waddr = Reg() { UFix() }; + val mem_reg_wdata = Reg() { Bits() }; + val mem_reg_raddr2 = Reg() { UFix() }; val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); // writeback definitions - val wb_reg_waddr = Reg(resetVal = UFix(0,5)); - val wb_reg_wdata = Reg(resetVal = Bits(0,64)); + val wb_reg_waddr = Reg() { UFix() }; + val wb_reg_wdata = Reg() { Bits() }; val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val r_dmem_resp_val = Reg(resetVal = Bool(false)); - val r_dmem_resp_waddr = Reg(resetVal = UFix(0,5)); + val r_dmem_resp_waddr = Reg() { UFix() }; // instruction fetch stage val if_pc_plus4 = if_reg_pc + UFix(4); @@ -123,10 +123,9 @@ class rocketDpath extends Component val ex_sign_extend_split = Cat(Fill(52, ex_reg_inst(31)), ex_reg_inst(31,27), ex_reg_inst(16,10)); - // FIXME: which bits to extract should be calculated based on VADDR_BITS val branch_adder_rhs = - Mux(io.ctrl.ex_jmp, Cat(Fill(17, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0,1)), - Cat(ex_sign_extend_split(41,0), UFix(0, 1))); + Mux(io.ctrl.ex_jmp, Cat(Fill(VADDR_BITS-26, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0,1)), + Cat(ex_sign_extend_split(VADDR_BITS-2,0), UFix(0, 1))); val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 9f6e9cc7..3d135748 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -65,17 +65,17 @@ class rocketDpathPCR extends Component { val io = new ioDpathPCR(); - val reg_epc = Reg(resetVal = UFix(0, VADDR_BITS)); - val reg_badvaddr = Reg(resetVal = UFix(0, VADDR_BITS)); - val reg_ebase = Reg(resetVal = UFix(0, VADDR_BITS)); - val reg_count = Reg(resetVal = UFix(0, 32)); - val reg_compare = Reg(resetVal = UFix(0, 32)); - val reg_cause = Reg(resetVal = Bits(0, 5)); + val reg_epc = Reg() { UFix() }; + val reg_badvaddr = Reg() { UFix() }; + val reg_ebase = Reg() { UFix() }; + val reg_count = Reg() { UFix() }; + val reg_compare = Reg() { UFix() }; + val reg_cause = Reg() { Bits() }; val reg_tohost = Reg(resetVal = Bits(0, 32)); val reg_fromhost = Reg(resetVal = Bits(0, 32)); - val reg_k0 = Reg(resetVal = Bits(0, 64)); - val reg_k1 = Reg(resetVal = Bits(0, 64)); - val reg_ptbr = Reg(resetVal = UFix(0, PADDR_BITS)); + val reg_k0 = Reg() { Bits() }; + val reg_k1 = Reg() { Bits() }; + val reg_ptbr = Reg() { UFix() }; val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 921bbacc..fe965fc3 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -42,12 +42,12 @@ class rocketDTLB(entries: Int) extends Component val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; val state = Reg(resetVal = s_ready); - val r_cpu_req_vpn = Reg(resetVal = Bits(0, VPN_BITS)); val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); - val r_cpu_req_asid = Reg(resetVal = Bits(0,ASID_BITS)); - val r_refill_tag = Reg(resetVal = Bits(0,ASID_BITS+VPN_BITS)); - val r_refill_waddr = Reg(resetVal = UFix(0,addr_bits)); + val r_cpu_req_vpn = Reg() { Bits() } + val r_cpu_req_cmd = Reg() { Bits() } + val r_cpu_req_asid = Reg() { Bits() } + val r_refill_tag = Reg() { Bits() } + val r_refill_waddr = Reg() { UFix() } val repl_count = Reg(resetVal = UFix(0,addr_bits)); when (io.cpu.req_val && io.cpu.req_rdy) { diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 95fde913..827d731c 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -96,11 +96,11 @@ class rocketITLB(entries: Int) extends Component val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; val state = Reg(resetVal = s_ready); - val r_cpu_req_vpn = Reg(resetVal = Bits(0, VPN_BITS)); val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_asid = Reg(resetVal = Bits(0,ASID_BITS)); - val r_refill_tag = Reg(resetVal = Bits(0, ASID_BITS+VPN_BITS)); - val r_refill_waddr = Reg(resetVal = UFix(0, addr_bits)); + val r_cpu_req_vpn = Reg() { Bits() }; + val r_cpu_req_asid = Reg() { Bits() }; + val r_refill_tag = Reg() { Bits() }; + val r_refill_waddr = Reg() { UFix() }; val repl_count = Reg(resetVal = UFix(0, addr_bits)); when (io.cpu.req_val && io.cpu.req_rdy) { diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index a5b60e1c..eadb2643 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -60,12 +60,12 @@ class rocketPTW extends Component val s_ready :: s_l1_req :: s_l1_wait :: s_l1_fake :: s_l2_req :: s_l2_wait :: s_l2_fake:: s_l3_req :: s_l3_wait :: s_done :: s_error :: Nil = Enum(11) { UFix() }; val state = Reg(resetVal = s_ready); - val r_req_vpn = Reg(resetVal = Bits(0,VPN_BITS)); - val r_req_dest = Reg(resetVal = Bool(false)); // 0 = ITLB, 1 = DTLB + val r_req_vpn = Reg() { Bits() } + val r_req_dest = Reg() { Bool() } - val req_addr = Reg(resetVal = UFix(0,PADDR_BITS)); - val r_resp_ppn = Reg(resetVal = Bits(0,PPN_BITS)); - val r_resp_perm = Reg(resetVal = Bits(0,PERM_BITS)); + val req_addr = Reg() { UFix() }; + val r_resp_ppn = Reg() { Bits() }; + val r_resp_perm = Reg() { Bits() }; val vpn_idx = Mux(state === s_l2_wait, r_req_vpn(9,0), r_req_vpn(19,10)); val req_val = io.itlb.req_val || io.dtlb.req_val; From efc623cc366f7788f7181e0a28a2d7b5223395a4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 1 Jan 2012 17:04:14 -0800 Subject: [PATCH 0083/1087] validate BTB address and use BTB for J/JAL/JR/JALR even if we weren't using the BTB for JR/JALR, we'd need to flush the BTB on FENCE.I and on context switches, but validating its result suffices instead. --- rocket/src/main/scala/ctrl.scala | 37 +++++++++++++++------------ rocket/src/main/scala/dpath.scala | 8 +++--- rocket/src/main/scala/dpath_alu.scala | 2 ++ 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 1d5777c4..33474c2c 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -39,6 +39,7 @@ class ioCtrlDpath extends Bundle() val wen = Bool('output); // instruction in execute is an unconditional jump val ex_jmp = Bool('output); + val ex_jr = Bool('output); // enable/disable interrupts val irq_enable = Bool('output); val irq_disable = Bool('output); @@ -49,6 +50,7 @@ class ioCtrlDpath extends Bundle() // inputs from datapath val xcpt_ma_inst = Bool('input); // high on a misaligned/illegal virtual PC val btb_hit = Bool('input); + val btb_match = Bool('input); val inst = Bits(32, 'input); val br_eq = Bool('input); val br_lt = Bool('input); @@ -418,6 +420,11 @@ class rocketCtrl extends Component } + val jr_taken = (ex_reg_br_type === BR_JR); + val j_taken = (ex_reg_br_type === BR_J); + io.dpath.ex_jmp := j_taken; + io.dpath.ex_jr := jr_taken; + val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; val blt = io.dpath.br_lt; @@ -431,11 +438,8 @@ class rocketCtrl extends Component (ex_reg_br_type === BR_LT) & blt | (ex_reg_br_type === BR_LTU) & bltu | (ex_reg_br_type === BR_GE) & bge | - (ex_reg_br_type === BR_GEU) & bgeu; - - val jr_taken = (ex_reg_br_type === BR_JR); - val j_taken = (ex_reg_br_type === BR_J); - io.dpath.ex_jmp := j_taken; + (ex_reg_br_type === BR_GEU) & bgeu | + j_taken; // treat J/JAL like a taken branch val mem_reg_div_mul_val = Reg(){Bool()}; val mem_reg_eret = Reg(){Bool()}; @@ -537,7 +541,9 @@ class rocketCtrl extends Component val kill_mem = mem_hazard || mem_exception; // control transfer from ex/mem - val take_pc_ex = (ex_reg_btb_hit != br_taken) || jr_taken || j_taken + val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match + val br_jr_taken = br_taken || jr_taken + val take_pc_ex = !ex_btb_match && br_jr_taken || ex_reg_btb_hit && !br_jr_taken val take_pc_mem = mem_exception || mem_reg_eret || replay_mem val take_pc = take_pc_ex || take_pc_mem @@ -553,17 +559,16 @@ class rocketCtrl extends Component mem_reg_kill_dmem <== ex_kill_dmem io.dpath.sel_pc := - Mux(replay_mem, PC_MEM, // dtlb miss - Mux(mem_exception, PC_EVEC, // exception - Mux(mem_reg_eret, PC_PCR, // eret instruction - Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch - Mux(j_taken, PC_BR, // jump - Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch - Mux(jr_taken, PC_JR, // jump register - Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB - PC_4)))))))); // PC+4 + Mux(replay_mem, PC_MEM, // dtlb miss + Mux(mem_exception, PC_EVEC, // exception + Mux(mem_reg_eret, PC_PCR, // eret instruction + Mux(ex_reg_btb_hit && !br_jr_taken, PC_EX4, // mispredicted not taken branch + Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch + Mux(!ex_btb_match && jr_taken, PC_JR, // mispredicted jump register + Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB + PC_4))))))); // PC+4 - io.dpath.wen_btb := ~ex_reg_btb_hit & br_taken & ~kill_ex & ~kill_mem; + io.dpath.wen_btb := !ex_btb_match && br_jr_taken && !kill_ex; io.dpath.stallf := ~take_pc & diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 2c183c62..a2ce0aa6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -47,7 +47,8 @@ class rocketDpath extends Component val alu = new rocketDpathALU(); val ex_alu_out = alu.io.out; - val ex_jr_target = ex_alu_out(VADDR_BITS-1,0); + val ex_alu_adder_out = alu.io.adder_out; + val ex_jr_target = ex_alu_adder_out(VADDR_BITS-1,0); val div = new rocketDivider(64); val div_result = div.io.div_result_bits; @@ -129,7 +130,7 @@ class rocketDpath extends Component val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; - btb.io.correct_target := ex_branch_target; + btb.io.correct_target := Mux(io.ctrl.ex_jr, ex_jr_target, ex_branch_target); val if_next_pc = Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, @@ -156,6 +157,7 @@ class rocketDpath extends Component btb.io.hit ^^ io.ctrl.btb_hit; btb.io.wen ^^ io.ctrl.wen_btb; btb.io.correct_pc4 := ex_reg_pc_plus4; + io.ctrl.btb_match := id_reg_pc === btb.io.correct_target; // instruction decode stage when (!io.ctrl.stalld) { @@ -297,7 +299,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_addr := ex_alu_out(VADDR_BITS-1,0); + io.dmem.req_addr := ex_alu_adder_out(VADDR_BITS-1,0); io.dmem.req_data := ex_reg_rs2; io.dmem.req_tag := ex_reg_waddr; diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 2635f89e..35b72eac 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -14,6 +14,7 @@ class ioALU extends Bundle(){ val in2 = UFix(64, 'input); val in1 = UFix(64, 'input); val out = UFix(64, 'output); + val adder_out = UFix(64, 'output); } class rocketDpathALU extends Component @@ -54,6 +55,7 @@ class rocketDpathALU extends Component val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) io.out := Cat(out_hi, out64(31,0)).toUFix + io.adder_out := adder_out } } From eb657dd2504b39bdbc7e349baf0d480ba1b2f669 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 1 Jan 2012 21:28:38 -0800 Subject: [PATCH 0084/1087] reduce superfluous replays we only replay after a cache miss if we mis-scheduled the use of a load. --- rocket/src/main/scala/consts.scala | 7 ++++--- rocket/src/main/scala/ctrl.scala | 12 ++++++++++-- rocket/src/main/scala/dpath.scala | 20 ++++++++++++-------- rocket/src/main/scala/dpath_alu.scala | 12 ++++++------ 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index fa7b6ddb..6d0b92ce 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -68,9 +68,10 @@ object Constants val WB_X = UFix(0, 3); val WB_PC = UFix(0, 3); - val WB_ALU = UFix(1, 3); - val WB_PCR = UFix(2, 3); - val WB_TSC = UFix(3, 3); + val WB_PCR = UFix(1, 3); + val WB_ALU = UFix(2, 3); + val WB_TSC = UFix(4, 3); + val WB_IRT = UFix(5, 3); val N = UFix(0, 1); val Y = UFix(1, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 33474c2c..ee18bc63 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -59,6 +59,7 @@ class ioCtrlDpath extends Bundle() val div_result_val = Bool('input); val mul_rdy = Bool('input); val mul_result_val = Bool('input); + val mem_lu_bypass = Bool('input); val ex_waddr = UFix(5,'input); // write addr from execute stage val mem_waddr = UFix(5,'input); // write addr from memory stage val wb_waddr = UFix(5,'input); // write addr from writeback stage @@ -261,6 +262,8 @@ class rocketCtrl extends Component MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y), MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y), RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + RDCYCLE-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + RDINSTRET->List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), // Instructions that have not yet been implemented // Faking these for now so akaros will boot @@ -549,7 +552,7 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions - val ex_hazard = io.dmem.resp_miss || mem_reg_privileged || mem_reg_flush_inst + val ex_hazard = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst val mem_kill_ex = kill_mem || take_pc_mem val kill_ex = mem_kill_ex || ex_hazard || !(io.dmem.req_rdy && io.dtlb_rdy) && ex_reg_mem_val val ex_kill_dtlb = mem_kill_ex || ex_hazard || !io.dmem.req_rdy @@ -600,7 +603,12 @@ class rocketCtrl extends Component ((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) || (id_ren2 && (id_raddr2 === io.dpath.mem_waddr))); - val lu_stall = lu_stall_ex || lu_stall_mem; + val lu_stall_wb = + dcache_miss && + ((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) || + (id_ren2 && (id_raddr2 === io.dpath.wb_waddr))); + + val lu_stall = lu_stall_ex || lu_stall_mem || lu_stall_wb; // check for divide and multiply instructions in ex,mem,wb stages val dm_stall_ex = diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a2ce0aa6..42afd337 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -194,24 +194,27 @@ class rocketDpath extends Component UFix(0, 5))))); // bypass muxes + val rs1_mem_lu_bypass = id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr; val id_rs1 = Mux(io.ctrl.div_wb, div_result, Mux(io.ctrl.mul_wb, mul_result, Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr, io.dmem.resp_data, + Mux(rs1_mem_lu_bypass, io.dmem.resp_data, Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, io.dmem.resp_data_subword, Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata, id_rdata1))))))); + val rs2_mem_lu_bypass = id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr; val id_rs2 = Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr, io.dmem.resp_data, + Mux(rs2_mem_lu_bypass, io.dmem.resp_data, Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, io.dmem.resp_data_subword, Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata, id_rdata2))))); + io.ctrl.mem_lu_bypass := rs1_mem_lu_bypass || rs2_mem_lu_bypass; io.ctrl.inst := id_reg_inst; // execute stage @@ -251,16 +254,14 @@ class rocketDpath extends Component } val ex_alu_in2 = - Mux(ex_reg_ctrl_sel_alu2 === A2_0, UFix(0, 64), Mux(ex_reg_ctrl_sel_alu2 === A2_SEXT, ex_sign_extend, Mux(ex_reg_ctrl_sel_alu2 === A2_SPLIT, ex_sign_extend_split, Mux(ex_reg_ctrl_sel_alu2 === A2_RS2, ex_reg_rs2, - UFix(0, 64))))); + UFix(0, 64)))); // A2_0 val ex_alu_in1 = Mux(ex_reg_ctrl_sel_alu1 === A1_RS1, ex_reg_rs1, - Mux(ex_reg_ctrl_sel_alu1 === A1_LUI, Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12)), - UFix(0, 64))); + Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12))); // A1_LUI val ex_alu_shamt = Cat(ex_alu_in2(5) & ex_reg_ctrl_fn_dw === DW_64, ex_alu_in2(4,0)).toUFix; @@ -330,15 +331,18 @@ class rocketDpath extends Component // time stamp counter val tsc_reg = Reg(resetVal = UFix(0,64)); tsc_reg <== tsc_reg + UFix(1); + // instructions retired counter + val irt_reg = Reg(resetVal = UFix(0,64)); + when (mem_reg_valid) { irt_reg <== irt_reg + UFix(1); } // writeback select mux ex_wdata := Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1, Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_reg_pc_plus4(VADDR_BITS-1)), ex_reg_pc_plus4), - Mux(ex_reg_ctrl_sel_wb === WB_ALU, ex_alu_out, Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, - Bits(0, 64)))))).toBits; + Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, + ex_alu_out))))).toBits; // WB_ALU // memory stage mem_reg_pc <== ex_reg_pc; diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 35b72eac..114acd57 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -24,11 +24,11 @@ class rocketDpathALU extends Component // ADD, SUB val sub = (io.fn === FN_SUB) || (io.fn === FN_SLT) || (io.fn === FN_SLTU) val adder_rhs = Mux(sub, ~io.in2, io.in2) - val adder_out = (io.in1 + adder_rhs + sub.toUFix)(63,0) + val sum = (io.in1 + adder_rhs + sub.toUFix)(63,0) // SLT, SLTU - val less = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in1(63)) - val lessu = Mux(io.in1(63) === io.in2(63), adder_out(63), io.in2(63)) + val less = Mux(io.in1(63) === io.in2(63), sum(63), io.in1(63)) + val lessu = Mux(io.in1(63) === io.in2(63), sum(63), io.in2(63)) // SLL, SRL, SRA val sra = (io.fn === FN_SRA) @@ -42,8 +42,8 @@ class rocketDpathALU extends Component val out64 = Wire { Bits(64) } switch(io.fn) { - is(FN_ADD) { out64 <== adder_out } - is(FN_SUB) { out64 <== adder_out } + is(FN_ADD) { out64 <== sum } + is(FN_SUB) { out64 <== sum } is(FN_SLT) { out64 <== less } is(FN_SLTU) { out64 <== lessu } is(FN_AND) { out64 <== io.in1 & io.in2 } @@ -55,7 +55,7 @@ class rocketDpathALU extends Component val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) io.out := Cat(out_hi, out64(31,0)).toUFix - io.adder_out := adder_out + io.adder_out := sum } } From ffe23a1ee815329fa613c14fa6765747be13bef2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 Jan 2012 00:25:11 -0800 Subject: [PATCH 0085/1087] fix WAW hazard handling --- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/ctrl.scala | 82 +++++++++++---------------- rocket/src/main/scala/ctrl_util.scala | 2 - rocket/src/main/scala/dcache.scala | 1 + rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/nbdcache.scala | 1 + 6 files changed, 37 insertions(+), 51 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 08e5a0ea..950adc54 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -103,6 +103,7 @@ class rocketProc extends Component ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack; dpath.io.dmem.resp_val := arb.io.cpu.resp_val; + dpath.io.dmem.resp_replay := io.dmem.resp_replay; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ee18bc63..e6406f81 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -302,10 +302,7 @@ class rocketCtrl extends Component val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); - val id_waddr = io.dpath.inst(31,27); - - val id_ren2 = id_renx2.toBool; - val id_ren1 = id_renx1.toBool; + val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val id_console_out_val = id_wen_pcr.toBool && (id_raddr2 === PCR_CONSOLE); @@ -329,7 +326,6 @@ class rocketCtrl extends Component val id_stall_raddr2 = sboard.io.stalla; val id_stall_raddr1 = sboard.io.stallb; val id_stall_waddr = sboard.io.stallc; - val id_stall_ra = sboard.io.stallra; val id_reg_btb_hit = Reg(resetVal = Bool(false)); val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); @@ -581,62 +577,50 @@ class rocketCtrl extends Component io.dpath.stalld ); - // check for loads and amos in execute and mem stages to detect load/use hazards + // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = ex_reg_mem_val && ((ex_reg_mem_cmd === M_XRD) || ex_reg_mem_cmd(3).toBool); - - val lu_stall_ex = - ex_mem_cmd_load && - ((id_ren1 && (id_raddr1 === io.dpath.ex_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.ex_waddr))); + val data_hazard_ex = + (ex_mem_cmd_load || ex_reg_div_mul_val) && + ((id_renx1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || + (id_renx2.toBool && (id_raddr2 === io.dpath.ex_waddr)) || + (id_wen.toBool && (id_waddr === io.dpath.ex_waddr))); + // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. + // stall for WAW-but-not-RAW hazards on LW/LD/AMO. + val mem_mem_cmd_load = + mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); val mem_mem_cmd_load_bh = - mem_reg_mem_val && - (mem_reg_mem_cmd === M_XRD) && + mem_mem_cmd_load && ((mem_reg_mem_type === MT_B) || (mem_reg_mem_type === MT_BU) || (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU)); - - val lu_stall_mem = - mem_mem_cmd_load_bh && - ((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.mem_waddr))); + val raw_hazard_mem = + (id_renx1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || + (id_renx2.toBool && (id_raddr2 === io.dpath.mem_waddr)); + val waw_hazard_mem = + (id_wen.toBool && (id_waddr === io.dpath.mem_waddr)); + val data_hazard_mem = + (mem_mem_cmd_load_bh || mem_reg_div_mul_val) && (raw_hazard_mem || waw_hazard_mem) || + mem_mem_cmd_load && (!raw_hazard_mem && waw_hazard_mem) - val lu_stall_wb = - dcache_miss && - ((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.wb_waddr))); + // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. + val data_hazard_wb = + (dcache_miss || wb_reg_div_mul_val) && + ((id_renx1.toBool && (id_raddr1 === io.dpath.wb_waddr)) || + (id_renx2.toBool && (id_raddr2 === io.dpath.wb_waddr)) || + (id_wen.toBool && (id_waddr === io.dpath.wb_waddr))); - val lu_stall = lu_stall_ex || lu_stall_mem || lu_stall_wb; - - // check for divide and multiply instructions in ex,mem,wb stages - val dm_stall_ex = - ex_reg_div_mul_val && - ((id_ren1 && (id_raddr1 === io.dpath.ex_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.ex_waddr))); - - val dm_stall_mem = - mem_reg_div_mul_val && - ((id_ren1 && (id_raddr1 === io.dpath.mem_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.mem_waddr))); - - val dm_stall_wb = - wb_reg_div_mul_val && - ((id_ren1 && (id_raddr1 === io.dpath.wb_waddr)) || - (id_ren2 && (id_raddr2 === io.dpath.wb_waddr))); - - val dm_stall = dm_stall_ex || dm_stall_mem || dm_stall_wb; + val data_hazard = data_hazard_ex || data_hazard_mem || data_hazard_wb; val ctrl_stalld = !take_pc && ( - dm_stall || - lu_stall || - id_ren2 && id_stall_raddr2 || - id_ren1 && id_stall_raddr1 || - (id_sel_wa === WA_RD) && id_stall_waddr || - (id_sel_wa === WA_RA) && id_stall_ra || + data_hazard || + id_renx2.toBool && id_stall_raddr2 || + id_renx1.toBool && id_stall_raddr1 || + id_wen.toBool && id_stall_waddr || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy || @@ -662,8 +646,8 @@ class rocketCtrl extends Component io.dpath.killm := kill_mem; io.dpath.mem_load := mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); - io.dpath.ren2 := id_ren2; - io.dpath.ren1 := id_ren1; + io.dpath.ren2 := id_renx2.toBool; + io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2; io.dpath.sel_alu1 := id_sel_alu1.toBool; io.dpath.fn_dw := id_fn_dw.toBool; diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 73661fa6..55744650 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -18,7 +18,6 @@ class ioCtrlSboard extends Bundle() val stalla = Bool('output); val stallb = Bool('output); val stallc = Bool('output); - val stallra = Bool('output); } class rocketCtrlSboard extends Component @@ -34,7 +33,6 @@ class rocketCtrlSboard extends Component io.stalla := reg_busy(io.raddra).toBool; io.stallb := reg_busy(io.raddrb).toBool; io.stallc := reg_busy(io.raddrc).toBool; - io.stallra := reg_busy(RA).toBool; } class ioCtrlCnt extends Bundle() diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 31266888..fd7bfdea 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -21,6 +21,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val resp_miss = Bool('output); val resp_nack = Bool('output); val resp_val = Bool('output); + val resp_replay = Bool('output); val resp_data = Bits(64, 'output); val resp_data_subword = Bits(64, 'output); val resp_tag = Bits(DCACHE_TAG_BITS, 'output); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 42afd337..4f506cf3 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -11,6 +11,7 @@ class ioDpathDmem extends Bundle() val req_tag = UFix(CPU_TAG_BITS, 'output); val req_data = Bits(64, 'output); val resp_val = Bool('input); + val resp_replay = Bool('input); val resp_tag = Bits(CPU_TAG_BITS, 'input); val resp_data = Bits(64, 'input); val resp_data_subword = Bits(64, 'input); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 498d8e44..70977b4d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -770,6 +770,7 @@ class HellaCache(lines: Int) extends Component { io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || replayer.io.cpu_resp_val + io.cpu.resp_replay := replayer.io.cpu_resp_val io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_data := loadgen.io.dout From 3045b33460c6d6a0ccd784e37f6df8a13b3b47e9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 Jan 2012 02:51:30 -0800 Subject: [PATCH 0086/1087] remove second RF write port load miss writebacks are treated like mul/div now. --- rocket/src/main/scala/cpu.scala | 4 +- rocket/src/main/scala/ctrl.scala | 56 +++++++++++++------------- rocket/src/main/scala/ctrl_util.scala | 13 +++--- rocket/src/main/scala/dpath.scala | 49 +++++++++++----------- rocket/src/main/scala/dpath_util.scala | 2 - rocket/src/main/scala/ptw.scala | 5 ++- 6 files changed, 63 insertions(+), 66 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 950adc54..bb59e054 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -101,9 +101,11 @@ class rocketProc extends Component arb.io.cpu.req_tag := dpath.io.dmem.req_tag; ctrl.io.dmem.req_rdy := dtlb.io.cpu.req_rdy && arb.io.cpu.req_rdy; ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; + ctrl.io.dmem.resp_replay:= arb.io.cpu.resp_replay; ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack; dpath.io.dmem.resp_val := arb.io.cpu.resp_val; - dpath.io.dmem.resp_replay := io.dmem.resp_replay; + dpath.io.dmem.resp_miss := arb.io.cpu.resp_miss; + dpath.io.dmem.resp_replay := arb.io.cpu.resp_replay; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index e6406f81..579220f2 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -64,10 +64,8 @@ class ioCtrlDpath extends Bundle() val mem_waddr = UFix(5,'input); // write addr from memory stage val wb_waddr = UFix(5,'input); // write addr from writeback stage val status = Bits(17, 'input); - val sboard_clr0 = Bool('input); - val sboard_clr0a = UFix(5, 'input); - val sboard_clr1 = Bool('input); - val sboard_clr1a = UFix(5, 'input); + val sboard_clr = Bool('input); + val sboard_clra = UFix(5, 'input); val mem_valid = Bool('input); // high if there's a valid (not flushed) instruction in mem stage val irq_timer = Bool('input); val irq_ipi = Bool('input); @@ -78,7 +76,7 @@ class ioCtrlAll extends Bundle() val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); + val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_replay", "resp_nack")).flip(); val host = new ioHost(List("start")); val dtlb_val = Bool('output) val dtlb_rdy = Bool('input); @@ -186,26 +184,26 @@ class rocketCtrl extends Component JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), @@ -318,10 +316,8 @@ class rocketCtrl extends Component sboard.io.set := wb_reg_div_mul_val | dcache_miss; sboard.io.seta := io.dpath.wb_waddr; - sboard.io.clr0 := io.dpath.sboard_clr0; - sboard.io.clr0a := io.dpath.sboard_clr0a; - sboard.io.clr1 := io.dpath.sboard_clr1; - sboard.io.clr1a := io.dpath.sboard_clr1a; + sboard.io.clr := io.dpath.sboard_clr; + sboard.io.clra := io.dpath.sboard_clra; val id_stall_raddr2 = sboard.io.stalla; val id_stall_raddr1 = sboard.io.stallb; @@ -613,6 +609,11 @@ class rocketCtrl extends Component (id_wen.toBool && (id_waddr === io.dpath.wb_waddr))); val data_hazard = data_hazard_ex || data_hazard_mem || data_hazard_wb; + + // for divider, multiplier, load miss writeback + val mem_wb = Reg(io.dmem.resp_replay, resetVal = Bool(false)) // delayed for subword extension + val mul_wb = io.dpath.mul_result_val && !io.dmem.resp_replay; + val div_wb = io.dpath.div_result_val && !io.dpath.mul_result_val && !io.dmem.resp_replay; val ctrl_stalld = !take_pc && @@ -627,15 +628,12 @@ class rocketCtrl extends Component id_div_val.toBool && !io.dpath.div_rdy || id_mul_val.toBool && !io.dpath.mul_rdy || io.dpath.div_result_val || - io.dpath.mul_result_val + io.dpath.mul_result_val || + mem_wb ); val ctrl_killd = take_pc || ctrl_stalld; val ctrl_killf = take_pc || !io.imem.resp_val; - - // for divider, multiplier writeback - val mul_wb = io.dpath.mul_result_val; - val div_wb = io.dpath.div_result_val & !mul_wb; io.flush_inst := mem_reg_flush_inst; diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 55744650..bed2984a 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -6,10 +6,8 @@ import Constants._; class ioCtrlSboard extends Bundle() { - val clr0 = Bool('input); - val clr0a = UFix(5, 'input); - val clr1 = Bool('input); - val clr1a = UFix(5, 'input); + val clr = Bool('input); + val clra = UFix(5, 'input); val set = Bool('input); val seta = UFix(5, 'input); val raddra = UFix(5, 'input); @@ -25,10 +23,9 @@ class rocketCtrlSboard extends Component override val io = new ioCtrlSboard(); val reg_busy = Reg(width = 32, resetVal = Bits(0, 32)); - val set_mask = Mux(io.set, UFix(1,1) << io.seta, UFix(0,32)); - val clr0_mask = Mux(io.clr0, ~(UFix(1,1) << io.clr0a), ~UFix(0,32)); - val clr1_mask = Mux(io.clr1, ~(UFix(1,1) << io.clr1a), ~UFix(0,32)); - reg_busy <== ((reg_busy | set_mask) & clr0_mask) & clr1_mask; + val set_mask = io.set.toUFix << io.seta; + val clr_mask = ~(io.clr.toUFix << io.clra); + reg_busy <== (reg_busy | set_mask) & clr_mask io.stalla := reg_busy(io.raddra).toBool; io.stallb := reg_busy(io.raddrb).toBool; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4f506cf3..68996f56 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -11,6 +11,7 @@ class ioDpathDmem extends Bundle() val req_tag = UFix(CPU_TAG_BITS, 'output); val req_data = Bits(64, 'output); val resp_val = Bool('input); + val resp_miss = Bool('input); val resp_replay = Bool('input); val resp_tag = Bits(CPU_TAG_BITS, 'input); val resp_data = Bits(64, 'input); @@ -78,7 +79,6 @@ class rocketDpath extends Component val ex_reg_pc_plus4 = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; val ex_reg_raddr2 = Reg() { UFix() }; - val ex_reg_raddr1 = Reg() { UFix() }; val ex_reg_rs2 = Reg() { Bits() }; val ex_reg_rs1 = Reg() { Bits() }; val ex_reg_waddr = Reg() { UFix() }; @@ -115,6 +115,7 @@ class rocketDpath extends Component val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val r_dmem_resp_val = Reg(resetVal = Bool(false)); + val r_dmem_resp_replay = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg() { UFix() }; // instruction fetch stage @@ -131,7 +132,8 @@ class rocketDpath extends Component val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; - btb.io.correct_target := Mux(io.ctrl.ex_jr, ex_jr_target, ex_branch_target); + val jr_br_target = Mux(io.ctrl.ex_jr, ex_jr_target, ex_branch_target); + btb.io.correct_target := jr_br_target val if_next_pc = Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, @@ -158,7 +160,7 @@ class rocketDpath extends Component btb.io.hit ^^ io.ctrl.btb_hit; btb.io.wen ^^ io.ctrl.wen_btb; btb.io.correct_pc4 := ex_reg_pc_plus4; - io.ctrl.btb_match := id_reg_pc === btb.io.correct_target; + io.ctrl.btb_match := id_reg_pc === jr_br_target; // instruction decode stage when (!io.ctrl.stalld) { @@ -187,32 +189,35 @@ class rocketDpath extends Component val id_rdata1 = rfile.io.r1.data; // destination register selection + val id_ctrl_ll_wb = io.ctrl.div_wb || io.ctrl.mul_wb || r_dmem_resp_replay; val id_waddr = - Mux(io.ctrl.div_wb, div_result_tag, + Mux(r_dmem_resp_replay, r_dmem_resp_waddr, Mux(io.ctrl.mul_wb, mul_result_tag, + Mux(io.ctrl.div_wb, div_result_tag, Mux(io.ctrl.sel_wa === WA_RD, id_reg_inst(31,27).toUFix, Mux(io.ctrl.sel_wa === WA_RA, RA, - UFix(0, 5))))); + UFix(0, 5)))))); // bypass muxes val rs1_mem_lu_bypass = id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr; val id_rs1 = + Mux(r_dmem_resp_replay, io.dmem.resp_data_subword, Mux(io.ctrl.div_wb, div_result, Mux(io.ctrl.mul_wb, mul_result, - Mux(id_raddr1 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr1 === ex_reg_waddr, ex_wdata, - Mux(id_raddr1 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr1 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(rs1_mem_lu_bypass, io.dmem.resp_data, + Mux(id_raddr1 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr1 === mem_reg_waddr, mem_reg_wdata, Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, io.dmem.resp_data_subword, - Mux(id_raddr1 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr1 === wb_reg_waddr, wb_reg_wdata, - id_rdata1))))))); + Mux(id_raddr1 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr1 === wb_reg_waddr, wb_reg_wdata, + id_rdata1)))))))); val rs2_mem_lu_bypass = id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr; val id_rs2 = - Mux(id_raddr2 != UFix(0, 5) && ex_reg_ctrl_wen && id_raddr2 === ex_reg_waddr, ex_wdata, - Mux(id_raddr2 != UFix(0, 5) && mem_reg_ctrl_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr2 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(rs2_mem_lu_bypass, io.dmem.resp_data, + Mux(id_raddr2 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr2 === mem_reg_waddr, mem_reg_wdata, Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, io.dmem.resp_data_subword, - Mux(id_raddr2 != UFix(0, 5) && wb_reg_ctrl_wen && id_raddr2 === wb_reg_waddr, wb_reg_wdata, + Mux(id_raddr2 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr2 === wb_reg_waddr, wb_reg_wdata, id_rdata2))))); io.ctrl.mem_lu_bypass := rs1_mem_lu_bypass || rs2_mem_lu_bypass; @@ -223,7 +228,6 @@ class rocketDpath extends Component ex_reg_pc_plus4 <== id_reg_pc_plus4; ex_reg_inst <== id_reg_inst; ex_reg_raddr2 <== id_raddr2; - ex_reg_raddr1 <== id_raddr1; ex_reg_rs2 <== id_rs2; ex_reg_rs1 <== id_rs1; ex_reg_waddr <== id_waddr; @@ -233,7 +237,7 @@ class rocketDpath extends Component ex_reg_ctrl_fn_alu <== io.ctrl.fn_alu; ex_reg_ctrl_mul_fn <== io.ctrl.mul_fn; ex_reg_ctrl_div_fn <== io.ctrl.div_fn; - ex_reg_ctrl_ll_wb <== io.ctrl.div_wb | io.ctrl.mul_wb; // TODO: verify + ex_reg_ctrl_ll_wb <== id_ctrl_ll_wb; ex_reg_ctrl_sel_wb <== io.ctrl.sel_wb; ex_reg_ctrl_ren_pcr <== io.ctrl.ren_pcr; @@ -371,6 +375,7 @@ class rocketDpath extends Component // writeback stage r_dmem_resp_val <== io.dmem.resp_val; + r_dmem_resp_replay <== io.dmem.resp_replay; r_dmem_resp_waddr <== io.dmem.resp_tag.toUFix wb_reg_waddr <== mem_reg_waddr; @@ -381,27 +386,21 @@ class rocketDpath extends Component wb_reg_ctrl_wen <== Bool(false); } otherwise { - wb_reg_ctrl_wen <== mem_reg_ctrl_wen; + wb_reg_ctrl_wen <== mem_reg_ctrl_wen && !io.dmem.resp_miss; } // crossbar/sign extension for 8/16 bit loads (moved to earlier in file) // regfile write rfile.io.w0.addr := wb_reg_waddr; - rfile.io.w0.en := wb_reg_ctrl_wen | wb_reg_ctrl_ll_wb; - rfile.io.w0.data := wb_reg_wdata; - - rfile.io.w1.addr := r_dmem_resp_waddr; - rfile.io.w1.en := r_dmem_resp_val; - rfile.io.w1.data := io.dmem.resp_data_subword; + rfile.io.w0.en := wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb; + rfile.io.w0.data := Mux(Reg(io.ctrl.mem_load), io.dmem.resp_data_subword, wb_reg_wdata); io.ctrl.wb_waddr := wb_reg_waddr; // scoreboard clear (for div/mul and D$ load miss writebacks) - io.ctrl.sboard_clr0 := wb_reg_ctrl_ll_wb; - io.ctrl.sboard_clr0a := wb_reg_waddr; - io.ctrl.sboard_clr1 := r_dmem_resp_val; - io.ctrl.sboard_clr1a := r_dmem_resp_waddr; + io.ctrl.sboard_clr := id_ctrl_ll_wb; + io.ctrl.sboard_clra := id_waddr; // processor control regfile write pcr.io.w.addr := mem_reg_raddr2; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 3d135748..f01227ed 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -219,14 +219,12 @@ class ioRegfile extends Bundle() val r0 = new ioReadPort(); val r1 = new ioReadPort(); val w0 = new ioWritePort(); - val w1 = new ioWritePort(); } class rocketDpathRegfile extends Component { override val io = new ioRegfile(); val regfile = Mem(32, io.w0.en && (io.w0.addr != UFix(0,5)), io.w0.addr, io.w0.data); - regfile.write(io.w1.en && (io.w1.addr != UFix(0,5)), io.w1.addr, io.w1.data); io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index eadb2643..43bbaaab 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -7,7 +7,7 @@ import scala.math._; class ioDmemArbiter extends Bundle { - val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val", "resp_nack")); + val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val", "resp_replay", "resp_nack")); val cpu = new ioDmem(); val mem = new ioDmem().flip(); } @@ -40,6 +40,9 @@ class rocketDmemArbiter extends Component io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool; io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(0).toBool; + io.cpu.resp_replay := io.mem.resp_replay && !io.mem.resp_tag(0).toBool; + io.ptw.resp_replay := io.mem.resp_replay && io.mem.resp_tag(0).toBool; + io.ptw.resp_data := io.mem.resp_data; io.cpu.resp_data := io.mem.resp_data; io.cpu.resp_tag := io.mem.resp_tag >> UFix(1); From 20aee36c961638aeafa95136abcafe9eb1adf608 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 Jan 2012 15:42:39 -0800 Subject: [PATCH 0087/1087] move PCR writes to WB stage --- rocket/src/main/scala/ctrl.scala | 36 +++++++++++++++++++++----- rocket/src/main/scala/divider.scala | 5 ++++ rocket/src/main/scala/dpath.scala | 27 ++++++++++++++----- rocket/src/main/scala/multiplier.scala | 3 ++- 4 files changed, 56 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 579220f2..ade5a80e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -34,7 +34,7 @@ class ioCtrlDpath extends Bundle() val ren_pcr = Bool('output); val wen_pcr = Bool('output); val id_eret = Bool('output); - val mem_eret = Bool('output); + val wb_eret = Bool('output); val mem_load = Bool('output); val wen = Bool('output); // instruction in execute is an unconditional jump @@ -357,6 +357,13 @@ class rocketCtrl extends Component val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill_dmem = Reg(resetVal = Bool(false)); + val wb_reg_inst_di = Reg(resetVal = Bool(false)); + val wb_reg_inst_ei = Reg(resetVal = Bool(false)); + val wb_reg_eret = Reg(resetVal = Bool(false)); + val wb_reg_exception = Reg(resetVal = Bool(false)); + val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); + val wb_reg_cause = Reg(){UFix()}; + when (!io.dpath.stalld) { when (io.dpath.killf) { id_reg_xcpt_ma_inst <== Bool(false); @@ -477,6 +484,17 @@ class rocketCtrl extends Component mem_reg_xcpt_fpu <== ex_reg_xcpt_fpu; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } + + when (io.dpath.killm) { + wb_reg_eret <== Bool(false); + wb_reg_inst_di <== Bool(false); + wb_reg_inst_ei <== Bool(false); + } + otherwise { + wb_reg_eret <== mem_reg_eret; + wb_reg_inst_di <== mem_reg_inst_di; + wb_reg_inst_ei <== mem_reg_inst_ei; + } wb_reg_div_mul_val <== mem_reg_div_mul_val; @@ -524,10 +542,14 @@ class rocketCtrl extends Component Mux(io.xcpt_dtlb_st, UFix(11,5), // store fault UFix(0,5))))))))))); // instruction address misaligned + wb_reg_exception <== mem_exception; + wb_reg_badvaddr_wen <== io.xcpt_dtlb_ld || io.xcpt_dtlb_st; + wb_reg_cause <== mem_cause; + // write cause to PCR on an exception - io.dpath.exception := mem_exception; - io.dpath.cause := mem_cause; - io.dpath.badvaddr_wen := io.xcpt_dtlb_ld || io.xcpt_dtlb_st; + io.dpath.exception := wb_reg_exception; + io.dpath.cause := wb_reg_cause; + io.dpath.badvaddr_wen := wb_reg_badvaddr_wen; // replay mem stage PC on a DTLB miss val mem_hazard = io.dtlb_miss || io.dmem.resp_nack; @@ -662,9 +684,9 @@ class rocketCtrl extends Component io.dpath.ren_pcr := id_ren_pcr.toBool; io.dpath.wen_pcr := id_wen_pcr.toBool; io.dpath.id_eret := id_eret.toBool; - io.dpath.mem_eret := mem_reg_eret; - io.dpath.irq_disable := mem_reg_inst_di && !kill_mem; - io.dpath.irq_enable := mem_reg_inst_ei && !kill_mem; + io.dpath.wb_eret := wb_reg_eret; + io.dpath.irq_disable := wb_reg_inst_di; + io.dpath.irq_enable := wb_reg_inst_ei; io.dtlb_val := ex_reg_mem_val && !ex_kill_dtlb; io.dmem.req_val := ex_reg_mem_val; diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 4b446d77..7e2b3955 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -7,6 +7,7 @@ import Constants._; class ioDivider(width: Int) extends Bundle { // requests val div_val = Bool('input); + val div_kill = Bool('input); val div_rdy = Bool('output); val dw = UFix(1, 'input); val div_fn = UFix(2, 'input); @@ -54,6 +55,10 @@ class rocketDivider(width : Int) extends Component { val subtractor = remainder(2*width, width).toUFix - divisor; val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R); + + when (io.div_kill) { + state <== s_ready; + } // state machine switch (state) { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 68996f56..99d4e6f0 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -105,14 +105,19 @@ class rocketDpath extends Component val mem_reg_wdata = Reg() { Bits() }; val mem_reg_raddr2 = Reg() { UFix() }; val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); + val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); // writeback definitions + val wb_reg_pc = Reg() { UFix() }; val wb_reg_waddr = Reg() { UFix() }; val wb_reg_wdata = Reg() { Bits() }; + val wb_reg_raddr2 = Reg() { UFix() }; val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); + val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val r_dmem_resp_val = Reg(resetVal = Bool(false)); val r_dmem_resp_replay = Reg(resetVal = Bool(false)); @@ -280,7 +285,8 @@ class rocketDpath extends Component // divider div.io.dw := ex_reg_ctrl_fn_dw; div.io.div_fn := ex_reg_ctrl_div_fn; - div.io.div_val := ex_reg_ctrl_div_val && !io.ctrl.killx; + div.io.div_val := ex_reg_ctrl_div_val; + div.io.div_kill := mem_reg_ctrl_div_val && io.ctrl.killm; div.io.div_waddr := ex_reg_waddr; div.io.dpath_rs1 := ex_reg_rs1; div.io.dpath_rs2 := ex_reg_rs2; @@ -290,7 +296,8 @@ class rocketDpath extends Component io.ctrl.div_result_val := div.io.div_result_val; // multiplier - mul.io.mul_val := ex_reg_ctrl_mul_val && !io.ctrl.killx; + mul.io.mul_val := ex_reg_ctrl_mul_val; + mul.io.mul_kill:= mem_reg_ctrl_mul_val && io.ctrl.killm; mul.io.dw := ex_reg_ctrl_fn_dw; mul.io.mul_fn := ex_reg_ctrl_mul_fn; mul.io.mul_tag := ex_reg_waddr; @@ -355,6 +362,8 @@ class rocketDpath extends Component mem_reg_wdata <== ex_wdata; mem_reg_ctrl_ll_wb <== ex_reg_ctrl_ll_wb; mem_reg_raddr2 <== ex_reg_raddr2; + mem_reg_ctrl_mul_val <== ex_reg_ctrl_mul_val; + mem_reg_ctrl_div_val <== ex_reg_ctrl_div_val; when (io.ctrl.killx) { mem_reg_valid <== Bool(false); @@ -378,15 +387,19 @@ class rocketDpath extends Component r_dmem_resp_replay <== io.dmem.resp_replay; r_dmem_resp_waddr <== io.dmem.resp_tag.toUFix + wb_reg_pc <== mem_reg_pc; wb_reg_waddr <== mem_reg_waddr; wb_reg_wdata <== mem_reg_wdata; wb_reg_ctrl_ll_wb <== mem_reg_ctrl_ll_wb; + wb_reg_raddr2 <== mem_reg_raddr2; when (io.ctrl.killm) { wb_reg_ctrl_wen <== Bool(false); + wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { wb_reg_ctrl_wen <== mem_reg_ctrl_wen && !io.dmem.resp_miss; + wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } // crossbar/sign extension for 8/16 bit loads (moved to earlier in file) @@ -403,16 +416,16 @@ class rocketDpath extends Component io.ctrl.sboard_clra := id_waddr; // processor control regfile write - pcr.io.w.addr := mem_reg_raddr2; - pcr.io.w.en := mem_reg_ctrl_wen_pcr && !io.ctrl.killm; - pcr.io.w.data := mem_reg_wdata; + pcr.io.w.addr := wb_reg_raddr2; + pcr.io.w.en := wb_reg_ctrl_wen_pcr; + pcr.io.w.data := wb_reg_wdata; pcr.io.di := io.ctrl.irq_disable; pcr.io.ei := io.ctrl.irq_enable; - pcr.io.eret := io.ctrl.mem_eret; + pcr.io.eret := io.ctrl.wb_eret; pcr.io.exception := io.ctrl.exception; pcr.io.cause := io.ctrl.cause; - pcr.io.pc := mem_reg_pc; + pcr.io.pc := wb_reg_pc; pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; io.console.bits := pcr.io.console_data; io.console.valid := pcr.io.console_val; diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index d7dc114f..44a6eb7e 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -7,6 +7,7 @@ import Constants._; class ioMultiplier(width: Int) extends Bundle { // requests val mul_val = Bool('input); + val mul_kill= Bool('input); val mul_rdy = Bool('output); val dw = UFix(1, 'input); val mul_fn = UFix(2, 'input); @@ -61,7 +62,7 @@ class rocketMultiplier extends Component { r_prod<== rhs_in r_lsb <== Bool(false) } - when (io.result_val && io.result_rdy) { + when (io.result_val && io.result_rdy || io.mul_kill) { r_val <== Bool(false) } From 142dfc6e07f05412412654d22653486d9779da39 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jan 2012 15:09:08 -0800 Subject: [PATCH 0088/1087] made tohost/fromhost 64 bits wide --- rocket/src/main/scala/cpu.scala | 4 ++-- rocket/src/main/scala/dpath_util.scala | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index bb59e054..332a471f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -13,8 +13,8 @@ class ioHost(view: List[String] = null) extends Bundle(view) { val start = Bool('input); val from_wen = Bool('input); - val from = Bits(32, 'input); - val to = Bits(32, 'output); + val from = Bits(64, 'input); + val to = Bits(64, 'output); } class ioConsole(view: List[String] = null) extends Bundle(view) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index f01227ed..1dd0f3b0 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -71,8 +71,8 @@ class rocketDpathPCR extends Component val reg_count = Reg() { UFix() }; val reg_compare = Reg() { UFix() }; val reg_cause = Reg() { Bits() }; - val reg_tohost = Reg(resetVal = Bits(0, 32)); - val reg_fromhost = Reg(resetVal = Bits(0, 32)); + val reg_tohost = Reg(resetVal = Bits(0, 64)); + val reg_fromhost = Reg(resetVal = Bits(0, 64)); val reg_k0 = Reg() { Bits() }; val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; @@ -99,7 +99,7 @@ class rocketDpathPCR extends Component io.status := Cat(reg_status_vm, reg_status_im, reg_status); io.evec := reg_ebase; io.ptbr := reg_ptbr; - io.host.to := Mux(io.host.from_wen, Bits(0,32), reg_tohost); + io.host.to := Mux(io.host.from_wen, Bits(0), reg_tohost); io.debug.error_mode := reg_error_mode; io.r.data := rdata; @@ -108,13 +108,13 @@ class rocketDpathPCR extends Component io.console_val := console_wen; when (io.host.from_wen) { - reg_tohost <== Bits(0,32); + reg_tohost <== Bits(0); reg_fromhost <== io.host.from; } otherwise { when (!io.exception && io.w.en && (io.w.addr === PCR_TOHOST)) { - reg_tohost <== io.w.data(31,0); - reg_fromhost <== Bits(0,32); + reg_tohost <== io.w.data; + reg_fromhost <== Bits(0); } } @@ -166,7 +166,7 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(31,0).toUFix; } when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0).toUFix; r_irq_timer <== Bool(false); } when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } - when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data(31,0); } + when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data; } when (io.w.addr === PCR_SEND_IPI) { r_irq_ipi <== Bool(true); } when (io.w.addr === PCR_CLR_IPI) { r_irq_ipi <== Bool(false); } when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } @@ -191,8 +191,8 @@ class rocketDpathPCR extends Component is (PCR_COMPARE) { rdata <== Cat(Fill(32, reg_compare(31)), reg_compare); } is (PCR_CAUSE) { rdata <== Cat(Bits(0,59), reg_cause); } is (PCR_COREID) { rdata <== Bits(COREID,64); } - is (PCR_FROMHOST) { rdata <== Cat(Fill(32, reg_fromhost(31)), reg_fromhost); } - is (PCR_TOHOST) { rdata <== Cat(Fill(32, reg_tohost(31)), reg_tohost); } + is (PCR_FROMHOST) { rdata <== reg_fromhost; } + is (PCR_TOHOST) { rdata <== reg_tohost; } is (PCR_K0) { rdata <== reg_k0; } is (PCR_K1) { rdata <== reg_k1; } is (PCR_PTBR) { rdata <== Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } From 938b142d64d22b1bca1b01537918c973a6cf725e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jan 2012 18:41:53 -0800 Subject: [PATCH 0089/1087] require writes to memory to be uninterrupted --- rocket/src/main/scala/arbiter.scala | 13 +++++++------ rocket/src/main/scala/nbdcache.scala | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 01708fe5..1cf2aa9d 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -36,13 +36,13 @@ class rocketMemArbiter extends Component { io.mem.req_val := (io.icache.req_val || io.dcache.req_val); // Set read/write bit. Icache always reads - io.mem.req_rw := Mux(io.icache.req_val,Bool(false),io.dcache.req_rw); + io.mem.req_rw := Mux(io.dcache.req_val, io.dcache.req_rw, Bool(false)); // Give priority to Icache - io.mem.req_addr := Mux(io.icache.req_val,io.icache.req_addr,io.dcache.req_addr); + io.mem.req_addr := Mux(io.dcache.req_val, io.dcache.req_addr, io.icache.req_addr); // low bit of tag=0 for I$, 1 for D$ - io.mem.req_tag := Cat(Mux(io.icache.req_val, io.icache.req_tag, io.dcache.req_tag), !io.icache.req_val) + io.mem.req_tag := Cat(Mux(io.dcache.req_val, io.dcache.req_tag, io.icache.req_tag), io.dcache.req_val) // Just pass through write data (only D$ will write) io.mem.req_wdata := io.dcache.req_wdata; @@ -51,9 +51,10 @@ class rocketMemArbiter extends Component { // Interface to caches // ***************************** - // Read for request from cache if the memory is ready. Give priority to I$ - io.icache.req_rdy := io.mem.req_rdy; - io.dcache.req_rdy := io.mem.req_rdy && !io.icache.req_val; + // Read for request from cache if the memory is ready. Give priority to D$. + // This way, writebacks will never be interrupted by I$ refills. + io.dcache.req_rdy := io.mem.req_rdy; + io.icache.req_rdy := io.mem.req_rdy && !io.dcache.req_val; // Response will only be valid for D$ or I$ not both because of tag bits io.icache.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 70977b4d..5557fdb3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -382,8 +382,9 @@ class WritebackUnit extends Component { val addr = Reg() { new WritebackReq() } // don't allow memory requests to bypass conflicting writebacks. + // also don't allow a refill request once a writeback has started. // TODO: turn this into a victim buffer. - val block_refill = valid && (io.refill_req.bits.addr(IDX_BITS-1,0) === addr.idx) + val block_refill = valid && ((io.refill_req.bits.addr(IDX_BITS-1,0) === addr.idx) || (cnt === UFix(REFILL_CYCLES))) val refill_val = io.refill_req.valid && !block_refill wbq.io.q_reset := Bool(false) From 92dda102b6371f5482d05ca4a458c15281c14fc5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 11 Jan 2012 16:56:40 -0800 Subject: [PATCH 0090/1087] slight control logic cleanup --- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/ctrl.scala | 38 +++++++++++++++----------------- rocket/src/main/scala/dtlb.scala | 3 ++- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 332a471f..6e6e74a3 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -72,6 +72,7 @@ class rocketProc extends Component dtlb.io.cpu.invalidate := dpath.io.ptbr_wen; dtlb.io.cpu.status := dpath.io.ctrl.status; dtlb.io.cpu.req_val := ctrl.io.dtlb_val; + dtlb.io.cpu.req_kill := ctrl.io.dtlb_kill; dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS-1,PGIDX_BITS); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ade5a80e..f88da327 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -78,7 +78,8 @@ class ioCtrlAll extends Bundle() val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_replay", "resp_nack")).flip(); val host = new ioHost(List("start")); - val dtlb_val = Bool('output) + val dtlb_val = Bool('output); + val dtlb_kill = Bool('output); val dtlb_rdy = Bool('input); val dtlb_miss = Bool('input); val flush_inst = Bool('output); @@ -386,8 +387,6 @@ class rocketCtrl extends Component ex_reg_btb_hit <== Bool(false); ex_reg_div_mul_val <== Bool(false); ex_reg_mem_val <== Bool(false); - ex_reg_mem_cmd <== UFix(0, 4); - ex_reg_mem_type <== UFix(0, 3); ex_reg_eret <== Bool(false); ex_reg_privileged <== Bool(false); ex_reg_inst_di <== Bool(false); @@ -405,8 +404,6 @@ class rocketCtrl extends Component ex_reg_btb_hit <== id_reg_btb_hit; ex_reg_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; ex_reg_mem_val <== id_mem_val.toBool; - ex_reg_mem_cmd <== id_mem_cmd; - ex_reg_mem_type <== id_mem_type; ex_reg_eret <== id_eret.toBool; ex_reg_privileged <== id_privileged.toBool; ex_reg_inst_di <== (id_irq === I_DI); @@ -420,6 +417,8 @@ class rocketCtrl extends Component ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; } + ex_reg_mem_cmd <== id_mem_cmd; + ex_reg_mem_type <== id_mem_type; val jr_taken = (ex_reg_br_type === BR_JR); @@ -454,8 +453,6 @@ class rocketCtrl extends Component mem_reg_div_mul_val <== Bool(false); mem_reg_eret <== Bool(false); mem_reg_mem_val <== Bool(false); - mem_reg_mem_cmd <== UFix(0, 4); - mem_reg_mem_type <== UFix(0, 3); mem_reg_privileged <== Bool(false); mem_reg_inst_di <== Bool(false); mem_reg_inst_ei <== Bool(false); @@ -471,8 +468,6 @@ class rocketCtrl extends Component mem_reg_div_mul_val <== ex_reg_div_mul_val; mem_reg_eret <== ex_reg_eret; mem_reg_mem_val <== ex_reg_mem_val; - mem_reg_mem_cmd <== ex_reg_mem_cmd; - mem_reg_mem_type <== ex_reg_mem_type; mem_reg_privileged <== ex_reg_privileged; mem_reg_inst_di <== ex_reg_inst_di; mem_reg_inst_ei <== ex_reg_inst_ei; @@ -484,6 +479,8 @@ class rocketCtrl extends Component mem_reg_xcpt_fpu <== ex_reg_xcpt_fpu; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } + mem_reg_mem_cmd <== ex_reg_mem_cmd; + mem_reg_mem_type <== ex_reg_mem_type; when (io.dpath.killm) { wb_reg_eret <== Bool(false); @@ -514,13 +511,15 @@ class rocketCtrl extends Component val mem_xcpt_ma_ld = io.xcpt_ma_ld && !mem_reg_kill_dmem val mem_xcpt_ma_st = io.xcpt_ma_st && !mem_reg_kill_dmem + val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill_dmem + val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill_dmem val mem_exception = interrupt || mem_xcpt_ma_ld || mem_xcpt_ma_st || - io.xcpt_dtlb_ld || - io.xcpt_dtlb_st || + mem_xcpt_dtlb_ld || + mem_xcpt_dtlb_st || mem_reg_xcpt_illegal || mem_reg_xcpt_privileged || mem_reg_xcpt_fpu || @@ -536,14 +535,14 @@ class rocketCtrl extends Component Mux(mem_reg_xcpt_fpu, UFix(4,5), // FPU disabled Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call // breakpoint - Mux(mem_xcpt_ma_ld, UFix(8,5), // misaligned load - Mux(mem_xcpt_ma_st, UFix(9,5), // misaligned store - Mux(io.xcpt_dtlb_ld, UFix(10,5), // load fault - Mux(io.xcpt_dtlb_st, UFix(11,5), // store fault + Mux(mem_xcpt_ma_ld, UFix(8,5), // misaligned load + Mux(mem_xcpt_ma_st, UFix(9,5), // misaligned store + Mux(mem_xcpt_dtlb_ld, UFix(10,5), // load fault + Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault UFix(0,5))))))))))); // instruction address misaligned wb_reg_exception <== mem_exception; - wb_reg_badvaddr_wen <== io.xcpt_dtlb_ld || io.xcpt_dtlb_st; + wb_reg_badvaddr_wen <== mem_xcpt_dtlb_ld || mem_xcpt_dtlb_st; wb_reg_cause <== mem_cause; // write cause to PCR on an exception @@ -569,11 +568,9 @@ class rocketCtrl extends Component val ex_hazard = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst val mem_kill_ex = kill_mem || take_pc_mem val kill_ex = mem_kill_ex || ex_hazard || !(io.dmem.req_rdy && io.dtlb_rdy) && ex_reg_mem_val - val ex_kill_dtlb = mem_kill_ex || ex_hazard || !io.dmem.req_rdy - val ex_kill_dmem = mem_kill_ex || ex_hazard || !io.dtlb_rdy mem_reg_replay <== kill_ex && !mem_kill_ex - mem_reg_kill_dmem <== ex_kill_dmem + mem_reg_kill_dmem <== kill_ex io.dpath.sel_pc := Mux(replay_mem, PC_MEM, // dtlb miss @@ -688,7 +685,8 @@ class rocketCtrl extends Component io.dpath.irq_disable := wb_reg_inst_di; io.dpath.irq_enable := wb_reg_inst_ei; - io.dtlb_val := ex_reg_mem_val && !ex_kill_dtlb; + io.dtlb_val := ex_reg_mem_val; + io.dtlb_kill := mem_reg_kill_dmem; io.dmem.req_val := ex_reg_mem_val; io.dmem.req_kill := mem_kill_dmem; io.dmem.req_cmd := ex_reg_mem_cmd; diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index fe965fc3..cea13c60 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -15,6 +15,7 @@ class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) val invalidate = Bool('input); // lookup requests val req_val = Bool('input); + val req_kill = Bool('input); val req_cmd = Bits(4, 'input); // load/store/amo val req_rdy = Bool('output); val req_asid = Bits(ASID_BITS, 'input); @@ -117,7 +118,7 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup = (state === s_ready) && r_cpu_req_val && (req_load || req_store || req_amo); + val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu.req_kill && (req_load || req_store || req_amo); val lookup_hit = lookup && tag_hit; val lookup_miss = lookup && !tag_hit; val tlb_hit = status_vm && lookup_hit; From bcb55e581a4efc9a42719202078dbd93af0dc0c8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 11 Jan 2012 17:49:32 -0800 Subject: [PATCH 0091/1087] remove host.start signal, use reset instead --- rocket/src/main/scala/cpu.scala | 2 -- rocket/src/main/scala/ctrl.scala | 3 +-- rocket/src/main/scala/dpath.scala | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 6e6e74a3..5497f727 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -11,7 +11,6 @@ class ioDebug(view: List[String] = null) extends Bundle(view) class ioHost(view: List[String] = null) extends Bundle(view) { - val start = Bool('input); val from_wen = Bool('input); val from = Bits(64, 'input); val to = Bits(64, 'output); @@ -47,7 +46,6 @@ class rocketProc extends Component ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host ^^ io.host; - ctrl.io.host.start := io.host.start; dpath.io.debug ^^ io.debug; // FIXME: try to make this more compact diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f88da327..6db925fe 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -77,7 +77,6 @@ class ioCtrlAll extends Bundle() val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_replay", "resp_nack")).flip(); - val host = new ioHost(List("start")); val dtlb_val = Bool('output); val dtlb_kill = Bool('output); val dtlb_rdy = Bool('input); @@ -294,7 +293,7 @@ class rocketCtrl extends Component val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst); // FIXME - io.imem.req_val := io.host.start && !io.dpath.xcpt_ma_inst; + io.imem.req_val := !io.dpath.xcpt_ma_inst; val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 99d4e6f0..bb5aae43 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -150,7 +150,7 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, if_pc_plus4))))))); // PC_4 - when (!io.ctrl.stallf && io.host.start) { + when (!io.ctrl.stallf) { if_reg_pc <== if_next_pc.toUFix; } From 1a7bfd4350c0ea2aa944a30f3bd5871a69179fae Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 11 Jan 2012 18:27:11 -0800 Subject: [PATCH 0092/1087] remove icache req_rdy signal --- rocket/src/main/scala/cpu.scala | 1 - rocket/src/main/scala/ctrl.scala | 3 +-- rocket/src/main/scala/icache.scala | 10 ++++------ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 5497f727..fc7e0494 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -60,7 +60,6 @@ class rocketProc extends Component io.imem.req_ppn := itlb.io.cpu.resp_ppn; io.imem.req_val := ctrl.io.imem.req_val; io.imem.invalidate := ctrl.io.flush_inst; - ctrl.io.imem.req_rdy := itlb.io.cpu.req_rdy && io.imem.req_rdy; ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; ctrl.io.xcpt_itlb := itlb.io.cpu.exception; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6db925fe..1333c65e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -75,7 +75,7 @@ class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy")); - val imem = new ioImem(List("req_val", "req_rdy", "resp_val")).flip(); + val imem = new ioImem(List("req_val", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_replay", "resp_nack")).flip(); val dtlb_val = Bool('output); val dtlb_kill = Bool('output); @@ -586,7 +586,6 @@ class rocketCtrl extends Component io.dpath.stallf := ~take_pc & ( - ~io.imem.req_rdy | ~io.imem.resp_val | io.dpath.stalld ); diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 870a3ee2..462cbd4c 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -54,12 +54,13 @@ class rocketICacheDM(lines: Int) extends Component { val offsetlsb = ceil(log(databits/8)/log(2)).toInt; val rf_cnt_bits = ceil(log(REFILL_CYCLES)/log(2)).toInt; - val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: s_resolve_miss :: Nil = Enum(6) { UFix() }; + val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_reset); val r_cpu_req_idx = Reg { Bits(width = PGIDX_BITS) } val r_cpu_req_ppn = Reg { Bits(width = PPN_BITS) } val r_cpu_req_val = Reg(resetVal = Bool(false)); + val r_rdy = Reg(io.cpu.req_rdy) when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_idx <== io.cpu.req_idx; @@ -111,7 +112,7 @@ class rocketICacheDM(lines: Int) extends Component { val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); // output signals - io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_valid && tag_match; + io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_rdy && r_cpu_req_val && tag_valid && tag_match; io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); io.cpu.resp_data := data_array_rdata >> Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix io.mem.req_val := (state === s_request); @@ -143,12 +144,9 @@ class rocketICacheDM(lines: Int) extends Component { } is (s_refill) { when (io.mem.resp_val && (~refill_count === UFix(0))) { - state <== s_resolve_miss; + state <== s_ready; } } - is (s_resolve_miss) { - state <== s_ready; - } } } From 4807d7222bda714b7e225319c529045a307a2b26 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 11 Jan 2012 19:20:20 -0800 Subject: [PATCH 0093/1087] use replay to handle I$ misses this eliminates a long path in the fetch stage --- rocket/src/main/scala/ctrl.scala | 25 +++++++++++++++---------- rocket/src/main/scala/icache.scala | 12 ++++++------ rocket/src/main/scala/top.scala | 4 ++-- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 1333c65e..1d08b408 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -290,10 +290,10 @@ class rocketCtrl extends Component */ )); - val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst); + val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); // FIXME - io.imem.req_val := !io.dpath.xcpt_ma_inst; + io.imem.req_val := Bool(true) val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; @@ -326,6 +326,7 @@ class rocketCtrl extends Component val id_reg_btb_hit = Reg(resetVal = Bool(false)); val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); + val id_reg_icmiss = Reg(resetVal = Bool(false)); val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; @@ -344,6 +345,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + val ex_reg_icmiss = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); @@ -356,6 +358,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill_dmem = Reg(resetVal = Bool(false)); + val mem_reg_icmiss = Reg(resetVal = Bool(false)); val wb_reg_inst_di = Reg(resetVal = Bool(false)); val wb_reg_inst_ei = Reg(resetVal = Bool(false)); @@ -364,6 +367,8 @@ class rocketCtrl extends Component val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; + val take_pc = Wire() { Bool() }; + when (!io.dpath.stalld) { when (io.dpath.killf) { id_reg_xcpt_ma_inst <== Bool(false); @@ -375,6 +380,7 @@ class rocketCtrl extends Component id_reg_xcpt_itlb <== io.xcpt_itlb; id_reg_btb_hit <== io.dpath.btb_hit; } + id_reg_icmiss <== !take_pc && !io.imem.resp_val; } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) @@ -397,6 +403,7 @@ class rocketCtrl extends Component ex_reg_xcpt_privileged <== Bool(false); ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); + ex_reg_icmiss <== Bool(false); } otherwise { ex_reg_br_type <== id_br_type; @@ -415,6 +422,7 @@ class rocketCtrl extends Component // ex_reg_xcpt_fpu <== id_fp_val.toBool; ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; + ex_reg_icmiss <== id_reg_icmiss; } ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; @@ -462,6 +470,7 @@ class rocketCtrl extends Component mem_reg_xcpt_privileged <== Bool(false); mem_reg_xcpt_fpu <== Bool(false); mem_reg_xcpt_syscall <== Bool(false); + mem_reg_icmiss <== Bool(false); } otherwise { mem_reg_div_mul_val <== ex_reg_div_mul_val; @@ -477,6 +486,7 @@ class rocketCtrl extends Component mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; mem_reg_xcpt_fpu <== ex_reg_xcpt_fpu; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; + mem_reg_icmiss <== ex_reg_icmiss; } mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; @@ -550,7 +560,7 @@ class rocketCtrl extends Component io.dpath.badvaddr_wen := wb_reg_badvaddr_wen; // replay mem stage PC on a DTLB miss - val mem_hazard = io.dtlb_miss || io.dmem.resp_nack; + val mem_hazard = io.dtlb_miss || io.dmem.resp_nack || mem_reg_icmiss; val mem_kill_dmem = io.dtlb_miss || mem_exception || mem_reg_kill_dmem; val replay_mem = mem_hazard || mem_reg_replay; val kill_mem = mem_hazard || mem_exception; @@ -560,7 +570,7 @@ class rocketCtrl extends Component val br_jr_taken = br_taken || jr_taken val take_pc_ex = !ex_btb_match && br_jr_taken || ex_reg_btb_hit && !br_jr_taken val take_pc_mem = mem_exception || mem_reg_eret || replay_mem - val take_pc = take_pc_ex || take_pc_mem + take_pc <== take_pc_ex || take_pc_mem // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions @@ -583,12 +593,7 @@ class rocketCtrl extends Component io.dpath.wen_btb := !ex_btb_match && br_jr_taken && !kill_ex; - io.dpath.stallf := - ~take_pc & - ( - ~io.imem.resp_val | - io.dpath.stalld - ); + io.dpath.stallf := io.dpath.stalld; // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 462cbd4c..83a2067e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -11,7 +11,6 @@ class ioImem(view: List[String] = null) extends Bundle (view) val invalidate = Bool('input); val itlb_miss = Bool('input); val req_val = Bool('input); - val req_rdy = Bool('output); val req_idx = Bits(PGIDX_BITS, 'input); val req_ppn = Bits(PPN_BITS, 'input); val resp_data = Bits(32, 'output); @@ -60,15 +59,16 @@ class rocketICacheDM(lines: Int) extends Component { val r_cpu_req_idx = Reg { Bits(width = PGIDX_BITS) } val r_cpu_req_ppn = Reg { Bits(width = PPN_BITS) } val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_rdy = Reg(io.cpu.req_rdy) + + val rdy = Wire() { Bool() } - when (io.cpu.req_val && io.cpu.req_rdy) { + when (io.cpu.req_val && rdy) { r_cpu_req_idx <== io.cpu.req_idx; } when (state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss) { r_cpu_req_ppn <== io.cpu.req_ppn; } - when (io.cpu.req_rdy) { + when (rdy) { r_cpu_req_val <== io.cpu.req_val; } otherwise { @@ -112,8 +112,8 @@ class rocketICacheDM(lines: Int) extends Component { val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); // output signals - io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_rdy && r_cpu_req_val && tag_valid && tag_match; - io.cpu.req_rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); + io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && Reg(rdy) && r_cpu_req_val && tag_valid && tag_match; + rdy <== !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); io.cpu.resp_data := data_array_rdata >> Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix io.mem.req_val := (state === s_request); io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(indexmsb,indexlsb)).toUFix diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index d84d333e..de63bc2f 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -37,9 +37,9 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance -// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); + val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); // val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); - val cpu_args = args ++ Array("--target-dir", "generated-src"); +// val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match { From acf3134e80d5ab4b098362d958a8072bf481f53f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 12 Jan 2012 14:19:18 -0800 Subject: [PATCH 0094/1087] minor control logic cleanup --- rocket/src/main/scala/ctrl.scala | 34 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 1d08b408..d544c9d1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -357,8 +357,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); - val mem_reg_kill_dmem = Reg(resetVal = Bool(false)); - val mem_reg_icmiss = Reg(resetVal = Bool(false)); + val mem_reg_kill = Reg(resetVal = Bool(false)); val wb_reg_inst_di = Reg(resetVal = Bool(false)); val wb_reg_inst_ei = Reg(resetVal = Bool(false)); @@ -470,7 +469,6 @@ class rocketCtrl extends Component mem_reg_xcpt_privileged <== Bool(false); mem_reg_xcpt_fpu <== Bool(false); mem_reg_xcpt_syscall <== Bool(false); - mem_reg_icmiss <== Bool(false); } otherwise { mem_reg_div_mul_val <== ex_reg_div_mul_val; @@ -486,7 +484,6 @@ class rocketCtrl extends Component mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; mem_reg_xcpt_fpu <== ex_reg_xcpt_fpu; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; - mem_reg_icmiss <== ex_reg_icmiss; } mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; @@ -518,10 +515,10 @@ class rocketCtrl extends Component Mux(p_irq_timer, UFix(23,5), UFix(0,5))); - val mem_xcpt_ma_ld = io.xcpt_ma_ld && !mem_reg_kill_dmem - val mem_xcpt_ma_st = io.xcpt_ma_st && !mem_reg_kill_dmem - val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill_dmem - val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill_dmem + val mem_xcpt_ma_ld = io.xcpt_ma_ld && !mem_reg_kill + val mem_xcpt_ma_st = io.xcpt_ma_st && !mem_reg_kill + val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill + val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill val mem_exception = interrupt || @@ -560,10 +557,9 @@ class rocketCtrl extends Component io.dpath.badvaddr_wen := wb_reg_badvaddr_wen; // replay mem stage PC on a DTLB miss - val mem_hazard = io.dtlb_miss || io.dmem.resp_nack || mem_reg_icmiss; - val mem_kill_dmem = io.dtlb_miss || mem_exception || mem_reg_kill_dmem; - val replay_mem = mem_hazard || mem_reg_replay; - val kill_mem = mem_hazard || mem_exception; + val replay_mem = io.dtlb_miss || io.dmem.resp_nack || mem_reg_replay; + val kill_mem = io.dtlb_miss || io.dmem.resp_nack || mem_exception; + val kill_dcache = io.dtlb_miss || mem_reg_kill || mem_exception; // control transfer from ex/mem val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match @@ -574,12 +570,12 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions - val ex_hazard = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst - val mem_kill_ex = kill_mem || take_pc_mem - val kill_ex = mem_kill_ex || ex_hazard || !(io.dmem.req_rdy && io.dtlb_rdy) && ex_reg_mem_val + val replay_ex = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst || + ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) + val kill_ex = take_pc_mem || replay_ex - mem_reg_replay <== kill_ex && !mem_kill_ex - mem_reg_kill_dmem <== kill_ex + mem_reg_replay <== (ex_reg_icmiss || replay_ex) && !take_pc_mem; + mem_reg_kill <== kill_ex io.dpath.sel_pc := Mux(replay_mem, PC_MEM, // dtlb miss @@ -689,9 +685,9 @@ class rocketCtrl extends Component io.dpath.irq_enable := wb_reg_inst_ei; io.dtlb_val := ex_reg_mem_val; - io.dtlb_kill := mem_reg_kill_dmem; + io.dtlb_kill := mem_reg_kill; io.dmem.req_val := ex_reg_mem_val; - io.dmem.req_kill := mem_kill_dmem; + io.dmem.req_kill := kill_dcache; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; } From addfe55735f4008bb5fd26388d39dbc058041548 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 13 Jan 2012 18:18:48 -0800 Subject: [PATCH 0095/1087] add FPGA memory generator script --- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 83a2067e..477e0490 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -87,7 +87,7 @@ class rocketICacheDM(lines: Int) extends Component { val tag_array = Mem4(lines, r_cpu_req_ppn); tag_array.setReadLatency(SRAM_READ_LATENCY); -// tag_array.setTarget('inst); + tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); // valid bit array @@ -108,7 +108,7 @@ class rocketICacheDM(lines: Int) extends Component { io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); data_array.setReadLatency(SRAM_READ_LATENCY); -// data_array.setTarget('inst); + data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); // output signals diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 5557fdb3..4497ad83 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -344,7 +344,7 @@ class ReplayUnit extends Component { val sdq = Mem4(NSDQ, io.sdq_enq.bits) sdq.setReadLatency(SRAM_READ_LATENCY) -// sdq.setTarget('inst) + sdq.setTarget('inst) val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) val sdq_free = replay_val && !replay_retry && rp_write @@ -471,7 +471,7 @@ class MetaDataArray(lines: Int) extends Component { val tag_array = Mem4(lines, io.resp.tag) tag_array.setReadLatency(SRAM_READ_LATENCY) -// tag_array.setTarget('inst) + tag_array.setTarget('inst) val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) io.resp.valid := vd_rdata1(1).toBool @@ -490,7 +490,7 @@ class DataArray(lines: Int) extends Component { val array = Mem4(lines*REFILL_CYCLES, io.resp) array.setReadLatency(SRAM_READ_LATENCY) -// array.setTarget('inst) + array.setTarget('inst) val addr = Cat(io.req.bits.idx, io.req.bits.offset) val rdata = array.rw(addr, io.req.bits.data, io.req.valid && io.req.bits.rw, wmask, cs = io.req.valid) io.resp := rdata From 1c8f49681185f8db36cb0b8160c4116c90a8e527 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 13 Jan 2012 20:04:11 -0800 Subject: [PATCH 0096/1087] fix fpga build --- rocket/src/main/scala/ctrl.scala | 5 +++-- rocket/src/main/scala/dpath_util.scala | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d544c9d1..7d9ba6d9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -589,8 +589,6 @@ class rocketCtrl extends Component io.dpath.wen_btb := !ex_btb_match && br_jr_taken && !kill_ex; - io.dpath.stallf := io.dpath.stalld; - // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = ex_reg_mem_val && ((ex_reg_mem_cmd === M_XRD) || ex_reg_mem_cmd(3).toBool); @@ -649,12 +647,15 @@ class rocketCtrl extends Component io.dpath.mul_result_val || mem_wb ); + val ctrl_stallf = ctrl_stalld; val ctrl_killd = take_pc || ctrl_stalld; val ctrl_killf = take_pc || !io.imem.resp_val; io.flush_inst := mem_reg_flush_inst; + + io.dpath.stallf := ctrl_stallf; io.dpath.stalld := ctrl_stalld; io.dpath.killf := ctrl_killf; io.dpath.killd := ctrl_killd; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 1dd0f3b0..bf13d0c0 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -224,9 +224,21 @@ class ioRegfile extends Bundle() class rocketDpathRegfile extends Component { override val io = new ioRegfile(); - val regfile = Mem(32, io.w0.en && (io.w0.addr != UFix(0,5)), io.w0.addr, io.w0.data); - io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); - io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); + + // FIXME: remove the first "if" case once Mem4 C backend bug is fixed + if (SRAM_READ_LATENCY == 0) { + val regfile = Mem(32, io.w0.en && (io.w0.addr != UFix(0,5)), io.w0.addr, io.w0.data); + io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); + io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); + } + else { + val regfile = Mem4(32, io.w0.data); + regfile.setReadLatency(0); + regfile.setTarget('inst); + regfile.write(io.w0.addr, io.w0.data, io.w0.en); + io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); + io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); + } } } From 0369b05deb41f2eef3c4edd031d5d16c3f18664e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Jan 2012 21:12:31 -0800 Subject: [PATCH 0097/1087] move replays to writeback stage --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/ctrl.scala | 59 ++++++++++++++------------ rocket/src/main/scala/dpath.scala | 10 +++-- rocket/src/main/scala/dpath_util.scala | 13 +++--- rocket/src/main/scala/dtlb.scala | 6 +-- rocket/src/main/scala/itlb.scala | 5 ++- 6 files changed, 54 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6d0b92ce..bada957b 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -21,7 +21,7 @@ object Constants val PC_BR = UFix(3, 4); val PC_JR = UFix(4, 4); val PC_PCR = UFix(5, 4); - val PC_MEM = UFix(6, 4); + val PC_WB = UFix(6, 4); val PC_EVEC = UFix(7, 4); val KF_Y = UFix(1, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7d9ba6d9..8955aa6a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -11,6 +11,7 @@ class ioCtrlDpath extends Bundle() // outputs to datapath val sel_pc = UFix(4, 'output); val wen_btb = Bool('output); + val clr_btb = Bool('output); val stallf = Bool('output); val stalld = Bool('output); val killf = Bool('output); @@ -327,6 +328,7 @@ class rocketCtrl extends Component val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val id_reg_icmiss = Reg(resetVal = Bool(false)); + val id_reg_replay = Reg(resetVal = Bool(false)); val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; @@ -345,7 +347,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); - val ex_reg_icmiss = Reg(resetVal = Bool(false)); + val ex_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); @@ -364,22 +366,24 @@ class rocketCtrl extends Component val wb_reg_eret = Reg(resetVal = Bool(false)); val wb_reg_exception = Reg(resetVal = Bool(false)); val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); + val wb_reg_replay = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; val take_pc = Wire() { Bool() }; when (!io.dpath.stalld) { when (io.dpath.killf) { + id_reg_btb_hit <== Bool(false); id_reg_xcpt_ma_inst <== Bool(false); id_reg_xcpt_itlb <== Bool(false); - id_reg_btb_hit <== Bool(false); } otherwise{ + id_reg_btb_hit <== io.dpath.btb_hit; id_reg_xcpt_ma_inst <== if_reg_xcpt_ma_inst; id_reg_xcpt_itlb <== io.xcpt_itlb; - id_reg_btb_hit <== io.dpath.btb_hit; } - id_reg_icmiss <== !take_pc && !io.imem.resp_val; + id_reg_icmiss <== !io.imem.resp_val; + id_reg_replay <== !take_pc && !io.imem.resp_val; } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) @@ -402,7 +406,7 @@ class rocketCtrl extends Component ex_reg_xcpt_privileged <== Bool(false); ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); - ex_reg_icmiss <== Bool(false); + ex_reg_replay <== Bool(false); } otherwise { ex_reg_br_type <== id_br_type; @@ -421,7 +425,7 @@ class rocketCtrl extends Component // ex_reg_xcpt_fpu <== id_fp_val.toBool; ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; - ex_reg_icmiss <== id_reg_icmiss; + ex_reg_replay <== id_reg_replay; } ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; @@ -492,14 +496,14 @@ class rocketCtrl extends Component wb_reg_eret <== Bool(false); wb_reg_inst_di <== Bool(false); wb_reg_inst_ei <== Bool(false); + wb_reg_div_mul_val <== Bool(false); } otherwise { wb_reg_eret <== mem_reg_eret; wb_reg_inst_di <== mem_reg_inst_di; wb_reg_inst_ei <== mem_reg_inst_ei; + wb_reg_div_mul_val <== mem_reg_div_mul_val; } - - wb_reg_div_mul_val <== mem_reg_div_mul_val; // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction @@ -547,40 +551,42 @@ class rocketCtrl extends Component Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault UFix(0,5))))))))))); // instruction address misaligned - wb_reg_exception <== mem_exception; - wb_reg_badvaddr_wen <== mem_xcpt_dtlb_ld || mem_xcpt_dtlb_st; - wb_reg_cause <== mem_cause; - // write cause to PCR on an exception io.dpath.exception := wb_reg_exception; io.dpath.cause := wb_reg_cause; io.dpath.badvaddr_wen := wb_reg_badvaddr_wen; - // replay mem stage PC on a DTLB miss - val replay_mem = io.dtlb_miss || io.dmem.resp_nack || mem_reg_replay; - val kill_mem = io.dtlb_miss || io.dmem.resp_nack || mem_exception; - val kill_dcache = io.dtlb_miss || mem_reg_kill || mem_exception; - // control transfer from ex/mem val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match val br_jr_taken = br_taken || jr_taken val take_pc_ex = !ex_btb_match && br_jr_taken || ex_reg_btb_hit && !br_jr_taken - val take_pc_mem = mem_exception || mem_reg_eret || replay_mem - take_pc <== take_pc_ex || take_pc_mem + val take_pc_mem = Bool(false) //mem_exception || mem_reg_eret; + val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; + take_pc <== take_pc_ex || take_pc_mem || take_pc_wb; + + // replay mem stage PC on a DTLB miss + val replay_mem = io.dtlb_miss || io.dmem.resp_nack || mem_reg_replay; + val kill_mem = io.dtlb_miss || io.dmem.resp_nack || take_pc_wb || mem_exception || mem_reg_kill; + val kill_dcache = io.dtlb_miss || take_pc_wb || mem_exception || mem_reg_kill; // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst || - ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) - val kill_ex = take_pc_mem || replay_ex + ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) + val kill_ex = take_pc_mem || take_pc_wb || replay_ex - mem_reg_replay <== (ex_reg_icmiss || replay_ex) && !take_pc_mem; - mem_reg_kill <== kill_ex + mem_reg_replay <== replay_ex && !(take_pc_mem || take_pc_wb); + mem_reg_kill <== kill_ex; + + wb_reg_replay <== replay_mem && !take_pc_wb; + wb_reg_exception <== mem_exception && !take_pc_wb; + wb_reg_badvaddr_wen <== (mem_xcpt_dtlb_ld || mem_xcpt_dtlb_st) && !take_pc_wb; + wb_reg_cause <== mem_cause; io.dpath.sel_pc := - Mux(replay_mem, PC_MEM, // dtlb miss - Mux(mem_exception, PC_EVEC, // exception - Mux(mem_reg_eret, PC_PCR, // eret instruction + Mux(wb_reg_exception, PC_EVEC, // exception + Mux(wb_reg_replay, PC_WB, // replay + Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(ex_reg_btb_hit && !br_jr_taken, PC_EX4, // mispredicted not taken branch Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch Mux(!ex_btb_match && jr_taken, PC_JR, // mispredicted jump register @@ -588,6 +594,7 @@ class rocketCtrl extends Component PC_4))))))); // PC+4 io.dpath.wen_btb := !ex_btb_match && br_jr_taken && !kill_ex; + io.dpath.clr_btb := ex_reg_btb_hit && !br_jr_taken || id_reg_icmiss; // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index bb5aae43..4ef112f6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -111,6 +111,7 @@ class rocketDpath extends Component val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); // writeback definitions + val wb_reg_valid = Reg(resetVal = Bool(false)); val wb_reg_pc = Reg() { UFix() }; val wb_reg_waddr = Reg() { UFix() }; val wb_reg_wdata = Reg() { Bits() }; @@ -145,9 +146,9 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, mem_reg_wdata(VADDR_BITS-1,0), // only used for ERET + Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS-1,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, - Mux(io.ctrl.sel_pc === PC_MEM, mem_reg_pc, + Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, if_pc_plus4))))))); // PC_4 when (!io.ctrl.stallf) { @@ -164,6 +165,7 @@ class rocketDpath extends Component btb.io.current_pc4 := if_pc_plus4; btb.io.hit ^^ io.ctrl.btb_hit; btb.io.wen ^^ io.ctrl.wen_btb; + btb.io.clr ^^ io.ctrl.clr_btb; btb.io.correct_pc4 := ex_reg_pc_plus4; io.ctrl.btb_match := id_reg_pc === jr_br_target; @@ -345,7 +347,7 @@ class rocketDpath extends Component tsc_reg <== tsc_reg + UFix(1); // instructions retired counter val irt_reg = Reg(resetVal = UFix(0,64)); - when (mem_reg_valid) { irt_reg <== irt_reg + UFix(1); } + when (wb_reg_valid) { irt_reg <== irt_reg + UFix(1); } // writeback select mux ex_wdata := @@ -394,10 +396,12 @@ class rocketDpath extends Component wb_reg_raddr2 <== mem_reg_raddr2; when (io.ctrl.killm) { + wb_reg_valid <== Bool(false); wb_reg_ctrl_wen <== Bool(false); wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { + wb_reg_valid <== mem_reg_valid; wb_reg_ctrl_wen <== mem_reg_ctrl_wen && !io.dmem.resp_miss; wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index bf13d0c0..c011b614 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -12,6 +12,7 @@ class ioDpathBTB extends Bundle() val hit = Bool('output); val target = UFix(VADDR_BITS, 'output); val wen = Bool('input); + val clr = Bool('input); val correct_pc4 = UFix(VADDR_BITS, 'input); val correct_target = UFix(VADDR_BITS, 'input); } @@ -27,13 +28,13 @@ class rocketDpathBTB(entries: Int) extends Component val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1; val taglsb = (VADDR_BITS-idxlsb); - val rst_lwlr_pf = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), UFix(1,1), resetVal = UFix(0,1)); - val lwlr_pf = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), - Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb)), resetVal = UFix(0,1)); - val is_val = rst_lwlr_pf(io.current_pc4(idxmsb,idxlsb)); - val tag_target = lwlr_pf(io.current_pc4(idxmsb, idxlsb)); + val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc4(idxmsb,idxlsb), !io.clr, resetVal = Bool(false)); + val tag_target_array = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), + Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) + val is_val = vb_array(io.current_pc4(idxmsb,idxlsb)); + val tag_target = tag_target_array(io.current_pc4(idxmsb, idxlsb)); - io.hit := (is_val & (tag_target(tagmsb,taglsb) === io.current_pc4(VADDR_BITS-1, idxmsb+1))).toBool; + io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc4(VADDR_BITS-1, idxmsb+1)); io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix; } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index cea13c60..372cc4fd 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -55,10 +55,10 @@ class rocketDTLB(entries: Int) extends Component r_cpu_req_vpn <== io.cpu.req_vpn; r_cpu_req_cmd <== io.cpu.req_cmd; r_cpu_req_asid <== io.cpu.req_asid; + r_cpu_req_val <== Bool(true); } - - when (io.cpu.req_rdy) { - r_cpu_req_val <== io.cpu.req_val; + otherwise { + r_cpu_req_val <== Bool(false); } val req_load = (r_cpu_req_cmd === M_XRD); diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 827d731c..f63fd1c7 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -106,9 +106,10 @@ class rocketITLB(entries: Int) extends Component when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_vpn <== io.cpu.req_vpn; r_cpu_req_asid <== io.cpu.req_asid; + r_cpu_req_val <== Bool(true); } - when (io.cpu.req_rdy) { - r_cpu_req_val <== io.cpu.req_val; + otherwise { + r_cpu_req_val <== Bool(false); } val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); From e4cf6391d70bf62dadc5ac14d43a9a52657a4f31 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Jan 2012 23:47:35 -0800 Subject: [PATCH 0098/1087] fix i$ miss pathology and badvaddr bug --- rocket/src/main/scala/ctrl.scala | 52 +++++++++++++++----------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 8955aa6a..c4dd5581 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -185,26 +185,26 @@ class rocketCtrl extends Component JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), @@ -292,9 +292,6 @@ class rocketCtrl extends Component )); val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); - - // FIXME - io.imem.req_val := Bool(true) val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; @@ -560,9 +557,8 @@ class rocketCtrl extends Component val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match val br_jr_taken = br_taken || jr_taken val take_pc_ex = !ex_btb_match && br_jr_taken || ex_reg_btb_hit && !br_jr_taken - val take_pc_mem = Bool(false) //mem_exception || mem_reg_eret; val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; - take_pc <== take_pc_ex || take_pc_mem || take_pc_wb; + take_pc <== take_pc_ex || take_pc_wb; // replay mem stage PC on a DTLB miss val replay_mem = io.dtlb_miss || io.dmem.resp_nack || mem_reg_replay; @@ -573,9 +569,9 @@ class rocketCtrl extends Component // for privileged instructions, and for fence.i instructions val replay_ex = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) - val kill_ex = take_pc_mem || take_pc_wb || replay_ex + val kill_ex = take_pc_wb || replay_ex - mem_reg_replay <== replay_ex && !(take_pc_mem || take_pc_wb); + mem_reg_replay <== replay_ex && !take_pc_wb; mem_reg_kill <== kill_ex; wb_reg_replay <== replay_mem && !take_pc_wb; @@ -593,8 +589,10 @@ class rocketCtrl extends Component Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB PC_4))))))); // PC+4 - io.dpath.wen_btb := !ex_btb_match && br_jr_taken && !kill_ex; + io.dpath.wen_btb := !ex_btb_match && br_jr_taken; io.dpath.clr_btb := ex_reg_btb_hit && !br_jr_taken || id_reg_icmiss; + + io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_icmiss) // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = From 1d76255dc198990ea3f8c6bd0dcf2a7002f274f4 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 18 Jan 2012 10:28:48 -0800 Subject: [PATCH 0099/1087] new chisel version jar and find and replace INPUT and OUTPUT --- rocket/src/main/scala/arbiter.scala | 18 +-- rocket/src/main/scala/cpu.scala | 14 +- rocket/src/main/scala/ctrl.scala | 134 ++++++++++---------- rocket/src/main/scala/ctrl_util.scala | 28 ++-- rocket/src/main/scala/dcache.scala | 74 +++++------ rocket/src/main/scala/divider.scala | 42 +++--- rocket/src/main/scala/dpath.scala | 26 ++-- rocket/src/main/scala/dpath_alu.scala | 14 +- rocket/src/main/scala/dpath_util.scala | 56 ++++---- rocket/src/main/scala/dtlb.scala | 26 ++-- rocket/src/main/scala/icache.scala | 24 ++-- rocket/src/main/scala/icache_prefetch.scala | 14 +- rocket/src/main/scala/itlb.scala | 50 ++++---- rocket/src/main/scala/multiplier.scala | 24 ++-- rocket/src/main/scala/nbdcache.scala | 116 +++++++++-------- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/queues.scala | 50 ++++---- rocket/src/main/scala/util.scala | 18 +-- 18 files changed, 372 insertions(+), 358 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 1cf2aa9d..d82b2050 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -6,16 +6,16 @@ import Constants._; class ioMem() extends Bundle { - val req_val = Bool('output); - val req_rdy = Bool('input); - val req_rw = Bool('output); - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'output); - val req_wdata = Bits(MEM_DATA_BITS, 'output); - val req_tag = Bits(MEM_TAG_BITS, 'output); + val req_val = Bool(OUTPUT); + val req_rdy = Bool(INPUT); + val req_rw = Bool(OUTPUT); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT); + val req_wdata = Bits(MEM_DATA_BITS, OUTPUT); + val req_tag = Bits(MEM_TAG_BITS, OUTPUT); - val resp_val = Bool('input); - val resp_tag = Bits(MEM_TAG_BITS, 'input); - val resp_data = Bits(MEM_DATA_BITS, 'input); + val resp_val = Bool(INPUT); + val resp_tag = Bits(MEM_TAG_BITS, INPUT); + val resp_data = Bits(MEM_DATA_BITS, INPUT); } class ioMemArbiter extends Bundle() { diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index fc7e0494..b93392ff 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -6,21 +6,21 @@ import Constants._; class ioDebug(view: List[String] = null) extends Bundle(view) { - val error_mode = Bool('output); + val error_mode = Bool(OUTPUT); } class ioHost(view: List[String] = null) extends Bundle(view) { - val from_wen = Bool('input); - val from = Bits(64, 'input); - val to = Bits(64, 'output); + val from_wen = Bool(INPUT); + val from = Bits(64, INPUT); + val to = Bits(64, OUTPUT); } class ioConsole(view: List[String] = null) extends Bundle(view) { - val rdy = Bool('input); - val valid = Bool('output); - val bits = Bits(8, 'output); + val rdy = Bool(INPUT); + val valid = Bool(OUTPUT); + val bits = Bits(8, OUTPUT); } class ioRocket extends Bundle() diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c4dd5581..ad880c65 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -9,67 +9,67 @@ import Instructions._ class ioCtrlDpath extends Bundle() { // outputs to datapath - val sel_pc = UFix(4, 'output); - val wen_btb = Bool('output); - val clr_btb = Bool('output); - val stallf = Bool('output); - val stalld = Bool('output); - val killf = Bool('output); - val killd = Bool('output); - val killx = Bool('output); - val killm = Bool('output); - val ren2 = Bool('output); - val ren1 = Bool('output); - val sel_alu2 = UFix(2, 'output); - val sel_alu1 = Bool('output); - val fn_dw = Bool('output); - val fn_alu = UFix(4, 'output); - val mul_val = Bool('output); - val mul_fn = UFix(2, 'output); - val mul_wb = Bool('output); - val div_val = Bool('output); - val div_fn = UFix(2, 'output); - val div_wb = Bool('output); - val sel_wa = Bool('output); - val sel_wb = UFix(3, 'output); - val ren_pcr = Bool('output); - val wen_pcr = Bool('output); - val id_eret = Bool('output); - val wb_eret = Bool('output); - val mem_load = Bool('output); - val wen = Bool('output); + val sel_pc = UFix(4, OUTPUT); + val wen_btb = Bool(OUTPUT); + val clr_btb = Bool(OUTPUT); + val stallf = Bool(OUTPUT); + val stalld = Bool(OUTPUT); + val killf = Bool(OUTPUT); + val killd = Bool(OUTPUT); + val killx = Bool(OUTPUT); + val killm = Bool(OUTPUT); + val ren2 = Bool(OUTPUT); + val ren1 = Bool(OUTPUT); + val sel_alu2 = UFix(2, OUTPUT); + val sel_alu1 = Bool(OUTPUT); + val fn_dw = Bool(OUTPUT); + val fn_alu = UFix(4, OUTPUT); + val mul_val = Bool(OUTPUT); + val mul_fn = UFix(2, OUTPUT); + val mul_wb = Bool(OUTPUT); + val div_val = Bool(OUTPUT); + val div_fn = UFix(2, OUTPUT); + val div_wb = Bool(OUTPUT); + val sel_wa = Bool(OUTPUT); + val sel_wb = UFix(3, OUTPUT); + val ren_pcr = Bool(OUTPUT); + val wen_pcr = Bool(OUTPUT); + val id_eret = Bool(OUTPUT); + val wb_eret = Bool(OUTPUT); + val mem_load = Bool(OUTPUT); + val wen = Bool(OUTPUT); // instruction in execute is an unconditional jump - val ex_jmp = Bool('output); - val ex_jr = Bool('output); + val ex_jmp = Bool(OUTPUT); + val ex_jr = Bool(OUTPUT); // enable/disable interrupts - val irq_enable = Bool('output); - val irq_disable = Bool('output); + val irq_enable = Bool(OUTPUT); + val irq_disable = Bool(OUTPUT); // exception handling - val exception = Bool('output); - val cause = UFix(5,'output); - val badvaddr_wen = Bool('output); // high for a load/store access fault + val exception = Bool(OUTPUT); + val cause = UFix(5,OUTPUT); + val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault // inputs from datapath - val xcpt_ma_inst = Bool('input); // high on a misaligned/illegal virtual PC - val btb_hit = Bool('input); - val btb_match = Bool('input); - val inst = Bits(32, 'input); - val br_eq = Bool('input); - val br_lt = Bool('input); - val br_ltu = Bool('input); - val div_rdy = Bool('input); - val div_result_val = Bool('input); - val mul_rdy = Bool('input); - val mul_result_val = Bool('input); - val mem_lu_bypass = Bool('input); - val ex_waddr = UFix(5,'input); // write addr from execute stage - val mem_waddr = UFix(5,'input); // write addr from memory stage - val wb_waddr = UFix(5,'input); // write addr from writeback stage - val status = Bits(17, 'input); - val sboard_clr = Bool('input); - val sboard_clra = UFix(5, 'input); - val mem_valid = Bool('input); // high if there's a valid (not flushed) instruction in mem stage - val irq_timer = Bool('input); - val irq_ipi = Bool('input); + val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC + val btb_hit = Bool(INPUT); + val btb_match = Bool(INPUT); + val inst = Bits(32, INPUT); + val br_eq = Bool(INPUT); + val br_lt = Bool(INPUT); + val br_ltu = Bool(INPUT); + val div_rdy = Bool(INPUT); + val div_result_val = Bool(INPUT); + val mul_rdy = Bool(INPUT); + val mul_result_val = Bool(INPUT); + val mem_lu_bypass = Bool(INPUT); + val ex_waddr = UFix(5,INPUT); // write addr from execute stage + val mem_waddr = UFix(5,INPUT); // write addr from memory stage + val wb_waddr = UFix(5,INPUT); // write addr from writeback stage + val status = Bits(17, INPUT); + val sboard_clr = Bool(INPUT); + val sboard_clra = UFix(5, INPUT); + val mem_valid = Bool(INPUT); // high if there's a valid (not flushed) instruction in mem stage + val irq_timer = Bool(INPUT); + val irq_ipi = Bool(INPUT); } class ioCtrlAll extends Bundle() @@ -78,16 +78,16 @@ class ioCtrlAll extends Bundle() val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_replay", "resp_nack")).flip(); - val dtlb_val = Bool('output); - val dtlb_kill = Bool('output); - val dtlb_rdy = Bool('input); - val dtlb_miss = Bool('input); - val flush_inst = Bool('output); - val xcpt_dtlb_ld = Bool('input); - val xcpt_dtlb_st = Bool('input); - val xcpt_itlb = Bool('input); - val xcpt_ma_ld = Bool('input); - val xcpt_ma_st = Bool('input); + val dtlb_val = Bool(OUTPUT); + val dtlb_kill = Bool(OUTPUT); + val dtlb_rdy = Bool(INPUT); + val dtlb_miss = Bool(INPUT); + val flush_inst = Bool(OUTPUT); + val xcpt_dtlb_ld = Bool(INPUT); + val xcpt_dtlb_st = Bool(INPUT); + val xcpt_itlb = Bool(INPUT); + val xcpt_ma_ld = Bool(INPUT); + val xcpt_ma_st = Bool(INPUT); } class rocketCtrl extends Component diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index bed2984a..61cc3bda 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -6,16 +6,16 @@ import Constants._; class ioCtrlSboard extends Bundle() { - val clr = Bool('input); - val clra = UFix(5, 'input); - val set = Bool('input); - val seta = UFix(5, 'input); - val raddra = UFix(5, 'input); - val raddrb = UFix(5, 'input); - val raddrc = UFix(5, 'input); - val stalla = Bool('output); - val stallb = Bool('output); - val stallc = Bool('output); + val clr = Bool(INPUT); + val clra = UFix(5, INPUT); + val set = Bool(INPUT); + val seta = UFix(5, INPUT); + val raddra = UFix(5, INPUT); + val raddrb = UFix(5, INPUT); + val raddrc = UFix(5, INPUT); + val stalla = Bool(OUTPUT); + val stallb = Bool(OUTPUT); + val stallc = Bool(OUTPUT); } class rocketCtrlSboard extends Component @@ -34,10 +34,10 @@ class rocketCtrlSboard extends Component class ioCtrlCnt extends Bundle() { - val enq = Bool('input); - val deq = Bool('input); - val empty = Bool('output); - val full = Bool('output); + val enq = Bool(INPUT); + val deq = Bool(INPUT); + val empty = Bool(OUTPUT); + val full = Bool(OUTPUT); } class rocketCtrlCnt(n_bits: Int, limit: Int) extends Component diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index fd7bfdea..e33f4865 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -7,37 +7,37 @@ import scala.math._; // interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { - val req_kill = Bool('input); - val req_val = Bool('input); - val req_rdy = Bool('output); - val req_cmd = Bits(4, 'input); - val req_type = Bits(3, 'input); - val req_idx = Bits(PGIDX_BITS, 'input); - val req_ppn = Bits(PPN_BITS, 'input); - val req_data = Bits(64, 'input); - val req_tag = Bits(DCACHE_TAG_BITS, 'input); - val xcpt_ma_ld = Bool('output); // misaligned load - val xcpt_ma_st = Bool('output); // misaligned store - val resp_miss = Bool('output); - val resp_nack = Bool('output); - val resp_val = Bool('output); - val resp_replay = Bool('output); - val resp_data = Bits(64, 'output); - val resp_data_subword = Bits(64, 'output); - val resp_tag = Bits(DCACHE_TAG_BITS, 'output); + val req_kill = Bool(INPUT); + val req_val = Bool(INPUT); + val req_rdy = Bool(OUTPUT); + val req_cmd = Bits(4, INPUT); + val req_type = Bits(3, INPUT); + val req_idx = Bits(PGIDX_BITS, INPUT); + val req_ppn = Bits(PPN_BITS, INPUT); + val req_data = Bits(64, INPUT); + val req_tag = Bits(DCACHE_TAG_BITS, INPUT); + val xcpt_ma_ld = Bool(OUTPUT); // misaligned load + val xcpt_ma_st = Bool(OUTPUT); // misaligned store + val resp_miss = Bool(OUTPUT); + val resp_nack = Bool(OUTPUT); + val resp_val = Bool(OUTPUT); + val resp_replay = Bool(OUTPUT); + val resp_data = Bits(64, OUTPUT); + val resp_data_subword = Bits(64, OUTPUT); + val resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT); } // interface between D$ and next level in memory hierarchy class ioDcache(view: List[String] = null) extends Bundle(view) { - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'input); - val req_tag = UFix(DMEM_TAG_BITS, 'input); - val req_val = Bool('input); - val req_rdy = Bool('output); - val req_wdata = Bits(MEM_DATA_BITS, 'input); - val req_rw = Bool('input); - val resp_data = Bits(MEM_DATA_BITS, 'output); - val resp_tag = Bits(DMEM_TAG_BITS, 'output); - val resp_val = Bool('output); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); + val req_tag = UFix(DMEM_TAG_BITS, INPUT); + val req_val = Bool(INPUT); + val req_rdy = Bool(OUTPUT); + val req_wdata = Bits(MEM_DATA_BITS, INPUT); + val req_rw = Bool(INPUT); + val resp_data = Bits(MEM_DATA_BITS, OUTPUT); + val resp_tag = Bits(DMEM_TAG_BITS, OUTPUT); + val resp_val = Bool(OUTPUT); } class ioDCacheDM extends Bundle() { @@ -47,11 +47,11 @@ class ioDCacheDM extends Bundle() { class rocketDCacheStoreGen extends Component { val io = new Bundle { - val req_type = Bits(3, 'input); - val req_addr_lsb = Bits(3, 'input); - val req_data = Bits(64, 'input); - val store_wmask = Bits(64, 'output); - val store_data = Bits(64, 'output); + val req_type = Bits(3, INPUT); + val req_addr_lsb = Bits(3, INPUT); + val req_data = Bits(64, INPUT); + val store_wmask = Bits(64, OUTPUT); + val store_data = Bits(64, OUTPUT); } // generate write mask and store data signals based on store type and address LSBs @@ -481,11 +481,11 @@ class rocketDCacheDM(lines: Int) extends Component { class rocketDCacheAmoALU extends Component { val io = new Bundle { - val cmd = Bits(4, 'input); - val wmask = Bits(8, 'input); - val lhs = UFix(64, 'input); - val rhs = UFix(64, 'input); - val result = UFix(64, 'output); + val cmd = Bits(4, INPUT); + val wmask = Bits(8, INPUT); + val lhs = UFix(64, INPUT); + val rhs = UFix(64, INPUT); + val result = UFix(64, OUTPUT); } // val signed_cmp = (op === M_XA_MIN) || (op === M_XA_MAX); diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 7e2b3955..1d1c8d57 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -6,33 +6,33 @@ import Constants._; class ioDivider(width: Int) extends Bundle { // requests - val div_val = Bool('input); - val div_kill = Bool('input); - val div_rdy = Bool('output); - val dw = UFix(1, 'input); - val div_fn = UFix(2, 'input); - val div_waddr = UFix(5, 'input); - val dpath_rs1 = Bits(width, 'input); - val dpath_rs2 = Bits(width, 'input); + val div_val = Bool(INPUT); + val div_kill = Bool(INPUT); + val div_rdy = Bool(OUTPUT); + val dw = UFix(1, INPUT); + val div_fn = UFix(2, INPUT); + val div_waddr = UFix(5, INPUT); + val dpath_rs1 = Bits(width, INPUT); + val dpath_rs2 = Bits(width, INPUT); // responses - val div_result_bits = Bits(width, 'output); - val div_result_tag = UFix(5, 'output); - val div_result_val = Bool('output); - val div_result_rdy = Bool('input); + val div_result_bits = Bits(width, OUTPUT); + val div_result_tag = UFix(5, OUTPUT); + val div_result_val = Bool(OUTPUT); + val div_result_rdy = Bool(INPUT); } // class ioDivider extends Bundle { // // requests -// val req_val = Bool('input); -// val req_rdy = Bool('output); -// val req_fn = UFix(3, 'input); -// val req_tag = UFix(5, 'input); -// val req_rs1 = Bits(64, 'input); -// val req_rs2 = Bits(64, 'input); +// val req_val = Bool(INPUT); +// val req_rdy = Bool(OUTPUT); +// val req_fn = UFix(3, INPUT); +// val req_tag = UFix(5, INPUT); +// val req_rs1 = Bits(64, INPUT); +// val req_rs2 = Bits(64, INPUT); // // responses -// val resp_val = Bool('output); -// val resp_data = Bits(64, 'output); -// val resp_tag = UFix(5, 'output); +// val resp_val = Bool(OUTPUT); +// val resp_data = Bits(64, OUTPUT); +// val resp_tag = UFix(5, OUTPUT); // } class rocketDivider(width : Int) extends Component { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4ef112f6..b0bab419 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -7,21 +7,21 @@ import Instructions._ class ioDpathDmem extends Bundle() { - val req_addr = UFix(VADDR_BITS, 'output); - val req_tag = UFix(CPU_TAG_BITS, 'output); - val req_data = Bits(64, 'output); - val resp_val = Bool('input); - val resp_miss = Bool('input); - val resp_replay = Bool('input); - val resp_tag = Bits(CPU_TAG_BITS, 'input); - val resp_data = Bits(64, 'input); - val resp_data_subword = Bits(64, 'input); + val req_addr = UFix(VADDR_BITS, OUTPUT); + val req_tag = UFix(CPU_TAG_BITS, OUTPUT); + val req_data = Bits(64, OUTPUT); + val resp_val = Bool(INPUT); + val resp_miss = Bool(INPUT); + val resp_replay = Bool(INPUT); + val resp_tag = Bits(CPU_TAG_BITS, INPUT); + val resp_data = Bits(64, INPUT); + val resp_data_subword = Bits(64, INPUT); } class ioDpathImem extends Bundle() { - val req_addr = UFix(VADDR_BITS, 'output); - val resp_data = Bits(32, 'input); + val req_addr = UFix(VADDR_BITS, OUTPUT); + val resp_data = Bits(32, INPUT); } class ioDpathAll extends Bundle() @@ -32,8 +32,8 @@ class ioDpathAll extends Bundle() val debug = new ioDebug(); val dmem = new ioDpathDmem(); val imem = new ioDpathImem(); - val ptbr_wen = Bool('output); - val ptbr = UFix(PADDR_BITS, 'output); + val ptbr_wen = Bool(OUTPUT); + val ptbr = UFix(PADDR_BITS, OUTPUT); } class rocketDpath extends Component diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 114acd57..cee39ba2 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -8,13 +8,13 @@ import Constants._ import Instructions._ class ioALU extends Bundle(){ - val dw = UFix(1, 'input); - val fn = UFix(4, 'input); - val shamt = UFix(6, 'input); - val in2 = UFix(64, 'input); - val in1 = UFix(64, 'input); - val out = UFix(64, 'output); - val adder_out = UFix(64, 'output); + val dw = UFix(1, INPUT); + val fn = UFix(4, INPUT); + val shamt = UFix(6, INPUT); + val in2 = UFix(64, INPUT); + val in1 = UFix(64, INPUT); + val out = UFix(64, OUTPUT); + val adder_out = UFix(64, OUTPUT); } class rocketDpathALU extends Component diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index c011b614..81198efb 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -8,13 +8,13 @@ import scala.math._; class ioDpathBTB extends Bundle() { - val current_pc4 = UFix(VADDR_BITS, 'input); - val hit = Bool('output); - val target = UFix(VADDR_BITS, 'output); - val wen = Bool('input); - val clr = Bool('input); - val correct_pc4 = UFix(VADDR_BITS, 'input); - val correct_target = UFix(VADDR_BITS, 'input); + val current_pc4 = UFix(VADDR_BITS, INPUT); + val hit = Bool(OUTPUT); + val target = UFix(VADDR_BITS, OUTPUT); + val wen = Bool(INPUT); + val clr = Bool(INPUT); + val correct_pc4 = UFix(VADDR_BITS, INPUT); + val correct_target = UFix(VADDR_BITS, INPUT); } // basic direct-mapped branch target buffer @@ -45,21 +45,21 @@ class ioDpathPCR extends Bundle() val r = new ioReadPort(); val w = new ioWritePort(); - val status = Bits(17, 'output); - val ptbr = UFix(PADDR_BITS, 'output); - val evec = UFix(VADDR_BITS, 'output); - val exception = Bool('input); - val cause = UFix(5, 'input); - val badvaddr_wen = Bool('input); - val pc = UFix(VADDR_BITS, 'input); - val eret = Bool('input); - val ei = Bool('input); - val di = Bool('input); - val ptbr_wen = Bool('output); - val irq_timer = Bool('output); - val irq_ipi = Bool('output); - val console_data = Bits(8, 'output); - val console_val = Bool('output); + val status = Bits(17, OUTPUT); + val ptbr = UFix(PADDR_BITS, OUTPUT); + val evec = UFix(VADDR_BITS, OUTPUT); + val exception = Bool(INPUT); + val cause = UFix(5, INPUT); + val badvaddr_wen = Bool(INPUT); + val pc = UFix(VADDR_BITS, INPUT); + val eret = Bool(INPUT); + val ei = Bool(INPUT); + val di = Bool(INPUT); + val ptbr_wen = Bool(OUTPUT); + val irq_timer = Bool(OUTPUT); + val irq_ipi = Bool(OUTPUT); + val console_data = Bits(8, OUTPUT); + val console_val = Bool(OUTPUT); } class rocketDpathPCR extends Component @@ -203,16 +203,16 @@ class rocketDpathPCR extends Component class ioReadPort extends Bundle() { - val addr = UFix(5, 'input); - val en = Bool('input); - val data = Bits(64, 'output); + val addr = UFix(5, INPUT); + val en = Bool(INPUT); + val data = Bits(64, OUTPUT); } class ioWritePort extends Bundle() { - val addr = UFix(5, 'input); - val en = Bool('input); - val data = Bits(64, 'input); + val addr = UFix(5, INPUT); + val en = Bool(INPUT); + val data = Bits(64, INPUT); } class ioRegfile extends Bundle() diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 372cc4fd..214f05df 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -10,22 +10,22 @@ import scala.math._; class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) { // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(17, 'input); + val status = Bits(17, INPUT); // invalidate all TLB entries - val invalidate = Bool('input); + val invalidate = Bool(INPUT); // lookup requests - val req_val = Bool('input); - val req_kill = Bool('input); - val req_cmd = Bits(4, 'input); // load/store/amo - val req_rdy = Bool('output); - val req_asid = Bits(ASID_BITS, 'input); - val req_vpn = UFix(VPN_BITS, 'input); + val req_val = Bool(INPUT); + val req_kill = Bool(INPUT); + val req_cmd = Bits(4, INPUT); // load/store/amo + val req_rdy = Bool(OUTPUT); + val req_asid = Bits(ASID_BITS, INPUT); + val req_vpn = UFix(VPN_BITS, INPUT); // lookup responses - val resp_miss = Bool('output); -// val resp_val = Bool('output); - val resp_ppn = UFix(PPN_BITS, 'output); - val xcpt_ld = Bool('output); - val xcpt_st = Bool('output); + val resp_miss = Bool(OUTPUT); +// val resp_val = Bool(OUTPUT); + val resp_ppn = UFix(PPN_BITS, OUTPUT); + val xcpt_ld = Bool(OUTPUT); + val xcpt_st = Bool(OUTPUT); } class ioDTLB extends Bundle diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 477e0490..a8fd2e3a 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -8,23 +8,23 @@ import scala.math._; // interface between I$ and pipeline/ITLB (32 bits wide) class ioImem(view: List[String] = null) extends Bundle (view) { - val invalidate = Bool('input); - val itlb_miss = Bool('input); - val req_val = Bool('input); - val req_idx = Bits(PGIDX_BITS, 'input); - val req_ppn = Bits(PPN_BITS, 'input); - val resp_data = Bits(32, 'output); - val resp_val = Bool('output); + val invalidate = Bool(INPUT); + val itlb_miss = Bool(INPUT); + val req_val = Bool(INPUT); + val req_idx = Bits(PGIDX_BITS, INPUT); + val req_ppn = Bits(PPN_BITS, INPUT); + val resp_data = Bits(32, OUTPUT); + val resp_val = Bool(OUTPUT); } // interface between I$ and memory (128 bits wide) class ioIcache(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'input); - val req_val = Bool('input); - val req_rdy = Bool('output); - val resp_data = Bits(MEM_DATA_BITS, 'output); - val resp_val = Bool('output); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); + val req_val = Bool(INPUT); + val req_rdy = Bool(OUTPUT); + val resp_data = Bits(MEM_DATA_BITS, OUTPUT); + val resp_val = Bool(OUTPUT); } class ioICacheDM extends Bundle() diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 668c8b83..cdcf9a00 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -7,13 +7,13 @@ import scala.math._; class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) { - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, 'output); - val req_val = Bool('output); - val req_rdy = Bool('input); - val req_tag = Bits(IMEM_TAG_BITS, 'output); - val resp_data = Bits(MEM_DATA_BITS, 'input); - val resp_val = Bool('input); - val resp_tag = Bits(IMEM_TAG_BITS, 'input); + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT); + val req_val = Bool(OUTPUT); + val req_rdy = Bool(INPUT); + val req_tag = Bits(IMEM_TAG_BITS, OUTPUT); + val resp_data = Bits(MEM_DATA_BITS, INPUT); + val resp_val = Bool(INPUT); + val resp_tag = Bits(IMEM_TAG_BITS, INPUT); } class ioIPrefetcher extends Bundle() { diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index f63fd1c7..73f63b00 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -7,15 +7,15 @@ import Constants._; import scala.math._; class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { - val clear = Bool('input); - val tag = Bits(tag_bits, 'input); - val hit = Bool('output); - val hit_addr = UFix(addr_bits, 'output); - val valid_bits = Bits(entries, 'output); + val clear = Bool(INPUT); + val tag = Bits(tag_bits, INPUT); + val hit = Bool(OUTPUT); + val hit_addr = UFix(addr_bits, OUTPUT); + val valid_bits = Bits(entries, OUTPUT); - val write = Bool('input); - val write_tag = Bits(tag_bits, 'input); - val write_addr = UFix(addr_bits, 'input); + val write = Bool(INPUT); + val write_tag = Bits(tag_bits, INPUT); + val write_addr = UFix(addr_bits, INPUT); } class rocketCAM(entries: Int, tag_bits: Int) extends Component { @@ -53,33 +53,33 @@ class rocketCAM(entries: Int, tag_bits: Int) extends Component { class ioTLB_PTW extends Bundle { // requests - val req_val = Bool('output); - val req_rdy = Bool('input); - val req_vpn = Bits(VPN_BITS, 'output); + val req_val = Bool(OUTPUT); + val req_rdy = Bool(INPUT); + val req_vpn = Bits(VPN_BITS, OUTPUT); // responses - val resp_val = Bool('input); - val resp_err = Bool('input); - val resp_ppn = Bits(PPN_BITS, 'input); - val resp_perm = Bits(PERM_BITS, 'input); + val resp_val = Bool(INPUT); + val resp_err = Bool(INPUT); + val resp_ppn = Bits(PPN_BITS, INPUT); + val resp_perm = Bits(PERM_BITS, INPUT); } // interface between ITLB and fetch stage of pipeline class ioITLB_CPU(view: List[String] = null) extends Bundle(view) { // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(17, 'input); + val status = Bits(17, INPUT); // invalidate all TLB entries - val invalidate = Bool('input); + val invalidate = Bool(INPUT); // lookup requests - val req_val = Bool('input); - val req_rdy = Bool('output); - val req_asid = Bits(ASID_BITS, 'input); - val req_vpn = UFix(VPN_BITS, 'input); + val req_val = Bool(INPUT); + val req_rdy = Bool(OUTPUT); + val req_asid = Bits(ASID_BITS, INPUT); + val req_vpn = UFix(VPN_BITS, INPUT); // lookup responses - val resp_miss = Bool('output); -// val resp_val = Bool('output); - val resp_ppn = UFix(PPN_BITS, 'output); - val exception = Bool('output); + val resp_miss = Bool(OUTPUT); +// val resp_val = Bool(OUTPUT); + val resp_ppn = UFix(PPN_BITS, OUTPUT); + val exception = Bool(OUTPUT); } class ioITLB extends Bundle diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 44a6eb7e..a3ce7bd8 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -6,20 +6,20 @@ import Constants._; class ioMultiplier(width: Int) extends Bundle { // requests - val mul_val = Bool('input); - val mul_kill= Bool('input); - val mul_rdy = Bool('output); - val dw = UFix(1, 'input); - val mul_fn = UFix(2, 'input); - val mul_tag = UFix(CPU_TAG_BITS, 'input); - val in0 = Bits(width, 'input); - val in1 = Bits(width, 'input); + val mul_val = Bool(INPUT); + val mul_kill= Bool(INPUT); + val mul_rdy = Bool(OUTPUT); + val dw = UFix(1, INPUT); + val mul_fn = UFix(2, INPUT); + val mul_tag = UFix(CPU_TAG_BITS, INPUT); + val in0 = Bits(width, INPUT); + val in1 = Bits(width, INPUT); // responses - val result = Bits(width, 'output); - val result_tag = UFix(CPU_TAG_BITS, 'output); - val result_val = Bool('output); - val result_rdy = Bool('input); + val result = Bits(width, OUTPUT); + val result_tag = UFix(CPU_TAG_BITS, OUTPUT); + val result_val = Bool(OUTPUT); + val result_rdy = Bool(INPUT); } class rocketMultiplier extends Component { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4497ad83..70d5fade 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -5,11 +5,25 @@ import Node._; import Constants._; import scala.math._; +class ReplacementWayGen (width: Int, ways: Int) extends Component { + val io = new Bundle { + val ways_en = Bits(width = width, dir = INPUT) + val way_id = UFix(width = log2up(ways), dir = OUTPUT) + } +} + +class RandomReplacementWayGen (width: Int, ways: Int) extends ReplacementWayGen(width, ways) { + val lfsr = Reg(resetVal = UFix(1, width)) + when (io.ways_en.orR) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)) } + //TODO: Actually limit selection based on which ways are available (io.ways_en) + io.way_id := lfsr(log2up(ways)-1,0).toUFix +} + class StoreMaskGen extends Component { val io = new Bundle { - val typ = Bits(3, 'input) - val addr = Bits(3, 'input) - val wmask = Bits(8, 'output) + val typ = Bits(3, INPUT) + val addr = Bits(3, INPUT) + val wmask = Bits(8, OUTPUT) } val word = (io.typ === MT_W) || (io.typ === MT_WU) @@ -24,9 +38,9 @@ class StoreMaskGen extends Component { class StoreDataGen extends Component { val io = new Bundle { - val typ = Bits(3, 'input) - val din = Bits(64, 'input) - val dout = Bits(64, 'output) + val typ = Bits(3, INPUT) + val din = Bits(64, INPUT) + val dout = Bits(64, OUTPUT) } val word = (io.typ === MT_W) || (io.typ === MT_WU) @@ -42,12 +56,12 @@ class StoreDataGen extends Component { // this currently requires that CPU_DATA_BITS == 64 class LoadDataGen extends Component { val io = new Bundle { - val typ = Bits(3, 'input) - val addr = Bits(log2up(MEM_DATA_BITS/8), 'input) - val din = Bits(MEM_DATA_BITS, 'input) - val dout = Bits(64, 'output) - val r_dout = Bits(64, 'output) - val r_dout_subword = Bits(64, 'output) + val typ = Bits(3, INPUT) + val addr = Bits(log2up(MEM_DATA_BITS/8), INPUT) + val din = Bits(MEM_DATA_BITS, INPUT) + val dout = Bits(64, OUTPUT) + val r_dout = Bits(64, OUTPUT) + val r_dout_subword = Bits(64, OUTPUT) } val sext = (io.typ === MT_B) || (io.typ === MT_H) || @@ -135,23 +149,23 @@ class MetaArrayReq extends Bundle { class MSHR(id: Int) extends Component { val io = new Bundle { - val req_pri_val = Bool('input) - val req_pri_rdy = Bool('output) - val req_sec_val = Bool('input) - val req_sec_rdy = Bool('output) - val req_ppn = Bits(PPN_BITS, 'input) - val req_idx = Bits(IDX_BITS, 'input) - val req_offset = Bits(OFFSET_BITS, 'input) - val req_cmd = Bits(4, 'input) - val req_type = Bits(3, 'input) - val req_sdq_id = UFix(log2up(NSDQ), 'input) - val req_tag = Bits(DCACHE_TAG_BITS, 'input) + val req_pri_val = Bool(INPUT) + val req_pri_rdy = Bool(OUTPUT) + val req_sec_val = Bool(INPUT) + val req_sec_rdy = Bool(OUTPUT) + val req_ppn = Bits(PPN_BITS, INPUT) + val req_idx = Bits(IDX_BITS, INPUT) + val req_offset = Bits(OFFSET_BITS, INPUT) + val req_cmd = Bits(4, INPUT) + val req_type = Bits(3, INPUT) + val req_sdq_id = UFix(log2up(NSDQ), INPUT) + val req_tag = Bits(DCACHE_TAG_BITS, INPUT) - val idx_match = Bool('output) - val idx = Bits(IDX_BITS, 'output) - val tag = Bits(PPN_BITS, 'output) + val idx_match = Bool(OUTPUT) + val idx = Bits(IDX_BITS, OUTPUT) + val tag = Bits(PPN_BITS, OUTPUT) - val mem_resp_val = Bool('input) + val mem_resp_val = Bool(INPUT) val mem_req = (new ioDecoupled) { new MemReq() }.flip val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip val replay = (new ioDecoupled) { new Replay() }.flip @@ -228,21 +242,21 @@ class MSHR(id: Int) extends Component { class MSHRFile extends Component { val io = new Bundle { - val req_val = Bool('input) - val req_rdy = Bool('output) - val req_ppn = Bits(PPN_BITS, 'input) - val req_idx = Bits(IDX_BITS, 'input) - val req_offset = Bits(OFFSET_BITS, 'input) - val req_cmd = Bits(4, 'input) - val req_type = Bits(3, 'input) - val req_tag = Bits(DCACHE_TAG_BITS, 'input) - val req_sdq_id = UFix(log2up(NSDQ), 'input) + val req_val = Bool(INPUT) + val req_rdy = Bool(OUTPUT) + val req_ppn = Bits(PPN_BITS, INPUT) + val req_idx = Bits(IDX_BITS, INPUT) + val req_offset = Bits(OFFSET_BITS, INPUT) + val req_cmd = Bits(4, INPUT) + val req_type = Bits(3, INPUT) + val req_tag = Bits(DCACHE_TAG_BITS, INPUT) + val req_sdq_id = UFix(log2up(NSDQ), INPUT) - val mem_resp_val = Bool('input) - val mem_resp_tag = Bits(DMEM_TAG_BITS, 'input) - val mem_resp_idx = Bits(IDX_BITS, 'output) + val mem_resp_val = Bool(INPUT) + val mem_resp_tag = Bits(DMEM_TAG_BITS, INPUT) + val mem_resp_idx = Bits(IDX_BITS, OUTPUT) - val fence_rdy = Bool('output) + val fence_rdy = Bool(OUTPUT) val mem_req = (new ioDecoupled) { new MemReq() }.flip() val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() @@ -311,11 +325,11 @@ class MSHRFile extends Component { class ReplayUnit extends Component { val io = new Bundle { val sdq_enq = (new ioDecoupled) { Bits(width = CPU_DATA_BITS) } - val sdq_id = UFix(log2up(NSDQ), 'output) + val sdq_id = UFix(log2up(NSDQ), OUTPUT) val replay = (new ioDecoupled) { new Replay() } val data_req = (new ioDecoupled) { new DataReq() }.flip() - val cpu_resp_val = Bool('output) - val cpu_resp_tag = Bits(DCACHE_TAG_BITS, 'output) + val cpu_resp_val = Bool(OUTPUT) + val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) } val sdq_val = Reg(resetVal = UFix(0, NSDQ)) @@ -370,10 +384,10 @@ class WritebackUnit extends Component { val io = new Bundle { val req = (new ioDecoupled) { new WritebackReq() } val data_req = (new ioDecoupled) { new DataArrayReq() }.flip() - val data_resp = Bits(MEM_DATA_BITS, 'input) + val data_resp = Bits(MEM_DATA_BITS, INPUT) val refill_req = (new ioDecoupled) { new MemReq() } val mem_req = (new ioDecoupled) { new MemReq() }.flip() - val mem_req_data = Bits(MEM_DATA_BITS, 'output) + val mem_req_data = Bits(MEM_DATA_BITS, OUTPUT) } val wbq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } @@ -483,7 +497,7 @@ class MetaDataArray(lines: Int) extends Component { class DataArray(lines: Int) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new DataArrayReq() } - val resp = Bits(width = MEM_DATA_BITS, dir = 'output) + val resp = Bits(width = MEM_DATA_BITS, dir = OUTPUT) } val wmask = FillInterleaved(8, io.req.bits.wmask) @@ -499,11 +513,11 @@ class DataArray(lines: Int) extends Component { class AMOALU extends Component { val io = new Bundle { - val cmd = Bits(4, 'input) - val typ = Bits(3, 'input) - val lhs = UFix(64, 'input) - val rhs = UFix(64, 'input) - val out = UFix(64, 'output) + val cmd = Bits(4, INPUT) + val typ = Bits(3, INPUT) + val lhs = UFix(64, INPUT) + val rhs = UFix(64, INPUT) + val out = UFix(64, OUTPUT) } val sgned = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 43bbaaab..ff62acdd 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -53,7 +53,7 @@ class ioPTW extends Bundle val itlb = new ioTLB_PTW().flip(); val dtlb = new ioTLB_PTW().flip(); val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_ppn", "req_idx", "resp_data", "resp_val", "resp_nack")).flip(); - val ptbr = UFix(PADDR_BITS, 'input); + val ptbr = UFix(PADDR_BITS, INPUT); } class rocketPTW extends Component diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index d9604271..04fd9d23 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -7,14 +7,14 @@ import scala.math._; class ioQueueCtrl(addr_sz: Int) extends Bundle() { - val q_reset = Bool('input); - val enq_val = Bool('input); - val enq_rdy = Bool('output); - val deq_val = Bool('output); - val deq_rdy = Bool('input); - val wen = Bool('output); - val waddr = UFix(addr_sz, 'output); - val raddr = UFix(addr_sz, 'output); + val q_reset = Bool(INPUT); + val enq_val = Bool(INPUT); + val enq_rdy = Bool(OUTPUT); + val deq_val = Bool(OUTPUT); + val deq_rdy = Bool(INPUT); + val wen = Bool(OUTPUT); + val waddr = UFix(addr_sz, OUTPUT); + val raddr = UFix(addr_sz, OUTPUT); } class queueCtrl(entries: Int) extends Component @@ -83,7 +83,7 @@ class queueCtrl(entries: Int) extends Component class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle { - val q_reset = Bool('input); + val q_reset = Bool(INPUT); val enq = new ioDecoupled()(data) val deq = new ioDecoupled()(data).flip } @@ -105,14 +105,14 @@ class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component // class IOqueueCtrlFlow extends IOqueueCtrl class ioQueueCtrlFlow(addr_sz: Int) extends Bundle() /* IOqueueCtrl */ { - val enq_val = Bool('input); - val enq_rdy = Bool('output); - val deq_val = Bool('output); - val deq_rdy = Bool('input); - val wen = Bool('output); - val waddr = UFix(addr_sz, 'output); - val raddr = UFix(addr_sz, 'output); - val flowthru = Bool('output); + val enq_val = Bool(INPUT); + val enq_rdy = Bool(OUTPUT); + val deq_val = Bool(OUTPUT); + val deq_rdy = Bool(INPUT); + val wen = Bool(OUTPUT); + val waddr = UFix(addr_sz, OUTPUT); + val raddr = UFix(addr_sz, OUTPUT); + val flowthru = Bool(OUTPUT); } class queueCtrlFlow(entries: Int) extends Component @@ -176,12 +176,12 @@ class queueCtrlFlow(entries: Int) extends Component class ioQueueDpathFlow[T <: Data](addr_sz: Int)(data: => T) extends Bundle() { - val wen = Bool('input); - val flowthru = Bool('input); + val wen = Bool(INPUT); + val flowthru = Bool(INPUT); val deq_bits = data.asOutput; val enq_bits = data.asInput; - val waddr = UFix(addr_sz, 'input); - val raddr = UFix(addr_sz, 'input); + val waddr = UFix(addr_sz, INPUT); + val raddr = UFix(addr_sz, INPUT); } class queueDpathFlow[T <: Data](entries: Int)(data: => T) extends Component @@ -195,11 +195,11 @@ class queueDpathFlow[T <: Data](entries: Int)(data: => T) extends Component class ioQueueFlowPF[T <: Data](data: => T) extends Bundle() { - val enq_val = Bool('input); - val enq_rdy = Bool('output); + val enq_val = Bool(INPUT); + val enq_rdy = Bool(OUTPUT); val enq_bits = data.asInput; - val deq_val = Bool('output); - val deq_rdy = Bool('input); + val deq_val = Bool(OUTPUT); + val deq_rdy = Bool(INPUT); val deq_bits = data.asOutput; } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 29023619..ac182310 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -35,9 +35,9 @@ object Reverse class Mux1H(n: Int, w: Int) extends Component { val io = new Bundle { - val sel = Vec(n) { Bool(dir = 'input) } - val in = Vec(n) { Bits(width = w, dir = 'input) } - val out = Bits(width = w, dir = 'output) + val sel = Vec(n) { Bool(dir = INPUT) } + val in = Vec(n) { Bits(width = w, dir = INPUT) } + val out = Bits(width = w, dir = OUTPUT) } if (n > 1) { @@ -52,8 +52,8 @@ class Mux1H(n: Int, w: Int) extends Component class ioDecoupled[T <: Data]()(data: => T) extends Bundle { - val valid = Bool('input) - val ready = Bool('output) + val valid = Bool(INPUT) + val ready = Bool(OUTPUT) val bits = data.asInput } @@ -84,8 +84,8 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle { - val in = UFix(in_width, 'input); - val out = Bits(out_width, 'output); + val in = UFix(in_width, INPUT); + val out = Bits(out_width, OUTPUT); } class priorityDecoder(width: Int) extends Component @@ -106,8 +106,8 @@ class priorityDecoder(width: Int) extends Component class ioPriorityEncoder(in_width: Int, out_width: Int) extends Bundle { - val in = Bits(in_width, 'input); - val out = UFix(out_width, 'output); + val in = Bits(in_width, INPUT); + val out = UFix(out_width, OUTPUT); } class priorityEncoder(width: Int) extends Component From 07f184df2f4b431e12c4e0b052633449f9d44485 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 18 Jan 2012 15:23:21 -0800 Subject: [PATCH 0100/1087] adhere to new chisel c naming convention --- rocket/src/main/scala/top.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index de63bc2f..d84d333e 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -37,9 +37,9 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance - val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); +// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); // val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); -// val cpu_args = args ++ Array("--target-dir", "generated-src"); + val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match { From 7e257495813ec14a55b403211862e400b8b8c426 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 13 Jan 2012 15:55:56 -0800 Subject: [PATCH 0101/1087] Groundwork for assoc cache implementation --- rocket/src/main/scala/dcache.scala | 5 +++++ rocket/src/main/scala/nbdcache.scala | 31 +++++++++++++++------------- rocket/src/main/scala/top.scala | 2 +- rocket/src/main/scala/util.scala | 10 +++++++-- 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index e33f4865..73fd4f61 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -45,6 +45,11 @@ class ioDCacheDM extends Bundle() { val mem = new ioDcache().flip(); } +class ioDCacheHella extends Bundle() { + val cpu = new ioDmem(); + val mem = new ioDcache().flip(); +} + class rocketDCacheStoreGen extends Component { val io = new Bundle { val req_type = Bits(3, INPUT); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 70d5fade..9d16d98e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -541,7 +541,7 @@ class AMOALU extends Component { } class HellaCache(lines: Int) extends Component { - val io = new ioDCacheDM(); + val io = new ioDCacheHella(); val addrbits = PADDR_BITS; val indexbits = log2up(lines); @@ -602,6 +602,22 @@ class HellaCache(lines: Int) extends Component { } } + // refill counter + val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val rr_count_next = rr_count + UFix(1) + when (io.mem.resp_val) { rr_count <== rr_count_next } + + val misaligned = + (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || + (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || + ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); + + io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned + io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned + +} + +class HellaCacheDM(lines: Int) extends HellaCache(lines) { // tags val meta = new MetaDataArray(lines) val meta_arb = (new Arbiter(3)) { new MetaArrayReq() } @@ -632,11 +648,6 @@ class HellaCache(lines: Int) extends Component { val tag_miss = r_cpu_req_val && !tag_match val dirty = meta.io.resp.valid && meta.io.resp.dirty - // refill counter - val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val rr_count_next = rr_count + UFix(1) - when (io.mem.resp_val) { rr_count <== rr_count_next } - // refill response val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) data_arb.io.in(0).valid := io.mem.resp_val || block_during_refill @@ -791,14 +802,6 @@ class HellaCache(lines: Int) extends Component { io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data_subword := loadgen.io.r_dout_subword - val misaligned = - (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || - (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || - ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - - io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned - io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned - wb.io.mem_req.ready := io.mem.req_rdy io.mem.req_val := wb.io.mem_req.valid io.mem.req_rw := wb.io.mem_req.bits.rw diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index d84d333e..be9bb311 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -17,7 +17,7 @@ class Top() extends Component { val cpu = new rocketProc(); val icache = new rocketICacheDM(128); // # 64 byte cache lines val icache_pf = new rocketIPrefetcher(); - val dcache = new HellaCache(128); + val dcache = new HellaCacheDM(128); val arbiter = new rocketMemArbiter(); arbiter.io.mem ^^ io.mem; diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index ac182310..1cbc7674 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -2,8 +2,14 @@ package Top { import Chisel._ -import Node._; -import scala.math._; +import Node._ +import scala.math._ + +object foldR +{ + def apply[T <: Bits](x: Seq[T], f: (T, T) => T): T = + if (x.length == 1) x(0) else f(x(0), foldR(x.slice(1, x.length), f)) +} object log2up { From 29ed8eb31a698bf4e1e4d271e84a53c980e2e3cf Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 17 Jan 2012 23:49:32 -0800 Subject: [PATCH 0102/1087] More utils for nbdcache --- rocket/src/main/scala/util.scala | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 1cbc7674..2e2dd20b 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -13,7 +13,7 @@ object foldR object log2up { - def apply(in: Int) = ceil(log(in)/log(2)).toInt + def apply(in: Int) = if (in == 1) 1 else ceil(log(in)/log(2)).toInt } object FillInterleaved @@ -38,6 +38,27 @@ object Reverse } } +object OHToUFix +{ + def apply(in: Bits): UFix = + { + var out = UFix(0) + for(i <- 0 until in.getWidth) + if(in(i) == Bits(1)) + out = UFix(i) + out + } +} + +object UFixToOH +{ + def apply(in: UFix, width: Int): Bits = + { + val out = Bits(1, width) + out << in + } +} + class Mux1H(n: Int, w: Int) extends Component { val io = new Bundle { From 8623d587245fe02b00af923c871a006e12b94a95 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 18 Jan 2012 15:07:36 -0800 Subject: [PATCH 0103/1087] split into two caches, compiles --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/nbdcache.scala | 488 ++++++++++++++++++++++++--- 2 files changed, 437 insertions(+), 52 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index bada957b..2caa7d43 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -188,6 +188,7 @@ object Constants val NSDQ = 17; // number of secondary stores/AMOs val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) val IDX_BITS = PGIDX_BITS - OFFSET_BITS; + val NWAYS = 1; // external memory interface val IMEM_TAG_BITS = 1; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9d16d98e..9959089f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -5,18 +5,19 @@ import Node._; import Constants._; import scala.math._; -class ReplacementWayGen (width: Int, ways: Int) extends Component { +class ReplacementWayGen extends Component { val io = new Bundle { - val ways_en = Bits(width = width, dir = INPUT) - val way_id = UFix(width = log2up(ways), dir = OUTPUT) + val way_en = Bits(width = width, dir = INPUT) + val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) } } -class RandomReplacementWayGen (width: Int, ways: Int) extends ReplacementWayGen(width, ways) { - val lfsr = Reg(resetVal = UFix(1, width)) - when (io.ways_en.orR) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)) } +class RandomReplacementWayGen extends ReplacementWayGen() { + val width = max(6,log2up(NWAYS)) + val lfsr = Reg(resetVal = UFix(1, width)) + when (io.way_en.orR) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)) } //TODO: Actually limit selection based on which ways are available (io.ways_en) - io.way_id := lfsr(log2up(ways)-1,0).toUFix + io.way_id := lfsr(log2up(NWAYS)-1,0).toUFix } class StoreMaskGen extends Component { @@ -106,6 +107,7 @@ class Replay extends Bundle { val typ = Bits(width = 3) val sdq_id = UFix(width = log2up(NSDQ)) val tag = Bits(width = DCACHE_TAG_BITS) + val way_id = UFix(width = log2up(NWAYS)) } class DataReq extends Bundle { @@ -124,6 +126,11 @@ class DataArrayReq extends Bundle { val data = Bits(width = MEM_DATA_BITS) } +class DataArrayArrayReq extends Bundle { + val inner_req = new DataArrayReq() + val way_en = Bits(width = NWAYS) +} + class MemReq extends Bundle { val rw = Bool() val addr = UFix(width = PPN_BITS+IDX_BITS) @@ -133,6 +140,7 @@ class MemReq extends Bundle { class WritebackReq extends Bundle { val ppn = Bits(width = PPN_BITS) val idx = Bits(width = IDX_BITS) + val way_id = UFix(width = log2up(NWAYS)) } class MetaData extends Bundle { @@ -147,6 +155,11 @@ class MetaArrayReq extends Bundle { val data = new MetaData() } +class MetaArrayArrayReq extends Bundle { + val inner_req = new MetaArrayReq() + val way_en = Bits(width = NWAYS) +} + class MSHR(id: Int) extends Component { val io = new Bundle { val req_pri_val = Bool(INPUT) @@ -160,14 +173,16 @@ class MSHR(id: Int) extends Component { val req_type = Bits(3, INPUT) val req_sdq_id = UFix(log2up(NSDQ), INPUT) val req_tag = Bits(DCACHE_TAG_BITS, INPUT) + val req_way_id = UFix(log2up(NWAYS),INPUT) val idx_match = Bool(OUTPUT) val idx = Bits(IDX_BITS, OUTPUT) val tag = Bits(PPN_BITS, OUTPUT) + val way_id = Bits(log2up(NWAYS), OUTPUT) val mem_resp_val = Bool(INPUT) val mem_req = (new ioDecoupled) { new MemReq() }.flip - val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip val replay = (new ioDecoupled) { new Replay() }.flip } @@ -177,6 +192,7 @@ class MSHR(id: Int) extends Component { val refilled = Reg { Bool() } val ppn = Reg { Bits() } val idx_ = Reg { Bits() } + val way_id_ = Reg { Bits() } val req_load = (io.req_cmd === M_XRD) || (io.req_cmd === M_PFR) val req_use_rpq = (io.req_cmd != M_PFR) && (io.req_cmd != M_PFW) @@ -200,6 +216,7 @@ class MSHR(id: Int) extends Component { refilled <== Bool(false) ppn <== io.req_ppn idx_ <== io.req_idx + way_id_ <== io.req_way_id } when (io.mem_req.valid && io.mem_req.ready) { requested <== Bool(true) @@ -215,15 +232,17 @@ class MSHR(id: Int) extends Component { io.idx_match := valid && (idx_ === io.req_idx) io.idx := idx_ io.tag := ppn + io.way_id := way_id_ io.req_pri_rdy := !valid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready io.meta_req.valid := valid && refilled && !rpq.io.deq.valid - io.meta_req.bits.rw := Bool(true) - io.meta_req.bits.idx := idx_ - io.meta_req.bits.data.valid := Bool(true) - io.meta_req.bits.data.dirty := dirty - io.meta_req.bits.data.tag := ppn + io.meta_req.bits.inner_req.rw := Bool(true) + io.meta_req.bits.inner_req.idx := idx_ + io.meta_req.bits.inner_req.data.valid := Bool(true) + io.meta_req.bits.inner_req.data.dirty := dirty + io.meta_req.bits.inner_req.data.tag := ppn + io.meta_req.bits.way_en := UFixToOH(way_id_.toUFix, NWAYS) io.mem_req.valid := valid && !requested //io.mem_req.bits.itm := next_dirty @@ -238,6 +257,7 @@ class MSHR(id: Int) extends Component { io.replay.bits.cmd := rpq.io.deq.bits.cmd io.replay.bits.typ := rpq.io.deq.bits.typ io.replay.bits.sdq_id := rpq.io.deq.bits.sdq_id + io.replay.bits.way_id := way_id_.toUFix } class MSHRFile extends Component { @@ -251,21 +271,24 @@ class MSHRFile extends Component { val req_type = Bits(3, INPUT) val req_tag = Bits(DCACHE_TAG_BITS, INPUT) val req_sdq_id = UFix(log2up(NSDQ), INPUT) + val req_way_id = UFix(log2up(NWAYS), INPUT) val mem_resp_val = Bool(INPUT) val mem_resp_tag = Bits(DMEM_TAG_BITS, INPUT) val mem_resp_idx = Bits(IDX_BITS, OUTPUT) + val mem_resp_way_id = UFix(log2up(NWAYS), OUTPUT) val fence_rdy = Bool(OUTPUT) val mem_req = (new ioDecoupled) { new MemReq() }.flip() - val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() val replay = (new ioDecoupled) { new Replay() }.flip() } val tag_mux = new Mux1H(NMSHR, PPN_BITS) val mem_resp_idx_mux = new Mux1H(NMSHR, IDX_BITS) - val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } + val mem_resp_way_id_mux = new Mux1H(NMSHR, log2up(NWAYS)) + val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } val replay_arb = (new Arbiter(NMSHR)) { new Replay() } @@ -295,6 +318,7 @@ class MSHRFile extends Component { mshr.io.req_cmd := io.req_cmd mshr.io.req_type := io.req_type mshr.io.req_sdq_id := io.req_sdq_id + mshr.io.req_way_id := io.req_way_id mshr.io.meta_req <> meta_req_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) @@ -304,6 +328,8 @@ class MSHRFile extends Component { mshr.io.mem_resp_val := mem_resp_val mem_resp_idx_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) mem_resp_idx_mux.io.in(i) := mshr.io.idx + mem_resp_way_id_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) + mem_resp_way_id_mux.io.in(i) := mshr.io.way_id pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy @@ -319,6 +345,7 @@ class MSHRFile extends Component { io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) io.mem_resp_idx := mem_resp_idx_mux.io.out + io.mem_resp_way_id := mem_resp_way_id_mux.io.out.toUFix io.fence_rdy := !fence } @@ -326,6 +353,7 @@ class ReplayUnit extends Component { val io = new Bundle { val sdq_enq = (new ioDecoupled) { Bits(width = CPU_DATA_BITS) } val sdq_id = UFix(log2up(NSDQ), OUTPUT) + val way_id = UFix(log2up(NWAYS), OUTPUT) val replay = (new ioDecoupled) { new Replay() } val data_req = (new ioDecoupled) { new DataReq() }.flip() val cpu_resp_val = Bool(OUTPUT) @@ -370,6 +398,7 @@ class ReplayUnit extends Component { io.replay.ready := !replay_retry io.data_req.valid := replay_val + io.way_id := rp.way_id io.data_req.bits.idx := rp.idx io.data_req.bits.offset := rp.offset io.data_req.bits.cmd := rp.cmd @@ -383,7 +412,7 @@ class ReplayUnit extends Component { class WritebackUnit extends Component { val io = new Bundle { val req = (new ioDecoupled) { new WritebackReq() } - val data_req = (new ioDecoupled) { new DataArrayReq() }.flip() + val data_req = (new ioDecoupled) { new DataArrayArrayReq() }.flip() val data_resp = Bits(MEM_DATA_BITS, INPUT) val refill_req = (new ioDecoupled) { new MemReq() } val mem_req = (new ioDecoupled) { new MemReq() }.flip() @@ -412,11 +441,12 @@ class WritebackUnit extends Component { io.req.ready := !valid io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) - io.data_req.bits.idx := addr.idx - io.data_req.bits.offset := cnt - io.data_req.bits.rw := Bool(false) - io.data_req.bits.wmask := Bits(0) - io.data_req.bits.data := Bits(0) + io.data_req.bits.way_en := UFixToOH(addr.way_id, NWAYS) + io.data_req.bits.inner_req.idx := addr.idx + io.data_req.bits.inner_req.offset := cnt + io.data_req.bits.inner_req.rw := Bool(false) + io.data_req.bits.inner_req.wmask := Bits(0) + io.data_req.bits.inner_req.data := Bits(0) io.refill_req.ready := io.mem_req.ready && !block_refill io.mem_req.valid := refill_val || wbq.io.deq.valid && (cnt === UFix(REFILL_CYCLES)) @@ -430,7 +460,7 @@ class FlushUnit(lines: Int) extends Component { val io = new Bundle { val req = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) } val resp = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) }.flip() - val meta_req = (new ioDecoupled) { new MetaArrayReq() }.flip() + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() val meta_resp = (new MetaData).asInput() val wb_req = (new ioDecoupled) { new WritebackReq() }.flip() } @@ -438,15 +468,23 @@ class FlushUnit(lines: Int) extends Component { val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_meta_write :: s_done :: Nil = Enum(6) { UFix() } val state = Reg(resetVal = s_reset) val tag = Reg() { Bits() } - val cnt = Reg(resetVal = UFix(0, log2up(lines))) - val next_cnt = cnt + UFix(1) + val idx_cnt = Reg(resetVal = UFix(0, log2up(lines))) + val next_idx_cnt = idx_cnt + UFix(1) + val way_cnt = Reg(resetVal = UFix(0, log2up(NWAYS))) + val next_way_cnt = way_cnt + UFix(1) switch (state) { - is(s_reset) { when (io.meta_req.ready) { state <== Mux(~cnt === UFix(0), s_ready, s_reset); cnt <== next_cnt } } + is(s_reset) { + when (io.meta_req.ready) { + state <== Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_ready, s_reset); + when (~way_cnt === UFix(0)) { idx_cnt <== next_idx_cnt }; + way_cnt <== next_way_cnt; + } + } is(s_ready) { when (io.req.valid) { state <== s_meta_read; tag <== io.req.bits } } is(s_meta_read) { when (io.meta_req.ready) { state <== s_meta_wait } } is(s_meta_wait) { state <== Mux(io.meta_resp.valid && io.meta_resp.dirty && !io.wb_req.ready, s_meta_read, s_meta_write) } - is(s_meta_write) { when (io.meta_req.ready) { state <== Mux(~cnt === UFix(0), s_done, s_meta_read); cnt <== next_cnt } } + is(s_meta_write) { when (io.meta_req.ready) { state <== Mux(~idx_cnt === UFix(0), s_done, s_meta_read); idx_cnt <== next_idx_cnt } } is(s_done) { when (io.resp.ready) { state <== s_ready } } } @@ -454,21 +492,48 @@ class FlushUnit(lines: Int) extends Component { io.resp.valid := state === s_done io.resp.bits := tag io.meta_req.valid := (state === s_meta_read) || (state === s_meta_write) || (state === s_reset) - io.meta_req.bits.idx := cnt - io.meta_req.bits.rw := (state === s_meta_write) || (state === s_reset) - io.meta_req.bits.data.valid := Bool(false) - io.meta_req.bits.data.dirty := Bool(false) - io.meta_req.bits.data.tag := UFix(0) + io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) + io.meta_req.bits.inner_req.idx := idx_cnt + io.meta_req.bits.inner_req.rw := (state === s_meta_write) || (state === s_reset) + io.meta_req.bits.inner_req.data.valid := Bool(false) + io.meta_req.bits.inner_req.data.dirty := Bool(false) + io.meta_req.bits.inner_req.data.tag := UFix(0) io.wb_req.valid := state === s_meta_wait io.wb_req.bits.ppn := io.meta_resp.tag - io.wb_req.bits.idx := cnt + io.wb_req.bits.idx := idx_cnt + io.wb_req.bits.way_id := way_cnt } +class MetaDataArrayArray(lines: Int) extends Component { + val io = new Bundle { + val req = (new ioDecoupled) { new MetaArrayArrayReq() } + val resp = Vec(NWAYS){ (new MetaData).asOutput } + val state_req = (new ioDecoupled) { new MetaArrayArrayReq() } + } + + val way_arr = List.fill(NWAYS){ new MetaDataArray(lines) } + val tag_ready_arr = Bits(width = NWAYS) + val state_ready_arr = Bits(width = NWAYS) + + for(w <- 0 until NWAYS) { + way_arr(w).io.req.bits ^^ io.req.bits.inner_req + way_arr(w).io.req.ready := tag_ready_arr(w) + way_arr(w).io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool + way_arr(w).io.state_req.bits ^^ io.req.bits.inner_req + way_arr(w).io.state_req.ready := state_ready_arr(w) + way_arr(w).io.state_req.valid := io.req.valid && io.req.bits.way_en(w).toBool + io.resp(w) ^^ way_arr(w).io.resp + } + + io.req.ready := tag_ready_arr.andR.toBool + io.state_req.ready := state_ready_arr.andR.toBool +} + + class MetaDataArray(lines: Int) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new MetaArrayReq() } val resp = (new MetaData).asOutput() - val state_req = (new ioDecoupled) { new MetaArrayReq() } } @@ -494,6 +559,32 @@ class MetaDataArray(lines: Int) extends Component { io.req.ready := !vd_conflict } +class DataArrayArray(lines: Int) extends Component { + val io = new Bundle { + val req = (new ioDecoupled) { new DataArrayArrayReq() } + val resp = Vec(NWAYS){ Bits(width = MEM_DATA_BITS, dir = OUTPUT) } + val way_en = Bits(width = NWAYS, dir = OUTPUT) + } + + val way_en_ = Reg { Bits() } + when (io.req.valid && io.req.ready) { + way_en_ <== io.req.bits.way_en + } + + val way_arr = List.fill(NWAYS){ new DataArray(lines) } + val data_ready_arr = Bits(width = NWAYS) + + for(w <- 0 until NWAYS) { + way_arr(w).io.req.bits ^^ io.req.bits.inner_req + way_arr(w).io.req.ready := data_ready_arr(w) + way_arr(w).io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool + io.resp(w) ^^ way_arr(w).io.resp + } + + io.way_en := way_en_ + io.req.ready := data_ready_arr.andR.toBool +} + class DataArray(lines: Int) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new DataArrayReq() } @@ -540,20 +631,24 @@ class AMOALU extends Component { /* MIN[U]/MAX[U] */ cmp_out)))); } -class HellaCache(lines: Int) extends Component { - val io = new ioDCacheHella(); +//class HellaCache(lines: Int, ways: Int) extends Component { +// +//} + +class HellaCacheDM(lines: Int) extends Component { + val io = new ioDCacheHella() - val addrbits = PADDR_BITS; - val indexbits = log2up(lines); - val offsetbits = OFFSET_BITS; - val tagmsb = PADDR_BITS-1; - val taglsb = indexbits+offsetbits; - val tagbits = tagmsb-taglsb+1; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val offsetlsb = log2up(CPU_DATA_BITS/8); - val ramindexlsb = log2up(MEM_DATA_BITS/8); + val addrbits = PADDR_BITS + val indexbits = log2up(lines) + val offsetbits = OFFSET_BITS + val tagmsb = PADDR_BITS-1 + val taglsb = indexbits+offsetbits + val tagbits = tagmsb-taglsb+1 + val indexmsb = taglsb-1 + val indexlsb = offsetbits + val offsetmsb = indexlsb-1 + val offsetlsb = log2up(CPU_DATA_BITS/8) + val ramindexlsb = log2up(MEM_DATA_BITS/8) val early_nack = Reg { Bool() } val r_cpu_req_val_ = Reg(io.cpu.req_val && io.cpu.req_rdy, resetVal = Bool(false)) @@ -569,6 +664,7 @@ class HellaCache(lines: Int) extends Component { val p_store_idx = Reg() { Bits() } val p_store_cmd = Reg() { Bits() } val p_store_type = Reg() { Bits() } + val p_store_way_id = Reg() { Bits() } val r_replay_amo = Reg(resetVal = Bool(false)) val req_store = (io.cpu.req_cmd === M_XWR) @@ -586,7 +682,7 @@ class HellaCache(lines: Int) extends Component { val r_req_readwrite = r_req_read || r_req_write // replay unit - val replayer = new ReplayUnit + val replayer = new ReplayUnit() val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool when (replay_amo_val) { @@ -615,9 +711,6 @@ class HellaCache(lines: Int) extends Component { io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned -} - -class HellaCacheDM(lines: Int) extends HellaCache(lines) { // tags val meta = new MetaDataArray(lines) val meta_arb = (new Arbiter(3)) { new MetaArrayReq() } @@ -632,7 +725,10 @@ class HellaCacheDM(lines: Int) extends HellaCache(lines) { val wb = new WritebackUnit val wb_arb = (new Arbiter(2)) { new WritebackReq() } wb_arb.io.out <> wb.io.req - wb.io.data_req <> data_arb.io.in(3) + wb.io.data_req.bits.inner_req <> data_arb.io.in(3).bits + wb.io.data_req.ready <> data_arb.io.in(3).ready + wb.io.data_req.valid <> data_arb.io.in(3).valid + wb.io.data_resp <> data.io.resp // cpu tag check @@ -713,7 +809,7 @@ class HellaCacheDM(lines: Int) extends HellaCache(lines) { } // miss handling - val mshr = new MSHRFile + val mshr = new MSHRFile() mshr.io.req_val := tag_miss && r_req_readwrite && (!dirty || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) mshr.io.req_ppn := io.cpu.req_ppn mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) @@ -810,4 +906,292 @@ class HellaCacheDM(lines: Int) extends HellaCache(lines) { io.mem.req_addr := wb.io.mem_req.bits.addr } +class HellaCacheAssoc(lines: Int) extends Component { + val io = new ioDCacheHella() + + val addrbits = PADDR_BITS + val indexbits = log2up(lines) + val offsetbits = OFFSET_BITS + val tagmsb = PADDR_BITS-1 + val taglsb = indexbits+offsetbits + val tagbits = tagmsb-taglsb+1 + val indexmsb = taglsb-1 + val indexlsb = offsetbits + val offsetmsb = indexlsb-1 + val offsetlsb = log2up(CPU_DATA_BITS/8) + val ramindexlsb = log2up(MEM_DATA_BITS/8) + + val early_nack = Reg { Bool() } + val r_cpu_req_val_ = Reg(io.cpu.req_val && io.cpu.req_rdy, resetVal = Bool(false)) + val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_kill && !early_nack + val r_cpu_req_idx = Reg() { Bits() } + val r_cpu_req_cmd = Reg() { Bits() } + val r_cpu_req_type = Reg() { Bits() } + val r_cpu_req_tag = Reg() { Bits() } + val r_cpu_req_data = Reg() { Bits() } + + val p_store_valid = Reg(resetVal = Bool(false)) + val p_store_data = Reg() { Bits() } + val p_store_idx = Reg() { Bits() } + val p_store_cmd = Reg() { Bits() } + val p_store_type = Reg() { Bits() } + val p_store_way_id = Reg() { Bits() } + val r_replay_amo = Reg(resetVal = Bool(false)) + + val req_store = (io.cpu.req_cmd === M_XWR) + val req_load = (io.cpu.req_cmd === M_XRD) + val req_amo = io.cpu.req_cmd(3).toBool + val req_read = req_load || req_amo + val req_write = req_store || req_amo + val r_req_load = (r_cpu_req_cmd === M_XRD) + val r_req_store = (r_cpu_req_cmd === M_XWR) + val r_req_flush = (r_cpu_req_cmd === M_FLA) + val r_req_fence = (r_cpu_req_cmd === M_FENCE) + val r_req_amo = r_cpu_req_cmd(3).toBool + val r_req_read = r_req_load || r_req_amo + val r_req_write = r_req_store || r_req_amo + val r_req_readwrite = r_req_read || r_req_write + + // replay unit + val replayer = new ReplayUnit() + val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool + + when (replay_amo_val) { + r_cpu_req_data <== replayer.io.data_req.bits.data + } + when (io.cpu.req_val) { + r_cpu_req_idx <== io.cpu.req_idx + r_cpu_req_cmd <== io.cpu.req_cmd + r_cpu_req_type <== io.cpu.req_type + r_cpu_req_tag <== io.cpu.req_tag + when (req_write) { + r_cpu_req_data <== io.cpu.req_data + } + } + + // refill counter + val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val rr_count_next = rr_count + UFix(1) + when (io.mem.resp_val) { rr_count <== rr_count_next } + + val misaligned = + (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || + (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || + ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); + + io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned + io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned + // tags + val meta = new MetaDataArrayArray(lines) + val meta_arb = (new Arbiter(3)) { new MetaArrayArrayReq() } + meta_arb.io.out <> meta.io.req + + // data + val data = new DataArrayArray(lines) + val data_arb = (new Arbiter(5)) { new DataArrayArrayReq() } + data_arb.io.out <> data.io.req + + // cpu tag check + meta_arb.io.in(2).valid := io.cpu.req_val + meta_arb.io.in(2).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) + meta_arb.io.in(2).bits.inner_req.rw := Bool(false) + meta_arb.io.in(2).bits.inner_req.data.valid := Bool(false) // don't care + meta_arb.io.in(2).bits.inner_req.data.dirty := Bool(false) // don't care + meta_arb.io.in(2).bits.inner_req.data.tag := UFix(0) // don't care + meta_arb.io.in(2).bits.way_en := ~UFix(0, NWAYS) + val early_tag_nack = !meta_arb.io.in(2).ready + //val tag_match_arr = meta.io.resp.map(r => r.valid && (r.tag === io.cpu_req_ppn)) + val tag_match_arr = (0 until NWAYS).map( w => meta.io.resp(w).valid && (meta.io.resp(w).tag === io.cpu.req_ppn)) + val tag_match = Cat(Bits(0),tag_match_arr:_*).orR + val tag_hit = r_cpu_req_val && tag_match + val tag_miss = r_cpu_req_val && !tag_match + val hit_way_id = OHToUFix(Cat(Bits(0),tag_match_arr:_*)) + val meta_hit_mux = meta.io.resp(hit_way_id) + + // writeback unit + val wb = new WritebackUnit + val wb_arb = (new Arbiter(2)) { new WritebackReq() } + wb_arb.io.out <> wb.io.req + wb.io.data_req <> data_arb.io.in(3) + val data_resp_way_id = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_id, OHToUFix(data.io.way_en)) + val data_resp_mux = data.io.resp(data_resp_way_id) + wb.io.data_resp <> data_resp_mux + + // replacement policy + val replacer = new RandomReplacementWayGen() + replacer.io.way_en := tag_miss & ~UFix(0, NWAYS) + val replaced_way_id = replacer.io.way_id + val meta_wb_mux = meta.io.resp(replaced_way_id) + val dirty = meta_wb_mux.valid && meta_wb_mux.dirty //TODO: check all dirty uses + + // refill response + val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) + data_arb.io.in(0).bits.inner_req.offset := rr_count + data_arb.io.in(0).bits.inner_req.rw := !block_during_refill + data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) + data_arb.io.in(0).bits.inner_req.data := io.mem.resp_data + data_arb.io.in(0).valid := io.mem.resp_val || block_during_refill + + // load hits + data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) + data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) + data_arb.io.in(4).bits.inner_req.rw := Bool(false) + data_arb.io.in(4).bits.inner_req.wmask := UFix(0) // don't care + data_arb.io.in(4).bits.inner_req.data := io.mem.resp_data // don't care + data_arb.io.in(4).valid := io.cpu.req_val && req_read + data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check + val early_load_nack = req_read && !data_arb.io.in(4).ready + + // store hits and AMO hits and misses use a pending store register. + // we nack new stores if a pending store can't retire for some reason. + // we drain a pending store if the CPU performs a store or a + // conflictig load, or if the cache is idle, or after a miss. + val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) + val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) + val p_store_match = r_cpu_req_val && r_req_read && p_store_idx_match && p_store_offset_match + val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(tag_miss))) || p_store_match + data_arb.io.in(2).bits.inner_req.offset := p_store_idx(offsetmsb,ramindexlsb) + data_arb.io.in(2).bits.inner_req.idx := p_store_idx(indexmsb,indexlsb) + data_arb.io.in(2).bits.inner_req.rw := Bool(true) + data_arb.io.in(2).valid := drain_store_val + data_arb.io.in(2).bits.way_en := UFixToOH(p_store_way_id.toUFix, NWAYS) + val drain_store = drain_store_val && data_arb.io.in(2).ready + val p_store_rdy = !p_store_valid || drain_store + val p_amo = Reg(tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) + p_store_valid <== !p_store_rdy || (tag_hit && r_req_store) || p_amo + + // writeback + val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match + wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && dirty && !p_store_idx_match + wb_arb.io.in(1).bits.ppn := meta_wb_mux.tag + wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) + wb_arb.io.in(1).bits.way_id := replaced_way_id + + // tag update after a miss or a store to an exclusive clean line. + val clear_valid = tag_miss && r_req_readwrite && meta_hit_mux.valid && (!dirty || wb_rdy) + val set_dirty = tag_hit && !meta_hit_mux.dirty && r_req_write + meta.io.state_req.bits.inner_req.rw := Bool(true) + meta.io.state_req.bits.inner_req.idx := r_cpu_req_idx(indexmsb,indexlsb) + meta.io.state_req.bits.inner_req.data.valid := tag_match + meta.io.state_req.bits.inner_req.data.dirty := tag_match + meta.io.state_req.valid := clear_valid || set_dirty + meta.io.state_req.bits.way_en := Cat(Bits(0),tag_match_arr:_*) + + // pending store data, also used for AMO RHS + val storegen = new StoreDataGen + val amoalu = new AMOALU + storegen.io.typ := r_cpu_req_type + storegen.io.din := r_cpu_req_data + when (p_amo) { + p_store_data <== amoalu.io.out + } + when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { + p_store_idx <== Mux(r_replay_amo, Reg(Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset)), r_cpu_req_idx) + p_store_way_id <== Mux(r_replay_amo, Reg(replayer.io.replay.bits.way_id), hit_way_id) + p_store_type <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.typ), r_cpu_req_type) + p_store_cmd <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.cmd), r_cpu_req_cmd) + p_store_data <== storegen.io.dout + } + + // miss handling + val mshr = new MSHRFile() + mshr.io.req_val := tag_miss && r_req_readwrite && (!dirty || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) + mshr.io.req_ppn := io.cpu.req_ppn + mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) + mshr.io.req_tag := r_cpu_req_tag + mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) + mshr.io.req_cmd := r_cpu_req_cmd + mshr.io.req_type := r_cpu_req_type + mshr.io.req_sdq_id := replayer.io.sdq_id + mshr.io.req_way_id := replaced_way_id + mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) + mshr.io.mem_resp_tag := io.mem.resp_tag + mshr.io.mem_req <> wb.io.refill_req + mshr.io.meta_req <> meta_arb.io.in(1) + mshr.io.replay <> replayer.io.replay + replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy + replayer.io.sdq_enq.bits := storegen.io.dout + data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx + data_arb.io.in(0).bits.way_en := UFixToOH(mshr.io.mem_resp_way_id.toUFix, NWAYS) + + // replays + val replay = replayer.io.data_req.bits + val stall_replay = r_replay_amo || p_amo || p_store_valid + val replay_val = replayer.io.data_req.valid && !stall_replay + val replay_rdy = data_arb.io.in(1).ready + data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) + data_arb.io.in(1).bits.inner_req.idx := replay.idx + data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR + data_arb.io.in(1).valid := replay_val + data_arb.io.in(1).bits.way_en := UFixToOH(replayer.io.way_id, NWAYS) + replayer.io.data_req.ready := replay_rdy && !stall_replay + r_replay_amo <== replay_amo_val && replay_rdy && !stall_replay + + // store write mask generation. + // assumes store replays are higher-priority than pending stores. + val maskgen = new StoreMaskGen + val store_offset = Mux(!replay_val, p_store_idx(offsetmsb,0), replay.offset) + maskgen.io.typ := Mux(!replay_val, p_store_type, replay.typ) + maskgen.io.addr := store_offset(offsetlsb-1,0) + val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix + val store_data = Mux(!replay_val, p_store_data, replay.data) + val store_data_wide = Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) + data_arb.io.in(1).bits.inner_req.data := store_data_wide + data_arb.io.in(1).bits.inner_req.wmask := store_wmask_wide + data_arb.io.in(2).bits.inner_req.data := store_data_wide + data_arb.io.in(2).bits.inner_req.wmask := store_wmask_wide + + // load data subword mux/sign extension. + // subword loads are delayed by one cycle. + val loadgen = new LoadDataGen + val loadgen_use_replay = Reg(replay_val && replay_rdy) + loadgen.io.typ := Mux(loadgen_use_replay, Reg(replay.typ), r_cpu_req_type) + loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(ramindexlsb-1,0) + loadgen.io.din := data_resp_mux + + amoalu.io.cmd := p_store_cmd + amoalu.io.typ := p_store_type + amoalu.io.lhs := loadgen.io.r_dout.toUFix + amoalu.io.rhs := p_store_data.toUFix + + early_nack <== early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo + + // reset and flush unit + val flusher = new FlushUnit(lines) + val flushed = Reg(resetVal = Bool(true)) + val flush_rdy = mshr.io.fence_rdy && wb_rdy && !p_store_valid + flushed <== flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready + flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed + flusher.io.wb_req <> wb_arb.io.in(0) + flusher.io.meta_req <> meta_arb.io.in(0) + flusher.io.meta_resp <> meta.io.resp + flusher.io.resp.ready := Bool(true) // we don't respond to flush requests + + // we usually nack rather than reporting that the cache is not ready. + // fences and flushes are the exceptions. + val pending_fence = Reg(resetVal = Bool(false)) + pending_fence <== (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy + val nack_hit = p_store_match || r_req_write && !p_store_rdy + val nack_miss = dirty && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready + val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || + !flushed && r_req_flush + val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush + + io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence + io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack + io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || replayer.io.cpu_resp_val + io.cpu.resp_replay := replayer.io.cpu_resp_val + io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read + io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) + io.cpu.resp_data := loadgen.io.dout + io.cpu.resp_data_subword := loadgen.io.r_dout_subword + + wb.io.mem_req.ready := io.mem.req_rdy + io.mem.req_val := wb.io.mem_req.valid + io.mem.req_rw := wb.io.mem_req.bits.rw + io.mem.req_wdata := wb.io.mem_req_data + io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix + io.mem.req_addr := wb.io.mem_req.bits.addr +} + } From 97f0852b17a15dc4fe00edce94ab1740d81460f0 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 18 Jan 2012 17:53:26 -0800 Subject: [PATCH 0104/1087] DM cache with assoc-aware subunits passes all asm and bmarks --- rocket/src/main/scala/nbdcache.scala | 15 +++++++++------ rocket/src/main/scala/util.scala | 7 ++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9959089f..2d0d1229 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -725,10 +725,9 @@ class HellaCacheDM(lines: Int) extends Component { val wb = new WritebackUnit val wb_arb = (new Arbiter(2)) { new WritebackReq() } wb_arb.io.out <> wb.io.req - wb.io.data_req.bits.inner_req <> data_arb.io.in(3).bits - wb.io.data_req.ready <> data_arb.io.in(3).ready - wb.io.data_req.valid <> data_arb.io.in(3).valid - + wb.io.data_req.bits.inner_req <> data_arb.io.in(3).bits //TODO + wb.io.data_req.ready := data_arb.io.in(3).ready + data_arb.io.in(3).valid := wb.io.data_req.valid wb.io.data_resp <> data.io.resp // cpu tag check @@ -821,7 +820,9 @@ class HellaCacheDM(lines: Int) extends Component { mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) mshr.io.mem_resp_tag := io.mem.resp_tag mshr.io.mem_req <> wb.io.refill_req - mshr.io.meta_req <> meta_arb.io.in(1) + mshr.io.meta_req.bits.inner_req <> meta_arb.io.in(1).bits //TODO + mshr.io.meta_req.ready := meta_arb.io.in(1).ready + meta_arb.io.in(1).valid := mshr.io.meta_req.valid mshr.io.replay <> replayer.io.replay replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy replayer.io.sdq_enq.bits := storegen.io.dout @@ -875,7 +876,9 @@ class HellaCacheDM(lines: Int) extends Component { flushed <== flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed flusher.io.wb_req <> wb_arb.io.in(0) - flusher.io.meta_req <> meta_arb.io.in(0) + flusher.io.meta_req.bits.inner_req <> meta_arb.io.in(0).bits //TODO + flusher.io.meta_req.ready := meta_arb.io.in(0).ready + meta_arb.io.in(0).valid := flusher.io.meta_req.valid flusher.io.meta_resp <> meta.io.resp flusher.io.resp.ready := Bool(true) // we don't respond to flush requests diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 2e2dd20b..f9ddab80 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -42,11 +42,8 @@ object OHToUFix { def apply(in: Bits): UFix = { - var out = UFix(0) - for(i <- 0 until in.getWidth) - if(in(i) == Bits(1)) - out = UFix(i) - out + val out = MuxCase( UFix(0), (0 until in.getWidth).map( i => (in(i).toBool, UFix(i)))) + out.toUFix } } From 31c56228e2b49778c80c120d0e988f416b4db184 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 Jan 2012 20:13:15 -0800 Subject: [PATCH 0105/1087] add missing "otherwise" --- rocket/src/main/scala/nbdcache.scala | 4 +++- rocket/src/main/scala/queues.scala | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2d0d1229..86583854 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -227,7 +227,9 @@ class MSHR(id: Int) extends Component { when (io.meta_req.valid && io.meta_req.ready) { valid <== Bool(false) } - dirty <== next_dirty + otherwise { + dirty <== next_dirty + } io.idx_match := valid && (idx_ === io.req_idx) io.idx := idx_ diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 04fd9d23..3f6892f9 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -27,12 +27,6 @@ class queueCtrl(entries: Int) extends Component val enq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); val deq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); val full = Reg(width = 1, resetVal = Bool(false)); - - when (io.q_reset) { - enq_ptr <== UFix(0, addr_sz); - deq_ptr <== UFix(0, addr_sz); - full <== Bool(false); - } io.waddr := enq_ptr; io.raddr := deq_ptr; @@ -76,9 +70,16 @@ class queueCtrl(entries: Int) extends Component Mux(do_deq && full, Bool(false), full)); - enq_ptr <== enq_ptr_next; - deq_ptr <== deq_ptr_next; - full <== full_next; + when (io.q_reset) { + enq_ptr <== UFix(0, addr_sz); + deq_ptr <== UFix(0, addr_sz); + full <== Bool(false); + } + otherwise { + enq_ptr <== enq_ptr_next; + deq_ptr <== deq_ptr_next; + full <== full_next; + } } class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle From d59bddfbf18048cf53ccf4dcdb08c7dd341f8fa1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 Jan 2012 20:42:13 -0800 Subject: [PATCH 0106/1087] fix I$ miss replay bug --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ad880c65..a33412a1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -592,7 +592,7 @@ class rocketCtrl extends Component io.dpath.wen_btb := !ex_btb_match && br_jr_taken; io.dpath.clr_btb := ex_reg_btb_hit && !br_jr_taken || id_reg_icmiss; - io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_icmiss) + io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay) // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val ex_mem_cmd_load = From e7bf07d55ec124f30d2ac1e7b9f4120ed4810042 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 Jan 2012 15:35:53 -0800 Subject: [PATCH 0107/1087] fix AMO replay bug --- rocket/src/main/scala/nbdcache.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 86583854..e75ea972 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -688,6 +688,9 @@ class HellaCacheDM(lines: Int) extends Component { val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool when (replay_amo_val) { + r_cpu_req_idx <== Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) + r_cpu_req_cmd <== replayer.io.data_req.bits.cmd + r_cpu_req_type <== replayer.io.data_req.bits.typ r_cpu_req_data <== replayer.io.data_req.bits.data } when (io.cpu.req_val) { @@ -803,9 +806,9 @@ class HellaCacheDM(lines: Int) extends Component { p_store_data <== amoalu.io.out } when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { - p_store_idx <== Mux(r_replay_amo, Reg(Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset)), r_cpu_req_idx) - p_store_type <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.typ), r_cpu_req_type) - p_store_cmd <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.cmd), r_cpu_req_cmd) + p_store_idx <== r_cpu_req_idx + p_store_type <== r_cpu_req_type + p_store_cmd <== r_cpu_req_cmd p_store_data <== storegen.io.dout } From 8766438bb98684632eb3950997256bf7471ba101 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 23 Jan 2012 09:51:35 -0800 Subject: [PATCH 0108/1087] Updated chisel removes ^^ from language. Removed from rocket source, updated jar. --- rocket/src/main/scala/cpu.scala | 6 +++--- rocket/src/main/scala/dcache.scala | 4 ++-- rocket/src/main/scala/dpath.scala | 16 ++++++++-------- rocket/src/main/scala/nbdcache.scala | 7 ++++--- rocket/src/main/scala/queues.scala | 26 +++++++++++++------------- rocket/src/main/scala/top.scala | 8 ++++---- rocket/src/main/scala/util.scala | 4 ++-- 7 files changed, 36 insertions(+), 35 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index b93392ff..e8a99954 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -45,8 +45,8 @@ class rocketProc extends Component val arb = new rocketDmemArbiter(); ctrl.io.dpath <> dpath.io.ctrl; - dpath.io.host ^^ io.host; - dpath.io.debug ^^ io.debug; + dpath.io.host <> io.host; + dpath.io.debug <> io.debug; // FIXME: try to make this more compact @@ -86,7 +86,7 @@ class rocketProc extends Component ptw.io.itlb <> itlb.io.ptw; ptw.io.ptbr := dpath.io.ptbr; arb.io.ptw <> ptw.io.dmem; - arb.io.mem ^^ io.dmem + arb.io.mem <> io.dmem // connect arbiter to ctrl+dpath+DTLB arb.io.cpu.req_val := ctrl.io.dmem.req_val; diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 73fd4f61..8f0ac992 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -162,9 +162,9 @@ class rocketDCacheDM_flush(lines: Int) extends Component { dcache.io.cpu.req_ppn := Mux(flushing, UFix(0,PPN_BITS), io.cpu.req_ppn); dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); dcache.io.cpu.req_type := io.cpu.req_type; - dcache.io.cpu.req_data ^^ io.cpu.req_data; + dcache.io.cpu.req_data <> io.cpu.req_data; dcache.io.cpu.req_kill := io.cpu.req_kill && !flush_waiting; - dcache.io.mem ^^ io.mem; + dcache.io.mem <> io.mem; io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; io.cpu.xcpt_ma_st := dcache.io.cpu.xcpt_ma_st; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index b0bab419..06b38a0d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -163,9 +163,9 @@ class rocketDpath extends Component if_next_pc.toUFix); btb.io.current_pc4 := if_pc_plus4; - btb.io.hit ^^ io.ctrl.btb_hit; - btb.io.wen ^^ io.ctrl.wen_btb; - btb.io.clr ^^ io.ctrl.clr_btb; + btb.io.hit <> io.ctrl.btb_hit; + btb.io.wen <> io.ctrl.wen_btb; + btb.io.clr <> io.ctrl.clr_btb; btb.io.correct_pc4 := ex_reg_pc_plus4; io.ctrl.btb_match := id_reg_pc === jr_br_target; @@ -187,11 +187,11 @@ class rocketDpath extends Component val id_raddr2 = id_reg_inst(21,17).toUFix; // regfile read - rfile.io.r0.en ^^ io.ctrl.ren2; + rfile.io.r0.en <> io.ctrl.ren2; rfile.io.r0.addr := id_raddr2; val id_rdata2 = rfile.io.r0.data; - rfile.io.r1.en ^^ io.ctrl.ren1; + rfile.io.r1.en <> io.ctrl.ren1; rfile.io.r1.addr := id_raddr1; val id_rdata1 = rfile.io.r1.data; @@ -324,9 +324,9 @@ class rocketDpath extends Component Mux(ex_reg_ctrl_eret, PCR_EPC, ex_reg_raddr2); - pcr.io.host.from_wen ^^ io.host.from_wen; - pcr.io.host.from ^^ io.host.from; - pcr.io.host.to ^^ io.host.to; + pcr.io.host.from_wen <> io.host.from_wen; + pcr.io.host.from <> io.host.from; + pcr.io.host.to <> io.host.to; io.ctrl.irq_timer := pcr.io.irq_timer; io.ctrl.irq_ipi := pcr.io.irq_ipi; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e75ea972..29334429 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -341,9 +341,9 @@ class MSHRFile extends Component { alloc_arb.io.out.ready := io.req_val && !idx_match - meta_req_arb.io.out ^^ io.meta_req - mem_req_arb.io.out ^^ io.mem_req - replay_arb.io.out ^^ io.replay + meta_req_arb.io.out <> io.meta_req + mem_req_arb.io.out <> io.mem_req + replay_arb.io.out <> io.replay io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) io.mem_resp_idx := mem_resp_idx_mux.io.out @@ -810,6 +810,7 @@ class HellaCacheDM(lines: Int) extends Component { p_store_type <== r_cpu_req_type p_store_cmd <== r_cpu_req_cmd p_store_data <== storegen.io.dout + p_store_way_id <== UFix(0) } // miss handling diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 3f6892f9..90732f2e 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -93,13 +93,13 @@ class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component { override val io = new ioQueueSimplePF()(data); val ctrl = new queueCtrl(entries); - ctrl.io.q_reset ^^ io.q_reset; - ctrl.io.deq_val ^^ io.deq.valid; - ctrl.io.enq_rdy ^^ io.enq.ready; - ctrl.io.enq_val ^^ io.enq.valid; - ctrl.io.deq_rdy ^^ io.deq.ready; + ctrl.io.q_reset <> io.q_reset; + ctrl.io.deq_val <> io.deq.valid; + ctrl.io.enq_rdy <> io.enq.ready; + ctrl.io.enq_val <> io.enq.valid; + ctrl.io.deq_rdy <> io.deq.ready; val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq.bits); - ram.read(ctrl.io.raddr) ^^ io.deq.bits; + ram.read(ctrl.io.raddr) <> io.deq.bits; } // TODO: SHOULD USE INHERITANCE BUT BREAKS INTROSPECTION CODE @@ -191,7 +191,7 @@ class queueDpathFlow[T <: Data](entries: Int)(data: => T) extends Component override val io = new ioQueueDpathFlow(addr_sz)(data); val ram = Mem(entries, io.wen, io.waddr, io.enq_bits); val rout = ram(io.raddr); - Mux(io.flowthru, io.enq_bits, rout) ^^ io.deq_bits; + Mux(io.flowthru, io.enq_bits, rout) <> io.deq_bits; } class ioQueueFlowPF[T <: Data](data: => T) extends Bundle() @@ -210,17 +210,17 @@ class queueFlowPF[T <: Data](entries: Int)(data: => T) extends Component val ctrl = new queueCtrlFlow(entries); val dpath = new queueDpathFlow(entries)(data); - ctrl.io.deq_rdy ^^ io.deq_rdy; + ctrl.io.deq_rdy <> io.deq_rdy; ctrl.io.wen <> dpath.io.wen; ctrl.io.raddr <> dpath.io.raddr; ctrl.io.waddr <> dpath.io.waddr; ctrl.io.flowthru <> dpath.io.flowthru; - ctrl.io.enq_val ^^ io.enq_val; - dpath.io.enq_bits ^^ io.enq_bits; + ctrl.io.enq_val <> io.enq_val; + dpath.io.enq_bits <> io.enq_bits; - ctrl.io.deq_val ^^ io.deq_val; - ctrl.io.enq_rdy ^^ io.enq_rdy; - dpath.io.deq_bits ^^ io.deq_bits; + ctrl.io.deq_val <> io.deq_val; + ctrl.io.enq_rdy <> io.enq_rdy; + dpath.io.deq_bits <> io.deq_bits; } } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index be9bb311..daa049be 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -20,13 +20,13 @@ class Top() extends Component { val dcache = new HellaCacheDM(128); val arbiter = new rocketMemArbiter(); - arbiter.io.mem ^^ io.mem; + arbiter.io.mem <> io.mem; arbiter.io.dcache <> dcache.io.mem; arbiter.io.icache <> icache_pf.io.mem; - cpu.io.host ^^ io.host; - cpu.io.debug ^^ io.debug; - cpu.io.console ^^ io.console; + cpu.io.host <> io.host; + cpu.io.debug <> io.debug; + cpu.io.console <> io.console; icache.io.mem <> icache_pf.io.icache; cpu.io.imem <> icache.io.cpu; diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index f9ddab80..837af57a 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -102,8 +102,8 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { for (i <- 1 to n-1) vout = vout || io.in(i).valid - vout ^^ io.out.valid - dout ^^ io.out.bits + vout <> io.out.valid + dout <> io.out.bits } class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle From a5a020f97b52a995a735a4f429370185b4be9503 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 Jan 2012 20:59:38 -0800 Subject: [PATCH 0109/1087] update chisel and remove SRAM_READ_LATENCY --- rocket/src/main/scala/consts.scala | 2 -- rocket/src/main/scala/dcache.scala | 8 ++++---- rocket/src/main/scala/dpath_util.scala | 26 ++++++++++---------------- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 8 ++++---- 5 files changed, 20 insertions(+), 28 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 2caa7d43..67ffbee3 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -210,8 +210,6 @@ object Constants val HAVE_RVC = Bool(false); val HAVE_FPU = Bool(false); val HAVE_VEC = Bool(false); - - val SRAM_READ_LATENCY = 0; } } diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 8f0ac992..a1b24ddd 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -258,8 +258,8 @@ class rocketDCacheDM(lines: Int) extends Component { ((state === s_resolve_miss) && r_req_flush); val tag_array = Mem4(lines, r_cpu_req_ppn); - tag_array.setReadLatency(SRAM_READ_LATENCY); -// tag_array.setTarget('inst); + tag_array.setReadLatency(1); + tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); // valid bit array @@ -375,8 +375,8 @@ class rocketDCacheDM(lines: Int) extends Component { store_wmask)); val data_array = Mem4(lines*4, data_wdata); - data_array.setReadLatency(SRAM_READ_LATENCY); -// data_array.setTarget('inst); + data_array.setReadLatency(1); + data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); val r_resp_data = Reg(resp_data); diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 81198efb..17a8b39f 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -29,8 +29,10 @@ class rocketDpathBTB(entries: Int) extends Component val taglsb = (VADDR_BITS-idxlsb); val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc4(idxmsb,idxlsb), !io.clr, resetVal = Bool(false)); - val tag_target_array = Mem(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), - Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) + val tag_target_array = Mem4(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), + Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) + tag_target_array.setReadLatency(0); + tag_target_array.setTarget('inst); val is_val = vb_array(io.current_pc4(idxmsb,idxlsb)); val tag_target = tag_target_array(io.current_pc4(idxmsb, idxlsb)); @@ -226,20 +228,12 @@ class rocketDpathRegfile extends Component { override val io = new ioRegfile(); - // FIXME: remove the first "if" case once Mem4 C backend bug is fixed - if (SRAM_READ_LATENCY == 0) { - val regfile = Mem(32, io.w0.en && (io.w0.addr != UFix(0,5)), io.w0.addr, io.w0.data); - io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); - io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); - } - else { - val regfile = Mem4(32, io.w0.data); - regfile.setReadLatency(0); - regfile.setTarget('inst); - regfile.write(io.w0.addr, io.w0.data, io.w0.en); - io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); - io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); - } + val regfile = Mem4(32, io.w0.data); + regfile.setReadLatency(0); + regfile.setTarget('inst); + regfile.write(io.w0.addr, io.w0.data, io.w0.en); + io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); + io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); } } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index a8fd2e3a..f06992d1 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -86,7 +86,7 @@ class rocketICacheDM(lines: Int) extends Component { val tag_we = (state === s_refill_wait) && io.mem.resp_val; val tag_array = Mem4(lines, r_cpu_req_ppn); - tag_array.setReadLatency(SRAM_READ_LATENCY); + tag_array.setReadLatency(1); tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); @@ -107,7 +107,7 @@ class rocketICacheDM(lines: Int) extends Component { Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); - data_array.setReadLatency(SRAM_READ_LATENCY); + data_array.setReadLatency(1); data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 29334429..8d1e7dc1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -387,7 +387,7 @@ class ReplayUnit extends Component { val sdq_addr = Mux(sdq_ren_retry, rp.sdq_id, Mux(sdq_ren_new, io.replay.bits.sdq_id, sdq_alloc_id)) val sdq = Mem4(NSDQ, io.sdq_enq.bits) - sdq.setReadLatency(SRAM_READ_LATENCY) + sdq.setReadLatency(1); sdq.setTarget('inst) val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) @@ -540,7 +540,7 @@ class MetaDataArray(lines: Int) extends Component { } val vd_array = Mem4(lines, Bits(width = 2)) - vd_array.setReadLatency(SRAM_READ_LATENCY) + vd_array.setReadLatency(1); val vd_wdata2 = Cat(io.state_req.bits.data.valid, io.state_req.bits.data.dirty) vd_array.write(io.state_req.bits.idx, vd_wdata2, io.state_req.valid && io.state_req.bits.rw) val vd_wdata1 = Cat(io.req.bits.data.valid, io.req.bits.data.dirty) @@ -551,7 +551,7 @@ class MetaDataArray(lines: Int) extends Component { val vd_conflict = io.state_req.valid && (io.req.bits.idx === io.state_req.bits.idx) val tag_array = Mem4(lines, io.resp.tag) - tag_array.setReadLatency(SRAM_READ_LATENCY) + tag_array.setReadLatency(1); tag_array.setTarget('inst) val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) @@ -596,7 +596,7 @@ class DataArray(lines: Int) extends Component { val wmask = FillInterleaved(8, io.req.bits.wmask) val array = Mem4(lines*REFILL_CYCLES, io.resp) - array.setReadLatency(SRAM_READ_LATENCY) + array.setReadLatency(1); array.setTarget('inst) val addr = Cat(io.req.bits.idx, io.req.bits.offset) val rdata = array.rw(addr, io.req.bits.data, io.req.valid && io.req.bits.rw, wmask, cs = io.req.valid) From f1c355e3cdcda715cd1bf26caa213498484977e1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Jan 2012 00:15:17 -0800 Subject: [PATCH 0110/1087] check pc/effective address sign extension --- rocket/src/main/scala/cpu.scala | 4 +-- rocket/src/main/scala/dpath.scala | 36 +++++++++++++------------- rocket/src/main/scala/dpath_util.scala | 13 +++++----- rocket/src/main/scala/dtlb.scala | 13 ++++++---- rocket/src/main/scala/itlb.scala | 17 ++++++------ 5 files changed, 43 insertions(+), 40 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index e8a99954..70fd9b56 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -55,7 +55,7 @@ class rocketProc extends Component itlb.io.cpu.status := dpath.io.ctrl.status; itlb.io.cpu.req_val := ctrl.io.imem.req_val; itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS-1,PGIDX_BITS); + itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS,PGIDX_BITS); io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0); io.imem.req_ppn := itlb.io.cpu.resp_ppn; io.imem.req_val := ctrl.io.imem.req_val; @@ -72,7 +72,7 @@ class rocketProc extends Component dtlb.io.cpu.req_kill := ctrl.io.dtlb_kill; dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS-1,PGIDX_BITS); + dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS); ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu.xcpt_ld; ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; ctrl.io.dtlb_rdy := dtlb.io.cpu.req_rdy; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 06b38a0d..a19d0fd7 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -7,7 +7,7 @@ import Instructions._ class ioDpathDmem extends Bundle() { - val req_addr = UFix(VADDR_BITS, OUTPUT); + val req_addr = UFix(VADDR_BITS+1, OUTPUT); val req_tag = UFix(CPU_TAG_BITS, OUTPUT); val req_data = Bits(64, OUTPUT); val resp_val = Bool(INPUT); @@ -20,7 +20,7 @@ class ioDpathDmem extends Bundle() class ioDpathImem extends Bundle() { - val req_addr = UFix(VADDR_BITS, OUTPUT); + val req_addr = UFix(VADDR_BITS+1, OUTPUT); val resp_data = Bits(32, INPUT); } @@ -50,7 +50,6 @@ class rocketDpath extends Component val alu = new rocketDpathALU(); val ex_alu_out = alu.io.out; val ex_alu_adder_out = alu.io.adder_out; - val ex_jr_target = ex_alu_adder_out(VADDR_BITS-1,0); val div = new rocketDivider(64); val div_result = div.io.div_result_bits; @@ -65,13 +64,13 @@ class rocketDpath extends Component val rfile = new rocketDpathRegfile(); // instruction fetch definitions - val if_reg_pc = Reg(resetVal = UFix(START_ADDR,VADDR_BITS)); + val if_reg_pc = Reg(resetVal = UFix(START_ADDR,VADDR_BITS+1)); // instruction decode definitions val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_inst = Reg(resetVal = NOP); - val id_reg_pc = Reg() { UFix() }; - val id_reg_pc_plus4 = Reg() { UFix() }; + val id_reg_pc = Reg() { UFix(width = VADDR_BITS+1) }; + val id_reg_pc_plus4 = Reg() { UFix(width = VADDR_BITS+1) }; // execute definitions val ex_reg_valid = Reg(resetVal = Bool(false)); @@ -133,21 +132,23 @@ class rocketDpath extends Component Cat(Fill(52, ex_reg_inst(31)), ex_reg_inst(31,27), ex_reg_inst(16,10)); val branch_adder_rhs = - Mux(io.ctrl.ex_jmp, Cat(Fill(VADDR_BITS-26, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0,1)), - Cat(ex_sign_extend_split(VADDR_BITS-2,0), UFix(0, 1))); - + Mux(io.ctrl.ex_jmp, Cat(Fill(VADDR_BITS-25, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0,1)), + Cat(ex_sign_extend_split(VADDR_BITS-1,0), UFix(0, 1))); val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; - val jr_br_target = Mux(io.ctrl.ex_jr, ex_jr_target, ex_branch_target); + val ex_jr_target_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) + val ex_jr_target_extended = Cat(ex_jr_target_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix + + val jr_br_target = Mux(io.ctrl.ex_jr, ex_jr_target_extended, ex_branch_target); btb.io.correct_target := jr_br_target val if_next_pc = - Mux(io.ctrl.sel_pc === PC_BTB, if_btb_target, + Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target), Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, - Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target.toUFix, - Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS-1,0), // only used for ERET - Mux(io.ctrl.sel_pc === PC_EVEC, pcr.io.evec, + Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target_extended, + Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET + Mux(io.ctrl.sel_pc === PC_EVEC, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, if_pc_plus4))))))); // PC_4 @@ -155,8 +156,7 @@ class rocketDpath extends Component if_reg_pc <== if_next_pc.toUFix; } - // FIXME: make sure PCs are properly sign extended - io.ctrl.xcpt_ma_inst := if_next_pc(1,0) != Bits(0,2) + io.ctrl.xcpt_ma_inst := if_next_pc(1,0) != Bits(0) io.imem.req_addr := Mux(io.ctrl.stallf, if_reg_pc, @@ -172,7 +172,7 @@ class rocketDpath extends Component // instruction decode stage when (!io.ctrl.stalld) { id_reg_pc <== if_reg_pc; - id_reg_pc_plus4 <== if_pc_plus4; + id_reg_pc_plus4 <== if_pc_plus4; when(io.ctrl.killf) { id_reg_inst <== NOP; id_reg_valid <== Bool(false); @@ -314,7 +314,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_addr := ex_alu_adder_out(VADDR_BITS-1,0); + io.dmem.req_addr := ex_jr_target_extended.toUFix; io.dmem.req_data := ex_reg_rs2; io.dmem.req_tag := ex_reg_waddr; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 17a8b39f..e12d6d5d 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -53,7 +53,7 @@ class ioDpathPCR extends Bundle() val exception = Bool(INPUT); val cause = UFix(5, INPUT); val badvaddr_wen = Bool(INPUT); - val pc = UFix(VADDR_BITS, INPUT); + val pc = UFix(VADDR_BITS+1, INPUT); val eret = Bool(INPUT); val ei = Bool(INPUT); val di = Bool(INPUT); @@ -121,8 +121,9 @@ class rocketDpathPCR extends Component } } + val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) when (io.badvaddr_wen) { - reg_badvaddr <== io.w.data.toUFix; + reg_badvaddr <== Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; } when (io.exception && !reg_status_et) { @@ -163,8 +164,8 @@ class rocketDpathPCR extends Component reg_status_ec <== HAVE_RVC && io.w.data(SR_EC).toBool; reg_status_et <== io.w.data(SR_ET).toBool; } - when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS-1,0).toUFix; } - when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS-1,0).toUFix; } + when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS,0).toUFix; } + when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS,0).toUFix; } when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(VADDR_BITS-1,0).toUFix; } when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(31,0).toUFix; } when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0).toUFix; r_irq_timer <== Bool(false); } @@ -187,8 +188,8 @@ class rocketDpathPCR extends Component when (!io.r.en) { rdata <== Bits(0,64); } switch (io.r.addr) { is (PCR_STATUS) { rdata <== Cat(Bits(0,47), reg_status_vm, reg_status_im, reg_status); } - is (PCR_EPC) { rdata <== Cat(Fill(64-VADDR_BITS, reg_epc(VADDR_BITS-1)), reg_epc); } - is (PCR_BADVADDR) { rdata <== Cat(Fill(64-VADDR_BITS, reg_badvaddr(VADDR_BITS-1)), reg_badvaddr); } + is (PCR_EPC) { rdata <== Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } + is (PCR_BADVADDR) { rdata <== Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } is (PCR_EVEC) { rdata <== Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } is (PCR_COUNT) { rdata <== Cat(Fill(32, reg_count(31)), reg_count); } is (PCR_COMPARE) { rdata <== Cat(Fill(32, reg_compare(31)), reg_compare); } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 214f05df..de1098eb 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -19,7 +19,7 @@ class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) val req_cmd = Bits(4, INPUT); // load/store/amo val req_rdy = Bool(OUTPUT); val req_asid = Bits(ASID_BITS, INPUT); - val req_vpn = UFix(VPN_BITS, INPUT); + val req_vpn = UFix(VPN_BITS+1, INPUT); // lookup responses val resp_miss = Bool(OUTPUT); // val resp_val = Bool(OUTPUT); @@ -65,17 +65,18 @@ class rocketDTLB(entries: Int) extends Component val req_store = (r_cpu_req_cmd === M_XWR); val req_amo = r_cpu_req_cmd(3).toBool; - val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); + val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); tag_cam.io.clear := io.cpu.invalidate; tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; tag_cam.io.write_addr := r_refill_waddr; - val tag_hit = tag_cam.io.hit; + val tag_hit = tag_cam.io.hit || bad_va; val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register @@ -140,14 +141,16 @@ class rocketDTLB(entries: Int) extends Component val access_fault_ld = tlb_hit && (req_load || req_amo) && ((status_s && !sr_array(tag_hit_addr).toBool) || - (status_u && !ur_array(tag_hit_addr).toBool)); + (status_u && !ur_array(tag_hit_addr).toBool) || + bad_va); io.cpu.xcpt_ld := access_fault_ld; val access_fault_st = tlb_hit && (req_store || req_amo) && ((status_s && !sw_array(tag_hit_addr).toBool) || - (status_u && !uw_array(tag_hit_addr).toBool)); + (status_u && !uw_array(tag_hit_addr).toBool) || + bad_va); io.cpu.xcpt_st := access_fault_st; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 73f63b00..1b6aa7ed 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -74,7 +74,7 @@ class ioITLB_CPU(view: List[String] = null) extends Bundle(view) val req_val = Bool(INPUT); val req_rdy = Bool(OUTPUT); val req_asid = Bits(ASID_BITS, INPUT); - val req_vpn = UFix(VPN_BITS, INPUT); + val req_vpn = UFix(VPN_BITS+1, INPUT); // lookup responses val resp_miss = Bool(OUTPUT); // val resp_val = Bool(OUTPUT); @@ -111,18 +111,19 @@ class rocketITLB(entries: Int) extends Component otherwise { r_cpu_req_val <== Bool(false); } - - val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); + + val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); tag_cam.io.clear := io.cpu.invalidate; tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; tag_cam.io.write_addr := r_refill_waddr; - val tag_hit = tag_cam.io.hit; + val tag_hit = tag_cam.io.hit || bad_va; val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register @@ -171,15 +172,13 @@ class rocketITLB(entries: Int) extends Component } } - // exception check - val outofrange = !tlb_miss && (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); - val access_fault = tlb_hit && ((status_s && !sx_array(tag_hit_addr).toBool) || - (status_u && !ux_array(tag_hit_addr).toBool)); + (status_u && !ux_array(tag_hit_addr).toBool) || + bad_va); - io.cpu.exception := access_fault; //|| outofrange; + io.cpu.exception := access_fault; io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && (!r_cpu_req_val || tag_hit), Bool(true)); io.cpu.resp_miss := tlb_miss || (state != s_ready); io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; From 06fdf79dabd1aae492bff6a701d20fa5be123c4c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Jan 2012 00:56:47 -0800 Subject: [PATCH 0111/1087] fix long-latency writeback arbitration bug --- rocket/src/main/scala/ctrl.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a33412a1..2386fbf1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -633,8 +633,8 @@ class rocketCtrl extends Component // for divider, multiplier, load miss writeback val mem_wb = Reg(io.dmem.resp_replay, resetVal = Bool(false)) // delayed for subword extension - val mul_wb = io.dpath.mul_result_val && !io.dmem.resp_replay; - val div_wb = io.dpath.div_result_val && !io.dpath.mul_result_val && !io.dmem.resp_replay; + val mul_wb = io.dpath.mul_result_val && !mem_wb; + val div_wb = io.dpath.div_result_val && !io.dpath.mul_result_val && !mem_wb; val ctrl_stalld = !take_pc && From 9e6b86fe85c60f0f275ca28c157562bc84cffff7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Jan 2012 03:40:01 -0800 Subject: [PATCH 0112/1087] Fix a nasty replay bug If a mispredicted branch was followed by an instruction dependent on a load that missed in the cache, the mispredicted path would be executed rather than the correct path. Fail. Example broken code: lw x2, 0(x2) # cache miss beq x3, x0, somewhere # mispredicted branch move x4, x2 # wrong-path instruction dependent on load miss --- rocket/src/main/scala/ctrl.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2386fbf1..1546597d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -345,6 +345,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val ex_reg_replay = Reg(resetVal = Bool(false)); + val ex_reg_lu_bypass = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); @@ -404,6 +405,7 @@ class rocketCtrl extends Component ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); ex_reg_replay <== Bool(false); + ex_reg_lu_bypass <== Bool(false); } otherwise { ex_reg_br_type <== id_br_type; @@ -423,6 +425,7 @@ class rocketCtrl extends Component ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; ex_reg_replay <== id_reg_replay; + ex_reg_lu_bypass <== io.dpath.mem_lu_bypass; } ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; @@ -567,7 +570,7 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions - val replay_ex = dcache_miss && Reg(io.dpath.mem_lu_bypass) || mem_reg_privileged || mem_reg_flush_inst || + val replay_ex = dcache_miss && ex_reg_lu_bypass || mem_reg_privileged || mem_reg_flush_inst || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) val kill_ex = take_pc_wb || replay_ex From 8229d65adfa2b74f3f2c49d3fe6ea6a0d6ec73ab Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 24 Jan 2012 11:41:44 -0800 Subject: [PATCH 0113/1087] Associative cache passes asm tests and bmarks with power of 2 associativities (including 1) --- rocket/src/main/scala/nbdcache.scala | 132 +++++++++++++++------------ 1 file changed, 76 insertions(+), 56 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 8d1e7dc1..f803ac3a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -7,17 +7,22 @@ import scala.math._; class ReplacementWayGen extends Component { val io = new Bundle { - val way_en = Bits(width = width, dir = INPUT) + val way_en = Bits(width = NWAYS, dir = INPUT) val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) } } -class RandomReplacementWayGen extends ReplacementWayGen() { +class RandomReplacementWayGen extends Component { + val io = new Bundle { + val way_en = Bits(width = NWAYS, dir = INPUT) + val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) + } val width = max(6,log2up(NWAYS)) val lfsr = Reg(resetVal = UFix(1, width)) - when (io.way_en.orR) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)) } + when (io.way_en.orR) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)).toUFix } //TODO: Actually limit selection based on which ways are available (io.ways_en) - io.way_id := lfsr(log2up(NWAYS)-1,0).toUFix + if(NWAYS > 1) io.way_id := lfsr(log2up(NWAYS)-1,0).toUFix + else io.way_id := UFix(0) } class StoreMaskGen extends Component { @@ -506,32 +511,6 @@ class FlushUnit(lines: Int) extends Component { io.wb_req.bits.way_id := way_cnt } -class MetaDataArrayArray(lines: Int) extends Component { - val io = new Bundle { - val req = (new ioDecoupled) { new MetaArrayArrayReq() } - val resp = Vec(NWAYS){ (new MetaData).asOutput } - val state_req = (new ioDecoupled) { new MetaArrayArrayReq() } - } - - val way_arr = List.fill(NWAYS){ new MetaDataArray(lines) } - val tag_ready_arr = Bits(width = NWAYS) - val state_ready_arr = Bits(width = NWAYS) - - for(w <- 0 until NWAYS) { - way_arr(w).io.req.bits ^^ io.req.bits.inner_req - way_arr(w).io.req.ready := tag_ready_arr(w) - way_arr(w).io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool - way_arr(w).io.state_req.bits ^^ io.req.bits.inner_req - way_arr(w).io.state_req.ready := state_ready_arr(w) - way_arr(w).io.state_req.valid := io.req.valid && io.req.bits.way_en(w).toBool - io.resp(w) ^^ way_arr(w).io.resp - } - - io.req.ready := tag_ready_arr.andR.toBool - io.state_req.ready := state_ready_arr.andR.toBool -} - - class MetaDataArray(lines: Int) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new MetaArrayReq() } @@ -561,30 +540,38 @@ class MetaDataArray(lines: Int) extends Component { io.req.ready := !vd_conflict } -class DataArrayArray(lines: Int) extends Component { +class MetaDataArrayArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new DataArrayArrayReq() } - val resp = Vec(NWAYS){ Bits(width = MEM_DATA_BITS, dir = OUTPUT) } + val req = (new ioDecoupled) { new MetaArrayArrayReq() } + val resp = Vec(NWAYS){ (new MetaData).asOutput } + val state_req = (new ioDecoupled) { new MetaArrayArrayReq() } val way_en = Bits(width = NWAYS, dir = OUTPUT) } - val way_en_ = Reg { Bits() } + val way_en_ = Reg { Bits(width=NWAYS) } + when (io.state_req.valid && io.state_req.ready) { + way_en_ <== io.state_req.bits.way_en + } when (io.req.valid && io.req.ready) { way_en_ <== io.req.bits.way_en } - val way_arr = List.fill(NWAYS){ new DataArray(lines) } - val data_ready_arr = Bits(width = NWAYS) - + var tag_ready = Bool(true) + var state_ready = Bool(true) for(w <- 0 until NWAYS) { - way_arr(w).io.req.bits ^^ io.req.bits.inner_req - way_arr(w).io.req.ready := data_ready_arr(w) - way_arr(w).io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool - io.resp(w) ^^ way_arr(w).io.resp + val way = new MetaDataArray(lines) + way.io.req.bits <> io.req.bits.inner_req + tag_ready = tag_ready && way.io.req.ready + way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool + way.io.state_req.bits <> io.state_req.bits.inner_req + state_ready = state_ready && way.io.state_req.ready + way.io.state_req.valid := io.state_req.valid && io.state_req.bits.way_en(w).toBool + way.io.resp <> io.resp(w) } io.way_en := way_en_ - io.req.ready := data_ready_arr.andR.toBool + io.req.ready := tag_ready + io.state_req.ready := state_ready } class DataArray(lines: Int) extends Component { @@ -604,6 +591,34 @@ class DataArray(lines: Int) extends Component { io.req.ready := Bool(true) } +class DataArrayArray(lines: Int) extends Component { + val io = new Bundle { + val req = (new ioDecoupled) { new DataArrayArrayReq() } + val resp = Vec(NWAYS){ Bits(width = MEM_DATA_BITS, dir = OUTPUT) } + val way_en = Bits(width = NWAYS, dir = OUTPUT) + } + + val way_en_ = Reg { Bits(width=NWAYS) } + when (io.req.valid && io.req.ready) { + way_en_ <== io.req.bits.way_en + } + + //val data_ready_arr = Vec(NWAYS){ Bool() } + var data_ready = Bool(true) + for(w <- 0 until NWAYS) { + val way = new DataArray(lines) + way.io.req.bits <> io.req.bits.inner_req + //data_ready_arr(w) := way.io.req.ready + data_ready = data_ready && way.io.req.ready + way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool + way.io.resp <> io.resp(w) + } + + io.way_en := way_en_ + //io.req.ready := Cat(data_ready_arr).andR.toBool + io.req.ready := data_ready +} + class AMOALU extends Component { val io = new Bundle { val cmd = Bits(4, INPUT) @@ -730,7 +745,7 @@ class HellaCacheDM(lines: Int) extends Component { val wb = new WritebackUnit val wb_arb = (new Arbiter(2)) { new WritebackReq() } wb_arb.io.out <> wb.io.req - wb.io.data_req.bits.inner_req <> data_arb.io.in(3).bits //TODO + wb.io.data_req.bits.inner_req <> data_arb.io.in(3).bits wb.io.data_req.ready := data_arb.io.in(3).ready data_arb.io.in(3).valid := wb.io.data_req.valid wb.io.data_resp <> data.io.resp @@ -794,6 +809,7 @@ class HellaCacheDM(lines: Int) extends Component { meta.io.state_req.valid := clear_valid || set_dirty meta.io.state_req.bits.rw := Bool(true) meta.io.state_req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) + meta.io.state_req.bits.data.tag := UFix(0) // don't care meta.io.state_req.bits.data.valid := tag_match meta.io.state_req.bits.data.dirty := tag_match @@ -826,7 +842,7 @@ class HellaCacheDM(lines: Int) extends Component { mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) mshr.io.mem_resp_tag := io.mem.resp_tag mshr.io.mem_req <> wb.io.refill_req - mshr.io.meta_req.bits.inner_req <> meta_arb.io.in(1).bits //TODO + mshr.io.meta_req.bits.inner_req <> meta_arb.io.in(1).bits mshr.io.meta_req.ready := meta_arb.io.in(1).ready meta_arb.io.in(1).valid := mshr.io.meta_req.valid mshr.io.replay <> replayer.io.replay @@ -882,7 +898,7 @@ class HellaCacheDM(lines: Int) extends Component { flushed <== flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed flusher.io.wb_req <> wb_arb.io.in(0) - flusher.io.meta_req.bits.inner_req <> meta_arb.io.in(0).bits //TODO + flusher.io.meta_req.bits.inner_req <> meta_arb.io.in(0).bits flusher.io.meta_req.ready := meta_arb.io.in(0).ready meta_arb.io.in(0).valid := flusher.io.meta_req.valid flusher.io.meta_resp <> meta.io.resp @@ -966,6 +982,9 @@ class HellaCacheAssoc(lines: Int) extends Component { val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool when (replay_amo_val) { + r_cpu_req_idx <== Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) + r_cpu_req_cmd <== replayer.io.data_req.bits.cmd + r_cpu_req_type <== replayer.io.data_req.bits.typ r_cpu_req_data <== replayer.io.data_req.bits.data } when (io.cpu.req_val) { @@ -1014,22 +1033,23 @@ class HellaCacheAssoc(lines: Int) extends Component { val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match - val hit_way_id = OHToUFix(Cat(Bits(0),tag_match_arr:_*)) - val meta_hit_mux = meta.io.resp(hit_way_id) + val hit_way_id = OHToUFix(Cat(Bits(0),tag_match_arr.reverse:_*)) //TODO + val meta_resp_way_id = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_id, OHToUFix(meta.io.way_en)) + val meta_resp_mux = meta.io.resp(meta_resp_way_id) + val data_resp_way_id = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_id, OHToUFix(data.io.way_en)) + val data_resp_mux = data.io.resp(data_resp_way_id) // writeback unit val wb = new WritebackUnit val wb_arb = (new Arbiter(2)) { new WritebackReq() } wb_arb.io.out <> wb.io.req wb.io.data_req <> data_arb.io.in(3) - val data_resp_way_id = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_id, OHToUFix(data.io.way_en)) - val data_resp_mux = data.io.resp(data_resp_way_id) wb.io.data_resp <> data_resp_mux // replacement policy val replacer = new RandomReplacementWayGen() replacer.io.way_en := tag_miss & ~UFix(0, NWAYS) - val replaced_way_id = replacer.io.way_id + val replaced_way_id = replacer.io.way_id //TODO val meta_wb_mux = meta.io.resp(replaced_way_id) val dirty = meta_wb_mux.valid && meta_wb_mux.dirty //TODO: check all dirty uses @@ -1077,8 +1097,8 @@ class HellaCacheAssoc(lines: Int) extends Component { wb_arb.io.in(1).bits.way_id := replaced_way_id // tag update after a miss or a store to an exclusive clean line. - val clear_valid = tag_miss && r_req_readwrite && meta_hit_mux.valid && (!dirty || wb_rdy) - val set_dirty = tag_hit && !meta_hit_mux.dirty && r_req_write + val clear_valid = tag_miss && r_req_readwrite && meta_resp_mux.valid && (!dirty || wb_rdy) + val set_dirty = tag_hit && !meta_resp_mux.dirty && r_req_write meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := r_cpu_req_idx(indexmsb,indexlsb) meta.io.state_req.bits.inner_req.data.valid := tag_match @@ -1095,10 +1115,10 @@ class HellaCacheAssoc(lines: Int) extends Component { p_store_data <== amoalu.io.out } when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { - p_store_idx <== Mux(r_replay_amo, Reg(Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset)), r_cpu_req_idx) + p_store_idx <== r_cpu_req_idx + p_store_type <== r_cpu_req_type + p_store_cmd <== r_cpu_req_cmd p_store_way_id <== Mux(r_replay_amo, Reg(replayer.io.replay.bits.way_id), hit_way_id) - p_store_type <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.typ), r_cpu_req_type) - p_store_cmd <== Mux(r_replay_amo, Reg(replayer.io.data_req.bits.cmd), r_cpu_req_cmd) p_store_data <== storegen.io.dout } @@ -1173,7 +1193,7 @@ class HellaCacheAssoc(lines: Int) extends Component { flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed flusher.io.wb_req <> wb_arb.io.in(0) flusher.io.meta_req <> meta_arb.io.in(0) - flusher.io.meta_resp <> meta.io.resp + flusher.io.meta_resp <> meta_resp_mux flusher.io.resp.ready := Bool(true) // we don't respond to flush requests // we usually nack rather than reporting that the cache is not ready. From 7f26fe2c44ea182775f7736d205c4016b1a3def8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Jan 2012 15:13:49 -0800 Subject: [PATCH 0114/1087] make icache size parameterizable --- rocket/src/main/scala/icache.scala | 53 ++++++++++++++++-------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index f06992d1..c0890738 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -41,7 +41,7 @@ class rocketICacheDM(lines: Int) extends Component { val io = new ioICacheDM(); val addrbits = PADDR_BITS; - val indexbits = ceil(log10(lines)/log10(2)).toInt; + val indexbits = log2up(lines); val offsetbits = OFFSET_BITS; val tagmsb = addrbits - 1; val taglsb = indexbits+offsetbits; @@ -50,8 +50,10 @@ class rocketICacheDM(lines: Int) extends Component { val indexlsb = offsetbits; val offsetmsb = indexlsb-1; val databits = 32; - val offsetlsb = ceil(log(databits/8)/log(2)).toInt; - val rf_cnt_bits = ceil(log(REFILL_CYCLES)/log(2)).toInt; + val offsetlsb = log2up(databits/8); + val rf_cnt_bits = log2up(REFILL_CYCLES); + + require(PGIDX_BITS >= taglsb); // virtually-indexed, physically-tagged constraint val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_reset); @@ -63,17 +65,20 @@ class rocketICacheDM(lines: Int) extends Component { val rdy = Wire() { Bool() } when (io.cpu.req_val && rdy) { - r_cpu_req_idx <== io.cpu.req_idx; - } - when (state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss) { - r_cpu_req_ppn <== io.cpu.req_ppn; - } - when (rdy) { - r_cpu_req_val <== io.cpu.req_val; + r_cpu_req_val <== Bool(true) + r_cpu_req_idx <== io.cpu.req_idx } otherwise { - r_cpu_req_val <== Bool(false); + r_cpu_req_val <== Bool(false) } + when (state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss) { + r_cpu_req_ppn <== io.cpu.req_ppn + } + + val r_cpu_hit_addr = Cat(io.cpu.req_ppn, r_cpu_req_idx) + val r_cpu_hit_tag = r_cpu_hit_addr(tagmsb,taglsb) + val r_cpu_miss_addr = Cat(r_cpu_req_ppn, r_cpu_req_idx) + val r_cpu_miss_tag = r_cpu_miss_addr(tagmsb,taglsb) // refill counter val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); @@ -81,14 +86,14 @@ class rocketICacheDM(lines: Int) extends Component { refill_count <== refill_count + UFix(1); } val tag_addr = - Mux((state === s_refill_wait), r_cpu_req_idx(PGIDX_BITS-1,offsetbits), - io.cpu.req_idx(PGIDX_BITS-1,offsetbits)).toUFix; + Mux((state === s_refill_wait), r_cpu_req_idx(indexmsb,indexlsb), + io.cpu.req_idx(indexmsb,indexlsb)).toUFix; val tag_we = (state === s_refill_wait) && io.mem.resp_val; - val tag_array = Mem4(lines, r_cpu_req_ppn); + val tag_array = Mem4(lines, r_cpu_miss_tag); tag_array.setReadLatency(1); tag_array.setTarget('inst); - val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); + val tag_rdata = tag_array.rw(tag_addr, r_cpu_miss_tag, tag_we); // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); @@ -96,27 +101,27 @@ class rocketICacheDM(lines: Int) extends Component { vb_array <== Bits(0,lines); } when (tag_we) { - vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + vb_array <== vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, UFix(1,1)); } - val tag_valid = Reg(vb_array(tag_addr)).toBool; - val tag_match = (tag_rdata === io.cpu.req_ppn); + val tag_valid = vb_array(r_cpu_req_idx(indexmsb,indexlsb)).toBool; + val tag_hit = tag_valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array val data_addr = - Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), refill_count), - io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1)).toUFix; + Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), + io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); data_array.setReadLatency(1); data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); // output signals - io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && Reg(rdy) && r_cpu_req_val && tag_valid && tag_match; - rdy <== !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || (tag_valid && tag_match)); + io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; + rdy <== !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_array_rdata >> Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix io.mem.req_val := (state === s_request); - io.mem.req_addr := Cat(r_cpu_req_ppn, r_cpu_req_idx(indexmsb,indexlsb)).toUFix + io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix // control state machine switch (state) { @@ -127,7 +132,7 @@ class rocketICacheDM(lines: Int) extends Component { when (io.cpu.itlb_miss) { state <== s_ready; } - when (r_cpu_req_val && !(tag_valid && tag_match)) { + when (r_cpu_req_val && !tag_hit) { state <== s_request; } } From aa3465699bf88f164e7a9661abf08acba5cdb18b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 24 Jan 2012 14:39:52 -0800 Subject: [PATCH 0115/1087] LFSR now a util --- rocket/src/main/scala/nbdcache.scala | 7 ++----- rocket/src/main/scala/util.scala | 11 +++++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f803ac3a..c908d788 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -17,11 +17,8 @@ class RandomReplacementWayGen extends Component { val way_en = Bits(width = NWAYS, dir = INPUT) val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) } - val width = max(6,log2up(NWAYS)) - val lfsr = Reg(resetVal = UFix(1, width)) - when (io.way_en.orR) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)).toUFix } - //TODO: Actually limit selection based on which ways are available (io.ways_en) - if(NWAYS > 1) io.way_id := lfsr(log2up(NWAYS)-1,0).toUFix + //TODO: Actually limit selection based on which ways are allowed (io.ways_en) + if(NWAYS > 1) io.way_id := LFSR16(io.way_en.orR) else io.way_id := UFix(0) } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 837af57a..04ef0382 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -56,6 +56,17 @@ object UFixToOH } } +object LFSR16 +{ + def apply(increment: Bool) = + { + val width = 16 + val lfsr = Reg(resetVal = UFix(1, width)) + when (increment) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)).toUFix } + lfsr + } +} + class Mux1H(n: Int, w: Int) extends Component { val io = new Bundle { From 97c379f1d7a5df150d021162d1c239ec7a96baba Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Jan 2012 16:51:30 -0800 Subject: [PATCH 0116/1087] made I$ associative --- rocket/src/main/scala/icache.scala | 80 ++++++++++++++++++------------ rocket/src/main/scala/top.scala | 2 +- rocket/src/main/scala/util.scala | 7 ++- 3 files changed, 56 insertions(+), 33 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c0890738..a461be19 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -27,7 +27,7 @@ class ioIcache(view: List[String] = null) extends Bundle (view) val resp_val = Bool(OUTPUT); } -class ioICacheDM extends Bundle() +class ioICache extends Bundle() { val cpu = new ioImem(); val mem = new ioIcache().flip(); @@ -37,11 +37,12 @@ class ioICacheDM extends Bundle() // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines // parameters : // lines = # cache lines -class rocketICacheDM(lines: Int) extends Component { - val io = new ioICacheDM(); +class rocketICache(sets: Int, assoc: Int) extends Component { + val io = new ioICache(); + val lines = sets * assoc; val addrbits = PADDR_BITS; - val indexbits = log2up(lines); + val indexbits = log2up(sets); val offsetbits = OFFSET_BITS; val tagmsb = addrbits - 1; val taglsb = indexbits+offsetbits; @@ -54,15 +55,17 @@ class rocketICacheDM(lines: Int) extends Component { val rf_cnt_bits = log2up(REFILL_CYCLES); require(PGIDX_BITS >= taglsb); // virtually-indexed, physically-tagged constraint + require(ispow2(sets) && ispow2(assoc)); val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_reset); - val r_cpu_req_idx = Reg { Bits(width = PGIDX_BITS) } - val r_cpu_req_ppn = Reg { Bits(width = PPN_BITS) } + val r_cpu_req_idx = Reg { Bits() } + val r_cpu_req_ppn = Reg { Bits() } val r_cpu_req_val = Reg(resetVal = Bool(false)); val rdy = Wire() { Bool() } + val tag_hit = Wire() { Bool() } when (io.cpu.req_val && rdy) { r_cpu_req_val <== Bool(true) @@ -85,41 +88,56 @@ class rocketICacheDM(lines: Int) extends Component { when (io.mem.resp_val) { refill_count <== refill_count + UFix(1); } + + val repl_way = LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2up(assoc)-1,0) + val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix val tag_addr = Mux((state === s_refill_wait), r_cpu_req_idx(indexmsb,indexlsb), io.cpu.req_idx(indexmsb,indexlsb)).toUFix; val tag_we = (state === s_refill_wait) && io.mem.resp_val; - - val tag_array = Mem4(lines, r_cpu_miss_tag); - tag_array.setReadLatency(1); - tag_array.setTarget('inst); - val tag_rdata = tag_array.rw(tag_addr, r_cpu_miss_tag, tag_we); - - // valid bit array - val vb_array = Reg(resetVal = Bits(0, lines)); - when (io.cpu.invalidate) { - vb_array <== Bits(0,lines); - } - when (tag_we) { - vb_array <== vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, UFix(1,1)); - } - - val tag_valid = vb_array(r_cpu_req_idx(indexmsb,indexlsb)).toBool; - val tag_hit = tag_valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) - - // data array val data_addr = Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; - val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); - data_array.setReadLatency(1); - data_array.setTarget('inst); - val data_array_rdata = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val); + + val data_mux = new Mux1H(assoc, MEM_DATA_BITS) + var any_hit = Bool(false) + for (i <- 0 until assoc) + { + val repl_me = (repl_way === UFix(i)) + val tag_array = Mem4(lines, r_cpu_miss_tag); + tag_array.setReadLatency(1); + tag_array.setTarget('inst); + val tag_rdata = tag_array.rw(tag_addr, r_cpu_miss_tag, tag_we && repl_me); + + // valid bit array + val vb_array = Reg(resetVal = Bits(0, lines)); + when (io.cpu.invalidate) { + vb_array <== Bits(0,lines); + } + when (tag_we && repl_me) { + vb_array <== vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, UFix(1,1)); + } + + val valid = vb_array(r_cpu_req_idx(indexmsb,indexlsb)).toBool; + val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) + + // data array + val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); + data_array.setReadLatency(1); + data_array.setTarget('inst); + val data_out = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val && repl_me) + + data_mux.io.sel(i) := hit + data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0); + + any_hit = any_hit || hit + } + tag_hit := any_hit // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; - rdy <== !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); - io.cpu.resp_data := data_array_rdata >> Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix + rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); + io.cpu.resp_data := data_mux.io.out io.mem.req_val := (state === s_request); io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index daa049be..b38c9ddf 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -15,7 +15,7 @@ class Top() extends Component { val io = new ioTop(); val cpu = new rocketProc(); - val icache = new rocketICacheDM(128); // # 64 byte cache lines + val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); val dcache = new HellaCacheDM(128); val arbiter = new rocketMemArbiter(); diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 04ef0382..cf2d05d8 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -16,6 +16,11 @@ object log2up def apply(in: Int) = if (in == 1) 1 else ceil(log(in)/log(2)).toInt } +object ispow2 +{ + def apply(in: Int) = in > 0 && ((in & (in-1)) == 0) +} + object FillInterleaved { def apply(n: Int, in: Bits) = @@ -58,7 +63,7 @@ object UFixToOH object LFSR16 { - def apply(increment: Bool) = + def apply(increment: Bool = Bool(true)) = { val width = 16 val lfsr = Reg(resetVal = UFix(1, width)) From 7172ddd05073a9928e63e0639cedd755ebf602cd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Jan 2012 18:40:08 -0800 Subject: [PATCH 0117/1087] don't flush pipeline after MFPCR --- rocket/src/main/scala/ctrl.scala | 199 +++++++++++++++---------------- 1 file changed, 98 insertions(+), 101 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 1546597d..9dbb1ec0 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -168,110 +168,110 @@ class rocketCtrl extends Component val xpr64 = Y; val cs = ListLookup(io.dpath.inst, - List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), Array( - BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N), - EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y), - DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y), - ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y), - FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N), - FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N), - CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y), - MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y), - MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y), - RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - RDCYCLE-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - RDINSTRET->List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), + EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), + DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), + ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), + CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), + MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDINSTRET->List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), // Instructions that have not yet been implemented // Faking these for now so akaros will boot - MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FLW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FLD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FSW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FSD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N) + MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) /* // floating point FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), @@ -294,7 +294,7 @@ class rocketCtrl extends Component val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; - val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: Nil = csremainder; + val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = csremainder; val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); @@ -334,7 +334,7 @@ class rocketCtrl extends Component val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_eret = Reg(resetVal = Bool(false)); - val ex_reg_privileged = Reg(resetVal = Bool(false)); + val ex_reg_replay_next = Reg(resetVal = Bool(false)); val ex_reg_inst_di = Reg(resetVal = Bool(false)); val ex_reg_inst_ei = Reg(resetVal = Bool(false)); val ex_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -394,7 +394,7 @@ class rocketCtrl extends Component ex_reg_div_mul_val <== Bool(false); ex_reg_mem_val <== Bool(false); ex_reg_eret <== Bool(false); - ex_reg_privileged <== Bool(false); + ex_reg_replay_next <== Bool(false); ex_reg_inst_di <== Bool(false); ex_reg_inst_ei <== Bool(false); ex_reg_flush_inst <== Bool(false); @@ -413,7 +413,7 @@ class rocketCtrl extends Component ex_reg_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; ex_reg_mem_val <== id_mem_val.toBool; ex_reg_eret <== id_eret.toBool; - ex_reg_privileged <== id_privileged.toBool; + ex_reg_replay_next <== id_replay_next.toBool; ex_reg_inst_di <== (id_irq === I_DI); ex_reg_inst_ei <== (id_irq === I_EI); ex_reg_flush_inst <== (id_sync === SYNC_I); @@ -424,7 +424,7 @@ class rocketCtrl extends Component // ex_reg_xcpt_fpu <== id_fp_val.toBool; ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; - ex_reg_replay <== id_reg_replay; + ex_reg_replay <== id_reg_replay || ex_reg_replay_next; ex_reg_lu_bypass <== io.dpath.mem_lu_bypass; } ex_reg_mem_cmd <== id_mem_cmd; @@ -457,13 +457,11 @@ class rocketCtrl extends Component val mem_reg_mem_val = Reg(){Bool()}; val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; val mem_reg_mem_type = Reg(){UFix(width = 3)}; - val mem_reg_privileged = Reg(){Bool()}; when (reset.toBool || io.dpath.killx) { mem_reg_div_mul_val <== Bool(false); mem_reg_eret <== Bool(false); mem_reg_mem_val <== Bool(false); - mem_reg_privileged <== Bool(false); mem_reg_inst_di <== Bool(false); mem_reg_inst_ei <== Bool(false); mem_reg_flush_inst <== Bool(false); @@ -478,7 +476,6 @@ class rocketCtrl extends Component mem_reg_div_mul_val <== ex_reg_div_mul_val; mem_reg_eret <== ex_reg_eret; mem_reg_mem_val <== ex_reg_mem_val; - mem_reg_privileged <== ex_reg_privileged; mem_reg_inst_di <== ex_reg_inst_di; mem_reg_inst_ei <== ex_reg_inst_ei; mem_reg_flush_inst <== ex_reg_flush_inst; @@ -570,7 +567,7 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions - val replay_ex = dcache_miss && ex_reg_lu_bypass || mem_reg_privileged || mem_reg_flush_inst || + val replay_ex = dcache_miss && ex_reg_lu_bypass || mem_reg_flush_inst || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) val kill_ex = take_pc_wb || replay_ex From 41855a6d4773463e5c0f63b446a40daff4e910ab Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jan 2012 19:33:55 -0800 Subject: [PATCH 0118/1087] fix missing "otherwise" in PCR file this fixes timer interrupts for VLSI backend. --- rocket/src/main/scala/dpath_util.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index e12d6d5d..8cbe948d 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -178,7 +178,9 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } } - reg_count <== reg_count + UFix(1); + otherwise { + reg_count <== reg_count + UFix(1); + } when (reg_count === reg_compare) { r_irq_timer <== Bool(true); } From a7999d4525ef1ba15a59ed57cf70ea72f2ee161d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jan 2012 20:36:31 -0800 Subject: [PATCH 0119/1087] don't flush I$ unless fence.i commits otherwise, we might not make forward progress. --- rocket/src/main/scala/ctrl.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 9dbb1ec0..c91480f0 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -361,6 +361,7 @@ class rocketCtrl extends Component val wb_reg_inst_di = Reg(resetVal = Bool(false)); val wb_reg_inst_ei = Reg(resetVal = Bool(false)); + val wb_reg_flush_inst = Reg(resetVal = Bool(false)); val wb_reg_eret = Reg(resetVal = Bool(false)); val wb_reg_exception = Reg(resetVal = Bool(false)); val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); @@ -493,12 +494,14 @@ class rocketCtrl extends Component wb_reg_eret <== Bool(false); wb_reg_inst_di <== Bool(false); wb_reg_inst_ei <== Bool(false); + wb_reg_flush_inst <== Bool(false); wb_reg_div_mul_val <== Bool(false); } otherwise { wb_reg_eret <== mem_reg_eret; wb_reg_inst_di <== mem_reg_inst_di; wb_reg_inst_ei <== mem_reg_inst_ei; + wb_reg_flush_inst <== mem_reg_flush_inst; wb_reg_div_mul_val <== mem_reg_div_mul_val; } @@ -657,7 +660,7 @@ class rocketCtrl extends Component val ctrl_killd = take_pc || ctrl_stalld; val ctrl_killf = take_pc || !io.imem.resp_val; - io.flush_inst := mem_reg_flush_inst; + io.flush_inst := wb_reg_flush_inst; io.dpath.stallf := ctrl_stallf; From a96c92f58d34d078e42d98100a83c33c99d0ae28 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jan 2012 20:45:04 -0800 Subject: [PATCH 0120/1087] enable amomin[u]/amomax[u --- rocket/src/main/scala/ctrl.scala | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c91480f0..74c6ef41 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -201,10 +201,18 @@ class rocketCtrl extends Component AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), @@ -278,16 +286,6 @@ class rocketCtrl extends Component FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - - // atomic memory operations - AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), */ )); From bd241ea2375cffb1c1db05b176f200895ad9f2a0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 30 Jan 2012 17:15:42 -0800 Subject: [PATCH 0121/1087] fix when badvaddr is set --- rocket/src/main/scala/ctrl.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 74c6ef41..3dbbb11e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -362,7 +362,6 @@ class rocketCtrl extends Component val wb_reg_flush_inst = Reg(resetVal = Bool(false)); val wb_reg_eret = Reg(resetVal = Bool(false)); val wb_reg_exception = Reg(resetVal = Bool(false)); - val wb_reg_badvaddr_wen = Reg(resetVal = Bool(false)); val wb_reg_replay = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; @@ -549,11 +548,6 @@ class rocketCtrl extends Component Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault UFix(0,5))))))))))); // instruction address misaligned - // write cause to PCR on an exception - io.dpath.exception := wb_reg_exception; - io.dpath.cause := wb_reg_cause; - io.dpath.badvaddr_wen := wb_reg_badvaddr_wen; - // control transfer from ex/mem val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match val br_jr_taken = br_taken || jr_taken @@ -577,9 +571,15 @@ class rocketCtrl extends Component wb_reg_replay <== replay_mem && !take_pc_wb; wb_reg_exception <== mem_exception && !take_pc_wb; - wb_reg_badvaddr_wen <== (mem_xcpt_dtlb_ld || mem_xcpt_dtlb_st) && !take_pc_wb; wb_reg_cause <== mem_cause; + val wb_badvaddr_wen = wb_reg_exception && ((wb_reg_cause === UFix(10)) || (wb_reg_cause === UFix(11))) + + // write cause to PCR on an exception + io.dpath.exception := wb_reg_exception; + io.dpath.cause := wb_reg_cause; + io.dpath.badvaddr_wen := wb_badvaddr_wen; + io.dpath.sel_pc := Mux(wb_reg_exception, PC_EVEC, // exception Mux(wb_reg_replay, PC_WB, // replay From 38c9105ea1ff753655cdd43814cf99311a63342d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 30 Jan 2012 21:14:28 -0800 Subject: [PATCH 0122/1087] fix mul/div deadlock bug If independent multiplies or independent divides were issued back-to-back, the second wouldn't execute, causing the register to be busy forever. --- rocket/src/main/scala/ctrl.scala | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3dbbb11e..dc7dcbd0 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -327,7 +327,8 @@ class rocketCtrl extends Component val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; - val ex_reg_div_mul_val = Reg(){Bool()}; + val ex_reg_div_val = Reg(){Bool()}; + val ex_reg_mul_val = Reg(){Bool()}; val ex_reg_mem_val = Reg(){Bool()}; val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; @@ -389,7 +390,8 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; ex_reg_btb_hit <== Bool(false); - ex_reg_div_mul_val <== Bool(false); + ex_reg_div_val <== Bool(false); + ex_reg_mul_val <== Bool(false); ex_reg_mem_val <== Bool(false); ex_reg_eret <== Bool(false); ex_reg_replay_next <== Bool(false); @@ -408,7 +410,8 @@ class rocketCtrl extends Component otherwise { ex_reg_br_type <== id_br_type; ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_div_mul_val <== id_div_val.toBool || id_mul_val.toBool; + ex_reg_div_val <== id_div_val.toBool; + ex_reg_mul_val <== id_mul_val.toBool; ex_reg_mem_val <== id_mem_val.toBool; ex_reg_eret <== id_eret.toBool; ex_reg_replay_next <== id_replay_next.toBool; @@ -471,7 +474,7 @@ class rocketCtrl extends Component mem_reg_xcpt_syscall <== Bool(false); } otherwise { - mem_reg_div_mul_val <== ex_reg_div_mul_val; + mem_reg_div_mul_val <== ex_reg_div_val || ex_reg_mul_val; mem_reg_eret <== ex_reg_eret; mem_reg_mem_val <== ex_reg_mem_val; mem_reg_inst_di <== ex_reg_inst_di; @@ -563,7 +566,9 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = dcache_miss && ex_reg_lu_bypass || mem_reg_flush_inst || - ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) + ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || + ex_reg_div_val && !io.dpath.div_rdy || + ex_reg_mul_val && !io.dpath.mul_rdy val kill_ex = take_pc_wb || replay_ex mem_reg_replay <== replay_ex && !take_pc_wb; @@ -599,7 +604,7 @@ class rocketCtrl extends Component val ex_mem_cmd_load = ex_reg_mem_val && ((ex_reg_mem_cmd === M_XRD) || ex_reg_mem_cmd(3).toBool); val data_hazard_ex = - (ex_mem_cmd_load || ex_reg_div_mul_val) && + (ex_mem_cmd_load || ex_reg_div_val || ex_reg_mul_val) && ((id_renx1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || (id_renx2.toBool && (id_raddr2 === io.dpath.ex_waddr)) || (id_wen.toBool && (id_waddr === io.dpath.ex_waddr))); @@ -647,8 +652,8 @@ class rocketCtrl extends Component id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy || - id_div_val.toBool && !io.dpath.div_rdy || - id_mul_val.toBool && !io.dpath.mul_rdy || + id_div_val.toBool && (!io.dpath.div_rdy || ex_reg_div_val) || + id_mul_val.toBool && (!io.dpath.mul_rdy || ex_reg_mul_val) || io.dpath.div_result_val || io.dpath.mul_result_val || mem_wb From 281abfbccb110dc0261e2f309915cdceff27ed92 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 1 Feb 2012 13:24:28 -0800 Subject: [PATCH 0123/1087] New Mux1H constructor --- rocket/src/main/scala/nbdcache.scala | 6 +-- rocket/src/main/scala/util.scala | 55 +++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c908d788..ab73063a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -289,9 +289,9 @@ class MSHRFile extends Component { val replay = (new ioDecoupled) { new Replay() }.flip() } - val tag_mux = new Mux1H(NMSHR, PPN_BITS) - val mem_resp_idx_mux = new Mux1H(NMSHR, IDX_BITS) - val mem_resp_way_id_mux = new Mux1H(NMSHR, log2up(NWAYS)) + val tag_mux = (new Mux1H(NMSHR)){ Bits(width = PPN_BITS) } + val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } + val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } val replay_arb = (new Arbiter(NMSHR)) { new Replay() } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index cf2d05d8..5e33e17f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -72,24 +72,67 @@ object LFSR16 } } -class Mux1H(n: Int, w: Int) extends Component +object Mux1H +{ +//TODO: cloning in(0) is unsafe if other elements have different widths, but +//is that even allowable? + def apply [T <: Data](n: Int, sel: Vec[Bool], in: Vec[T]): T = { + MuxCase(in(0), (0 until n).map( i => (sel(i), in(i)))) +// val mux = (new Mux1H(n)){ in(0).clone } +// mux.io.sel <> sel +// mux.io.in <> in +// mux.io.out.asInstanceOf[T] + } + + def apply [T <: Data](n: Int, sel: Seq[Bool], in: Vec[T]): T = { + MuxCase(in(0), (0 until n).map( i => (sel(i), in(i)))) +// val mux = (new Mux1H(n)){ in(0).clone } +// for(i <- 0 until n) { +// mux.io.sel(i) := sel(i) +// } +// mux.io.in <> in.asOutput +// mux.io.out.asInstanceOf[T] + } + + def apply [T <: Data](n: Int, sel: Bits, in: Vec[T]): T = { + MuxCase(in(0), (0 until n).map( i => (sel(i).toBool, in(i)))) +// val mux = (new Mux1H(n)){ in(0).clone } +// for(i <- 0 until n) { +// mux.io.sel(i) := sel(i).toBool +// } +// mux.io.in := in +// mux.io.out + } +} + +class Mux1H [T <: Data](n: Int)(gen: => T) extends Component { val io = new Bundle { val sel = Vec(n) { Bool(dir = INPUT) } - val in = Vec(n) { Bits(width = w, dir = INPUT) } - val out = Bits(width = w, dir = OUTPUT) + val in = Vec(n) { gen }.asInput + val out = gen.asOutput } - if (n > 1) { - var out = io.in(0) & Fill(w, io.sel(0)) + if (n > 2) { + var out = io.in(0).toBits & Fill(gen.getWidth, io.sel(0)) for (i <- 1 to n-1) - out = out | (io.in(i) & Fill(w, io.sel(i))) + out = out | (io.in(i).toBits & Fill(gen.getWidth, io.sel(i))) io.out := out + } else if (n == 2) { + io.out := Mux(io.sel(1), io.in(1), io.in(0)) } else { io.out := io.in(0) } } + + + + + + + + class ioDecoupled[T <: Data]()(data: => T) extends Bundle { val valid = Bool(INPUT) From c5a4eaa0a1ddb14f544629b5e3ecdd120d8d8916 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 1 Feb 2012 13:26:04 -0800 Subject: [PATCH 0124/1087] Associative cache, boots kernel --- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 105 ++++++++++++++------------- rocket/src/main/scala/top.scala | 6 +- rocket/src/main/scala/util.scala | 3 +- 4 files changed, 58 insertions(+), 58 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index a461be19..857adc15 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -99,7 +99,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; - val data_mux = new Mux1H(assoc, MEM_DATA_BITS) + val data_mux = (new Mux1H(assoc)){Bits(width = MEM_DATA_BITS)} var any_hit = Bool(false) for (i <- 0 until assoc) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ab73063a..2101ecc1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -5,20 +5,16 @@ import Node._; import Constants._; import scala.math._; -class ReplacementWayGen extends Component { - val io = new Bundle { - val way_en = Bits(width = NWAYS, dir = INPUT) - val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) - } +class ioReplacementWayGen extends Bundle { + val pick_new_way = Bool(dir = INPUT) + val way_en = Bits(width = NWAYS, dir = INPUT) + val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) } class RandomReplacementWayGen extends Component { - val io = new Bundle { - val way_en = Bits(width = NWAYS, dir = INPUT) - val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) - } + val io = new ioReplacementWayGen() //TODO: Actually limit selection based on which ways are allowed (io.ways_en) - if(NWAYS > 1) io.way_id := LFSR16(io.way_en.orR) + if(NWAYS > 1) io.way_id := LFSR16(io.pick_new_way) else io.way_id := UFix(0) } @@ -109,7 +105,7 @@ class Replay extends Bundle { val typ = Bits(width = 3) val sdq_id = UFix(width = log2up(NSDQ)) val tag = Bits(width = DCACHE_TAG_BITS) - val way_id = UFix(width = log2up(NWAYS)) + val way_oh = Bits(width = NWAYS) } class DataReq extends Bundle { @@ -142,7 +138,7 @@ class MemReq extends Bundle { class WritebackReq extends Bundle { val ppn = Bits(width = PPN_BITS) val idx = Bits(width = IDX_BITS) - val way_id = UFix(width = log2up(NWAYS)) + val way_oh = Bits(width = NWAYS) } class MetaData extends Bundle { @@ -175,12 +171,12 @@ class MSHR(id: Int) extends Component { val req_type = Bits(3, INPUT) val req_sdq_id = UFix(log2up(NSDQ), INPUT) val req_tag = Bits(DCACHE_TAG_BITS, INPUT) - val req_way_id = UFix(log2up(NWAYS),INPUT) + val req_way_oh = Bits(NWAYS, INPUT) val idx_match = Bool(OUTPUT) val idx = Bits(IDX_BITS, OUTPUT) val tag = Bits(PPN_BITS, OUTPUT) - val way_id = Bits(log2up(NWAYS), OUTPUT) + val way_oh = Bits(NWAYS, OUTPUT) val mem_resp_val = Bool(INPUT) val mem_req = (new ioDecoupled) { new MemReq() }.flip @@ -194,7 +190,7 @@ class MSHR(id: Int) extends Component { val refilled = Reg { Bool() } val ppn = Reg { Bits() } val idx_ = Reg { Bits() } - val way_id_ = Reg { Bits() } + val way_oh_ = Reg { Bits() } val req_load = (io.req_cmd === M_XRD) || (io.req_cmd === M_PFR) val req_use_rpq = (io.req_cmd != M_PFR) && (io.req_cmd != M_PFW) @@ -218,7 +214,7 @@ class MSHR(id: Int) extends Component { refilled <== Bool(false) ppn <== io.req_ppn idx_ <== io.req_idx - way_id_ <== io.req_way_id + way_oh_ <== io.req_way_oh } when (io.mem_req.valid && io.mem_req.ready) { requested <== Bool(true) @@ -236,7 +232,7 @@ class MSHR(id: Int) extends Component { io.idx_match := valid && (idx_ === io.req_idx) io.idx := idx_ io.tag := ppn - io.way_id := way_id_ + io.way_oh := way_oh_ io.req_pri_rdy := !valid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -246,7 +242,7 @@ class MSHR(id: Int) extends Component { io.meta_req.bits.inner_req.data.valid := Bool(true) io.meta_req.bits.inner_req.data.dirty := dirty io.meta_req.bits.inner_req.data.tag := ppn - io.meta_req.bits.way_en := UFixToOH(way_id_.toUFix, NWAYS) + io.meta_req.bits.way_en := way_oh_ io.mem_req.valid := valid && !requested //io.mem_req.bits.itm := next_dirty @@ -261,7 +257,7 @@ class MSHR(id: Int) extends Component { io.replay.bits.cmd := rpq.io.deq.bits.cmd io.replay.bits.typ := rpq.io.deq.bits.typ io.replay.bits.sdq_id := rpq.io.deq.bits.sdq_id - io.replay.bits.way_id := way_id_.toUFix + io.replay.bits.way_oh := way_oh_ } class MSHRFile extends Component { @@ -275,12 +271,12 @@ class MSHRFile extends Component { val req_type = Bits(3, INPUT) val req_tag = Bits(DCACHE_TAG_BITS, INPUT) val req_sdq_id = UFix(log2up(NSDQ), INPUT) - val req_way_id = UFix(log2up(NWAYS), INPUT) + val req_way_oh = Bits(NWAYS, INPUT) val mem_resp_val = Bool(INPUT) val mem_resp_tag = Bits(DMEM_TAG_BITS, INPUT) val mem_resp_idx = Bits(IDX_BITS, OUTPUT) - val mem_resp_way_id = UFix(log2up(NWAYS), OUTPUT) + val mem_resp_way_oh = Bits(NWAYS, OUTPUT) val fence_rdy = Bool(OUTPUT) @@ -322,7 +318,7 @@ class MSHRFile extends Component { mshr.io.req_cmd := io.req_cmd mshr.io.req_type := io.req_type mshr.io.req_sdq_id := io.req_sdq_id - mshr.io.req_way_id := io.req_way_id + mshr.io.req_way_oh := io.req_way_oh mshr.io.meta_req <> meta_req_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) @@ -332,8 +328,8 @@ class MSHRFile extends Component { mshr.io.mem_resp_val := mem_resp_val mem_resp_idx_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) mem_resp_idx_mux.io.in(i) := mshr.io.idx - mem_resp_way_id_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) - mem_resp_way_id_mux.io.in(i) := mshr.io.way_id + mem_resp_way_oh_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) + mem_resp_way_oh_mux.io.in(i) := mshr.io.way_oh pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy @@ -349,7 +345,7 @@ class MSHRFile extends Component { io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) io.mem_resp_idx := mem_resp_idx_mux.io.out - io.mem_resp_way_id := mem_resp_way_id_mux.io.out.toUFix + io.mem_resp_way_oh := mem_resp_way_oh_mux.io.out io.fence_rdy := !fence } @@ -357,7 +353,7 @@ class ReplayUnit extends Component { val io = new Bundle { val sdq_enq = (new ioDecoupled) { Bits(width = CPU_DATA_BITS) } val sdq_id = UFix(log2up(NSDQ), OUTPUT) - val way_id = UFix(log2up(NWAYS), OUTPUT) + val way_oh = Bits(NWAYS, OUTPUT) val replay = (new ioDecoupled) { new Replay() } val data_req = (new ioDecoupled) { new DataReq() }.flip() val cpu_resp_val = Bool(OUTPUT) @@ -402,7 +398,7 @@ class ReplayUnit extends Component { io.replay.ready := !replay_retry io.data_req.valid := replay_val - io.way_id := rp.way_id + io.way_oh := rp.way_oh io.data_req.bits.idx := rp.idx io.data_req.bits.offset := rp.offset io.data_req.bits.cmd := rp.cmd @@ -445,7 +441,7 @@ class WritebackUnit extends Component { io.req.ready := !valid io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) - io.data_req.bits.way_en := UFixToOH(addr.way_id, NWAYS) + io.data_req.bits.way_en := addr.way_oh io.data_req.bits.inner_req.idx := addr.idx io.data_req.bits.inner_req.offset := cnt io.data_req.bits.inner_req.rw := Bool(false) @@ -488,7 +484,13 @@ class FlushUnit(lines: Int) extends Component { is(s_ready) { when (io.req.valid) { state <== s_meta_read; tag <== io.req.bits } } is(s_meta_read) { when (io.meta_req.ready) { state <== s_meta_wait } } is(s_meta_wait) { state <== Mux(io.meta_resp.valid && io.meta_resp.dirty && !io.wb_req.ready, s_meta_read, s_meta_write) } - is(s_meta_write) { when (io.meta_req.ready) { state <== Mux(~idx_cnt === UFix(0), s_done, s_meta_read); idx_cnt <== next_idx_cnt } } + is(s_meta_write) { + when (io.meta_req.ready) { + state <== Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_done, s_meta_read); + when (~way_cnt === UFix(0)) { idx_cnt <== next_idx_cnt }; + way_cnt <== next_way_cnt; + } + } is(s_done) { when (io.resp.ready) { state <== s_ready } } } @@ -505,7 +507,7 @@ class FlushUnit(lines: Int) extends Component { io.wb_req.valid := state === s_meta_wait io.wb_req.bits.ppn := io.meta_resp.tag io.wb_req.bits.idx := idx_cnt - io.wb_req.bits.way_id := way_cnt + io.wb_req.bits.way_oh := UFixToOH(way_cnt, NWAYS) } class MetaDataArray(lines: Int) extends Component { @@ -546,9 +548,6 @@ class MetaDataArrayArray(lines: Int) extends Component { } val way_en_ = Reg { Bits(width=NWAYS) } - when (io.state_req.valid && io.state_req.ready) { - way_en_ <== io.state_req.bits.way_en - } when (io.req.valid && io.req.ready) { way_en_ <== io.req.bits.way_en } @@ -678,7 +677,6 @@ class HellaCacheDM(lines: Int) extends Component { val p_store_idx = Reg() { Bits() } val p_store_cmd = Reg() { Bits() } val p_store_type = Reg() { Bits() } - val p_store_way_id = Reg() { Bits() } val r_replay_amo = Reg(resetVal = Bool(false)) val req_store = (io.cpu.req_cmd === M_XWR) @@ -823,7 +821,6 @@ class HellaCacheDM(lines: Int) extends Component { p_store_type <== r_cpu_req_type p_store_cmd <== r_cpu_req_cmd p_store_data <== storegen.io.dout - p_store_way_id <== UFix(0) } // miss handling @@ -957,7 +954,7 @@ class HellaCacheAssoc(lines: Int) extends Component { val p_store_idx = Reg() { Bits() } val p_store_cmd = Reg() { Bits() } val p_store_type = Reg() { Bits() } - val p_store_way_id = Reg() { Bits() } + val p_store_way_oh = Reg() { Bits() } val r_replay_amo = Reg(resetVal = Bool(false)) val req_store = (io.cpu.req_cmd === M_XWR) @@ -1030,11 +1027,13 @@ class HellaCacheAssoc(lines: Int) extends Component { val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match - val hit_way_id = OHToUFix(Cat(Bits(0),tag_match_arr.reverse:_*)) //TODO - val meta_resp_way_id = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_id, OHToUFix(meta.io.way_en)) - val meta_resp_mux = meta.io.resp(meta_resp_way_id) - val data_resp_way_id = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_id, OHToUFix(data.io.way_en)) - val data_resp_mux = data.io.resp(data_resp_way_id) + val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*) //TODO: use GenArray + val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) + val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) + val meta_resp_mux = Mux1H(NWAYS, meta_resp_way_oh, meta.io.resp) + //val meta_resp_mux = MuxCase(meta.io.resp(0), (0 until NWAYS).map(i => (meta_resp_way_oh(i).toBool, meta.io.resp(i))))// + val data_resp_mux = Mux1H(NWAYS, data_resp_way_oh, data.io.resp) + //val data_resp_mux = MuxCase(data.io.resp(0), (0 until NWAYS).map(i => (data_resp_way_oh(i).toBool, data.io.resp(i))))// // writeback unit val wb = new WritebackUnit @@ -1045,10 +1044,11 @@ class HellaCacheAssoc(lines: Int) extends Component { // replacement policy val replacer = new RandomReplacementWayGen() - replacer.io.way_en := tag_miss & ~UFix(0, NWAYS) - val replaced_way_id = replacer.io.way_id //TODO + replacer.io.way_en := ~UFix(0, NWAYS) + val replaced_way_id = replacer.io.way_id + val replaced_way_oh = UFixToOH(replaced_way_id, NWAYS) val meta_wb_mux = meta.io.resp(replaced_way_id) - val dirty = meta_wb_mux.valid && meta_wb_mux.dirty //TODO: check all dirty uses + val dirty = meta_wb_mux.valid && meta_wb_mux.dirty // refill response val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) @@ -1080,7 +1080,7 @@ class HellaCacheAssoc(lines: Int) extends Component { data_arb.io.in(2).bits.inner_req.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.inner_req.rw := Bool(true) data_arb.io.in(2).valid := drain_store_val - data_arb.io.in(2).bits.way_en := UFixToOH(p_store_way_id.toUFix, NWAYS) + data_arb.io.in(2).bits.way_en := p_store_way_oh val drain_store = drain_store_val && data_arb.io.in(2).ready val p_store_rdy = !p_store_valid || drain_store val p_amo = Reg(tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) @@ -1091,17 +1091,17 @@ class HellaCacheAssoc(lines: Int) extends Component { wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && dirty && !p_store_idx_match wb_arb.io.in(1).bits.ppn := meta_wb_mux.tag wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - wb_arb.io.in(1).bits.way_id := replaced_way_id + wb_arb.io.in(1).bits.way_oh := replaced_way_oh // tag update after a miss or a store to an exclusive clean line. - val clear_valid = tag_miss && r_req_readwrite && meta_resp_mux.valid && (!dirty || wb_rdy) + val clear_valid = tag_miss && r_req_readwrite && meta_wb_mux.valid && (!dirty || wb_rdy) val set_dirty = tag_hit && !meta_resp_mux.dirty && r_req_write meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := r_cpu_req_idx(indexmsb,indexlsb) meta.io.state_req.bits.inner_req.data.valid := tag_match meta.io.state_req.bits.inner_req.data.dirty := tag_match meta.io.state_req.valid := clear_valid || set_dirty - meta.io.state_req.bits.way_en := Cat(Bits(0),tag_match_arr:_*) + meta.io.state_req.bits.way_en := Mux(clear_valid, replaced_way_oh, hit_way_oh) // pending store data, also used for AMO RHS val storegen = new StoreDataGen @@ -1115,7 +1115,7 @@ class HellaCacheAssoc(lines: Int) extends Component { p_store_idx <== r_cpu_req_idx p_store_type <== r_cpu_req_type p_store_cmd <== r_cpu_req_cmd - p_store_way_id <== Mux(r_replay_amo, Reg(replayer.io.replay.bits.way_id), hit_way_id) + p_store_way_oh <== Mux(r_replay_amo, replayer.io.way_oh, hit_way_oh) p_store_data <== storegen.io.dout } @@ -1129,7 +1129,7 @@ class HellaCacheAssoc(lines: Int) extends Component { mshr.io.req_cmd := r_cpu_req_cmd mshr.io.req_type := r_cpu_req_type mshr.io.req_sdq_id := replayer.io.sdq_id - mshr.io.req_way_id := replaced_way_id + mshr.io.req_way_oh := replaced_way_oh mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) mshr.io.mem_resp_tag := io.mem.resp_tag mshr.io.mem_req <> wb.io.refill_req @@ -1138,7 +1138,8 @@ class HellaCacheAssoc(lines: Int) extends Component { replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy replayer.io.sdq_enq.bits := storegen.io.dout data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx - data_arb.io.in(0).bits.way_en := UFixToOH(mshr.io.mem_resp_way_id.toUFix, NWAYS) + data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh + replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy // replays val replay = replayer.io.data_req.bits @@ -1149,7 +1150,7 @@ class HellaCacheAssoc(lines: Int) extends Component { data_arb.io.in(1).bits.inner_req.idx := replay.idx data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR data_arb.io.in(1).valid := replay_val - data_arb.io.in(1).bits.way_en := UFixToOH(replayer.io.way_id, NWAYS) + data_arb.io.in(1).bits.way_en := replayer.io.way_oh replayer.io.data_req.ready := replay_rdy && !stall_replay r_replay_amo <== replay_amo_val && replay_rdy && !stall_replay diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index b38c9ddf..48305588 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -17,7 +17,7 @@ class Top() extends Component { val cpu = new rocketProc(); val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); - val dcache = new HellaCacheDM(128); + val dcache = new HellaCacheAssoc(128); val arbiter = new rocketMemArbiter(); arbiter.io.mem <> io.mem; @@ -37,9 +37,9 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance -// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); + val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); // val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); - val cpu_args = args ++ Array("--target-dir", "generated-src"); +// val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match { diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 5e33e17f..f2c19ee4 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -56,8 +56,7 @@ object UFixToOH { def apply(in: UFix, width: Int): Bits = { - val out = Bits(1, width) - out << in + (UFix(1) << in(log2up(width)-1,0)) } } From b1bbf56b74787b8067068e26f9e2038eedb440f0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 Feb 2012 15:36:01 -0800 Subject: [PATCH 0125/1087] clean up wb->id bypass --- rocket/src/main/scala/dpath.scala | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a19d0fd7..4ba96384 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -118,6 +118,7 @@ class rocketDpath extends Component val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val wb_wdata = Wire() { Bits() }; val r_dmem_resp_val = Reg(resetVal = Bool(false)); val r_dmem_resp_replay = Reg(resetVal = Bool(false)); @@ -214,18 +215,16 @@ class rocketDpath extends Component Mux(id_raddr1 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(rs1_mem_lu_bypass, io.dmem.resp_data, Mux(id_raddr1 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr1 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr1 != UFix(0, 5) && r_dmem_resp_val && id_raddr1 === r_dmem_resp_waddr, io.dmem.resp_data_subword, - Mux(id_raddr1 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr1 === wb_reg_waddr, wb_reg_wdata, - id_rdata1)))))))); + Mux(id_raddr1 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, + id_rdata1))))))); val rs2_mem_lu_bypass = id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr; val id_rs2 = Mux(id_raddr2 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(rs2_mem_lu_bypass, io.dmem.resp_data, Mux(id_raddr2 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr2 === mem_reg_waddr, mem_reg_wdata, - Mux(id_raddr2 != UFix(0, 5) && r_dmem_resp_val && id_raddr2 === r_dmem_resp_waddr, io.dmem.resp_data_subword, - Mux(id_raddr2 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr2 === wb_reg_waddr, wb_reg_wdata, - id_rdata2))))); + Mux(id_raddr2 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, + id_rdata2)))); io.ctrl.mem_lu_bypass := rs1_mem_lu_bypass || rs2_mem_lu_bypass; io.ctrl.inst := id_reg_inst; @@ -406,12 +405,11 @@ class rocketDpath extends Component wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } - // crossbar/sign extension for 8/16 bit loads (moved to earlier in file) - // regfile write + wb_wdata := Mux(Reg(io.ctrl.mem_load), io.dmem.resp_data_subword, wb_reg_wdata) rfile.io.w0.addr := wb_reg_waddr; rfile.io.w0.en := wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb; - rfile.io.w0.data := Mux(Reg(io.ctrl.mem_load), io.dmem.resp_data_subword, wb_reg_wdata); + rfile.io.w0.data := wb_wdata io.ctrl.wb_waddr := wb_reg_waddr; From 01a156eb98c7cde22aa78bb8f05116d6225cf7ef Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 Feb 2012 21:11:45 -0800 Subject: [PATCH 0126/1087] make # of dcache lines configurable --- rocket/src/main/scala/consts.scala | 4 ++- rocket/src/main/scala/nbdcache.scala | 39 ++++++++++++++-------------- rocket/src/main/scala/top.scala | 2 +- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 67ffbee3..28c43867 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -187,8 +187,10 @@ object Constants val NRPQ = 16; // number of secondary misses val NSDQ = 17; // number of secondary stores/AMOs val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) - val IDX_BITS = PGIDX_BITS - OFFSET_BITS; + val IDX_BITS = 7; + val TAG_BITS = PADDR_BITS - OFFSET_BITS - IDX_BITS; val NWAYS = 1; + require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); // external memory interface val IMEM_TAG_BITS = 1; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2101ecc1..7d69c19f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -131,12 +131,12 @@ class DataArrayArrayReq extends Bundle { class MemReq extends Bundle { val rw = Bool() - val addr = UFix(width = PPN_BITS+IDX_BITS) + val addr = UFix(width = PADDR_BITS-OFFSET_BITS) val tag = Bits(width = DMEM_TAG_BITS) } class WritebackReq extends Bundle { - val ppn = Bits(width = PPN_BITS) + val ppn = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) val way_oh = Bits(width = NWAYS) } @@ -144,7 +144,7 @@ class WritebackReq extends Bundle { class MetaData extends Bundle { val valid = Bool() val dirty = Bool() - val tag = Bits(width = PPN_BITS) + val tag = Bits(width = TAG_BITS) } class MetaArrayReq extends Bundle { @@ -164,7 +164,7 @@ class MSHR(id: Int) extends Component { val req_pri_rdy = Bool(OUTPUT) val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) - val req_ppn = Bits(PPN_BITS, INPUT) + val req_ppn = Bits(TAG_BITS, INPUT) val req_idx = Bits(IDX_BITS, INPUT) val req_offset = Bits(OFFSET_BITS, INPUT) val req_cmd = Bits(4, INPUT) @@ -175,7 +175,7 @@ class MSHR(id: Int) extends Component { val idx_match = Bool(OUTPUT) val idx = Bits(IDX_BITS, OUTPUT) - val tag = Bits(PPN_BITS, OUTPUT) + val tag = Bits(TAG_BITS, OUTPUT) val way_oh = Bits(NWAYS, OUTPUT) val mem_resp_val = Bool(INPUT) @@ -264,7 +264,7 @@ class MSHRFile extends Component { val io = new Bundle { val req_val = Bool(INPUT) val req_rdy = Bool(OUTPUT) - val req_ppn = Bits(PPN_BITS, INPUT) + val req_ppn = Bits(TAG_BITS, INPUT) val req_idx = Bits(IDX_BITS, INPUT) val req_offset = Bits(OFFSET_BITS, INPUT) val req_cmd = Bits(4, INPUT) @@ -285,7 +285,7 @@ class MSHRFile extends Component { val replay = (new ioDecoupled) { new Replay() }.flip() } - val tag_mux = (new Mux1H(NMSHR)){ Bits(width = PPN_BITS) } + val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } @@ -644,13 +644,10 @@ class AMOALU extends Component { /* MIN[U]/MAX[U] */ cmp_out)))); } -//class HellaCache(lines: Int, ways: Int) extends Component { -// -//} - -class HellaCacheDM(lines: Int) extends Component { +class HellaCacheDM extends Component { val io = new ioDCacheHella() - + + val lines = 1 << IDX_BITS val addrbits = PADDR_BITS val indexbits = log2up(lines) val offsetbits = OFFSET_BITS @@ -753,7 +750,8 @@ class HellaCacheDM(lines: Int) extends Component { meta_arb.io.in(2).bits.data.dirty := Bool(false) // don't care meta_arb.io.in(2).bits.data.tag := UFix(0) // don't care val early_tag_nack = !meta_arb.io.in(2).ready - val tag_match = meta.io.resp.valid && (meta.io.resp.tag === io.cpu.req_ppn) + val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) + val tag_match = meta.io.resp.valid && (meta.io.resp.tag === cpu_req_tag) val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match val dirty = meta.io.resp.valid && meta.io.resp.dirty @@ -826,7 +824,7 @@ class HellaCacheDM(lines: Int) extends Component { // miss handling val mshr = new MSHRFile() mshr.io.req_val := tag_miss && r_req_readwrite && (!dirty || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) - mshr.io.req_ppn := io.cpu.req_ppn + mshr.io.req_ppn := cpu_req_tag mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) mshr.io.req_tag := r_cpu_req_tag mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) @@ -925,9 +923,10 @@ class HellaCacheDM(lines: Int) extends Component { io.mem.req_addr := wb.io.mem_req.bits.addr } -class HellaCacheAssoc(lines: Int) extends Component { +class HellaCacheAssoc extends Component { val io = new ioDCacheHella() - + + val lines = 1 << IDX_BITS val addrbits = PADDR_BITS val indexbits = log2up(lines) val offsetbits = OFFSET_BITS @@ -1022,8 +1021,8 @@ class HellaCacheAssoc(lines: Int) extends Component { meta_arb.io.in(2).bits.inner_req.data.tag := UFix(0) // don't care meta_arb.io.in(2).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(2).ready - //val tag_match_arr = meta.io.resp.map(r => r.valid && (r.tag === io.cpu_req_ppn)) - val tag_match_arr = (0 until NWAYS).map( w => meta.io.resp(w).valid && (meta.io.resp(w).tag === io.cpu.req_ppn)) + val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) + val tag_match_arr = (0 until NWAYS).map( w => meta.io.resp(w).valid && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match @@ -1122,7 +1121,7 @@ class HellaCacheAssoc(lines: Int) extends Component { // miss handling val mshr = new MSHRFile() mshr.io.req_val := tag_miss && r_req_readwrite && (!dirty || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) - mshr.io.req_ppn := io.cpu.req_ppn + mshr.io.req_ppn := cpu_req_tag mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) mshr.io.req_tag := r_cpu_req_tag mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 48305588..a85b7245 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -17,7 +17,7 @@ class Top() extends Component { val cpu = new rocketProc(); val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); - val dcache = new HellaCacheAssoc(128); + val dcache = new HellaCacheAssoc(); val arbiter = new rocketMemArbiter(); arbiter.io.mem <> io.mem; From 99a959e6b18d2d8ffa1e4a2b6a923b55fa6ea118 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 2 Feb 2012 13:33:27 -0800 Subject: [PATCH 0127/1087] remove pc+4 piperegs and add new ex pc+4 adder --- rocket/src/main/scala/dpath.scala | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4ba96384..36def484 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -70,12 +70,10 @@ class rocketDpath extends Component val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_inst = Reg(resetVal = NOP); val id_reg_pc = Reg() { UFix(width = VADDR_BITS+1) }; - val id_reg_pc_plus4 = Reg() { UFix(width = VADDR_BITS+1) }; // execute definitions val ex_reg_valid = Reg(resetVal = Bool(false)); val ex_reg_pc = Reg() { UFix() }; - val ex_reg_pc_plus4 = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; val ex_reg_raddr2 = Reg() { UFix() }; val ex_reg_rs2 = Reg() { Bits() }; @@ -127,6 +125,7 @@ class rocketDpath extends Component // instruction fetch stage val if_pc_plus4 = if_reg_pc + UFix(4); + val ex_pc_plus4 = ex_reg_pc + UFix(4); val ex_sign_extend = Cat(Fill(52, ex_reg_inst(21)), ex_reg_inst(21,10)); val ex_sign_extend_split = @@ -145,7 +144,7 @@ class rocketDpath extends Component val if_next_pc = Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target), - Mux(io.ctrl.sel_pc === PC_EX4, ex_reg_pc_plus4, + Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target_extended, Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET @@ -167,13 +166,12 @@ class rocketDpath extends Component btb.io.hit <> io.ctrl.btb_hit; btb.io.wen <> io.ctrl.wen_btb; btb.io.clr <> io.ctrl.clr_btb; - btb.io.correct_pc4 := ex_reg_pc_plus4; + btb.io.correct_pc4 := ex_pc_plus4; io.ctrl.btb_match := id_reg_pc === jr_br_target; // instruction decode stage when (!io.ctrl.stalld) { id_reg_pc <== if_reg_pc; - id_reg_pc_plus4 <== if_pc_plus4; when(io.ctrl.killf) { id_reg_inst <== NOP; id_reg_valid <== Bool(false); @@ -231,7 +229,6 @@ class rocketDpath extends Component // execute stage ex_reg_pc <== id_reg_pc; - ex_reg_pc_plus4 <== id_reg_pc_plus4; ex_reg_inst <== id_reg_inst; ex_reg_raddr2 <== id_raddr2; ex_reg_rs2 <== id_rs2; @@ -351,7 +348,7 @@ class rocketDpath extends Component // writeback select mux ex_wdata := Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1, - Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_reg_pc_plus4(VADDR_BITS-1)), ex_reg_pc_plus4), + Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_pc_plus4(VADDR_BITS-1)), ex_pc_plus4), Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, From 41c4e10c37b622504de982c5cb652a4415e4b1b5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 2 Feb 2012 21:53:39 -0800 Subject: [PATCH 0128/1087] Workaround for another frakking extraction error in the C backend. C and VLSI backends now both boot kernel with associativity on --- rocket/src/main/scala/nbdcache.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7d69c19f..130de659 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -14,7 +14,7 @@ class ioReplacementWayGen extends Bundle { class RandomReplacementWayGen extends Component { val io = new ioReplacementWayGen() //TODO: Actually limit selection based on which ways are allowed (io.ways_en) - if(NWAYS > 1) io.way_id := LFSR16(io.pick_new_way) + if(NWAYS > 1) io.way_id := LFSR16(io.pick_new_way)(log2up(NWAYS)-1,0) else io.way_id := UFix(0) } @@ -928,7 +928,7 @@ class HellaCacheAssoc extends Component { val lines = 1 << IDX_BITS val addrbits = PADDR_BITS - val indexbits = log2up(lines) + val indexbits = IDX_BITS val offsetbits = OFFSET_BITS val tagmsb = PADDR_BITS-1 val taglsb = indexbits+offsetbits @@ -1030,9 +1030,7 @@ class HellaCacheAssoc extends Component { val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(NWAYS, meta_resp_way_oh, meta.io.resp) - //val meta_resp_mux = MuxCase(meta.io.resp(0), (0 until NWAYS).map(i => (meta_resp_way_oh(i).toBool, meta.io.resp(i))))// val data_resp_mux = Mux1H(NWAYS, data_resp_way_oh, data.io.resp) - //val data_resp_mux = MuxCase(data.io.resp(0), (0 until NWAYS).map(i => (data_resp_way_oh(i).toBool, data.io.resp(i))))// // writeback unit val wb = new WritebackUnit From fde8e3b696170c2dfa8862f025d6842eb12fa8ab Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 6 Feb 2012 17:26:45 -0800 Subject: [PATCH 0129/1087] clean up bypassing/hazard checking a bit --- rocket/src/main/scala/ctrl.scala | 33 +++++++++++++------------------ rocket/src/main/scala/dpath.scala | 20 +++++++++---------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dc7dcbd0..7f6b035d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -60,7 +60,6 @@ class ioCtrlDpath extends Bundle() val div_result_val = Bool(INPUT); val mul_rdy = Bool(INPUT); val mul_result_val = Bool(INPUT); - val mem_lu_bypass = Bool(INPUT); val ex_waddr = UFix(5,INPUT); // write addr from execute stage val mem_waddr = UFix(5,INPUT); // write addr from memory stage val wb_waddr = UFix(5,INPUT); // write addr from writeback stage @@ -280,7 +279,7 @@ class rocketCtrl extends Component FLD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FSW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FSD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) -/* +/* // floating point FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), @@ -324,6 +323,7 @@ class rocketCtrl extends Component val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val id_reg_icmiss = Reg(resetVal = Bool(false)); val id_reg_replay = Reg(resetVal = Bool(false)); + val id_load_use = Wire(){Bool()}; val ex_reg_br_type = Reg(){UFix(width = 4)}; val ex_reg_btb_hit = Reg(){Bool()}; @@ -344,7 +344,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val ex_reg_replay = Reg(resetVal = Bool(false)); - val ex_reg_lu_bypass = Reg(resetVal = Bool(false)); + val ex_reg_load_use = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); @@ -405,7 +405,7 @@ class rocketCtrl extends Component ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); ex_reg_replay <== Bool(false); - ex_reg_lu_bypass <== Bool(false); + ex_reg_load_use <== Bool(false); } otherwise { ex_reg_br_type <== id_br_type; @@ -426,7 +426,7 @@ class rocketCtrl extends Component ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; ex_reg_replay <== id_reg_replay || ex_reg_replay_next; - ex_reg_lu_bypass <== io.dpath.mem_lu_bypass; + ex_reg_load_use <== id_load_use; } ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; @@ -565,7 +565,7 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions - val replay_ex = dcache_miss && ex_reg_lu_bypass || mem_reg_flush_inst || + val replay_ex = dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy @@ -604,13 +604,12 @@ class rocketCtrl extends Component val ex_mem_cmd_load = ex_reg_mem_val && ((ex_reg_mem_cmd === M_XRD) || ex_reg_mem_cmd(3).toBool); val data_hazard_ex = - (ex_mem_cmd_load || ex_reg_div_val || ex_reg_mul_val) && ((id_renx1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || (id_renx2.toBool && (id_raddr2 === io.dpath.ex_waddr)) || (id_wen.toBool && (id_waddr === io.dpath.ex_waddr))); + val id_ex_hazard = data_hazard_ex && (ex_mem_cmd_load || ex_reg_div_val || ex_reg_mul_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. - // stall for WAW-but-not-RAW hazards on LW/LD/AMO. val mem_mem_cmd_load = mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); val mem_mem_cmd_load_bh = @@ -619,24 +618,20 @@ class rocketCtrl extends Component (mem_reg_mem_type === MT_BU) || (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU)); - val raw_hazard_mem = - (id_renx1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || - (id_renx2.toBool && (id_raddr2 === io.dpath.mem_waddr)); - val waw_hazard_mem = - (id_wen.toBool && (id_waddr === io.dpath.mem_waddr)); val data_hazard_mem = - (mem_mem_cmd_load_bh || mem_reg_div_mul_val) && (raw_hazard_mem || waw_hazard_mem) || - mem_mem_cmd_load && (!raw_hazard_mem && waw_hazard_mem) + (id_renx1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || + (id_renx2.toBool && (id_raddr2 === io.dpath.mem_waddr)) || + (id_wen.toBool && (id_waddr === io.dpath.mem_waddr)); + val id_mem_hazard = data_hazard_mem && (mem_mem_cmd_load_bh || mem_reg_div_mul_val) + id_load_use := mem_mem_cmd_load && data_hazard_mem // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. val data_hazard_wb = - (dcache_miss || wb_reg_div_mul_val) && ((id_renx1.toBool && (id_raddr1 === io.dpath.wb_waddr)) || (id_renx2.toBool && (id_raddr2 === io.dpath.wb_waddr)) || (id_wen.toBool && (id_waddr === io.dpath.wb_waddr))); + val id_wb_hazard = data_hazard_wb && (dcache_miss || wb_reg_div_mul_val) - val data_hazard = data_hazard_ex || data_hazard_mem || data_hazard_wb; - // for divider, multiplier, load miss writeback val mem_wb = Reg(io.dmem.resp_replay, resetVal = Bool(false)) // delayed for subword extension val mul_wb = io.dpath.mul_result_val && !mem_wb; @@ -645,7 +640,7 @@ class rocketCtrl extends Component val ctrl_stalld = !take_pc && ( - data_hazard || + id_ex_hazard || id_mem_hazard || id_wb_hazard || id_renx2.toBool && id_stall_raddr2 || id_renx1.toBool && id_stall_raddr1 || id_wen.toBool && id_stall_waddr || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 36def484..b2b82385 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -106,6 +106,7 @@ class rocketDpath extends Component val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val mem_wdata = Wire() { Bits() }; // writeback definitions val wb_reg_valid = Reg(resetVal = Bool(false)); @@ -205,26 +206,21 @@ class rocketDpath extends Component UFix(0, 5)))))); // bypass muxes - val rs1_mem_lu_bypass = id_raddr1 != UFix(0, 5) && io.ctrl.mem_load && id_raddr1 === mem_reg_waddr; val id_rs1 = Mux(r_dmem_resp_replay, io.dmem.resp_data_subword, Mux(io.ctrl.div_wb, div_result, Mux(io.ctrl.mul_wb, mul_result, - Mux(id_raddr1 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr1 === ex_reg_waddr, ex_wdata, - Mux(rs1_mem_lu_bypass, io.dmem.resp_data, - Mux(id_raddr1 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr1 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr1 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr1 === ex_reg_waddr, ex_wdata, + Mux(id_raddr1 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr1 === mem_reg_waddr, mem_wdata, Mux(id_raddr1 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, - id_rdata1))))))); + id_rdata1)))))); - val rs2_mem_lu_bypass = id_raddr2 != UFix(0, 5) && io.ctrl.mem_load && id_raddr2 === mem_reg_waddr; val id_rs2 = - Mux(id_raddr2 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr2 === ex_reg_waddr, ex_wdata, - Mux(rs2_mem_lu_bypass, io.dmem.resp_data, - Mux(id_raddr2 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr2 === mem_reg_waddr, mem_reg_wdata, + Mux(id_raddr2 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr2 === ex_reg_waddr, ex_wdata, + Mux(id_raddr2 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr2 === mem_reg_waddr, mem_wdata, Mux(id_raddr2 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, - id_rdata2)))); + id_rdata2))); - io.ctrl.mem_lu_bypass := rs1_mem_lu_bypass || rs2_mem_lu_bypass; io.ctrl.inst := id_reg_inst; // execute stage @@ -378,6 +374,8 @@ class rocketDpath extends Component io.ctrl.mem_waddr := mem_reg_waddr; io.ctrl.mem_valid := mem_reg_valid; + mem_wdata := Mux(io.ctrl.mem_load, io.dmem.resp_data, mem_reg_wdata) + // 32/64 bit load handling (moved to earlier in file) // writeback stage From 1be9d159443b0ecc8b10a1d34be60c3ebc7b1edf Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Tue, 7 Feb 2012 14:07:42 -0800 Subject: [PATCH 0130/1087] Fixed bug regarding case sensitivity regarding ioICache,ioDCache --- rocket/src/main/scala/arbiter.scala | 8 ++++---- rocket/src/main/scala/dcache.scala | 6 +++--- rocket/src/main/scala/icache.scala | 8 ++++---- rocket/src/main/scala/icache_prefetch.scala | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index d82b2050..b0c51155 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -20,8 +20,8 @@ class ioMem() extends Bundle class ioMemArbiter extends Bundle() { val mem = new ioMem(); - val dcache = new ioDcache(); -// val icache = new ioIcache(); + val dcache = new ioDCache(); +// val icache = new ioICache(); val icache = new ioIPrefetcherMem().flip(); } @@ -35,10 +35,10 @@ class rocketMemArbiter extends Component { // Memory request is valid if either icache or dcache have a valid request io.mem.req_val := (io.icache.req_val || io.dcache.req_val); - // Set read/write bit. Icache always reads + // Set read/write bit. ICache always reads io.mem.req_rw := Mux(io.dcache.req_val, io.dcache.req_rw, Bool(false)); - // Give priority to Icache + // Give priority to ICache io.mem.req_addr := Mux(io.dcache.req_val, io.dcache.req_addr, io.icache.req_addr); // low bit of tag=0 for I$, 1 for D$ diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index a1b24ddd..7355d6d4 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -28,7 +28,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { } // interface between D$ and next level in memory hierarchy -class ioDcache(view: List[String] = null) extends Bundle(view) { +class ioDCache(view: List[String] = null) extends Bundle(view) { val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); val req_tag = UFix(DMEM_TAG_BITS, INPUT); val req_val = Bool(INPUT); @@ -42,12 +42,12 @@ class ioDcache(view: List[String] = null) extends Bundle(view) { class ioDCacheDM extends Bundle() { val cpu = new ioDmem(); - val mem = new ioDcache().flip(); + val mem = new ioDCache().flip(); } class ioDCacheHella extends Bundle() { val cpu = new ioDmem(); - val mem = new ioDcache().flip(); + val mem = new ioDCache().flip(); } class rocketDCacheStoreGen extends Component { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 857adc15..5a1b8aaa 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -18,7 +18,7 @@ class ioImem(view: List[String] = null) extends Bundle (view) } // interface between I$ and memory (128 bits wide) -class ioIcache(view: List[String] = null) extends Bundle (view) +class ioICache(view: List[String] = null) extends Bundle (view) { val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); val req_val = Bool(INPUT); @@ -27,10 +27,10 @@ class ioIcache(view: List[String] = null) extends Bundle (view) val resp_val = Bool(OUTPUT); } -class ioICache extends Bundle() +class ioRocketICache extends Bundle() { val cpu = new ioImem(); - val mem = new ioIcache().flip(); + val mem = new ioICache().flip(); } // basic direct mapped instruction cache @@ -38,7 +38,7 @@ class ioICache extends Bundle() // parameters : // lines = # cache lines class rocketICache(sets: Int, assoc: Int) extends Component { - val io = new ioICache(); + val io = new ioRocketICache(); val lines = sets * assoc; val addrbits = PADDR_BITS; diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index cdcf9a00..07b2fb57 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -17,7 +17,7 @@ class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) } class ioIPrefetcher extends Bundle() { - val icache = new ioIcache(); + val icache = new ioICache(); val mem = new ioIPrefetcherMem(); } From 5403d069e9ccd8206a38313c32e6ad7405687348 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 7 Feb 2012 23:54:25 -0800 Subject: [PATCH 0131/1087] add fp loads/stores --- rocket/src/main/scala/consts.scala | 14 +- rocket/src/main/scala/cpu.scala | 9 + rocket/src/main/scala/ctrl.scala | 266 ++++++++++++------------- rocket/src/main/scala/ctrl_util.scala | 3 + rocket/src/main/scala/dpath.scala | 46 +++-- rocket/src/main/scala/dpath_util.scala | 6 +- rocket/src/main/scala/fpu.scala | 148 ++++++++++++++ 7 files changed, 319 insertions(+), 173 deletions(-) create mode 100644 rocket/src/main/scala/fpu.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 28c43867..228fa655 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -77,9 +77,6 @@ object Constants val Y = UFix(1, 1); val Y_SH = UFix(1, 1); -// val FPU_N = UFix(0, 1); -// val FPU_Y = FPU_N; - val FWBQ_N = UFix(0, 1); val FWBQ_Y = UFix(1, 1); @@ -180,7 +177,7 @@ object Constants // rocketNBDCacheDM parameters val CPU_DATA_BITS = 64; - val CPU_TAG_BITS = 5; + val CPU_TAG_BITS = 6; val DCACHE_TAG_BITS = 1 + CPU_TAG_BITS; val OFFSET_BITS = 6; // log2(cache line size in bytes) val NMSHR = 2; // number of primary misses @@ -209,9 +206,12 @@ object Constants val START_ADDR = 0x2000; - val HAVE_RVC = Bool(false); - val HAVE_FPU = Bool(false); - val HAVE_VEC = Bool(false); + val HAVE_RVC = false + val HAVE_FPU = true + val HAVE_VEC = false + + val FPU_N = UFix(0, 1); + val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N; } } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 70fd9b56..2dd0b062 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -111,6 +111,15 @@ class rocketProc extends Component io.console.bits := dpath.io.console.bits; io.console.valid := dpath.io.console.valid; ctrl.io.console.rdy := io.console.rdy; + + if (HAVE_FPU) + { + val fpu = new rocketFPU + fpu.io.dmem.resp_val := arb.io.cpu.resp_val; + fpu.io.dmem.resp_tag := arb.io.cpu.resp_tag; + fpu.io.dmem.resp_data := arb.io.cpu.resp_data; + dpath.io.fpu <> fpu.io.dpath + } } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7f6b035d..c78e0878 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,7 +37,10 @@ class ioCtrlDpath extends Bundle() val id_eret = Bool(OUTPUT); val wb_eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); - val wen = Bool(OUTPUT); + val ex_fp_val= Bool(OUTPUT); + val ex_wen = Bool(OUTPUT); + val mem_wen = Bool(OUTPUT); + val wb_wen = Bool(OUTPUT); // instruction in execute is an unconditional jump val ex_jmp = Bool(OUTPUT); val ex_jr = Bool(OUTPUT); @@ -60,12 +63,15 @@ class ioCtrlDpath extends Bundle() val div_result_val = Bool(INPUT); val mul_rdy = Bool(INPUT); val mul_result_val = Bool(INPUT); + val mem_wb = Bool(INPUT); val ex_waddr = UFix(5,INPUT); // write addr from execute stage val mem_waddr = UFix(5,INPUT); // write addr from memory stage val wb_waddr = UFix(5,INPUT); // write addr from writeback stage val status = Bits(17, INPUT); val sboard_clr = Bool(INPUT); val sboard_clra = UFix(5, INPUT); + val fp_sboard_clr = Bool(INPUT); + val fp_sboard_clra = UFix(5, INPUT); val mem_valid = Bool(INPUT); // high if there's a valid (not flushed) instruction in mem stage val irq_timer = Bool(INPUT); val irq_ipi = Bool(INPUT); @@ -76,7 +82,7 @@ class ioCtrlAll extends Bundle() val dpath = new ioCtrlDpath(); val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_replay", "resp_nack")).flip(); + val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); @@ -92,77 +98,9 @@ class ioCtrlAll extends Bundle() class rocketCtrl extends Component { val io = new ioCtrlAll(); - -// val fp = -// ListLookup(io.dpath.inst, -// List(Bool(false)), -// Array( -// FMOVZ -> List(Bool(true)), -// FMOVN -> List(Bool(true)), -// FADD_S -> List(Bool(true)), -// FSUB_S -> List(Bool(true)), -// FMUL_S -> List(Bool(true)), -// FDIV_S -> List(Bool(true)), -// FSQRT_S -> List(Bool(true)), -// FSGNJ_S -> List(Bool(true)), -// FSGNJN_S -> List(Bool(true)), -// FSGNJX_S -> List(Bool(true)), -// FADD_D -> List(Bool(true)), -// FSUB_D -> List(Bool(true)), -// FMUL_D -> List(Bool(true)), -// FDIV_D -> List(Bool(true)), -// FSQRT_D -> List(Bool(true)), -// FSGNJ_D -> List(Bool(true)), -// FSGNJN_D -> List(Bool(true)), -// FSGNJX_D -> List(Bool(true)), -// FCVT_L_S -> List(Bool(true)), -// FCVT_LU_S -> List(Bool(true)), -// FCVT_W_S -> List(Bool(true)), -// FCVT_WU_S -> List(Bool(true)), -// FCVT_L_D -> List(Bool(true)), -// FCVT_LU_D -> List(Bool(true)), -// FCVT_W_D -> List(Bool(true)), -// FCVT_WU_D -> List(Bool(true)), -// FCVT_S_L -> List(Bool(true)), -// FCVT_S_LU -> List(Bool(true)), -// FCVT_S_W -> List(Bool(true)), -// FCVT_S_WU -> List(Bool(true)), -// FCVT_D_L -> List(Bool(true)), -// FCVT_D_LU -> List(Bool(true)), -// FCVT_D_W -> List(Bool(true)), -// FCVT_D_WU -> List(Bool(true)), -// FCVT_S_D -> List(Bool(true)), -// FCVT_D_S -> List(Bool(true)), -// FEQ_S -> List(Bool(true)), -// FLT_S -> List(Bool(true)), -// FLE_S -> List(Bool(true)), -// FEQ_D -> List(Bool(true)), -// FLT_D -> List(Bool(true)), -// FLE_D -> List(Bool(true)), -// FMIN_S -> List(Bool(true)), -// FMAX_S -> List(Bool(true)), -// FMIN_D -> List(Bool(true)), -// FMAX_D -> List(Bool(true)), -// MFTX_S -> List(Bool(true)), -// MFTX_D -> List(Bool(true)), -// MFFSR -> List(Bool(true)), -// MXTF_S -> List(Bool(true)), -// MXTF_D -> List(Bool(true)), -// MTFSR -> List(Bool(true)), -// FLW -> List(Bool(true)), -// FLD -> List(Bool(true)), -// FSW -> List(Bool(true)), -// FSD -> List(Bool(true)), -// FMADD_S -> List(Bool(true)), -// FMSUB_S -> List(Bool(true)), -// FNMSUB_S -> List(Bool(true)), -// FNMADD_S -> List(Bool(true)), -// FMADD_D -> List(Bool(true)), -// FMSUB_D -> List(Bool(true)), -// FNMSUB_D -> List(Bool(true)), -// FNMADD_D -> List(Bool(true)) -// )); -// val id_fp_val :: Nil = fp; + + val fpdec = new rocketFPUDecoder + fpdec.io.inst := io.dpath.inst val xpr64 = Y; val cs = @@ -273,12 +211,12 @@ class rocketCtrl extends Component // Instructions that have not yet been implemented // Faking these for now so akaros will boot - MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSW-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSD-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) + //MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + //MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLD-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSW-> List(Y, BR_N, REN_N,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSD-> List(Y, BR_N, REN_N,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) /* // floating point FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), @@ -293,6 +231,7 @@ class rocketCtrl extends Component val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = csremainder; + val id_raddr3 = io.dpath.inst(16,12); val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); @@ -300,23 +239,7 @@ class rocketCtrl extends Component val id_console_out_val = id_wen_pcr.toBool && (id_raddr2 === PCR_CONSOLE); val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) - val dcache_miss = Reg(io.dmem.resp_miss, resetVal = Bool(false)); - - val sboard = new rocketCtrlSboard(); - sboard.io.raddra := id_raddr2.toUFix; - sboard.io.raddrb := id_raddr1.toUFix; - sboard.io.raddrc := id_waddr.toUFix; - - // scoreboard set (for D$ misses, div, mul) - sboard.io.set := wb_reg_div_mul_val | dcache_miss; - sboard.io.seta := io.dpath.wb_waddr; - - sboard.io.clr := io.dpath.sboard_clr; - sboard.io.clra := io.dpath.sboard_clra; - - val id_stall_raddr2 = sboard.io.stalla; - val id_stall_raddr1 = sboard.io.stallb; - val id_stall_waddr = sboard.io.stallc; + val wb_reg_dcache_miss = Reg(io.dmem.resp_miss, resetVal = Bool(false)); val id_reg_btb_hit = Reg(resetVal = Bool(false)); val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); @@ -332,6 +255,8 @@ class rocketCtrl extends Component val ex_reg_mem_val = Reg(){Bool()}; val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; + val ex_reg_wen = Reg(resetVal = Bool(false)); + val ex_reg_fp_wen = Reg(resetVal = Bool(false)); val ex_reg_eret = Reg(resetVal = Bool(false)); val ex_reg_replay_next = Reg(resetVal = Bool(false)); val ex_reg_inst_di = Reg(resetVal = Bool(false)); @@ -341,11 +266,13 @@ class rocketCtrl extends Component val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val ex_reg_xcpt_illegal = Reg(resetVal = Bool(false)); val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); - val ex_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + val ex_reg_fp_val = Reg(resetVal = Bool(false)); val ex_reg_replay = Reg(resetVal = Bool(false)); val ex_reg_load_use = Reg(resetVal = Bool(false)); + val mem_reg_wen = Reg(resetVal = Bool(false)); + val mem_reg_fp_wen = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); val mem_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -358,6 +285,8 @@ class rocketCtrl extends Component val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); + val wb_reg_wen = Reg(resetVal = Bool(false)); + val wb_reg_fp_wen = Reg(resetVal = Bool(false)); val wb_reg_inst_di = Reg(resetVal = Bool(false)); val wb_reg_inst_ei = Reg(resetVal = Bool(false)); val wb_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -384,8 +313,7 @@ class rocketCtrl extends Component } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) -// val illegal_inst = !(id_int_val.toBool || id_fp_val.toBool) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); - val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status(SR_ET).toBool); + val illegal_inst = !(id_int_val.toBool || fpdec.io.valid) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; @@ -393,6 +321,8 @@ class rocketCtrl extends Component ex_reg_div_val <== Bool(false); ex_reg_mul_val <== Bool(false); ex_reg_mem_val <== Bool(false); + ex_reg_wen <== Bool(false); + ex_reg_fp_wen <== Bool(false); ex_reg_eret <== Bool(false); ex_reg_replay_next <== Bool(false); ex_reg_inst_di <== Bool(false); @@ -402,8 +332,8 @@ class rocketCtrl extends Component ex_reg_xcpt_itlb <== Bool(false); ex_reg_xcpt_illegal <== Bool(false); ex_reg_xcpt_privileged <== Bool(false); - ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); + ex_reg_fp_val <== Bool(false); ex_reg_replay <== Bool(false); ex_reg_load_use <== Bool(false); } @@ -413,6 +343,8 @@ class rocketCtrl extends Component ex_reg_div_val <== id_div_val.toBool; ex_reg_mul_val <== id_mul_val.toBool; ex_reg_mem_val <== id_mem_val.toBool; + ex_reg_wen <== id_wen.toBool; + ex_reg_fp_wen <== fpdec.io.wen; ex_reg_eret <== id_eret.toBool; ex_reg_replay_next <== id_replay_next.toBool; ex_reg_inst_di <== (id_irq === I_DI); @@ -422,9 +354,8 @@ class rocketCtrl extends Component ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; ex_reg_xcpt_illegal <== illegal_inst; ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(SR_S)).toBool; -// ex_reg_xcpt_fpu <== id_fp_val.toBool; - ex_reg_xcpt_fpu <== Bool(false); ex_reg_xcpt_syscall <== id_syscall.toBool; + ex_reg_fp_val <== fpdec.io.valid; ex_reg_replay <== id_reg_replay || ex_reg_replay_next; ex_reg_load_use <== id_load_use; } @@ -461,6 +392,8 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killx) { mem_reg_div_mul_val <== Bool(false); + mem_reg_wen <== Bool(false); + mem_reg_fp_wen <== Bool(false); mem_reg_eret <== Bool(false); mem_reg_mem_val <== Bool(false); mem_reg_inst_di <== Bool(false); @@ -475,6 +408,8 @@ class rocketCtrl extends Component } otherwise { mem_reg_div_mul_val <== ex_reg_div_val || ex_reg_mul_val; + mem_reg_wen <== ex_reg_wen; + mem_reg_fp_wen <== ex_reg_fp_wen; mem_reg_eret <== ex_reg_eret; mem_reg_mem_val <== ex_reg_mem_val; mem_reg_inst_di <== ex_reg_inst_di; @@ -484,13 +419,15 @@ class rocketCtrl extends Component mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; mem_reg_xcpt_illegal <== ex_reg_xcpt_illegal; mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; - mem_reg_xcpt_fpu <== ex_reg_xcpt_fpu; + mem_reg_xcpt_fpu <== ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; when (io.dpath.killm) { + wb_reg_wen <== Bool(false); + wb_reg_fp_wen <== Bool(false); wb_reg_eret <== Bool(false); wb_reg_inst_di <== Bool(false); wb_reg_inst_ei <== Bool(false); @@ -498,6 +435,8 @@ class rocketCtrl extends Component wb_reg_div_mul_val <== Bool(false); } otherwise { + wb_reg_wen <== mem_reg_wen; + wb_reg_fp_wen <== mem_reg_fp_wen; wb_reg_eret <== mem_reg_eret; wb_reg_inst_di <== mem_reg_inst_di; wb_reg_inst_ei <== mem_reg_inst_ei; @@ -505,6 +444,42 @@ class rocketCtrl extends Component wb_reg_div_mul_val <== mem_reg_div_mul_val; } + val sboard = new rocketCtrlSboard(); + sboard.io.raddra := id_raddr2.toUFix; + sboard.io.raddrb := id_raddr1.toUFix; + sboard.io.raddrc := id_waddr.toUFix; + + // scoreboard set (for D$ misses, div, mul) + sboard.io.set := wb_reg_div_mul_val || wb_reg_dcache_miss && wb_reg_wen; + sboard.io.seta := io.dpath.wb_waddr; + + sboard.io.clr := io.dpath.sboard_clr; + sboard.io.clra := io.dpath.sboard_clra; + + val id_stall_raddr2 = id_renx2.toBool && sboard.io.stalla; + val id_stall_raddr1 = id_renx1.toBool && sboard.io.stallb; + val id_stall_waddr = id_wen.toBool && sboard.io.stallc; + + var id_stall_fpu = Bool(false) + if (HAVE_FPU) { + val fp_sboard = new rocketCtrlSboard(); + fp_sboard.io.raddra := id_raddr1.toUFix; + fp_sboard.io.raddrb := id_raddr2.toUFix; + fp_sboard.io.raddrc := id_raddr3.toUFix; + fp_sboard.io.raddrd := id_waddr.toUFix; + + fp_sboard.io.set := wb_reg_dcache_miss && wb_reg_fp_wen; + fp_sboard.io.seta := io.dpath.wb_waddr; + + fp_sboard.io.clr := io.dpath.fp_sboard_clr; + fp_sboard.io.clra := io.dpath.fp_sboard_clra; + + id_stall_fpu = fpdec.io.ren1 && fp_sboard.io.stalla || + fpdec.io.ren2 && fp_sboard.io.stallb || + fpdec.io.ren3 && fp_sboard.io.stallc || + fpdec.io.wen && fp_sboard.io.stalld + } + // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); @@ -565,7 +540,7 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions - val replay_ex = dcache_miss && ex_reg_load_use || mem_reg_flush_inst || + val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy @@ -601,57 +576,63 @@ class rocketCtrl extends Component io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay) // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. - val ex_mem_cmd_load = - ex_reg_mem_val && ((ex_reg_mem_cmd === M_XRD) || ex_reg_mem_cmd(3).toBool); - val data_hazard_ex = - ((id_renx1.toBool && (id_raddr1 === io.dpath.ex_waddr)) || - (id_renx2.toBool && (id_raddr2 === io.dpath.ex_waddr)) || - (id_wen.toBool && (id_waddr === io.dpath.ex_waddr))); - val id_ex_hazard = data_hazard_ex && (ex_mem_cmd_load || ex_reg_div_val || ex_reg_mul_val) + val data_hazard_ex = ex_reg_wen && + (id_renx1.toBool && id_raddr1 === io.dpath.ex_waddr || + id_renx2.toBool && id_raddr2 === io.dpath.ex_waddr || + id_wen.toBool && id_waddr === io.dpath.ex_waddr) + val fp_data_hazard_ex = ex_reg_fp_wen && + (fpdec.io.ren1 && id_raddr1 === io.dpath.ex_waddr || + fpdec.io.ren2 && id_raddr2 === io.dpath.ex_waddr || + fpdec.io.ren3 && id_raddr3 === io.dpath.ex_waddr || + fpdec.io.wen && id_waddr === io.dpath.ex_waddr) + val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val) || + fp_data_hazard_ex && ex_reg_mem_val // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. - val mem_mem_cmd_load = - mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); - val mem_mem_cmd_load_bh = - mem_mem_cmd_load && - ((mem_reg_mem_type === MT_B) || - (mem_reg_mem_type === MT_BU) || - (mem_reg_mem_type === MT_H) || - (mem_reg_mem_type === MT_HU)); - val data_hazard_mem = - (id_renx1.toBool && (id_raddr1 === io.dpath.mem_waddr)) || - (id_renx2.toBool && (id_raddr2 === io.dpath.mem_waddr)) || - (id_wen.toBool && (id_waddr === io.dpath.mem_waddr)); - val id_mem_hazard = data_hazard_mem && (mem_mem_cmd_load_bh || mem_reg_div_mul_val) - id_load_use := mem_mem_cmd_load && data_hazard_mem + val mem_mem_cmd_bh = + (mem_reg_mem_type === MT_B) || (mem_reg_mem_type === MT_BU) || + (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU) + val data_hazard_mem = mem_reg_wen && + (id_renx1.toBool && id_raddr1 === io.dpath.mem_waddr || + id_renx2.toBool && id_raddr2 === io.dpath.mem_waddr || + id_wen.toBool && id_waddr === io.dpath.mem_waddr) + val fp_data_hazard_mem = mem_reg_fp_wen && + (fpdec.io.ren1 && id_raddr1 === io.dpath.mem_waddr || + fpdec.io.ren2 && id_raddr2 === io.dpath.mem_waddr || + fpdec.io.ren3 && id_raddr3 === io.dpath.mem_waddr || + fpdec.io.wen && id_waddr === io.dpath.mem_waddr) + val id_mem_hazard = data_hazard_mem && (mem_mem_cmd_bh || mem_reg_div_mul_val) + id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. - val data_hazard_wb = - ((id_renx1.toBool && (id_raddr1 === io.dpath.wb_waddr)) || - (id_renx2.toBool && (id_raddr2 === io.dpath.wb_waddr)) || - (id_wen.toBool && (id_waddr === io.dpath.wb_waddr))); - val id_wb_hazard = data_hazard_wb && (dcache_miss || wb_reg_div_mul_val) + val data_hazard_wb = wb_reg_wen && + (id_renx1.toBool && id_raddr1 === io.dpath.wb_waddr || + id_renx2.toBool && id_raddr2 === io.dpath.wb_waddr || + id_wen.toBool && id_waddr === io.dpath.wb_waddr) + val fp_data_hazard_wb = wb_reg_fp_wen && + (fpdec.io.ren1 && id_raddr1 === io.dpath.wb_waddr || + fpdec.io.ren2 && id_raddr2 === io.dpath.wb_waddr || + fpdec.io.ren3 && id_raddr3 === io.dpath.wb_waddr || + fpdec.io.wen && id_waddr === io.dpath.wb_waddr) + val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || + fp_data_hazard_wb && wb_reg_dcache_miss // for divider, multiplier, load miss writeback - val mem_wb = Reg(io.dmem.resp_replay, resetVal = Bool(false)) // delayed for subword extension - val mul_wb = io.dpath.mul_result_val && !mem_wb; - val div_wb = io.dpath.div_result_val && !io.dpath.mul_result_val && !mem_wb; + val mul_wb = io.dpath.mul_result_val && !io.dpath.mem_wb; + val div_wb = io.dpath.div_result_val && !io.dpath.mul_result_val && !io.dpath.mem_wb; val ctrl_stalld = !take_pc && ( id_ex_hazard || id_mem_hazard || id_wb_hazard || - id_renx2.toBool && id_stall_raddr2 || - id_renx1.toBool && id_stall_raddr1 || - id_wen.toBool && id_stall_waddr || + id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || + id_stall_fpu || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy || - id_div_val.toBool && (!io.dpath.div_rdy || ex_reg_div_val) || - id_mul_val.toBool && (!io.dpath.mul_rdy || ex_reg_mul_val) || io.dpath.div_result_val || io.dpath.mul_result_val || - mem_wb + io.dpath.mem_wb ); val ctrl_stallf = ctrl_stalld; @@ -668,7 +649,7 @@ class rocketCtrl extends Component io.dpath.killx := kill_ex; io.dpath.killm := kill_mem; - io.dpath.mem_load := mem_reg_mem_val && ((mem_reg_mem_cmd === M_XRD) || mem_reg_mem_cmd(3).toBool); + io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2; @@ -681,7 +662,10 @@ class rocketCtrl extends Component io.dpath.mul_fn := id_mul_fn; io.dpath.mul_val := id_mul_val.toBool; io.dpath.mul_wb := mul_wb; - io.dpath.wen := id_wen.toBool; + io.dpath.ex_fp_val:= ex_reg_fp_val; + io.dpath.ex_wen := ex_reg_wen; + io.dpath.mem_wen := mem_reg_wen; + io.dpath.wb_wen := wb_reg_wen; io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb; io.dpath.ren_pcr := id_ren_pcr.toBool; diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 61cc3bda..09f0ba88 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -13,9 +13,11 @@ class ioCtrlSboard extends Bundle() val raddra = UFix(5, INPUT); val raddrb = UFix(5, INPUT); val raddrc = UFix(5, INPUT); + val raddrd = UFix(5, INPUT); val stalla = Bool(OUTPUT); val stallb = Bool(OUTPUT); val stallc = Bool(OUTPUT); + val stalld = Bool(OUTPUT); } class rocketCtrlSboard extends Component @@ -30,6 +32,7 @@ class rocketCtrlSboard extends Component io.stalla := reg_busy(io.raddra).toBool; io.stallb := reg_busy(io.raddrb).toBool; io.stallc := reg_busy(io.raddrc).toBool; + io.stalld := reg_busy(io.raddrd).toBool; } class ioCtrlCnt extends Bundle() diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index b2b82385..7593ea52 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -34,6 +34,7 @@ class ioDpathAll extends Bundle() val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); + val fpu = new ioDpathFPU(); } class rocketDpath extends Component @@ -90,7 +91,6 @@ class rocketDpath extends Component val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); val ex_reg_ctrl_div_fn = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; - val ex_reg_ctrl_wen = Reg(resetVal = Bool(false)); val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); val ex_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val ex_wdata = Wire() { Bits() }; @@ -104,7 +104,6 @@ class rocketDpath extends Component val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_wen = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val mem_wdata = Wire() { Bits() }; @@ -115,12 +114,11 @@ class rocketDpath extends Component val wb_reg_wdata = Reg() { Bits() }; val wb_reg_raddr2 = Reg() { UFix() }; val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); - val wb_reg_ctrl_wen = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val wb_wdata = Wire() { Bits() }; - val r_dmem_resp_val = Reg(resetVal = Bool(false)); val r_dmem_resp_replay = Reg(resetVal = Bool(false)); + val r_dmem_fp_replay = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg() { UFix() }; // instruction fetch stage @@ -210,15 +208,15 @@ class rocketDpath extends Component Mux(r_dmem_resp_replay, io.dmem.resp_data_subword, Mux(io.ctrl.div_wb, div_result, Mux(io.ctrl.mul_wb, mul_result, - Mux(id_raddr1 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr1 === ex_reg_waddr, ex_wdata, - Mux(id_raddr1 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr1 === mem_reg_waddr, mem_wdata, - Mux(id_raddr1 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, + Mux(id_raddr1 != UFix(0, 5) && (io.ctrl.ex_wen || ex_reg_ctrl_ll_wb) && id_raddr1 === ex_reg_waddr, ex_wdata, + Mux(id_raddr1 != UFix(0, 5) && (io.ctrl.mem_wen || mem_reg_ctrl_ll_wb) && id_raddr1 === mem_reg_waddr, mem_wdata, + Mux(id_raddr1 != UFix(0, 5) && (io.ctrl.wb_wen || wb_reg_ctrl_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, id_rdata1)))))); val id_rs2 = - Mux(id_raddr2 != UFix(0, 5) && (ex_reg_ctrl_wen || ex_reg_ctrl_ll_wb) && id_raddr2 === ex_reg_waddr, ex_wdata, - Mux(id_raddr2 != UFix(0, 5) && (mem_reg_ctrl_wen || mem_reg_ctrl_ll_wb) && id_raddr2 === mem_reg_waddr, mem_wdata, - Mux(id_raddr2 != UFix(0, 5) && (wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, + Mux(id_raddr2 != UFix(0, 5) && (io.ctrl.ex_wen || ex_reg_ctrl_ll_wb) && id_raddr2 === ex_reg_waddr, ex_wdata, + Mux(id_raddr2 != UFix(0, 5) && (io.ctrl.mem_wen || mem_reg_ctrl_ll_wb) && id_raddr2 === mem_reg_waddr, mem_wdata, + Mux(id_raddr2 != UFix(0, 5) && (io.ctrl.wb_wen || wb_reg_ctrl_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, id_rdata2))); io.ctrl.inst := id_reg_inst; @@ -244,7 +242,6 @@ class rocketDpath extends Component ex_reg_valid <== Bool(false); ex_reg_ctrl_div_val <== Bool(false); ex_reg_ctrl_mul_val <== Bool(false); - ex_reg_ctrl_wen <== Bool(false); ex_reg_ctrl_wen_pcr <== Bool(false); ex_reg_ctrl_eret <== Bool(false); } @@ -252,7 +249,6 @@ class rocketDpath extends Component ex_reg_valid <== id_reg_valid; ex_reg_ctrl_div_val <== io.ctrl.div_val; ex_reg_ctrl_mul_val <== io.ctrl.mul_val; - ex_reg_ctrl_wen <== io.ctrl.wen; ex_reg_ctrl_wen_pcr <== io.ctrl.wen_pcr; ex_reg_ctrl_eret <== io.ctrl.id_eret; } @@ -307,8 +303,14 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_jr_target_extended.toUFix; - io.dmem.req_data := ex_reg_rs2; - io.dmem.req_tag := ex_reg_waddr; + if (HAVE_FPU) { + io.dmem.req_data := Mux(io.ctrl.ex_fp_val, io.fpu.store_data, ex_reg_rs2) + io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val).toUFix + } + else { + io.dmem.req_data := ex_reg_rs2 + io.dmem.req_tag := Cat(ex_reg_waddr, Bool(false)).toUFix + } // processor control regfile read pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; @@ -361,12 +363,10 @@ class rocketDpath extends Component when (io.ctrl.killx) { mem_reg_valid <== Bool(false); - mem_reg_ctrl_wen <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); } otherwise { mem_reg_valid <== ex_reg_valid; - mem_reg_ctrl_wen <== ex_reg_ctrl_wen; mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; } @@ -379,9 +379,10 @@ class rocketDpath extends Component // 32/64 bit load handling (moved to earlier in file) // writeback stage - r_dmem_resp_val <== io.dmem.resp_val; - r_dmem_resp_replay <== io.dmem.resp_replay; - r_dmem_resp_waddr <== io.dmem.resp_tag.toUFix + val dmem_resp_fpu = if (HAVE_FPU) io.dmem.resp_tag(0).toBool else Bool(false) + r_dmem_resp_replay <== io.dmem.resp_replay && !dmem_resp_fpu; + r_dmem_fp_replay <== io.dmem.resp_replay && dmem_resp_fpu; + r_dmem_resp_waddr <== io.dmem.resp_tag.toUFix >> UFix(1) wb_reg_pc <== mem_reg_pc; wb_reg_waddr <== mem_reg_waddr; @@ -391,26 +392,27 @@ class rocketDpath extends Component when (io.ctrl.killm) { wb_reg_valid <== Bool(false); - wb_reg_ctrl_wen <== Bool(false); wb_reg_ctrl_wen_pcr <== Bool(false); } otherwise { wb_reg_valid <== mem_reg_valid; - wb_reg_ctrl_wen <== mem_reg_ctrl_wen && !io.dmem.resp_miss; wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } // regfile write wb_wdata := Mux(Reg(io.ctrl.mem_load), io.dmem.resp_data_subword, wb_reg_wdata) rfile.io.w0.addr := wb_reg_waddr; - rfile.io.w0.en := wb_reg_ctrl_wen || wb_reg_ctrl_ll_wb; + rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ctrl_ll_wb; rfile.io.w0.data := wb_wdata io.ctrl.wb_waddr := wb_reg_waddr; + io.ctrl.mem_wb := r_dmem_resp_replay; // scoreboard clear (for div/mul and D$ load miss writebacks) io.ctrl.sboard_clr := id_ctrl_ll_wb; io.ctrl.sboard_clra := id_waddr; + io.ctrl.fp_sboard_clr := r_dmem_fp_replay; + io.ctrl.fp_sboard_clra := r_dmem_resp_waddr; // processor control regfile write pcr.io.w.addr := wb_reg_raddr2; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 8cbe948d..ced09299 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -159,9 +159,9 @@ class rocketDpathPCR extends Component reg_status_ux <== io.w.data(SR_UX).toBool; reg_status_s <== io.w.data(SR_S).toBool; reg_status_ps <== io.w.data(SR_PS).toBool; - reg_status_ev <== HAVE_VEC && io.w.data(SR_EV).toBool; - reg_status_ef <== HAVE_FPU && io.w.data(SR_EF).toBool; - reg_status_ec <== HAVE_RVC && io.w.data(SR_EC).toBool; + reg_status_ev <== Bool(HAVE_VEC) && io.w.data(SR_EV).toBool; + reg_status_ef <== Bool(HAVE_FPU) && io.w.data(SR_EF).toBool; + reg_status_ec <== Bool(HAVE_RVC) && io.w.data(SR_EC).toBool; reg_status_et <== io.w.data(SR_ET).toBool; } when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS,0).toUFix; } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala new file mode 100644 index 00000000..6d354d21 --- /dev/null +++ b/rocket/src/main/scala/fpu.scala @@ -0,0 +1,148 @@ +package Top + +import Chisel._ +import Node._; +import Constants._ +import Instructions._ + +class rocketFPUDecoder extends Component +{ + val io = new Bundle { + val inst = Bits(32, INPUT) + val valid = Bool(OUTPUT) + val wen = Bool(OUTPUT) + val ren1 = Bool(OUTPUT) + val ren2 = Bool(OUTPUT) + val ren3 = Bool(OUTPUT) + } +// val fp = +// ListLookup(io.dpath.inst, +// List(FPU_N, FPU_N, FPU_N, FPU_N, FPU_N), +// Array( +// FMOVZ -> List(Bool(true)), +// FMOVN -> List(Bool(true)), +// FADD_S -> List(Bool(true)), +// FSUB_S -> List(Bool(true)), +// FMUL_S -> List(Bool(true)), +// FDIV_S -> List(Bool(true)), +// FSQRT_S -> List(Bool(true)), +// FSGNJ_S -> List(Bool(true)), +// FSGNJN_S -> List(Bool(true)), +// FSGNJX_S -> List(Bool(true)), +// FADD_D -> List(Bool(true)), +// FSUB_D -> List(Bool(true)), +// FMUL_D -> List(Bool(true)), +// FDIV_D -> List(Bool(true)), +// FSQRT_D -> List(Bool(true)), +// FSGNJ_D -> List(Bool(true)), +// FSGNJN_D -> List(Bool(true)), +// FSGNJX_D -> List(Bool(true)), +// FCVT_L_S -> List(Bool(true)), +// FCVT_LU_S -> List(Bool(true)), +// FCVT_W_S -> List(Bool(true)), +// FCVT_WU_S -> List(Bool(true)), +// FCVT_L_D -> List(Bool(true)), +// FCVT_LU_D -> List(Bool(true)), +// FCVT_W_D -> List(Bool(true)), +// FCVT_WU_D -> List(Bool(true)), +// FCVT_S_L -> List(Bool(true)), +// FCVT_S_LU -> List(Bool(true)), +// FCVT_S_W -> List(Bool(true)), +// FCVT_S_WU -> List(Bool(true)), +// FCVT_D_L -> List(Bool(true)), +// FCVT_D_LU -> List(Bool(true)), +// FCVT_D_W -> List(Bool(true)), +// FCVT_D_WU -> List(Bool(true)), +// FCVT_S_D -> List(Bool(true)), +// FCVT_D_S -> List(Bool(true)), +// FEQ_S -> List(Bool(true)), +// FLT_S -> List(Bool(true)), +// FLE_S -> List(Bool(true)), +// FEQ_D -> List(Bool(true)), +// FLT_D -> List(Bool(true)), +// FLE_D -> List(Bool(true)), +// FMIN_S -> List(Bool(true)), +// FMAX_S -> List(Bool(true)), +// FMIN_D -> List(Bool(true)), +// FMAX_D -> List(Bool(true)), +// MFTX_S -> List(Bool(true)), +// MFTX_D -> List(Bool(true)), +// MFFSR -> List(Bool(true)), +// MXTF_S -> List(Bool(true)), +// MXTF_D -> List(Bool(true)), +// MTFSR -> List(Bool(true)), +// FLW -> List(FPU_Y, FPU_Y, FPU_N, FPU_N, FPU_N), +// FLD -> List(FPU_Y, FPU_Y, FPU_N, FPU_N, FPU_N), +// FSW -> List(FPU_Y, FPU_N, FPU_N, FPU_Y, FPU_N), +// FSD -> List(FPU_Y, FPU_N, FPU_N, FPU_Y, FPU_N) +// FMADD_S -> List(Bool(true)), +// FMSUB_S -> List(Bool(true)), +// FNMSUB_S -> List(Bool(true)), +// FNMADD_S -> List(Bool(true)), +// FMADD_D -> List(Bool(true)), +// FMSUB_D -> List(Bool(true)), +// FNMSUB_D -> List(Bool(true)), +// FNMADD_D -> List(Bool(true)) +// )); + + val N = Bool(false) + val Y = Bool(true) + val decoder = ListLookup(io.inst, + List (N, N, N, N, N), + Array(FLW -> List(Y, Y, N, N, N), + FLD -> List(Y, Y, N, N, N), + FSW -> List(Y, N, N, Y, N), + FSD -> List(Y, N, N, Y, N))) + val valid :: wen :: ren1 :: ren2 :: ren3 :: Nil = decoder + + io.valid := valid.toBool + io.wen := wen.toBool + io.ren1 := ren1.toBool + io.ren2 := ren2.toBool + io.ren3 := ren3.toBool +} + +class ioDpathFPU extends Bundle { + val store_data = Bits(64, INPUT) +} + +class rocketFPU extends Component +{ + val io = new Bundle { + val req_valid = Bool(INPUT) + val req_ready = Bool(OUTPUT) + val req_cmd = Bits(6, INPUT) + val req_inst = Bits(32, INPUT) + + val killx = Bool(INPUT) + val killm = Bool(INPUT) + + val dmem = new ioDmem(List("resp_val", "resp_tag", "resp_data")) + val dpath = new ioDpathFPU().flip() + } + + val ex_reg_inst = Reg() { Bits() } + when (io.req_valid) { + ex_reg_inst <== io.req_inst + } + + // load response + val dmem_resp_val_fpu = io.dmem.resp_val && io.dmem.resp_tag(0).toBool + val load_wb = Reg(dmem_resp_val_fpu, resetVal = Bool(false)) + val load_wb_data = Reg() { Bits() } + val load_wb_tag = Reg() { UFix() } + when (dmem_resp_val_fpu) { + load_wb_data <== io.dmem.resp_data + load_wb_tag <== io.dmem.resp_tag.toUFix >> UFix(1) + } + + // regfile + val regfile = Mem4(32, load_wb_data); + regfile.setReadLatency(0); + regfile.setTarget('inst); + regfile.write(load_wb_tag, load_wb_data, load_wb); + + io.req_ready := Bool(true) + + io.dpath.store_data := regfile(ex_reg_inst(21,17)) +} From ebed56500e3ed46ce72e30204b0e300e15825934 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 01:56:11 -0800 Subject: [PATCH 0132/1087] fix mul/wb hazard checks I erroneously assumed that those instructions set id_wen. --- rocket/src/main/scala/ctrl.scala | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c78e0878..a9761601 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -181,20 +181,20 @@ class rocketCtrl extends Component SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), @@ -601,7 +601,7 @@ class rocketCtrl extends Component fpdec.io.ren2 && id_raddr2 === io.dpath.mem_waddr || fpdec.io.ren3 && id_raddr3 === io.dpath.mem_waddr || fpdec.io.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_mem_cmd_bh || mem_reg_div_mul_val) + val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val) id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. From d471a8b2dac6789993a34bbe00ec486f675739da Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 04:21:05 -0800 Subject: [PATCH 0133/1087] arbitrate for LLFU writebacks in MEM stage --- rocket/src/main/scala/ctrl.scala | 24 +++------- rocket/src/main/scala/dpath.scala | 76 ++++++++++++++++--------------- rocket/src/main/scala/fpu.scala | 5 +- 3 files changed, 50 insertions(+), 55 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a9761601..937148ec 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -26,10 +26,8 @@ class ioCtrlDpath extends Bundle() val fn_alu = UFix(4, OUTPUT); val mul_val = Bool(OUTPUT); val mul_fn = UFix(2, OUTPUT); - val mul_wb = Bool(OUTPUT); val div_val = Bool(OUTPUT); val div_fn = UFix(2, OUTPUT); - val div_wb = Bool(OUTPUT); val sel_wa = Bool(OUTPUT); val sel_wb = UFix(3, OUTPUT); val ren_pcr = Bool(OUTPUT); @@ -343,7 +341,7 @@ class rocketCtrl extends Component ex_reg_div_val <== id_div_val.toBool; ex_reg_mul_val <== id_mul_val.toBool; ex_reg_mem_val <== id_mem_val.toBool; - ex_reg_wen <== id_wen.toBool; + ex_reg_wen <== id_wen.toBool && id_waddr != UFix(0); ex_reg_fp_wen <== fpdec.io.wen; ex_reg_eret <== id_eret.toBool; ex_reg_replay_next <== id_replay_next.toBool; @@ -533,10 +531,11 @@ class rocketCtrl extends Component val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; take_pc <== take_pc_ex || take_pc_wb; - // replay mem stage PC on a DTLB miss - val replay_mem = io.dtlb_miss || io.dmem.resp_nack || mem_reg_replay; - val kill_mem = io.dtlb_miss || io.dmem.resp_nack || take_pc_wb || mem_exception || mem_reg_kill; - val kill_dcache = io.dtlb_miss || take_pc_wb || mem_exception || mem_reg_kill; + // replay mem stage PC on a DTLB miss or a long-latency writeback + val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val + val replay_mem = io.dtlb_miss || mem_reg_wen && mem_ll_wb || io.dmem.resp_nack || mem_reg_replay + val kill_mem = io.dtlb_miss || mem_reg_wen && mem_ll_wb || io.dmem.resp_nack || take_pc_wb || mem_exception || mem_reg_kill + val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions @@ -617,10 +616,6 @@ class rocketCtrl extends Component val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || fp_data_hazard_wb && wb_reg_dcache_miss - // for divider, multiplier, load miss writeback - val mul_wb = io.dpath.mul_result_val && !io.dpath.mem_wb; - val div_wb = io.dpath.div_result_val && !io.dpath.mul_result_val && !io.dpath.mem_wb; - val ctrl_stalld = !take_pc && ( @@ -629,10 +624,7 @@ class rocketCtrl extends Component id_stall_fpu || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || - id_console_out_val && !io.console.rdy || - io.dpath.div_result_val || - io.dpath.mul_result_val || - io.dpath.mem_wb + id_console_out_val && !io.console.rdy ); val ctrl_stallf = ctrl_stalld; @@ -658,10 +650,8 @@ class rocketCtrl extends Component io.dpath.fn_alu := id_fn_alu; io.dpath.div_fn := id_div_fn; io.dpath.div_val := id_div_val.toBool; - io.dpath.div_wb := div_wb; io.dpath.mul_fn := id_mul_fn; io.dpath.mul_val := id_mul_val.toBool; - io.dpath.mul_wb := mul_wb; io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7593ea52..d2b74c5c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -85,7 +85,6 @@ class rocketDpath extends Component val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); val ex_reg_ctrl_fn_dw = Reg() { UFix() }; val ex_reg_ctrl_fn_alu = Reg() { UFix() }; - val ex_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val ex_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val ex_reg_ctrl_mul_fn = Reg() { UFix() }; val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); @@ -101,7 +100,6 @@ class rocketDpath extends Component val mem_reg_waddr = Reg() { UFix() }; val mem_reg_wdata = Reg() { Bits() }; val mem_reg_raddr2 = Reg() { UFix() }; - val mem_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); @@ -113,10 +111,11 @@ class rocketDpath extends Component val wb_reg_waddr = Reg() { UFix() }; val wb_reg_wdata = Reg() { Bits() }; val wb_reg_raddr2 = Reg() { UFix() }; - val wb_reg_ctrl_ll_wb = Reg(resetVal = Bool(false)); val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val wb_reg_ll_wb = Reg(resetVal = Bool(false)); val wb_wdata = Wire() { Bits() }; + val dmem_resp_replay = Wire() { Bool() } val r_dmem_resp_replay = Reg(resetVal = Bool(false)); val r_dmem_fp_replay = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg() { UFix() }; @@ -194,29 +193,21 @@ class rocketDpath extends Component val id_rdata1 = rfile.io.r1.data; // destination register selection - val id_ctrl_ll_wb = io.ctrl.div_wb || io.ctrl.mul_wb || r_dmem_resp_replay; val id_waddr = - Mux(r_dmem_resp_replay, r_dmem_resp_waddr, - Mux(io.ctrl.mul_wb, mul_result_tag, - Mux(io.ctrl.div_wb, div_result_tag, Mux(io.ctrl.sel_wa === WA_RD, id_reg_inst(31,27).toUFix, - Mux(io.ctrl.sel_wa === WA_RA, RA, - UFix(0, 5)))))); + RA); // WA_RA // bypass muxes val id_rs1 = - Mux(r_dmem_resp_replay, io.dmem.resp_data_subword, - Mux(io.ctrl.div_wb, div_result, - Mux(io.ctrl.mul_wb, mul_result, - Mux(id_raddr1 != UFix(0, 5) && (io.ctrl.ex_wen || ex_reg_ctrl_ll_wb) && id_raddr1 === ex_reg_waddr, ex_wdata, - Mux(id_raddr1 != UFix(0, 5) && (io.ctrl.mem_wen || mem_reg_ctrl_ll_wb) && id_raddr1 === mem_reg_waddr, mem_wdata, - Mux(id_raddr1 != UFix(0, 5) && (io.ctrl.wb_wen || wb_reg_ctrl_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, - id_rdata1)))))); + Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata, + Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_wdata, + Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, + id_rdata1))); val id_rs2 = - Mux(id_raddr2 != UFix(0, 5) && (io.ctrl.ex_wen || ex_reg_ctrl_ll_wb) && id_raddr2 === ex_reg_waddr, ex_wdata, - Mux(id_raddr2 != UFix(0, 5) && (io.ctrl.mem_wen || mem_reg_ctrl_ll_wb) && id_raddr2 === mem_reg_waddr, mem_wdata, - Mux(id_raddr2 != UFix(0, 5) && (io.ctrl.wb_wen || wb_reg_ctrl_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, + Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata, + Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_wdata, + Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, id_rdata2))); io.ctrl.inst := id_reg_inst; @@ -234,7 +225,6 @@ class rocketDpath extends Component ex_reg_ctrl_fn_alu <== io.ctrl.fn_alu; ex_reg_ctrl_mul_fn <== io.ctrl.mul_fn; ex_reg_ctrl_div_fn <== io.ctrl.div_fn; - ex_reg_ctrl_ll_wb <== id_ctrl_ll_wb; ex_reg_ctrl_sel_wb <== io.ctrl.sel_wb; ex_reg_ctrl_ren_pcr <== io.ctrl.ren_pcr; @@ -280,7 +270,7 @@ class rocketDpath extends Component div.io.div_waddr := ex_reg_waddr; div.io.dpath_rs1 := ex_reg_rs1; div.io.dpath_rs2 := ex_reg_rs2; - div.io.div_result_rdy := io.ctrl.div_wb; + div.io.div_result_rdy := !dmem_resp_replay io.ctrl.div_rdy := div.io.div_rdy; io.ctrl.div_result_val := div.io.div_result_val; @@ -296,7 +286,7 @@ class rocketDpath extends Component io.ctrl.mul_rdy := mul.io.mul_rdy io.ctrl.mul_result_val := mul.io.result_val; - mul.io.result_rdy := io.ctrl.mul_wb + mul.io.result_rdy := !dmem_resp_replay && !div.io.div_result_val io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control @@ -345,7 +335,7 @@ class rocketDpath extends Component // writeback select mux ex_wdata := - Mux(ex_reg_ctrl_ll_wb || ex_reg_ctrl_wen_pcr, ex_reg_rs1, + Mux(ex_reg_ctrl_wen_pcr, ex_reg_rs1, Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_pc_plus4(VADDR_BITS-1)), ex_pc_plus4), Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, @@ -356,7 +346,6 @@ class rocketDpath extends Component mem_reg_pc <== ex_reg_pc; mem_reg_waddr <== ex_reg_waddr; mem_reg_wdata <== ex_wdata; - mem_reg_ctrl_ll_wb <== ex_reg_ctrl_ll_wb; mem_reg_raddr2 <== ex_reg_raddr2; mem_reg_ctrl_mul_val <== ex_reg_ctrl_mul_val; mem_reg_ctrl_div_val <== ex_reg_ctrl_div_val; @@ -380,14 +369,26 @@ class rocketDpath extends Component // writeback stage val dmem_resp_fpu = if (HAVE_FPU) io.dmem.resp_tag(0).toBool else Bool(false) - r_dmem_resp_replay <== io.dmem.resp_replay && !dmem_resp_fpu; + val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(1) + dmem_resp_replay := io.dmem.resp_replay && !dmem_resp_fpu; + r_dmem_resp_replay <== dmem_resp_replay + r_dmem_resp_waddr <== dmem_resp_waddr r_dmem_fp_replay <== io.dmem.resp_replay && dmem_resp_fpu; - r_dmem_resp_waddr <== io.dmem.resp_tag.toUFix >> UFix(1) + + val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, + Mux(div_result_val, div_result_tag, + Mux(mul_result_val, mul_result_tag, + mem_reg_waddr))) + val mem_ll_wdata = Mux(div_result_val, div_result, + Mux(mul_result_val, mul_result, + mem_reg_wdata)) + val mem_ll_wb = mem_ll_waddr != UFix(0) && + (dmem_resp_replay || div_result_val || mul_result_val) wb_reg_pc <== mem_reg_pc; - wb_reg_waddr <== mem_reg_waddr; - wb_reg_wdata <== mem_reg_wdata; - wb_reg_ctrl_ll_wb <== mem_reg_ctrl_ll_wb; + wb_reg_ll_wb <== mem_ll_wb + wb_reg_waddr <== mem_ll_waddr + wb_reg_wdata <== mem_ll_wdata wb_reg_raddr2 <== mem_reg_raddr2; when (io.ctrl.killm) { @@ -400,19 +401,20 @@ class rocketDpath extends Component } // regfile write - wb_wdata := Mux(Reg(io.ctrl.mem_load), io.dmem.resp_data_subword, wb_reg_wdata) - rfile.io.w0.addr := wb_reg_waddr; - rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ctrl_ll_wb; + val wb_src_dmem = Reg(io.ctrl.mem_load) && wb_reg_valid || r_dmem_resp_replay + wb_wdata := Mux(wb_src_dmem, io.dmem.resp_data_subword, wb_reg_wdata) + rfile.io.w0.addr := wb_reg_waddr + rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb rfile.io.w0.data := wb_wdata io.ctrl.wb_waddr := wb_reg_waddr; - io.ctrl.mem_wb := r_dmem_resp_replay; + io.ctrl.mem_wb := dmem_resp_replay; // scoreboard clear (for div/mul and D$ load miss writebacks) - io.ctrl.sboard_clr := id_ctrl_ll_wb; - io.ctrl.sboard_clra := id_waddr; - io.ctrl.fp_sboard_clr := r_dmem_fp_replay; - io.ctrl.fp_sboard_clra := r_dmem_resp_waddr; + io.ctrl.sboard_clr := mem_ll_wb + io.ctrl.sboard_clra := mem_ll_waddr + io.ctrl.fp_sboard_clr := r_dmem_fp_replay + io.ctrl.fp_sboard_clra := r_dmem_resp_waddr // processor control regfile write pcr.io.w.addr := wb_reg_raddr2; diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 6d354d21..528d46b5 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -92,7 +92,10 @@ class rocketFPUDecoder extends Component Array(FLW -> List(Y, Y, N, N, N), FLD -> List(Y, Y, N, N, N), FSW -> List(Y, N, N, Y, N), - FSD -> List(Y, N, N, Y, N))) + FSD -> List(Y, N, N, Y, N), + MTFSR -> List(Y, N, N, N, N), + MFFSR -> List(Y, N, N, N, N) + )) val valid :: wen :: ren1 :: ren2 :: ren3 :: Nil = decoder io.valid := valid.toBool From e9da2cf66ae15b1c2e310aa17674bfa539d74eb7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 06:47:26 -0800 Subject: [PATCH 0134/1087] improve id/ex datapath move operand selection into decode stage; simplify bypassing --- rocket/src/main/scala/consts.scala | 36 ++-- rocket/src/main/scala/ctrl.scala | 240 ++++++++++++-------------- rocket/src/main/scala/dpath.scala | 77 ++++----- rocket/src/main/scala/dpath_alu.scala | 33 ++-- 4 files changed, 179 insertions(+), 207 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 228fa655..76f50896 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -15,14 +15,13 @@ object Constants val BR_J = UFix(7, 4); val BR_JR = UFix(8, 4); - val PC_4 = UFix(0, 4); - val PC_BTB = UFix(1, 4); - val PC_EX4 = UFix(2, 4); - val PC_BR = UFix(3, 4); - val PC_JR = UFix(4, 4); - val PC_PCR = UFix(5, 4); - val PC_WB = UFix(6, 4); - val PC_EVEC = UFix(7, 4); + val PC_4 = UFix(0, 3); + val PC_BTB = UFix(1, 3); + val PC_EX4 = UFix(2, 3); + val PC_BR = UFix(3, 3); + val PC_PCR = UFix(4, 3); + val PC_WB = UFix(5, 3); + val PC_EVEC = UFix(6, 3); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); @@ -30,19 +29,13 @@ object Constants val REN_Y = UFix(1, 1); val REN_N = UFix(0, 1); - val AS_X = UFix(0, 1); - val AS_IMM = UFix(0, 1); - val AS_RS2 = UFix(1, 1); - - val A2_X = UFix(0, 2); - val A2_0 = UFix(0, 2); - val A2_SEXT = UFix(1, 2); - val A2_RS2 = UFix(2, 2); - val A2_SPLIT = UFix(3, 2); - - val A1_X = UFix(0, 1); - val A1_RS1 = UFix(0, 1); - val A1_LUI = UFix(1, 1); + val A2_X = UFix(0, 3); + val A2_BTYPE = UFix(0, 3); + val A2_LTYPE = UFix(1, 3); + val A2_ITYPE = UFix(2, 3); + val A2_ZERO = UFix(4, 3); + val A2_JTYPE = UFix(5, 3); + val A2_RTYPE = UFix(6, 3); val MUL_X = UFix(0, 2); val MUL_LO = UFix(0, 2); @@ -94,6 +87,7 @@ object Constants val FN_SL = UFix(7, 4); val FN_SR = UFix(8, 4); val FN_SRA = UFix(9, 4); + val FN_OP2 = UFix(10, 4); val DW_X = UFix(0, 1); val DW_32 = UFix(0, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 937148ec..d4b323b3 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -9,7 +9,7 @@ import Instructions._ class ioCtrlDpath extends Bundle() { // outputs to datapath - val sel_pc = UFix(4, OUTPUT); + val sel_pc = UFix(3, OUTPUT); val wen_btb = Bool(OUTPUT); val clr_btb = Bool(OUTPUT); val stallf = Bool(OUTPUT); @@ -20,8 +20,7 @@ class ioCtrlDpath extends Bundle() val killm = Bool(OUTPUT); val ren2 = Bool(OUTPUT); val ren1 = Bool(OUTPUT); - val sel_alu2 = UFix(2, OUTPUT); - val sel_alu1 = Bool(OUTPUT); + val sel_alu2 = UFix(3, OUTPUT); val fn_dw = Bool(OUTPUT); val fn_alu = UFix(4, OUTPUT); val mul_val = Bool(OUTPUT); @@ -39,9 +38,6 @@ class ioCtrlDpath extends Bundle() val ex_wen = Bool(OUTPUT); val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); - // instruction in execute is an unconditional jump - val ex_jmp = Bool(OUTPUT); - val ex_jr = Bool(OUTPUT); // enable/disable interrupts val irq_enable = Bool(OUTPUT); val irq_disable = Bool(OUTPUT); @@ -103,130 +99,130 @@ class rocketCtrl extends Component val xpr64 = Y; val cs = ListLookup(io.dpath.inst, - List( N, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + List( N, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), Array( - BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_RS2, A1_RS1,DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - J-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JAL-> List(Y, BR_J, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + J-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JAL-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SB-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SH-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SW-> List(Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LB-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SB-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SH-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SW-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_0, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LUI-> List(Y, BR_N, REN_N,REN_Y,A2_0, A1_LUI,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - XORI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LUI-> List(Y, BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + XORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RS2, A1_RS1,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, A1_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), - EI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), - DI-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), - ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), - CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, A1_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), - MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDINSTRET->List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), + EI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), + DI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), + ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), + CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), + MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDINSTRET->List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), // Instructions that have not yet been implemented // Faking these for now so akaros will boot - //MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - //MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, A1_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLW-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLD-> List(Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSW-> List(Y, BR_N, REN_N,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSD-> List(Y, BR_N, REN_N,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) + //MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + //MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLD-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSW-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSD-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) /* // floating point - FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_SEXT, A1_RS1,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_SPLIT,A1_RS1,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), + FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), */ )); val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); - val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_sel_alu1 :: id_fn_dw :: id_fn_alu :: csremainder = cs; + val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: csremainder = cs; val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = csremainder; val id_raddr3 = io.dpath.inst(16,12); @@ -360,12 +356,6 @@ class rocketCtrl extends Component ex_reg_mem_cmd <== id_mem_cmd; ex_reg_mem_type <== id_mem_type; - - val jr_taken = (ex_reg_br_type === BR_JR); - val j_taken = (ex_reg_br_type === BR_J); - io.dpath.ex_jmp := j_taken; - io.dpath.ex_jr := jr_taken; - val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; val blt = io.dpath.br_lt; @@ -380,7 +370,8 @@ class rocketCtrl extends Component (ex_reg_br_type === BR_LTU) & bltu | (ex_reg_br_type === BR_GE) & bge | (ex_reg_br_type === BR_GEU) & bgeu | - j_taken; // treat J/JAL like a taken branch + (ex_reg_br_type === BR_J) | + (ex_reg_br_type === BR_JR); // treat J/JAL/JALR like a taken branch val mem_reg_div_mul_val = Reg(){Bool()}; val mem_reg_eret = Reg(){Bool()}; @@ -526,8 +517,7 @@ class rocketCtrl extends Component // control transfer from ex/mem val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match - val br_jr_taken = br_taken || jr_taken - val take_pc_ex = !ex_btb_match && br_jr_taken || ex_reg_btb_hit && !br_jr_taken + val take_pc_ex = !ex_btb_match && br_taken || ex_reg_btb_hit && !br_taken val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; take_pc <== take_pc_ex || take_pc_wb; @@ -563,14 +553,13 @@ class rocketCtrl extends Component Mux(wb_reg_exception, PC_EVEC, // exception Mux(wb_reg_replay, PC_WB, // replay Mux(wb_reg_eret, PC_PCR, // eret instruction - Mux(ex_reg_btb_hit && !br_jr_taken, PC_EX4, // mispredicted not taken branch + Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch - Mux(!ex_btb_match && jr_taken, PC_JR, // mispredicted jump register Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB - PC_4))))))); // PC+4 + PC_4)))))); // PC+4 - io.dpath.wen_btb := !ex_btb_match && br_jr_taken; - io.dpath.clr_btb := ex_reg_btb_hit && !br_jr_taken || id_reg_icmiss; + io.dpath.wen_btb := !ex_btb_match && br_taken; + io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss; io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay) @@ -645,7 +634,6 @@ class rocketCtrl extends Component io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2; - io.dpath.sel_alu1 := id_sel_alu1.toBool; io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu; io.dpath.div_fn := id_div_fn; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d2b74c5c..7acf7dea 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -75,13 +75,11 @@ class rocketDpath extends Component // execute definitions val ex_reg_valid = Reg(resetVal = Bool(false)); val ex_reg_pc = Reg() { UFix() }; - val ex_reg_inst = Reg() { Bits() }; val ex_reg_raddr2 = Reg() { UFix() }; + val ex_reg_op2 = Reg() { Bits() }; val ex_reg_rs2 = Reg() { Bits() }; val ex_reg_rs1 = Reg() { Bits() }; val ex_reg_waddr = Reg() { UFix() }; - val ex_reg_ctrl_sel_alu2 = Reg() { UFix() }; - val ex_reg_ctrl_sel_alu1 = Reg() { UFix() }; val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); val ex_reg_ctrl_fn_dw = Reg() { UFix() }; val ex_reg_ctrl_fn_alu = Reg() { UFix() }; @@ -124,31 +122,23 @@ class rocketDpath extends Component val if_pc_plus4 = if_reg_pc + UFix(4); val ex_pc_plus4 = ex_reg_pc + UFix(4); - val ex_sign_extend = - Cat(Fill(52, ex_reg_inst(21)), ex_reg_inst(21,10)); - val ex_sign_extend_split = - Cat(Fill(52, ex_reg_inst(31)), ex_reg_inst(31,27), ex_reg_inst(16,10)); + val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2, Bits(0,1)).toUFix - val branch_adder_rhs = - Mux(io.ctrl.ex_jmp, Cat(Fill(VADDR_BITS-25, ex_reg_inst(31)), ex_reg_inst(31,7), UFix(0,1)), - Cat(ex_sign_extend_split(VADDR_BITS-1,0), UFix(0, 1))); - val ex_branch_target = ex_reg_pc + branch_adder_rhs.toUFix; - - val ex_jr_target_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) - val ex_jr_target_extended = Cat(ex_jr_target_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix - - val jr_br_target = Mux(io.ctrl.ex_jr, ex_jr_target_extended, ex_branch_target); - btb.io.correct_target := jr_br_target + val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) + val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix + + val ex_br_target_sel = Reg(io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE) + val ex_br_target = Mux(ex_br_target_sel, ex_branch_target, ex_effective_address) + btb.io.correct_target := ex_br_target val if_next_pc = Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target), Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, - Mux(io.ctrl.sel_pc === PC_JR, ex_jr_target_extended, + Mux(io.ctrl.sel_pc === PC_BR, ex_br_target, Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, - if_pc_plus4))))))); // PC_4 + if_pc_plus4)))))); // PC_4 when (!io.ctrl.stallf) { if_reg_pc <== if_next_pc.toUFix; @@ -165,7 +155,7 @@ class rocketDpath extends Component btb.io.wen <> io.ctrl.wen_btb; btb.io.clr <> io.ctrl.clr_btb; btb.io.correct_pc4 := ex_pc_plus4; - io.ctrl.btb_match := id_reg_pc === jr_br_target; + io.ctrl.btb_match := id_reg_pc === ex_br_target; // instruction decode stage when (!io.ctrl.stalld) { @@ -210,17 +200,33 @@ class rocketDpath extends Component Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, id_rdata2))); + // immediate generation + val id_imm_bj = io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE + val id_imm_l = io.ctrl.sel_alu2 === A2_LTYPE + val id_imm_zero = io.ctrl.sel_alu2 === A2_ZERO || io.ctrl.sel_alu2 === A2_RTYPE + val id_imm_ibz = io.ctrl.sel_alu2 === A2_ITYPE || io.ctrl.sel_alu2 === A2_BTYPE || id_imm_zero + val id_imm_sign = Mux(id_imm_bj, id_reg_inst(31), + Mux(id_imm_l, id_reg_inst(26), + Mux(id_imm_zero, Bits(0,1), + id_reg_inst(21)))) // IMM_ITYPE + val id_imm_small = Mux(id_imm_zero, Bits(0,12), + Cat(Mux(id_imm_bj, id_reg_inst(31,27), id_reg_inst(21,17)), id_reg_inst(16,10))) + val id_imm = Cat(Fill(32, id_imm_sign), + Mux(id_imm_l, Cat(id_reg_inst(26,7), Bits(0,12)), + Mux(id_imm_ibz, Cat(Fill(20, id_imm_sign), id_imm_small), + Cat(Fill(7, id_imm_sign), id_reg_inst(31,7))))) // A2_JTYPE + + val id_op2 = Mux(io.ctrl.sel_alu2 === A2_RTYPE, id_rs2, id_imm) + io.ctrl.inst := id_reg_inst; // execute stage ex_reg_pc <== id_reg_pc; - ex_reg_inst <== id_reg_inst; ex_reg_raddr2 <== id_raddr2; + ex_reg_op2 <== id_op2; ex_reg_rs2 <== id_rs2; ex_reg_rs1 <== id_rs1; ex_reg_waddr <== id_waddr; - ex_reg_ctrl_sel_alu2 <== io.ctrl.sel_alu2; - ex_reg_ctrl_sel_alu1 <== io.ctrl.sel_alu1.toUFix; ex_reg_ctrl_fn_dw <== io.ctrl.fn_dw.toUFix; ex_reg_ctrl_fn_alu <== io.ctrl.fn_alu; ex_reg_ctrl_mul_fn <== io.ctrl.mul_fn; @@ -243,24 +249,10 @@ class rocketDpath extends Component ex_reg_ctrl_eret <== io.ctrl.id_eret; } - val ex_alu_in2 = - Mux(ex_reg_ctrl_sel_alu2 === A2_SEXT, ex_sign_extend, - Mux(ex_reg_ctrl_sel_alu2 === A2_SPLIT, ex_sign_extend_split, - Mux(ex_reg_ctrl_sel_alu2 === A2_RS2, ex_reg_rs2, - UFix(0, 64)))); // A2_0 - - val ex_alu_in1 = - Mux(ex_reg_ctrl_sel_alu1 === A1_RS1, ex_reg_rs1, - Cat(Fill(32, ex_reg_inst(26)),ex_reg_inst(26,7),UFix(0, 12))); // A1_LUI - - val ex_alu_shamt = - Cat(ex_alu_in2(5) & ex_reg_ctrl_fn_dw === DW_64, ex_alu_in2(4,0)).toUFix; - alu.io.dw := ex_reg_ctrl_fn_dw; alu.io.fn := ex_reg_ctrl_fn_alu; - alu.io.shamt := ex_alu_shamt.toUFix; - alu.io.in2 := ex_alu_in2.toUFix; - alu.io.in1 := ex_alu_in1.toUFix; + alu.io.in2 := ex_reg_op2.toUFix; + alu.io.in1 := ex_reg_rs1.toUFix; // divider div.io.dw := ex_reg_ctrl_fn_dw; @@ -292,7 +284,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_addr := ex_jr_target_extended.toUFix; + io.dmem.req_addr := ex_effective_address.toUFix; if (HAVE_FPU) { io.dmem.req_data := Mux(io.ctrl.ex_fp_val, io.fpu.store_data, ex_reg_rs2) io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val).toUFix @@ -335,12 +327,11 @@ class rocketDpath extends Component // writeback select mux ex_wdata := - Mux(ex_reg_ctrl_wen_pcr, ex_reg_rs1, Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_pc_plus4(VADDR_BITS-1)), ex_pc_plus4), Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, - ex_alu_out))))).toBits; // WB_ALU + ex_alu_out)))).toBits; // WB_ALU // memory stage mem_reg_pc <== ex_reg_pc; diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index cee39ba2..947723df 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -10,7 +10,6 @@ import Instructions._ class ioALU extends Bundle(){ val dw = UFix(1, INPUT); val fn = UFix(4, INPUT); - val shamt = UFix(6, INPUT); val in2 = UFix(64, INPUT); val in1 = UFix(64, INPUT); val out = UFix(64, OUTPUT); @@ -27,31 +26,31 @@ class rocketDpathALU extends Component val sum = (io.in1 + adder_rhs + sub.toUFix)(63,0) // SLT, SLTU - val less = Mux(io.in1(63) === io.in2(63), sum(63), io.in1(63)) - val lessu = Mux(io.in1(63) === io.in2(63), sum(63), io.in2(63)) + val less = Mux(io.in1(63) === io.in2(63), sum(63), + Mux(io.fn === FN_SLT, io.in1(63), io.in2(63))) // SLL, SRL, SRA val sra = (io.fn === FN_SRA) + val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUFix val shright = sra || (io.fn === FN_SR) val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32)) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) val shin_r = Cat(shin_hi, io.in1(31,0)) val shin = Mux(shright, shin_r, Reverse(shin_r)) - val shout_r = (Cat(sra & shin_r(63), shin).toFix >>> io.shamt)(63,0) + val shout_r = (Cat(sra & shin_r(63), shin).toFix >>> shamt)(63,0) - val out64 = Wire { Bits(64) } - switch(io.fn) - { - is(FN_ADD) { out64 <== sum } - is(FN_SUB) { out64 <== sum } - is(FN_SLT) { out64 <== less } - is(FN_SLTU) { out64 <== lessu } - is(FN_AND) { out64 <== io.in1 & io.in2 } - is(FN_OR) { out64 <== io.in1 | io.in2 } - is(FN_XOR) { out64 <== io.in1 ^ io.in2 } - is(FN_SL) { out64 <== Reverse(shout_r) } - } - out64 <== shout_r + val logic = + Mux(io.fn === FN_AND, io.in1 & io.in2, + Mux(io.fn === FN_OR, io.in1 | io.in2, + Mux(io.fn === FN_XOR, io.in1 ^ io.in2, + io.in2))) // FN_OP2 + + val out64 = + Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, + Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less, + Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, + Mux(io.fn === FN_SL, Reverse(shout_r), + logic)))) val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) io.out := Cat(out_hi, out64(31,0)).toUFix From b3f6f9a5fd6ef016b08d24b65083806d56f5e2b5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 15:03:59 -0800 Subject: [PATCH 0135/1087] fix BTB misprediction check for negative addresses also index BTB with PC, not PC+4 --- rocket/src/main/scala/dpath.scala | 6 +++--- rocket/src/main/scala/dpath_util.scala | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7acf7dea..ac308504 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -122,7 +122,7 @@ class rocketDpath extends Component val if_pc_plus4 = if_reg_pc + UFix(4); val ex_pc_plus4 = ex_reg_pc + UFix(4); - val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2, Bits(0,1)).toUFix + val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix @@ -150,11 +150,11 @@ class rocketDpath extends Component Mux(io.ctrl.stallf, if_reg_pc, if_next_pc.toUFix); - btb.io.current_pc4 := if_pc_plus4; + btb.io.current_pc := if_reg_pc; btb.io.hit <> io.ctrl.btb_hit; btb.io.wen <> io.ctrl.wen_btb; btb.io.clr <> io.ctrl.clr_btb; - btb.io.correct_pc4 := ex_pc_plus4; + btb.io.correct_pc := ex_reg_pc; io.ctrl.btb_match := id_reg_pc === ex_br_target; // instruction decode stage diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index ced09299..e821889b 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -8,12 +8,12 @@ import scala.math._; class ioDpathBTB extends Bundle() { - val current_pc4 = UFix(VADDR_BITS, INPUT); + val current_pc = UFix(VADDR_BITS, INPUT); val hit = Bool(OUTPUT); val target = UFix(VADDR_BITS, OUTPUT); val wen = Bool(INPUT); val clr = Bool(INPUT); - val correct_pc4 = UFix(VADDR_BITS, INPUT); + val correct_pc = UFix(VADDR_BITS, INPUT); val correct_target = UFix(VADDR_BITS, INPUT); } @@ -28,15 +28,15 @@ class rocketDpathBTB(entries: Int) extends Component val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1; val taglsb = (VADDR_BITS-idxlsb); - val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc4(idxmsb,idxlsb), !io.clr, resetVal = Bool(false)); - val tag_target_array = Mem4(entries, io.wen, io.correct_pc4(idxmsb,idxlsb), - Cat(io.correct_pc4(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) + val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc(idxmsb,idxlsb), !io.clr, resetVal = Bool(false)); + val tag_target_array = Mem4(entries, io.wen, io.correct_pc(idxmsb,idxlsb), + Cat(io.correct_pc(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) tag_target_array.setReadLatency(0); tag_target_array.setTarget('inst); - val is_val = vb_array(io.current_pc4(idxmsb,idxlsb)); - val tag_target = tag_target_array(io.current_pc4(idxmsb, idxlsb)); + val is_val = vb_array(io.current_pc(idxmsb,idxlsb)); + val tag_target = tag_target_array(io.current_pc(idxmsb, idxlsb)); - io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc4(VADDR_BITS-1, idxmsb+1)); + io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc(VADDR_BITS-1, idxmsb+1)); io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix; } From 990e3a1b342e12fbe6bb242127dbaa928afb673f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 15:19:08 -0800 Subject: [PATCH 0136/1087] fix fpu port direction bug --- rocket/src/main/scala/fpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 528d46b5..f46d9d25 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -120,7 +120,7 @@ class rocketFPU extends Component val killx = Bool(INPUT) val killm = Bool(INPUT) - val dmem = new ioDmem(List("resp_val", "resp_tag", "resp_data")) + val dmem = new ioDmem(List("resp_val", "resp_tag", "resp_data")).flip() val dpath = new ioDpathFPU().flip() } From a1855b12c220042c5f54cb23941d0f72ee773731 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 17:55:05 -0800 Subject: [PATCH 0137/1087] clean up queues --- rocket/src/main/scala/icache_prefetch.scala | 7 +- rocket/src/main/scala/nbdcache.scala | 6 +- rocket/src/main/scala/queues.scala | 239 +++----------------- 3 files changed, 33 insertions(+), 219 deletions(-) diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 07b2fb57..54007fc9 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -23,7 +23,7 @@ class ioIPrefetcher extends Bundle() { class rocketIPrefetcher extends Component() { val io = new ioIPrefetcher(); - val pdq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) }; + val pdq = (new queue(REFILL_CYCLES, flushable = true)) { Bits(width = MEM_DATA_BITS) }; val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_invalid); @@ -43,9 +43,6 @@ class rocketIPrefetcher extends Component() { io.mem.req_tag := !(io.icache.req_val && !hit); io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); - val pdq_reset = Reg(resetVal = Bool(true)); - pdq_reset <== demand_miss & ~hit | (state === s_bad_resp_wait); - val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1); } val fill_done = (~fill_cnt === UFix(0)) & ip_mem_resp_val; @@ -59,7 +56,7 @@ class rocketIPrefetcher extends Component() { io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq.valid); io.icache.resp_data := Mux(forward, pdq.io.deq.bits, io.mem.resp_data); - pdq.io.q_reset := pdq_reset; + pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false)) pdq.io.enq.bits := io.mem.resp_data; pdq.io.enq.valid := ip_mem_resp_val.toBool; pdq.io.deq.ready := forward; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 130de659..dec60832 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -197,8 +197,7 @@ class MSHR(id: Int) extends Component { val next_dirty = dirty || io.req_sec_val && io.req_sec_rdy && !req_load val sec_rdy = io.idx_match && !refilled && (dirty || !requested || req_load) - val rpq = (new queueSimplePF(NRPQ)) { new RPQEntry() } - rpq.io.q_reset := Bool(false) + val rpq = (new queue(NRPQ)) { new RPQEntry() } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits.offset := io.req_offset rpq.io.enq.bits.cmd := io.req_cmd @@ -419,7 +418,7 @@ class WritebackUnit extends Component { val mem_req_data = Bits(MEM_DATA_BITS, OUTPUT) } - val wbq = (new queueSimplePF(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } + val wbq = (new queue(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } val valid = Reg(resetVal = Bool(false)) val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } val addr = Reg() { new WritebackReq() } @@ -430,7 +429,6 @@ class WritebackUnit extends Component { val block_refill = valid && ((io.refill_req.bits.addr(IDX_BITS-1,0) === addr.idx) || (cnt === UFix(REFILL_CYCLES))) val refill_val = io.refill_req.valid && !block_refill - wbq.io.q_reset := Bool(false) wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) wbq.io.enq.bits := io.data_resp wbq.io.deq.ready := io.mem_req.ready && !refill_val && (cnt === UFix(REFILL_CYCLES)) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 90732f2e..8912a15a 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -1,226 +1,45 @@ package Top -{ import Chisel._ import Node._; -import scala.math._; -class ioQueueCtrl(addr_sz: Int) extends Bundle() +class ioQueue[T <: Data](flushable: Boolean)(data: => T) extends Bundle { - val q_reset = Bool(INPUT); - val enq_val = Bool(INPUT); - val enq_rdy = Bool(OUTPUT); - val deq_val = Bool(OUTPUT); - val deq_rdy = Bool(INPUT); - val wen = Bool(OUTPUT); - val waddr = UFix(addr_sz, OUTPUT); - val raddr = UFix(addr_sz, OUTPUT); + val flush = if (flushable) Bool(INPUT) else null + val enq = new ioDecoupled()(data) + val deq = new ioDecoupled()(data).flip } -class queueCtrl(entries: Int) extends Component +class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) extends Component { - val addr_sz = log2up(entries) - override val io = new ioQueueCtrl(addr_sz); + val io = new ioQueue(flushable)(data) - // Enqueue and dequeue pointers + val enq_ptr = Reg(resetVal = UFix(0, log2up(entries))) + val deq_ptr = Reg(resetVal = UFix(0, log2up(entries))) + val maybe_full = Reg(resetVal = Bool(false)) - val enq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); - val deq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); - val full = Reg(width = 1, resetVal = Bool(false)); + io.deq.valid := maybe_full || enq_ptr != deq_ptr + io.enq.ready := !maybe_full || enq_ptr != deq_ptr - io.waddr := enq_ptr; - io.raddr := deq_ptr; + val do_enq = io.enq.ready && io.enq.valid + val do_deq = io.deq.ready && io.deq.valid - // We enq/deq only when they are both ready and valid - - val do_enq = io.enq_rdy && io.enq_val; - val do_deq = io.deq_rdy && io.deq_val; - - // Determine if we have pipeline or flowthrough behaviour and - // set the write enable accordingly. - - val empty = ~full && (enq_ptr === deq_ptr); - - io.wen := do_enq; - - // Ready signals are calculated from full register. If pipeline - // behavior is enabled, then the enq_rdy signal is also calculated - // combinationally from the deq_rdy signal. If flowthrough behavior - // is enabled then the deq_val signal is also calculated combinationally - // from the enq_val signal. - - io.enq_rdy := ~full; - io.deq_val := ~empty; - - // Control logic for the enq/deq pointers and full register - - val deq_ptr_inc = deq_ptr + UFix(1, 1); - val enq_ptr_inc = enq_ptr + UFix(1, 1); - - val deq_ptr_next = - Mux(do_deq, deq_ptr_inc, - deq_ptr); - - val enq_ptr_next = - Mux(do_enq, enq_ptr_inc, - enq_ptr); - - val full_next = - Mux(do_enq && ~do_deq && ( enq_ptr_inc === deq_ptr ), Bool(true), - Mux(do_deq && full, Bool(false), - full)); - - when (io.q_reset) { - enq_ptr <== UFix(0, addr_sz); - deq_ptr <== UFix(0, addr_sz); - full <== Bool(false); + if (flushable) { + when (io.flush) { + deq_ptr <== UFix(0) + enq_ptr <== UFix(0) + maybe_full <== Bool(false) + } } - otherwise { - enq_ptr <== enq_ptr_next; - deq_ptr <== deq_ptr_next; - full <== full_next; + when (do_deq) { + deq_ptr <== deq_ptr + UFix(1) } -} - -class ioQueueSimplePF[T <: Data]()(data: => T) extends Bundle -{ - val q_reset = Bool(INPUT); - val enq = new ioDecoupled()(data) - val deq = new ioDecoupled()(data).flip -} - -class queueSimplePF[T <: Data](entries: Int)(data: => T) extends Component -{ - override val io = new ioQueueSimplePF()(data); - val ctrl = new queueCtrl(entries); - ctrl.io.q_reset <> io.q_reset; - ctrl.io.deq_val <> io.deq.valid; - ctrl.io.enq_rdy <> io.enq.ready; - ctrl.io.enq_val <> io.enq.valid; - ctrl.io.deq_rdy <> io.deq.ready; - val ram = Mem(entries, ctrl.io.wen, ctrl.io.waddr, io.enq.bits); - ram.read(ctrl.io.raddr) <> io.deq.bits; -} - -// TODO: SHOULD USE INHERITANCE BUT BREAKS INTROSPECTION CODE -// class IOqueueCtrlFlow extends IOqueueCtrl -class ioQueueCtrlFlow(addr_sz: Int) extends Bundle() /* IOqueueCtrl */ -{ - val enq_val = Bool(INPUT); - val enq_rdy = Bool(OUTPUT); - val deq_val = Bool(OUTPUT); - val deq_rdy = Bool(INPUT); - val wen = Bool(OUTPUT); - val waddr = UFix(addr_sz, OUTPUT); - val raddr = UFix(addr_sz, OUTPUT); - val flowthru = Bool(OUTPUT); -} - -class queueCtrlFlow(entries: Int) extends Component -{ - val addr_sz = log2up(entries) - override val io = new ioQueueCtrlFlow(addr_sz); - // Enqueue and dequeue pointers - - val enq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); - val deq_ptr = Reg(width = addr_sz, resetVal = UFix(0, addr_sz)); - val full = Reg(width = 1, resetVal = Bool(false)); - - io.waddr := enq_ptr; - io.raddr := deq_ptr; - - // We enq/deq only when they are both ready and valid - - val do_enq = io.enq_rdy && io.enq_val; - val do_deq = io.deq_rdy && io.deq_val; - - // Determine if we have pipeline or flowthrough behaviour and - // set the write enable accordingly. - - val empty = ~full && (enq_ptr === deq_ptr); - val do_flowthru = empty && do_enq && do_deq; - io.flowthru := do_flowthru; - - io.wen := do_enq && ~do_flowthru; - - // Ready signals are calculated from full register. If pipeline - // behavior is enabled, then the enq_rdy signal is also calculated - // combinationally from the deq_rdy signal. If flowthrough behavior - // is enabled then the deq_val signal is also calculated combinationally - // from the enq_val signal. - - io.enq_rdy := ~full; - io.deq_val := ~empty || ( empty && io.enq_val ); - - // Control logic for the enq/deq pointers and full register - - val deq_ptr_inc = deq_ptr + UFix(1, 1); - val enq_ptr_inc = enq_ptr + UFix(1, 1); - - val deq_ptr_next = - Mux(do_deq && ~do_flowthru, deq_ptr_inc, - deq_ptr); - - val enq_ptr_next = - Mux(do_enq && ~do_flowthru, enq_ptr_inc, - enq_ptr); - - val full_next = - Mux(do_enq && ~do_deq && ( enq_ptr_inc === deq_ptr ), Bool(true), - Mux(do_deq && full, Bool(false), - full)); - - enq_ptr <== enq_ptr_next; - deq_ptr <== deq_ptr_next; - full <== full_next; -} - -class ioQueueDpathFlow[T <: Data](addr_sz: Int)(data: => T) extends Bundle() -{ - val wen = Bool(INPUT); - val flowthru = Bool(INPUT); - val deq_bits = data.asOutput; - val enq_bits = data.asInput; - val waddr = UFix(addr_sz, INPUT); - val raddr = UFix(addr_sz, INPUT); -} - -class queueDpathFlow[T <: Data](entries: Int)(data: => T) extends Component -{ - val addr_sz = log2up(entries) - override val io = new ioQueueDpathFlow(addr_sz)(data); - val ram = Mem(entries, io.wen, io.waddr, io.enq_bits); - val rout = ram(io.raddr); - Mux(io.flowthru, io.enq_bits, rout) <> io.deq_bits; -} - -class ioQueueFlowPF[T <: Data](data: => T) extends Bundle() -{ - val enq_val = Bool(INPUT); - val enq_rdy = Bool(OUTPUT); - val enq_bits = data.asInput; - val deq_val = Bool(OUTPUT); - val deq_rdy = Bool(INPUT); - val deq_bits = data.asOutput; -} - -class queueFlowPF[T <: Data](entries: Int)(data: => T) extends Component -{ - override val io = new ioQueueFlowPF(data); - val ctrl = new queueCtrlFlow(entries); - val dpath = new queueDpathFlow(entries)(data); - - ctrl.io.deq_rdy <> io.deq_rdy; - ctrl.io.wen <> dpath.io.wen; - ctrl.io.raddr <> dpath.io.raddr; - ctrl.io.waddr <> dpath.io.waddr; - ctrl.io.flowthru <> dpath.io.flowthru; - ctrl.io.enq_val <> io.enq_val; - dpath.io.enq_bits <> io.enq_bits; - - ctrl.io.deq_val <> io.deq_val; - ctrl.io.enq_rdy <> io.enq_rdy; - dpath.io.deq_bits <> io.deq_bits; -} - + when (do_enq) { + enq_ptr <== enq_ptr + UFix(1) + } + when (do_enq != do_deq) { + maybe_full <== do_enq + } + + Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) <> io.deq.bits } From 10b5a0006c0f9bc168eca70fe13b37162f10e45d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 20:11:57 -0800 Subject: [PATCH 0138/1087] fix mul/div to rd=0 --- rocket/src/main/scala/ctrl.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d4b323b3..f3f236bb 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -334,8 +334,8 @@ class rocketCtrl extends Component otherwise { ex_reg_br_type <== id_br_type; ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_div_val <== id_div_val.toBool; - ex_reg_mul_val <== id_mul_val.toBool; + ex_reg_div_val <== id_div_val.toBool && id_waddr != UFix(0); + ex_reg_mul_val <== id_mul_val.toBool && id_waddr != UFix(0); ex_reg_mem_val <== id_mem_val.toBool; ex_reg_wen <== id_wen.toBool && id_waddr != UFix(0); ex_reg_fp_wen <== fpdec.io.wen; From 9285a52f2519ec747abe0bb9b683b147aee38f9f Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 8 Feb 2012 21:43:45 -0800 Subject: [PATCH 0139/1087] initial vu integration --- rocket/src/main/scala/consts.scala | 20 ++ rocket/src/main/scala/cpu.scala | 15 +- rocket/src/main/scala/ctrl.scala | 286 +++++++++++++++++------------ rocket/src/main/scala/dpath.scala | 5 + 4 files changed, 209 insertions(+), 117 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 76f50896..6f52d4d7 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -206,6 +206,26 @@ object Constants val FPU_N = UFix(0, 1); val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N; + + val VEC_N = UFix(0, 1); + val VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N; + + val VEC_X = UFix(0, 1) + val VEC_VL = UFix(0, 1) + val VEC_CFG = UFix(1, 1) + + val VCMD_I = UFix(0, 3) + val VCMD_F = UFix(1, 3) + val VCMD_TX = UFix(2, 3) + val VCMD_TF = UFix(3, 3) + val VCMD_MX = UFix(4, 3) + val VCMD_MF = UFix(5, 3) + val VCMD_X = UFix(0, 3) + + val VIMM_VLEN = UFix(0, 2) + val VIMM_ALU = UFix(1, 2) + val VIMM_RS1 = UFix(2, 2) + val VIMM_X = UFix(0, 2) } } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2dd0b062..bffb539c 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -1,8 +1,9 @@ -package Top { +package Top import Chisel._; import Node._; import Constants._; +import hwacha._ class ioDebug(view: List[String] = null) extends Bundle(view) { @@ -120,6 +121,16 @@ class rocketProc extends Component fpu.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.fpu <> fpu.io.dpath } -} + if (HAVE_VEC) + { + val vu = new vu() + + vu.io.vec_cmdq <> ctrl.io.vcmdq + vu.io.vec_cmdq <> dpath.io.vcmdq + vu.io.vec_ximm1q <> ctrl.io.vximm1q + vu.io.vec_ximm1q <> dpath.io.vximm1q + vu.io.vec_ximm2q <> ctrl.io.vximm2q + vu.io.vec_ximm2q <> dpath.io.vximm2q + } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f3f236bb..3bad41c9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -5,6 +5,7 @@ import Node._; import Constants._ import Instructions._ +import hwacha._ class ioCtrlDpath extends Bundle() { @@ -77,6 +78,9 @@ class ioCtrlAll extends Bundle() val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); + val vcmdq = new io_vec_cmdq(List("ready", "valid")) + val vximm1q = new io_vec_ximm1q(List("ready", "valid")) + val vximm2q = new io_vec_ximm2q(List("ready", "valid")) val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); @@ -99,132 +103,184 @@ class rocketCtrl extends Component val xpr64 = Y; val cs = ListLookup(io.dpath.inst, - List( N, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - Array( - BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + // eret + // | syscall + // mem_val mul_val div_val renpcr | | privileged + // val brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),Array( + BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - J-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JAL-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + J-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JAL-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LD-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LWU-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SB-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SH-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SW-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SD-> List(xpr64, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LB-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LH-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LD-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LBU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LHU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LWU-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SB-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SH-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SW-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SD-> List(xpr64,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_D->List(xpr64, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_D->List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_D->List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_D->List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LUI-> List(Y, BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - XORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LUI-> List(Y, BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + XORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64, BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUBW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUBW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMUW-> List(xpr64, BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMUW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), - EI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), - DI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), - ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), - CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), - MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDINSTRET->List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - // Instructions that have not yet been implemented - // Faking these for now so akaros will boot - //MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - //MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLD-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSW-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSD-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) -/* - // floating point - FLW-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_FRD, MT_WU,N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FLD-> List(FPU_Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_FRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FSW-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_FWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), - FSD-> List(FPU_Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_FWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N), -*/ - )); + SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), + EI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), + DI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), + ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), + CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), + MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDINSTRET->List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + + // Instructions that have not yet been implemented + // Faking these for now so akaros will boot + //MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + //MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLD-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSW-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSD-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) + )) + + val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs + val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 + val id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + + val veccs = + ListLookup(io.dpath.inst, + // appvlmask + // | vcmdq + // | | vximm1q + // | | | vximm2q + // val ren2 ren1 vcmd vimm fn | | | | vackq + // | | | | | | | | | | | + List(N,REN_N,REN_N,VCMD_X, VIMM_X, VEC_X ,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,REN_N,REN_Y,VCMD_I, VIMM_VLEN,VEC_CFG,N,Y,Y,N,N), + VSETVL-> List(Y,REN_N,REN_Y,VCMD_I, VIMM_VLEN,VEC_VL ,N,Y,Y,N,N), + VF-> List(Y,REN_Y,REN_Y,VCMD_I, VIMM_ALU, VEC_X ,Y,Y,Y,N,N), + VMVV-> List(Y,REN_N,REN_N,VCMD_TX,VIMM_X, VEC_X ,Y,Y,N,N,N), + VMSV-> List(Y,REN_N,REN_Y,VCMD_TX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VFMVV-> List(Y,REN_N,REN_N,VCMD_TF,VIMM_X, VEC_X ,Y,Y,N,N,N), + FENCE_L_V-> List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,N), + FENCE_G_V-> List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,N), + FENCE_L_CV->List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,Y), + FENCE_G_CV->List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,Y), + VLD-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VLW-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VLWU-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VLH-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VLHU-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VLB-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VLBU-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VSD-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VSW-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VSH-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VSB-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VFLD-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VFLW-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VFSD-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VFSW-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), + VLSTD-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VLSTW-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VLSTWU-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VLSTH-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VLSTHU-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VLSTB-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VLSTBU-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VSSTD-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VSSTW-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VSSTH-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VSSTB-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VFLSTD-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VFLSTW-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VFSSTD-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), + VFSSTW-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N) + )) + + val id_vec_val :: id_renv2 :: id_renv1 :: id_sel_vcmd :: id_sel_vimm :: id_fn_vec :: id_vec_appvlmask :: veccs0 = veccs + val id_vec_cmdq_val :: id_vec_ximm1q_val :: id_vec_ximm2q_val :: id_vec_ackq_wait :: Nil = veccs0 val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); - val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: csremainder = cs; - val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = csremainder; - val id_raddr3 = io.dpath.inst(16,12); val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ac308504..5c2d2bc7 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -2,8 +2,10 @@ package Top { import Chisel._ import Node._; + import Constants._ import Instructions._ +import hwacha._ class ioDpathDmem extends Bundle() { @@ -32,6 +34,9 @@ class ioDpathAll extends Bundle() val debug = new ioDebug(); val dmem = new ioDpathDmem(); val imem = new ioDpathImem(); + val vcmdq = new io_vec_cmdq(List("bits")) + val vximm1q = new io_vec_ximm1q(List("bits")) + val vximm2q = new io_vec_ximm2q(List("bits")) val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); val fpu = new ioDpathFPU(); From 8b6b0f5367781cd06d2ffae5d79c88626c680f92 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Feb 2012 22:30:45 -0800 Subject: [PATCH 0140/1087] add external memory request interface for vec unit --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/cpu.scala | 3 +++ rocket/src/main/scala/ctrl.scala | 31 +++++++++++++++++++----------- rocket/src/main/scala/dpath.scala | 30 ++++++++++++++++------------- 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6f52d4d7..6902c157 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -171,7 +171,7 @@ object Constants // rocketNBDCacheDM parameters val CPU_DATA_BITS = 64; - val CPU_TAG_BITS = 6; + val CPU_TAG_BITS = 9; val DCACHE_TAG_BITS = 1 + CPU_TAG_BITS; val OFFSET_BITS = 6; // log2(cache line size in bytes) val NMSHR = 2; // number of primary misses diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index bffb539c..edb82633 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -122,6 +122,9 @@ class rocketProc extends Component dpath.io.fpu <> fpu.io.dpath } + ctrl.io.ext_mem.req_val := Bool(false) + dpath.io.ext_mem.req_val := Bool(false) + if (HAVE_VEC) { val vu = new vu() diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3bad41c9..fe0ee43f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -35,6 +35,7 @@ class ioCtrlDpath extends Bundle() val id_eret = Bool(OUTPUT); val wb_eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); + val ex_ext_mem_val = Bool(OUTPUT); val ex_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); val mem_wen = Bool(OUTPUT); @@ -78,6 +79,7 @@ class ioCtrlAll extends Bundle() val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); + val ext_mem = new ioDmem(List("req_val", "req_cmd", "req_type", "resp_nack")) val vcmdq = new io_vec_cmdq(List("ready", "valid")) val vximm1q = new io_vec_ximm1q(List("ready", "valid")) val vximm2q = new io_vec_ximm2q(List("ready", "valid")) @@ -319,7 +321,8 @@ class rocketCtrl extends Component val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val ex_reg_fp_val = Reg(resetVal = Bool(false)); val ex_reg_replay = Reg(resetVal = Bool(false)); - val ex_reg_load_use = Reg(resetVal = Bool(false)); + val ex_reg_load_use = Reg(resetVal = Bool(false)); + val ex_reg_ext_mem_val = Reg(resetVal = Bool(false)) val mem_reg_wen = Reg(resetVal = Bool(false)); val mem_reg_fp_wen = Reg(resetVal = Bool(false)); @@ -334,6 +337,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); + val mem_reg_ext_mem_val = Reg(resetVal = Bool(false)) val wb_reg_wen = Reg(resetVal = Bool(false)); val wb_reg_fp_wen = Reg(resetVal = Bool(false)); @@ -409,8 +413,9 @@ class rocketCtrl extends Component ex_reg_replay <== id_reg_replay || ex_reg_replay_next; ex_reg_load_use <== id_load_use; } - ex_reg_mem_cmd <== id_mem_cmd; - ex_reg_mem_type <== id_mem_type; + ex_reg_ext_mem_val <== io.ext_mem.req_val + ex_reg_mem_cmd <== Mux(io.ext_mem.req_val, io.ext_mem.req_cmd, id_mem_cmd).toUFix + ex_reg_mem_type <== Mux(io.ext_mem.req_val, io.ext_mem.req_type, id_mem_type).toUFix val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; @@ -467,6 +472,7 @@ class rocketCtrl extends Component mem_reg_xcpt_fpu <== ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } + mem_reg_ext_mem_val <== ex_reg_ext_mem_val; mem_reg_mem_cmd <== ex_reg_mem_cmd; mem_reg_mem_type <== ex_reg_mem_type; @@ -575,13 +581,14 @@ class rocketCtrl extends Component val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match val take_pc_ex = !ex_btb_match && br_taken || ex_reg_btb_hit && !br_taken val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; - take_pc <== take_pc_ex || take_pc_wb; + take_pc := take_pc_ex || take_pc_wb; // replay mem stage PC on a DTLB miss or a long-latency writeback val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val replay_mem = io.dtlb_miss || mem_reg_wen && mem_ll_wb || io.dmem.resp_nack || mem_reg_replay - val kill_mem = io.dtlb_miss || mem_reg_wen && mem_ll_wb || io.dmem.resp_nack || take_pc_wb || mem_exception || mem_reg_kill - val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + val dmem_kill_mem = io.dpath.mem_valid && (io.dtlb_miss || io.dmem.resp_nack) + val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay + val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions @@ -666,7 +673,7 @@ class rocketCtrl extends Component ( id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || - id_stall_fpu || + id_stall_fpu || io.ext_mem.req_val || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy @@ -678,7 +685,6 @@ class rocketCtrl extends Component io.flush_inst := wb_reg_flush_inst; - io.dpath.stallf := ctrl_stallf; io.dpath.stalld := ctrl_stalld; io.dpath.killf := ctrl_killf; @@ -689,13 +695,14 @@ class rocketCtrl extends Component io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; - io.dpath.sel_alu2 := id_sel_alu2; + io.dpath.sel_alu2 := Mux(io.ext_mem.req_val, A2_ZERO, id_sel_alu2) io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu; io.dpath.div_fn := id_div_fn; io.dpath.div_val := id_div_val.toBool; io.dpath.mul_fn := id_mul_fn; io.dpath.mul_val := id_mul_val.toBool; + io.dpath.ex_ext_mem_val := ex_reg_ext_mem_val; io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; @@ -711,10 +718,12 @@ class rocketCtrl extends Component io.dtlb_val := ex_reg_mem_val; io.dtlb_kill := mem_reg_kill; - io.dmem.req_val := ex_reg_mem_val; + io.dmem.req_val := ex_reg_mem_val || ex_reg_ext_mem_val; io.dmem.req_kill := kill_dcache; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; + + io.ext_mem.resp_nack:= mem_reg_ext_mem_val && (io.dmem.resp_nack || Reg(!io.dmem.req_rdy)) } } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 5c2d2bc7..68886260 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -33,6 +33,7 @@ class ioDpathAll extends Bundle() val console = new ioConsole(List("valid","bits")); val debug = new ioDebug(); val dmem = new ioDpathDmem(); + val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "resp_val", "resp_data", "resp_tag")) val imem = new ioDpathImem(); val vcmdq = new io_vec_cmdq(List("bits")) val vximm1q = new io_vec_ximm1q(List("bits")) @@ -194,16 +195,18 @@ class rocketDpath extends Component // bypass muxes val id_rs1 = + Mux(io.ext_mem.req_val, Cat(io.ext_mem.req_ppn, io.ext_mem.req_idx), Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, - id_rdata1))); + id_rdata1)))) val id_rs2 = + Mux(io.ext_mem.req_val, io.ext_mem.req_data, Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, - id_rdata2))); + id_rdata2)))) // immediate generation val id_imm_bj = io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE @@ -290,14 +293,8 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_effective_address.toUFix; - if (HAVE_FPU) { - io.dmem.req_data := Mux(io.ctrl.ex_fp_val, io.fpu.store_data, ex_reg_rs2) - io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val).toUFix - } - else { - io.dmem.req_data := ex_reg_rs2 - io.dmem.req_tag := Cat(ex_reg_waddr, Bool(false)).toUFix - } + io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, ex_reg_rs2) else ex_reg_rs2) + io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix // processor control regfile read pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; @@ -364,9 +361,12 @@ class rocketDpath extends Component // 32/64 bit load handling (moved to earlier in file) // writeback stage - val dmem_resp_fpu = if (HAVE_FPU) io.dmem.resp_tag(0).toBool else Bool(false) - val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(1) - dmem_resp_replay := io.dmem.resp_replay && !dmem_resp_fpu; + val dmem_resp_ext = io.dmem.resp_tag(0).toBool + val dmem_resp_xpu = !io.dmem.resp_tag(0).toBool && !io.dmem.resp_tag(1).toBool + val dmem_resp_fpu = !io.dmem.resp_tag(0).toBool && io.dmem.resp_tag(1).toBool + val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(2) + val dmem_resp_ext_tag = io.dmem.resp_tag.toUFix >> UFix(1) + dmem_resp_replay := io.dmem.resp_replay && dmem_resp_xpu; r_dmem_resp_replay <== dmem_resp_replay r_dmem_resp_waddr <== dmem_resp_waddr r_dmem_fp_replay <== io.dmem.resp_replay && dmem_resp_fpu; @@ -402,6 +402,10 @@ class rocketDpath extends Component rfile.io.w0.addr := wb_reg_waddr rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb rfile.io.w0.data := wb_wdata + + io.ext_mem.resp_val := Reg(io.dmem.resp_val && dmem_resp_ext, resetVal = Bool(false)) + io.ext_mem.resp_tag := Reg(dmem_resp_ext_tag) + io.ext_mem.resp_data := io.dmem.resp_data_subword io.ctrl.wb_waddr := wb_reg_waddr; io.ctrl.mem_wb := dmem_resp_replay; From fcc8081c4dc8daba877a9545386fae53884867f7 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 9 Feb 2012 01:28:16 -0800 Subject: [PATCH 0141/1087] hook up the vector command queue --- rocket/src/main/scala/consts.scala | 12 +- rocket/src/main/scala/cpu.scala | 3 - rocket/src/main/scala/ctrl.scala | 325 ++++++++++++------------- rocket/src/main/scala/dpath.scala | 46 +++- rocket/src/main/scala/dpath_util.scala | 11 + rocket/src/main/scala/dpath_vec.scala | 179 ++++++++++++++ 6 files changed, 396 insertions(+), 180 deletions(-) create mode 100644 rocket/src/main/scala/dpath_vec.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6902c157..1e25faf8 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -65,6 +65,7 @@ object Constants val WB_ALU = UFix(2, 3); val WB_TSC = UFix(4, 3); val WB_IRT = UFix(5, 3); + val WB_VEC = UFix(6, 3); val N = UFix(0, 1); val Y = UFix(1, 1); @@ -146,8 +147,8 @@ object Constants val PCR_K1 = UFix(13, 5); val PCR_TOHOST = UFix(16, 5); val PCR_FROMHOST = UFix(17, 5); - val PCR_CONSOLE = UFix(18, 5); - + val PCR_VECBANK = UFix(18, 5); + val PCR_CONSOLE = UFix(19, 5); // definition of bits in PCR status reg val SR_ET = 0; // enable traps @@ -222,10 +223,9 @@ object Constants val VCMD_MF = UFix(5, 3) val VCMD_X = UFix(0, 3) - val VIMM_VLEN = UFix(0, 2) - val VIMM_ALU = UFix(1, 2) - val VIMM_RS1 = UFix(2, 2) - val VIMM_X = UFix(0, 2) + val VIMM_VLEN = UFix(0, 1) + val VIMM_ALU = UFix(1, 1) + val VIMM_X = UFix(0, 1) } } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index edb82633..beed47df 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -129,11 +129,8 @@ class rocketProc extends Component { val vu = new vu() - vu.io.vec_cmdq <> ctrl.io.vcmdq vu.io.vec_cmdq <> dpath.io.vcmdq - vu.io.vec_ximm1q <> ctrl.io.vximm1q vu.io.vec_ximm1q <> dpath.io.vximm1q - vu.io.vec_ximm2q <> ctrl.io.vximm2q vu.io.vec_ximm2q <> dpath.io.vximm2q } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index fe0ee43f..2880ae5b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -80,9 +80,6 @@ class ioCtrlAll extends Bundle() val imem = new ioImem(List("req_val", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); val ext_mem = new ioDmem(List("req_val", "req_cmd", "req_type", "resp_nack")) - val vcmdq = new io_vec_cmdq(List("ready", "valid")) - val vximm1q = new io_vec_ximm1q(List("ready", "valid")) - val vximm2q = new io_vec_ximm2q(List("ready", "valid")) val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); @@ -105,182 +102,170 @@ class rocketCtrl extends Component val xpr64 = Y; val cs = ListLookup(io.dpath.inst, - // eret - // | syscall - // mem_val mul_val div_val renpcr | | privileged - // val brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),Array( - BNE-> List(Y, BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BEQ-> List(Y, BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLT-> List(Y, BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLTU-> List(Y, BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGE-> List(Y, BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGEU-> List(Y, BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + // eret + // | syscall + // vec_val mem_val mul_val div_val renpcr | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),Array( + BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - J-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JAL-> List(Y, BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_C-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_J-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_R-> List(Y, BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDNPC-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LB-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LH-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LD-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LBU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LHU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LWU-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SB-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SH-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SW-> List(Y, BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SD-> List(xpr64,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_W->List(Y, BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_D->List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_D->List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_D->List(xpr64,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LUI-> List(Y, BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTI -> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTIU-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ANDI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - XORI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLI-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAI-> List(Y_SH, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADD-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUB-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLT-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTU-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvAND-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRL-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRA-> List(Y, BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLI-> List(Y_SH, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAI-> List(Y_SH, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUBW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MUL-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULH-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHSU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIV-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REM-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMU-> List(Y, BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SYSCALL-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), - EI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), - DI-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), - ERET-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), - CFLUSH-> List(Y, BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), - MTPCR-> List(Y, BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDINSTRET->List(Y, BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), + EI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), + DI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), + ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), + CFLUSH-> List(Y, N,BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), + MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), // Instructions that have not yet been implemented // Faking these for now so akaros will boot - //MFFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - //MTFSR-> List(Y, BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLW-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLD-> List(Y, BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSW-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSD-> List(Y, BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N) + //MFFSR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + //MTFSR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLD-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSW-> List(Y, N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSD-> List(Y, N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + + // Vector Stuff + VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_VEC,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), + VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_VEC,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), + VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + FENCE_L_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + FENCE_G_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) )) - val id_int_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs + val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 val id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 - val veccs = - ListLookup(io.dpath.inst, - // appvlmask - // | vcmdq - // | | vximm1q - // | | | vximm2q - // val ren2 ren1 vcmd vimm fn | | | | vackq - // | | | | | | | | | | | - List(N,REN_N,REN_N,VCMD_X, VIMM_X, VEC_X ,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,REN_N,REN_Y,VCMD_I, VIMM_VLEN,VEC_CFG,N,Y,Y,N,N), - VSETVL-> List(Y,REN_N,REN_Y,VCMD_I, VIMM_VLEN,VEC_VL ,N,Y,Y,N,N), - VF-> List(Y,REN_Y,REN_Y,VCMD_I, VIMM_ALU, VEC_X ,Y,Y,Y,N,N), - VMVV-> List(Y,REN_N,REN_N,VCMD_TX,VIMM_X, VEC_X ,Y,Y,N,N,N), - VMSV-> List(Y,REN_N,REN_Y,VCMD_TX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VFMVV-> List(Y,REN_N,REN_N,VCMD_TF,VIMM_X, VEC_X ,Y,Y,N,N,N), - FENCE_L_V-> List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,N), - FENCE_G_V-> List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,N), - FENCE_L_CV->List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,Y), - FENCE_G_CV->List(Y,REN_N,REN_N,VCMD_F, VIMM_X, VEC_X ,N,Y,N,N,Y), - VLD-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VLW-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VLWU-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VLH-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VLHU-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VLB-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VLBU-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VSD-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VSW-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VSH-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VSB-> List(Y,REN_N,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VFLD-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VFLW-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VFSD-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VFSW-> List(Y,REN_N,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,N,N), - VLSTD-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VLSTW-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VLSTWU-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VLSTH-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VLSTHU-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VLSTB-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VLSTBU-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VSSTD-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VSSTW-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VSSTH-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VSSTB-> List(Y,REN_Y,REN_Y,VCMD_MX,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VFLSTD-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VFLSTW-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VFSSTD-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N), - VFSSTW-> List(Y,REN_Y,REN_Y,VCMD_MF,VIMM_RS1, VEC_X ,Y,Y,Y,Y,N) - )) - - val id_vec_val :: id_renv2 :: id_renv1 :: id_sel_vcmd :: id_sel_vimm :: id_fn_vec :: id_vec_appvlmask :: veccs0 = veccs - val id_vec_cmdq_val :: id_vec_ximm1q_val :: id_vec_ximm2q_val :: id_vec_ackq_wait :: Nil = veccs0 - val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); val id_raddr3 = io.dpath.inst(16,12); @@ -320,6 +305,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val ex_reg_fp_val = Reg(resetVal = Bool(false)); + val ex_reg_vec_val = Reg(resetVal = Bool(false)); val ex_reg_replay = Reg(resetVal = Bool(false)); val ex_reg_load_use = Reg(resetVal = Bool(false)); val ex_reg_ext_mem_val = Reg(resetVal = Bool(false)) @@ -334,6 +320,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_illegal = Reg(resetVal = Bool(false)); val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); + val mem_reg_xcpt_vec = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); @@ -367,7 +354,9 @@ class rocketCtrl extends Component } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) - val illegal_inst = !(id_int_val.toBool || fpdec.io.valid) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); + val illegal_inst = + !(id_int_val.toBool || fpdec.io.valid || id_vec_val.toBool) || + (id_eret.toBool && io.dpath.status(SR_ET).toBool); when (reset.toBool || io.dpath.killd) { ex_reg_br_type <== BR_N; @@ -388,6 +377,7 @@ class rocketCtrl extends Component ex_reg_xcpt_privileged <== Bool(false); ex_reg_xcpt_syscall <== Bool(false); ex_reg_fp_val <== Bool(false); + ex_reg_vec_val <== Bool(false); ex_reg_replay <== Bool(false); ex_reg_load_use <== Bool(false); } @@ -410,6 +400,7 @@ class rocketCtrl extends Component ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(SR_S)).toBool; ex_reg_xcpt_syscall <== id_syscall.toBool; ex_reg_fp_val <== fpdec.io.valid; + ex_reg_vec_val <== id_vec_val.toBool ex_reg_replay <== id_reg_replay || ex_reg_replay_next; ex_reg_load_use <== id_load_use; } @@ -454,6 +445,7 @@ class rocketCtrl extends Component mem_reg_xcpt_illegal <== Bool(false); mem_reg_xcpt_privileged <== Bool(false); mem_reg_xcpt_fpu <== Bool(false); + mem_reg_xcpt_vec <== Bool(false); mem_reg_xcpt_syscall <== Bool(false); } otherwise { @@ -470,6 +462,7 @@ class rocketCtrl extends Component mem_reg_xcpt_illegal <== ex_reg_xcpt_illegal; mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; mem_reg_xcpt_fpu <== ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; + mem_reg_xcpt_vec <== ex_reg_vec_val && !io.dpath.status(SR_EV).toBool; mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; } mem_reg_ext_mem_val <== ex_reg_ext_mem_val; @@ -559,6 +552,7 @@ class rocketCtrl extends Component mem_reg_xcpt_illegal || mem_reg_xcpt_privileged || mem_reg_xcpt_fpu || + mem_reg_xcpt_vec || mem_reg_xcpt_syscall || mem_reg_xcpt_itlb || mem_reg_xcpt_ma_inst; @@ -575,7 +569,8 @@ class rocketCtrl extends Component Mux(mem_xcpt_ma_st, UFix(9,5), // misaligned store Mux(mem_xcpt_dtlb_ld, UFix(10,5), // load fault Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault - UFix(0,5))))))))))); // instruction address misaligned + Mux(mem_reg_xcpt_vec, UFix(12,5), // vector disabled + UFix(0,5)))))))))))); // instruction address misaligned // control transfer from ex/mem val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 68886260..d5981227 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -33,11 +33,11 @@ class ioDpathAll extends Bundle() val console = new ioConsole(List("valid","bits")); val debug = new ioDebug(); val dmem = new ioDpathDmem(); - val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "resp_val", "resp_data", "resp_tag")) + val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "resp_val", "resp_data", "resp_tag")) val imem = new ioDpathImem(); - val vcmdq = new io_vec_cmdq(List("bits")) - val vximm1q = new io_vec_ximm1q(List("bits")) - val vximm2q = new io_vec_ximm2q(List("bits")) + val vcmdq = new io_vec_cmdq() + val vximm1q = new io_vec_ximm1q() + val vximm2q = new io_vec_ximm2q() val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); val fpu = new ioDpathFPU(); @@ -54,6 +54,8 @@ class rocketDpath extends Component val pcr = new rocketDpathPCR(); val ex_pcr = pcr.io.r.data; + val vec = new rocketDpathVec() + val alu = new rocketDpathALU(); val ex_alu_out = alu.io.out; val ex_alu_adder_out = alu.io.adder_out; @@ -81,6 +83,8 @@ class rocketDpath extends Component // execute definitions val ex_reg_valid = Reg(resetVal = Bool(false)); val ex_reg_pc = Reg() { UFix() }; + val ex_reg_inst = Reg() { Bits() }; + val ex_reg_raddr1 = Reg() { UFix() }; val ex_reg_raddr2 = Reg() { UFix() }; val ex_reg_op2 = Reg() { Bits() }; val ex_reg_rs2 = Reg() { Bits() }; @@ -101,8 +105,11 @@ class rocketDpath extends Component // memory definitions val mem_reg_valid = Reg(resetVal = Bool(false)); val mem_reg_pc = Reg() { UFix() }; + val mem_reg_inst = Reg() { Bits() }; + val mem_reg_rs2 = Reg() { Bits() }; val mem_reg_waddr = Reg() { UFix() }; val mem_reg_wdata = Reg() { Bits() }; + val mem_reg_raddr1 = Reg() { UFix() }; val mem_reg_raddr2 = Reg() { UFix() }; val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); @@ -112,8 +119,11 @@ class rocketDpath extends Component // writeback definitions val wb_reg_valid = Reg(resetVal = Bool(false)); val wb_reg_pc = Reg() { UFix() }; + val wb_reg_inst = Reg() { Bits() }; + val wb_reg_rs2 = Reg() { Bits() }; val wb_reg_waddr = Reg() { UFix() }; val wb_reg_wdata = Reg() { Bits() }; + val wb_reg_raddr1 = Reg() { UFix() }; val wb_reg_raddr2 = Reg() { UFix() }; val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val wb_reg_ll_wb = Reg(resetVal = Bool(false)); @@ -230,6 +240,8 @@ class rocketDpath extends Component // execute stage ex_reg_pc <== id_reg_pc; + ex_reg_inst <== id_reg_inst + ex_reg_raddr1 <== id_raddr1 ex_reg_raddr2 <== id_raddr2; ex_reg_op2 <== id_op2; ex_reg_rs2 <== id_rs2; @@ -333,16 +345,20 @@ class rocketDpath extends Component Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, - ex_alu_out)))).toBits; // WB_ALU + Mux(ex_reg_ctrl_sel_wb === WB_VEC, vec.io.appvl, + ex_alu_out))))).toBits; // WB_ALU // memory stage mem_reg_pc <== ex_reg_pc; + mem_reg_inst <== ex_reg_inst + mem_reg_rs2 <== ex_reg_rs2 mem_reg_waddr <== ex_reg_waddr; mem_reg_wdata <== ex_wdata; + mem_reg_raddr1 <== ex_reg_raddr1 mem_reg_raddr2 <== ex_reg_raddr2; mem_reg_ctrl_mul_val <== ex_reg_ctrl_mul_val; mem_reg_ctrl_div_val <== ex_reg_ctrl_div_val; - + when (io.ctrl.killx) { mem_reg_valid <== Bool(false); mem_reg_ctrl_wen_pcr <== Bool(false); @@ -382,9 +398,12 @@ class rocketDpath extends Component (dmem_resp_replay || div_result_val || mul_result_val) wb_reg_pc <== mem_reg_pc; + wb_reg_inst <== mem_reg_inst wb_reg_ll_wb <== mem_ll_wb + wb_reg_rs2 <== mem_reg_rs2 wb_reg_waddr <== mem_ll_waddr wb_reg_wdata <== mem_ll_wdata + wb_reg_raddr1 <== mem_reg_raddr1 wb_reg_raddr2 <== mem_reg_raddr2; when (io.ctrl.killm) { @@ -409,6 +428,21 @@ class rocketDpath extends Component io.ctrl.wb_waddr := wb_reg_waddr; io.ctrl.mem_wb := dmem_resp_replay; + + // vector datapath + vec.io.valid := wb_reg_valid + vec.io.sr_ev := pcr.io.status(SR_EV) + vec.io.inst := wb_reg_inst + vec.io.waddr := wb_reg_waddr + vec.io.raddr1 := wb_reg_raddr1 + vec.io.vecbank := pcr.io.vecbank + vec.io.vecbankcnt := pcr.io.vecbankcnt + vec.io.wdata := wb_reg_wdata + vec.io.rs2 := wb_reg_rs2 + + vec.io.vcmdq <> io.vcmdq + vec.io.vximm1q <> io.vximm1q + vec.io.vximm2q <> io.vximm2q // scoreboard clear (for div/mul and D$ load miss writebacks) io.ctrl.sboard_clr := mem_ll_wb diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index e821889b..32197f7b 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -62,6 +62,8 @@ class ioDpathPCR extends Bundle() val irq_ipi = Bool(OUTPUT); val console_data = Bits(8, OUTPUT); val console_val = Bool(OUTPUT); + val vecbank = Bits(8, OUTPUT) + val vecbankcnt = UFix(4, OUTPUT) } class rocketDpathPCR extends Component @@ -79,6 +81,7 @@ class rocketDpathPCR extends Component val reg_k0 = Reg() { Bits() }; val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; + val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); @@ -106,6 +109,12 @@ class rocketDpathPCR extends Component io.debug.error_mode := reg_error_mode; io.r.data := rdata; + io.vecbank := reg_vecbank + var cnt = UFix(0) + for (i <- 0 until 8) + cnt = cnt + reg_vecbank(i) + io.vecbankcnt := cnt(3,0) + val console_wen = !io.exception && io.w.en && (io.w.addr === PCR_CONSOLE); io.console_data := Mux(console_wen, io.w.data(7,0), Bits(0,8)); io.console_val := console_wen; @@ -176,6 +185,7 @@ class rocketDpathPCR extends Component when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } when (io.w.addr === PCR_K1) { reg_k1 <== io.w.data; } when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } + when (io.w.addr === PCR_VECBANK) { reg_vecbank <== io.w.data(7,0) } } otherwise { @@ -202,6 +212,7 @@ class rocketDpathPCR extends Component is (PCR_K0) { rdata <== reg_k0; } is (PCR_K1) { rdata <== reg_k1; } is (PCR_PTBR) { rdata <== Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } + is (PCR_VECBANK) { rdata <== Cat(Bits(0, 56), reg_vecbank) } otherwise { rdata <== Bits(0,64); } } } diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala new file mode 100644 index 00000000..ae35484c --- /dev/null +++ b/rocket/src/main/scala/dpath_vec.scala @@ -0,0 +1,179 @@ +package Top + +import Chisel._ +import Node._ +import Constants._ +import Instructions._ +import hwacha._ + +class ioDpathVec extends Bundle +{ + val valid = Bool(INPUT) + val sr_ev = Bool(INPUT) + val inst = Bits(32, INPUT) + val waddr = UFix(5, INPUT) + val raddr1 = UFix(5, INPUT) + val vecbank = Bits(8, INPUT) + val vecbankcnt = UFix(4, INPUT) + val wdata = Bits(64, INPUT) + val rs2 = Bits(64, INPUT) + val appvl = UFix(12, OUTPUT) + val vcmdq = new io_vec_cmdq() + val vximm1q = new io_vec_ximm1q() + val vximm2q = new io_vec_ximm2q() +} + +class rocketDpathVec extends Component +{ + val io = new ioDpathVec() + + val veccs = + ListLookup(io.inst, + // appvlmask + // | vcmdq + // wen | | vximm1q + // val vcmd vimm | fn | | | vximm2q + // | | | | | | | | | + List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N), + VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y) + )) + + val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs + val wb_vec_cmdq_val :: wb_vec_ximm1q_val :: wb_vec_ximm2q_val :: Nil = veccs0 + + val nxregs = io.inst(15,10) + val nfregs = io.inst(21,16) + val nregs = (nxregs + nfregs)(6,0) + + val uts_per_bank = MuxLookup( + nregs, UFix(4,9), Array( + UFix(0,7) -> UFix(256,9), + UFix(1,7) -> UFix(256,9), + UFix(2,7) -> UFix(256,9), + UFix(3,7) -> UFix(128,9), + UFix(4,7) -> UFix(85,9), + UFix(5,7) -> UFix(64,9), + UFix(6,7) -> UFix(51,9), + UFix(7,7) -> UFix(42,9), + UFix(8,7) -> UFix(36,9), + UFix(9,7) -> UFix(32,9), + UFix(10,7) -> UFix(28,9), + UFix(11,7) -> UFix(25,9), + UFix(12,7) -> UFix(23,9), + UFix(13,7) -> UFix(21,9), + UFix(14,7) -> UFix(19,9), + UFix(15,7) -> UFix(18,9), + UFix(16,7) -> UFix(17,9), + UFix(17,7) -> UFix(16,9), + UFix(18,7) -> UFix(15,9), + UFix(19,7) -> UFix(14,9), + UFix(20,7) -> UFix(13,9), + UFix(21,7) -> UFix(12,9), + UFix(22,7) -> UFix(12,9), + UFix(23,7) -> UFix(11,9), + UFix(24,7) -> UFix(11,9), + UFix(25,7) -> UFix(10,9), + UFix(26,7) -> UFix(10,9), + UFix(27,7) -> UFix(9,9), + UFix(28,7) -> UFix(9,9), + UFix(29,7) -> UFix(9,9), + UFix(30,7) -> UFix(8,9), + UFix(31,7) -> UFix(8,9), + UFix(32,7) -> UFix(8,9), + UFix(33,7) -> UFix(8,9), + UFix(34,7) -> UFix(7,9), + UFix(35,7) -> UFix(7,9), + UFix(36,7) -> UFix(7,9), + UFix(37,7) -> UFix(7,9), + UFix(38,7) -> UFix(6,9), + UFix(39,7) -> UFix(6,9), + UFix(40,7) -> UFix(6,9), + UFix(41,7) -> UFix(6,9), + UFix(42,7) -> UFix(6,9), + UFix(43,7) -> UFix(6,9), + UFix(44,7) -> UFix(5,9), + UFix(45,7) -> UFix(5,9), + UFix(46,7) -> UFix(5,9), + UFix(47,7) -> UFix(5,9), + UFix(48,7) -> UFix(5,9), + UFix(49,7) -> UFix(5,9), + UFix(50,7) -> UFix(5,9), + UFix(51,7) -> UFix(5,9), + UFix(52,7) -> UFix(5,9) + )) + + val reg_hwvl = Reg(resetVal = UFix(32, 12)) + val reg_appvl0 = Reg(resetVal = Bool(true)) + val hwvl_vcfg = (uts_per_bank * io.vecbankcnt)(11,0) + val hwvl = Mux(wb_vec_fn.toBool, hwvl_vcfg, reg_hwvl) + val appvl = Mux(io.wdata(11,0) < hwvl, io.wdata(11,0), hwvl).toUFix + + when (io.valid && wb_vec_wen.toBool && wb_vec_fn.toBool) + { + reg_hwvl <== hwvl_vcfg + reg_appvl0 <== !(appvl.orR()) + } + + io.appvl := appvl + val vlenm1 = appvl - Bits(1,1) + + val valid_common = io.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && reg_appvl0) + + io.vcmdq.valid := valid_common && wb_vec_cmdq_val + io.vximm1q.valid := valid_common && wb_vec_ximm1q_val + io.vximm2q.valid := valid_common && wb_vec_ximm2q_val + + io.vcmdq.bits := + Mux(wb_sel_vcmd === VCMD_I, Cat(Bits(0,2), Bits(0,4), io.inst(9,8), Bits(0,6), Bits(0,6)), + Mux(wb_sel_vcmd === VCMD_F, Cat(Bits(0,2), Bits(1,3), io.inst(9,7), Bits(0,6), Bits(0,6)), + Mux(wb_sel_vcmd === VCMD_TX, Cat(Bits(1,2), io.inst(13,8), Bits(0,1), io.waddr, Bits(0,1), io.raddr1), + Mux(wb_sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), io.waddr, Bits(1,1), io.raddr1), + Mux(wb_sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), io.waddr, Bits(0,1), io.waddr), + Mux(wb_sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), io.waddr, Bits(1,1), io.waddr), + Bits(0,20))))))) + + io.vximm1q.bits := + Mux(wb_sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1), + io.wdata) // VIMM_ALU + + io.vximm2q.bits := io.rs2 +} From 128ec567edfcf295abeea878220fe07caa7f5e28 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Feb 2012 01:32:52 -0800 Subject: [PATCH 0142/1087] make BTB fully associative; don't use it for JALR JALR created a long path from the ALU in execute stage to an address comparator to the next-PC mux. the benfit was close to nil, anyway. --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/ctrl.scala | 20 ++++----- rocket/src/main/scala/dpath.scala | 14 +++---- rocket/src/main/scala/dpath_util.scala | 57 +++++++++++++++++--------- rocket/src/main/scala/top.scala | 4 +- 6 files changed, 57 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1e25faf8..6ce9df0e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -22,6 +22,7 @@ object Constants val PC_PCR = UFix(4, 3); val PC_WB = UFix(5, 3); val PC_EVEC = UFix(6, 3); + val PC_JR = UFix(7, 3); val KF_Y = UFix(1, 1); val KF_N = UFix(0, 1); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index beed47df..978dc409 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -60,7 +60,7 @@ class rocketProc extends Component io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0); io.imem.req_ppn := itlb.io.cpu.resp_ppn; io.imem.req_val := ctrl.io.imem.req_val; - io.imem.invalidate := ctrl.io.flush_inst; + io.imem.invalidate := ctrl.io.dpath.flush_inst; ctrl.io.imem.resp_val := io.imem.resp_val; dpath.io.imem.resp_data := io.imem.resp_data; ctrl.io.xcpt_itlb := itlb.io.cpu.exception; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2880ae5b..26fce4fa 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -40,6 +40,7 @@ class ioCtrlDpath extends Bundle() val ex_wen = Bool(OUTPUT); val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); + val flush_inst = Bool(OUTPUT); // enable/disable interrupts val irq_enable = Bool(OUTPUT); val irq_disable = Bool(OUTPUT); @@ -50,7 +51,6 @@ class ioCtrlDpath extends Bundle() // inputs from datapath val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC val btb_hit = Bool(INPUT); - val btb_match = Bool(INPUT); val inst = Bits(32, INPUT); val br_eq = Bool(INPUT); val br_lt = Bool(INPUT); @@ -84,7 +84,6 @@ class ioCtrlAll extends Bundle() val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); val dtlb_miss = Bool(INPUT); - val flush_inst = Bool(OUTPUT); val xcpt_dtlb_ld = Bool(INPUT); val xcpt_dtlb_st = Bool(INPUT); val xcpt_itlb = Bool(INPUT); @@ -422,8 +421,8 @@ class rocketCtrl extends Component (ex_reg_br_type === BR_LTU) & bltu | (ex_reg_br_type === BR_GE) & bge | (ex_reg_br_type === BR_GEU) & bgeu | - (ex_reg_br_type === BR_J) | - (ex_reg_br_type === BR_JR); // treat J/JAL/JALR like a taken branch + (ex_reg_br_type === BR_J); // treat J/JAL like taken branches + val jr_taken = ex_reg_br_type === BR_JR val mem_reg_div_mul_val = Reg(){Bool()}; val mem_reg_eret = Reg(){Bool()}; @@ -573,8 +572,7 @@ class rocketCtrl extends Component UFix(0,5)))))))))))); // instruction address misaligned // control transfer from ex/mem - val ex_btb_match = ex_reg_btb_hit && io.dpath.btb_match - val take_pc_ex = !ex_btb_match && br_taken || ex_reg_btb_hit && !br_taken + val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; take_pc := take_pc_ex || take_pc_wb; @@ -612,11 +610,12 @@ class rocketCtrl extends Component Mux(wb_reg_replay, PC_WB, // replay Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch - Mux(!ex_btb_match && br_taken, PC_BR, // mispredicted taken branch + Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch + Mux(jr_taken, PC_JR, // taken JALR Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB - PC_4)))))); // PC+4 + PC_4))))))); // PC+4 - io.dpath.wen_btb := !ex_btb_match && br_taken; + io.dpath.wen_btb := !ex_reg_btb_hit && br_taken io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss; io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay) @@ -678,8 +677,7 @@ class rocketCtrl extends Component val ctrl_killd = take_pc || ctrl_stalld; val ctrl_killf = take_pc || !io.imem.resp_val; - io.flush_inst := wb_reg_flush_inst; - + io.dpath.flush_inst := wb_reg_flush_inst; io.dpath.stallf := ctrl_stallf; io.dpath.stalld := ctrl_stalld; io.dpath.killf := ctrl_killf; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d5981227..6dfa0a16 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -47,7 +47,7 @@ class rocketDpath extends Component { val io = new ioDpathAll(); - val btb = new rocketDpathBTB(8); // # of entries in BTB + val btb = new rocketDpathBTB(4); // # of entries in BTB val if_btb_target = btb.io.target; @@ -142,19 +142,16 @@ class rocketDpath extends Component val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix - - val ex_br_target_sel = Reg(io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE) - val ex_br_target = Mux(ex_br_target_sel, ex_branch_target, ex_effective_address) - btb.io.correct_target := ex_br_target val if_next_pc = Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target), Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_BR, ex_br_target, + Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, + Mux(io.ctrl.sel_pc === PC_JR, ex_effective_address, Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET Mux(io.ctrl.sel_pc === PC_EVEC, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, - if_pc_plus4)))))); // PC_4 + if_pc_plus4))))))); // PC_4 when (!io.ctrl.stallf) { if_reg_pc <== if_next_pc.toUFix; @@ -171,7 +168,8 @@ class rocketDpath extends Component btb.io.wen <> io.ctrl.wen_btb; btb.io.clr <> io.ctrl.clr_btb; btb.io.correct_pc := ex_reg_pc; - io.ctrl.btb_match := id_reg_pc === ex_br_target; + btb.io.correct_target := ex_branch_target + btb.io.invalidate := io.ctrl.flush_inst // instruction decode stage when (!io.ctrl.stalld) { diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 32197f7b..4472c919 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -13,31 +13,50 @@ class ioDpathBTB extends Bundle() val target = UFix(VADDR_BITS, OUTPUT); val wen = Bool(INPUT); val clr = Bool(INPUT); + val invalidate = Bool(INPUT); val correct_pc = UFix(VADDR_BITS, INPUT); val correct_target = UFix(VADDR_BITS, INPUT); } -// basic direct-mapped branch target buffer +// fully-associative branch target buffer class rocketDpathBTB(entries: Int) extends Component { - val io = new ioDpathBTB(); - - val addr_bits = ceil(log10(entries)/log10(2)).toInt; - val idxlsb = 2; - val idxmsb = idxlsb+addr_bits-1; - val tagmsb = (VADDR_BITS-idxmsb-1)+(VADDR_BITS-idxlsb)-1; - val taglsb = (VADDR_BITS-idxlsb); - - val vb_array = Mem(entries, io.wen || io.clr, io.correct_pc(idxmsb,idxlsb), !io.clr, resetVal = Bool(false)); - val tag_target_array = Mem4(entries, io.wen, io.correct_pc(idxmsb,idxlsb), - Cat(io.correct_pc(VADDR_BITS-1,idxmsb+1), io.correct_target(VADDR_BITS-1,idxlsb))) - tag_target_array.setReadLatency(0); - tag_target_array.setTarget('inst); - val is_val = vb_array(io.current_pc(idxmsb,idxlsb)); - val tag_target = tag_target_array(io.current_pc(idxmsb, idxlsb)); - - io.hit := is_val && (tag_target(tagmsb,taglsb) === io.current_pc(VADDR_BITS-1, idxmsb+1)); - io.target := Cat(tag_target(taglsb-1, 0), Bits(0,idxlsb)).toUFix; + val io = new ioDpathBTB(); + + val do_update = io.wen || io.clr + val expected_tag = Mux(do_update, io.correct_pc, io.current_pc) + + val repl_way = LFSR16(io.wen)(log2up(entries)-1,0) // TODO: pseudo-LRU + + var hit_reduction = Bool(false) + val hit = Wire() { Bool() } + val mux = (new Mux1H(entries)) { Bits(width = VADDR_BITS) } + + for (i <- 0 until entries) { + val tag = Reg() { UFix() } + val target = Reg() { UFix() } + val valid = Reg(resetVal = Bool(false)) + val my_hit = valid && tag === expected_tag + val my_clr = io.clr && my_hit || io.invalidate + val my_wen = io.wen && (my_hit || !hit && UFix(i) === repl_way) + + when (my_clr) { + valid <== Bool(false) + } + when (my_wen) { + valid <== Bool(true) + tag <== io.correct_pc + target <== io.correct_target + } + + hit_reduction = hit_reduction || my_hit + mux.io.sel(i) := my_hit + mux.io.in(i) := target + } + hit := hit_reduction + + io.hit := hit + io.target := mux.io.out.toUFix } class ioDpathPCR extends Bundle() diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index a85b7245..2e5e0311 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -37,9 +37,9 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance - val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); +// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); // val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); -// val cpu_args = args ++ Array("--target-dir", "generated-src"); + val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match { From 92493ad153a3513588ddd0bd5e0c509e335ec064 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Feb 2012 02:26:03 -0800 Subject: [PATCH 0143/1087] fix mul/div kill bug occasionally, an in-progress multiply or divide could be erroneously killed, tying up the register forever. --- rocket/src/main/scala/divider.scala | 2 +- rocket/src/main/scala/dpath.scala | 11 +++-------- rocket/src/main/scala/multiplier.scala | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 1d1c8d57..b661ee62 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -56,7 +56,7 @@ class rocketDivider(width : Int) extends Component { val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R); - when (io.div_kill) { + when (io.div_kill && Reg(state === s_ready)) { // can only kill on first cycle state <== s_ready; } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 6dfa0a16..8112d968 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -111,8 +111,6 @@ class rocketDpath extends Component val mem_reg_wdata = Reg() { Bits() }; val mem_reg_raddr1 = Reg() { UFix() }; val mem_reg_raddr2 = Reg() { UFix() }; - val mem_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); - val mem_reg_ctrl_div_val = Reg(resetVal = Bool(false)); val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val mem_wdata = Wire() { Bits() }; @@ -276,7 +274,7 @@ class rocketDpath extends Component div.io.dw := ex_reg_ctrl_fn_dw; div.io.div_fn := ex_reg_ctrl_div_fn; div.io.div_val := ex_reg_ctrl_div_val; - div.io.div_kill := mem_reg_ctrl_div_val && io.ctrl.killm; + div.io.div_kill := io.ctrl.killm; div.io.div_waddr := ex_reg_waddr; div.io.dpath_rs1 := ex_reg_rs1; div.io.dpath_rs2 := ex_reg_rs2; @@ -287,7 +285,7 @@ class rocketDpath extends Component // multiplier mul.io.mul_val := ex_reg_ctrl_mul_val; - mul.io.mul_kill:= mem_reg_ctrl_mul_val && io.ctrl.killm; + mul.io.mul_kill:= io.ctrl.killm; mul.io.dw := ex_reg_ctrl_fn_dw; mul.io.mul_fn := ex_reg_ctrl_mul_fn; mul.io.mul_tag := ex_reg_waddr; @@ -354,8 +352,6 @@ class rocketDpath extends Component mem_reg_wdata <== ex_wdata; mem_reg_raddr1 <== ex_reg_raddr1 mem_reg_raddr2 <== ex_reg_raddr2; - mem_reg_ctrl_mul_val <== ex_reg_ctrl_mul_val; - mem_reg_ctrl_div_val <== ex_reg_ctrl_div_val; when (io.ctrl.killx) { mem_reg_valid <== Bool(false); @@ -392,8 +388,7 @@ class rocketDpath extends Component val mem_ll_wdata = Mux(div_result_val, div_result, Mux(mul_result_val, mul_result, mem_reg_wdata)) - val mem_ll_wb = mem_ll_waddr != UFix(0) && - (dmem_resp_replay || div_result_val || mul_result_val) + val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val wb_reg_pc <== mem_reg_pc; wb_reg_inst <== mem_reg_inst diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index a3ce7bd8..52ab0ef0 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -62,7 +62,7 @@ class rocketMultiplier extends Component { r_prod<== rhs_in r_lsb <== Bool(false) } - when (io.result_val && io.result_rdy || io.mul_kill) { + when (io.result_val && io.result_rdy || io.mul_kill && r_cnt === UFix(0)) { // can only kill on first cycle r_val <== Bool(false) } From f47d888feb90f9d55b0d69c0b80f4e5efd1b84bb Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 9 Feb 2012 02:35:09 -0800 Subject: [PATCH 0144/1087] vvcfgivl and vsetvl works --- rocket/src/main/scala/consts.scala | 1 - rocket/src/main/scala/ctrl.scala | 68 +++++++++++++------------- rocket/src/main/scala/dpath.scala | 35 +++++++------ rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/dpath_vec.scala | 8 +-- 5 files changed, 59 insertions(+), 55 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6ce9df0e..12a5dfac 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -66,7 +66,6 @@ object Constants val WB_ALU = UFix(2, 3); val WB_TSC = UFix(4, 3); val WB_IRT = UFix(5, 3); - val WB_VEC = UFix(6, 3); val N = UFix(0, 1); val Y = UFix(1, 1); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 26fce4fa..2c309d9a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -219,46 +219,46 @@ class rocketCtrl extends Component FSD-> List(Y, N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), // Vector Stuff - VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_VEC,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_VEC,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), + VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), + VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_L_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_G_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) + VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) )) val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 8112d968..ecb62e52 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -341,8 +341,7 @@ class rocketDpath extends Component Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, - Mux(ex_reg_ctrl_sel_wb === WB_VEC, vec.io.appvl, - ex_alu_out))))).toBits; // WB_ALU + ex_alu_out)))).toBits; // WB_ALU // memory stage mem_reg_pc <== ex_reg_pc; @@ -408,20 +407,6 @@ class rocketDpath extends Component wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; } - // regfile write - val wb_src_dmem = Reg(io.ctrl.mem_load) && wb_reg_valid || r_dmem_resp_replay - wb_wdata := Mux(wb_src_dmem, io.dmem.resp_data_subword, wb_reg_wdata) - rfile.io.w0.addr := wb_reg_waddr - rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb - rfile.io.w0.data := wb_wdata - - io.ext_mem.resp_val := Reg(io.dmem.resp_val && dmem_resp_ext, resetVal = Bool(false)) - io.ext_mem.resp_tag := Reg(dmem_resp_ext_tag) - io.ext_mem.resp_data := io.dmem.resp_data_subword - - io.ctrl.wb_waddr := wb_reg_waddr; - io.ctrl.mem_wb := dmem_resp_replay; - // vector datapath vec.io.valid := wb_reg_valid vec.io.sr_ev := pcr.io.status(SR_EV) @@ -433,6 +418,24 @@ class rocketDpath extends Component vec.io.wdata := wb_reg_wdata vec.io.rs2 := wb_reg_rs2 + // regfile write + val wb_src_dmem = Reg(io.ctrl.mem_load) && wb_reg_valid || r_dmem_resp_replay + wb_wdata := + Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), + Mux(wb_src_dmem, io.dmem.resp_data_subword, + wb_reg_wdata)) + + rfile.io.w0.addr := wb_reg_waddr + rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb + rfile.io.w0.data := wb_wdata + + io.ext_mem.resp_val := Reg(io.dmem.resp_val && dmem_resp_ext, resetVal = Bool(false)) + io.ext_mem.resp_tag := Reg(dmem_resp_ext_tag) + io.ext_mem.resp_data := io.dmem.resp_data_subword + + io.ctrl.wb_waddr := wb_reg_waddr; + io.ctrl.mem_wb := dmem_resp_replay; + vec.io.vcmdq <> io.vcmdq vec.io.vximm1q <> io.vximm1q vec.io.vximm2q <> io.vximm2q diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 4472c919..b060b722 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -129,7 +129,7 @@ class rocketDpathPCR extends Component io.r.data := rdata; io.vecbank := reg_vecbank - var cnt = UFix(0) + var cnt = UFix(0,4) for (i <- 0 until 8) cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index ae35484c..8c6c7d8d 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -17,6 +17,7 @@ class ioDpathVec extends Bundle val vecbankcnt = UFix(4, INPUT) val wdata = Bits(64, INPUT) val rs2 = Bits(64, INPUT) + val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) val vcmdq = new io_vec_cmdq() val vximm1q = new io_vec_ximm1q() @@ -80,9 +81,9 @@ class rocketDpathVec extends Component val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs val wb_vec_cmdq_val :: wb_vec_ximm1q_val :: wb_vec_ximm2q_val :: Nil = veccs0 - val nxregs = io.inst(15,10) - val nfregs = io.inst(21,16) - val nregs = (nxregs + nfregs)(6,0) + val nxregs = Cat(UFix(0,1),io.inst(15,10).toUFix) // FIXME: to make the nregs width 7 bits + val nfregs = io.inst(21,16).toUFix + val nregs = nxregs + nfregs val uts_per_bank = MuxLookup( nregs, UFix(4,9), Array( @@ -153,6 +154,7 @@ class rocketDpathVec extends Component reg_appvl0 <== !(appvl.orR()) } + io.wen := io.valid && wb_vec_wen.toBool io.appvl := appvl val vlenm1 = appvl - Bits(1,1) From 03ee49f424424df22a6a4c1d20a73b1687a11074 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Feb 2012 03:30:55 -0800 Subject: [PATCH 0145/1087] fix 32-bit AMOs to upper halves of 64-bit words thanks, torture! --- rocket/src/main/scala/nbdcache.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index dec60832..60a7c5f3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -635,11 +635,13 @@ class AMOALU extends Component { val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(sgned, cmp_lhs, cmp_rhs)) val cmp_out = Mux(min === less, io.lhs, io.rhs) - io.out := Mux(io.cmd === M_XA_ADD, adder_out, + val out = Mux(io.cmd === M_XA_ADD, adder_out, Mux(io.cmd === M_XA_SWAP, io.rhs, Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, /* MIN[U]/MAX[U] */ cmp_out)))); + + io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) } class HellaCacheDM extends Component { From f8b937d59093e6e56ded600fa6db636778e1bd91 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Feb 2012 03:47:59 -0800 Subject: [PATCH 0146/1087] fix 32-bit divider bug thanks, torture! also, tidied up the code a bit. --- rocket/src/main/scala/divider.scala | 48 ++++++++++++++--------------- rocket/src/main/scala/dpath.scala | 18 +++++------ 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index b661ee62..8fb98c1d 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -11,14 +11,14 @@ class ioDivider(width: Int) extends Bundle { val div_rdy = Bool(OUTPUT); val dw = UFix(1, INPUT); val div_fn = UFix(2, INPUT); - val div_waddr = UFix(5, INPUT); - val dpath_rs1 = Bits(width, INPUT); - val dpath_rs2 = Bits(width, INPUT); + val div_tag = UFix(5, INPUT); + val in0 = Bits(width, INPUT); + val in1 = Bits(width, INPUT); // responses - val div_result_bits = Bits(width, OUTPUT); - val div_result_tag = UFix(5, OUTPUT); - val div_result_val = Bool(OUTPUT); - val div_result_rdy = Bool(INPUT); + val result = Bits(width, OUTPUT); + val result_tag = UFix(5, OUTPUT); + val result_val = Bool(OUTPUT); + val result_rdy = Bool(INPUT); } // class ioDivider extends Bundle { @@ -41,12 +41,11 @@ class rocketDivider(width : Int) extends Component { val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); - val count_bits = java.math.BigInteger.valueOf(width).bitLength(); val count = Reg() { UFix() }; val divby0 = Reg() { Bool() }; val neg_quo = Reg() { Bool() }; val neg_rem = Reg() { Bool() }; - val reg_waddr = Reg() { UFix() }; + val reg_tag = Reg() { UFix() }; val rem = Reg() { Bool() }; val half = Reg() { Bool() }; @@ -75,30 +74,28 @@ class rocketDivider(width : Int) extends Component { } is (s_neg_outputs) { state <== s_done; } is (s_done) { - when (io.div_result_rdy) { state <== s_ready; } + when (io.result_rdy) { state <== s_ready; } } } - // if we're doing 32-bit unsigned division, then we don't want the 32-bit - // inputs to be sign-extended. - val in_lhs = Mux(((io.dw === DW_32) && !tc), - Cat(Fill(width/2, UFix(0,1)), io.dpath_rs1(width/2-1, 0)), - io.dpath_rs1).toUFix; + val lhs_sign = tc && Mux(io.dw === DW_64, io.in0(width-1), io.in0(width/2-1)).toBool + val lhs_hi = Mux(io.dw === DW_64, io.in0(width-1,width/2), Fill(width/2, lhs_sign)) + val lhs_in = Cat(lhs_hi, io.in0(width/2-1,0)) - val in_rhs = Mux(((io.dw === DW_32) && !tc), - Cat(Fill(width/2, UFix(0,1)), io.dpath_rs2(width/2-1, 0)), - io.dpath_rs2).toUFix; + val rhs_sign = tc && Mux(io.dw === DW_64, io.in1(width-1), io.in1(width/2-1)).toBool + val rhs_hi = Mux(io.dw === DW_64, io.in1(width-1,width/2), Fill(width/2, rhs_sign)) + val rhs_in = Cat(rhs_hi, io.in1(width/2-1,0)) when ((state === s_ready) && io.div_val) { - count <== UFix(0, count_bits); + count <== UFix(0, log2up(width+1)); half <== (io.dw === DW_32); neg_quo <== Bool(false); neg_rem <== Bool(false); rem <== (io.div_fn === DIV_R) || (io.div_fn === DIV_RU); - reg_waddr <== io.div_waddr; + reg_tag <== io.div_tag; divby0 <== Bool(true); - divisor <== in_rhs; - remainder <== Cat(Fill(width+1, UFix(0,1)), in_lhs).toUFix; + divisor <== rhs_in.toUFix; + remainder <== Cat(UFix(0,width+1), lhs_in).toUFix; } when (state === s_neg_inputs) { @@ -135,10 +132,11 @@ class rocketDivider(width : Int) extends Component { val result = Mux(rem, remainder(2*width, width+1), remainder(width-1,0)); - io.div_result_bits := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result); + io.result := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result); + io.result_tag := reg_tag; + io.result_val := (state === s_done); + io.div_rdy := (state === s_ready); - io.div_result_tag := reg_waddr; - io.div_result_val := (state === s_done); } } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ecb62e52..96be17b5 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -61,9 +61,9 @@ class rocketDpath extends Component val ex_alu_adder_out = alu.io.adder_out; val div = new rocketDivider(64); - val div_result = div.io.div_result_bits; - val div_result_tag = div.io.div_result_tag; - val div_result_val = div.io.div_result_val; + val div_result = div.io.result; + val div_result_tag = div.io.result_tag; + val div_result_val = div.io.result_val; val mul = new rocketMultiplier(); val mul_result = mul.io.result; @@ -275,13 +275,13 @@ class rocketDpath extends Component div.io.div_fn := ex_reg_ctrl_div_fn; div.io.div_val := ex_reg_ctrl_div_val; div.io.div_kill := io.ctrl.killm; - div.io.div_waddr := ex_reg_waddr; - div.io.dpath_rs1 := ex_reg_rs1; - div.io.dpath_rs2 := ex_reg_rs2; - div.io.div_result_rdy := !dmem_resp_replay + div.io.div_tag := ex_reg_waddr; + div.io.in0 := ex_reg_rs1; + div.io.in1 := ex_reg_rs2; + div.io.result_rdy:= !dmem_resp_replay io.ctrl.div_rdy := div.io.div_rdy; - io.ctrl.div_result_val := div.io.div_result_val; + io.ctrl.div_result_val := div.io.result_val; // multiplier mul.io.mul_val := ex_reg_ctrl_mul_val; @@ -294,7 +294,7 @@ class rocketDpath extends Component io.ctrl.mul_rdy := mul.io.mul_rdy io.ctrl.mul_result_val := mul.io.result_val; - mul.io.result_rdy := !dmem_resp_replay && !div.io.div_result_val + mul.io.result_rdy := !dmem_resp_replay && !div.io.result_val io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control From 725190d0eeb73b60dc44ba6af337f85234dfaff5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 11 Feb 2012 17:20:33 -0800 Subject: [PATCH 0147/1087] update to new chisel --- rocket/src/main/scala/ctrl.scala | 218 ++++++++++---------- rocket/src/main/scala/ctrl_util.scala | 30 +-- rocket/src/main/scala/dcache.scala | 104 +++++----- rocket/src/main/scala/divider.scala | 106 +++++----- rocket/src/main/scala/dpath.scala | 124 +++++------ rocket/src/main/scala/dpath_util.scala | 188 +++++++++-------- rocket/src/main/scala/dpath_vec.scala | 4 +- rocket/src/main/scala/dtlb.scala | 40 ++-- rocket/src/main/scala/fpu.scala | 8 +- rocket/src/main/scala/icache.scala | 36 ++-- rocket/src/main/scala/icache_prefetch.scala | 26 +-- rocket/src/main/scala/itlb.scala | 58 +++--- rocket/src/main/scala/multiplier.scala | 28 +-- rocket/src/main/scala/nbdcache.scala | 206 +++++++++--------- rocket/src/main/scala/ptw.scala | 70 +++---- rocket/src/main/scala/queues.scala | 22 +- rocket/src/main/scala/util.scala | 14 +- 17 files changed, 626 insertions(+), 656 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2c309d9a..29c374b2 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -339,17 +339,17 @@ class rocketCtrl extends Component when (!io.dpath.stalld) { when (io.dpath.killf) { - id_reg_btb_hit <== Bool(false); - id_reg_xcpt_ma_inst <== Bool(false); - id_reg_xcpt_itlb <== Bool(false); + id_reg_btb_hit := Bool(false); + id_reg_xcpt_ma_inst := Bool(false); + id_reg_xcpt_itlb := Bool(false); } - otherwise{ - id_reg_btb_hit <== io.dpath.btb_hit; - id_reg_xcpt_ma_inst <== if_reg_xcpt_ma_inst; - id_reg_xcpt_itlb <== io.xcpt_itlb; + .otherwise{ + id_reg_btb_hit := io.dpath.btb_hit; + id_reg_xcpt_ma_inst := if_reg_xcpt_ma_inst; + id_reg_xcpt_itlb := io.xcpt_itlb; } - id_reg_icmiss <== !io.imem.resp_val; - id_reg_replay <== !take_pc && !io.imem.resp_val; + id_reg_icmiss := !io.imem.resp_val; + id_reg_replay := !take_pc && !io.imem.resp_val; } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) @@ -358,54 +358,54 @@ class rocketCtrl extends Component (id_eret.toBool && io.dpath.status(SR_ET).toBool); when (reset.toBool || io.dpath.killd) { - ex_reg_br_type <== BR_N; - ex_reg_btb_hit <== Bool(false); - ex_reg_div_val <== Bool(false); - ex_reg_mul_val <== Bool(false); - ex_reg_mem_val <== Bool(false); - ex_reg_wen <== Bool(false); - ex_reg_fp_wen <== Bool(false); - ex_reg_eret <== Bool(false); - ex_reg_replay_next <== Bool(false); - ex_reg_inst_di <== Bool(false); - ex_reg_inst_ei <== Bool(false); - ex_reg_flush_inst <== Bool(false); - ex_reg_xcpt_ma_inst <== Bool(false); - ex_reg_xcpt_itlb <== Bool(false); - ex_reg_xcpt_illegal <== Bool(false); - ex_reg_xcpt_privileged <== Bool(false); - ex_reg_xcpt_syscall <== Bool(false); - ex_reg_fp_val <== Bool(false); - ex_reg_vec_val <== Bool(false); - ex_reg_replay <== Bool(false); - ex_reg_load_use <== Bool(false); + ex_reg_br_type := BR_N; + ex_reg_btb_hit := Bool(false); + ex_reg_div_val := Bool(false); + ex_reg_mul_val := Bool(false); + ex_reg_mem_val := Bool(false); + ex_reg_wen := Bool(false); + ex_reg_fp_wen := Bool(false); + ex_reg_eret := Bool(false); + ex_reg_replay_next := Bool(false); + ex_reg_inst_di := Bool(false); + ex_reg_inst_ei := Bool(false); + ex_reg_flush_inst := Bool(false); + ex_reg_xcpt_ma_inst := Bool(false); + ex_reg_xcpt_itlb := Bool(false); + ex_reg_xcpt_illegal := Bool(false); + ex_reg_xcpt_privileged := Bool(false); + ex_reg_xcpt_syscall := Bool(false); + ex_reg_fp_val := Bool(false); + ex_reg_vec_val := Bool(false); + ex_reg_replay := Bool(false); + ex_reg_load_use := Bool(false); } - otherwise { - ex_reg_br_type <== id_br_type; - ex_reg_btb_hit <== id_reg_btb_hit; - ex_reg_div_val <== id_div_val.toBool && id_waddr != UFix(0); - ex_reg_mul_val <== id_mul_val.toBool && id_waddr != UFix(0); - ex_reg_mem_val <== id_mem_val.toBool; - ex_reg_wen <== id_wen.toBool && id_waddr != UFix(0); - ex_reg_fp_wen <== fpdec.io.wen; - ex_reg_eret <== id_eret.toBool; - ex_reg_replay_next <== id_replay_next.toBool; - ex_reg_inst_di <== (id_irq === I_DI); - ex_reg_inst_ei <== (id_irq === I_EI); - ex_reg_flush_inst <== (id_sync === SYNC_I); - ex_reg_xcpt_ma_inst <== id_reg_xcpt_ma_inst; - ex_reg_xcpt_itlb <== id_reg_xcpt_itlb; - ex_reg_xcpt_illegal <== illegal_inst; - ex_reg_xcpt_privileged <== (id_privileged & ~io.dpath.status(SR_S)).toBool; - ex_reg_xcpt_syscall <== id_syscall.toBool; - ex_reg_fp_val <== fpdec.io.valid; - ex_reg_vec_val <== id_vec_val.toBool - ex_reg_replay <== id_reg_replay || ex_reg_replay_next; - ex_reg_load_use <== id_load_use; + .otherwise { + ex_reg_br_type := id_br_type; + ex_reg_btb_hit := id_reg_btb_hit; + ex_reg_div_val := id_div_val.toBool && id_waddr != UFix(0); + ex_reg_mul_val := id_mul_val.toBool && id_waddr != UFix(0); + ex_reg_mem_val := id_mem_val.toBool; + ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); + ex_reg_fp_wen := fpdec.io.wen; + ex_reg_eret := id_eret.toBool; + ex_reg_replay_next := id_replay_next.toBool; + ex_reg_inst_di := (id_irq === I_DI); + ex_reg_inst_ei := (id_irq === I_EI); + ex_reg_flush_inst := (id_sync === SYNC_I); + ex_reg_xcpt_ma_inst := id_reg_xcpt_ma_inst; + ex_reg_xcpt_itlb := id_reg_xcpt_itlb; + ex_reg_xcpt_illegal := illegal_inst; + ex_reg_xcpt_privileged := (id_privileged & ~io.dpath.status(SR_S)).toBool; + ex_reg_xcpt_syscall := id_syscall.toBool; + ex_reg_fp_val := fpdec.io.valid; + ex_reg_vec_val := id_vec_val.toBool + ex_reg_replay := id_reg_replay || ex_reg_replay_next; + ex_reg_load_use := id_load_use; } - ex_reg_ext_mem_val <== io.ext_mem.req_val - ex_reg_mem_cmd <== Mux(io.ext_mem.req_val, io.ext_mem.req_cmd, id_mem_cmd).toUFix - ex_reg_mem_type <== Mux(io.ext_mem.req_val, io.ext_mem.req_type, id_mem_type).toUFix + ex_reg_ext_mem_val := io.ext_mem.req_val + ex_reg_mem_cmd := Mux(io.ext_mem.req_val, io.ext_mem.req_cmd, id_mem_cmd).toUFix + ex_reg_mem_type := Mux(io.ext_mem.req_val, io.ext_mem.req_type, id_mem_type).toUFix val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; @@ -431,60 +431,60 @@ class rocketCtrl extends Component val mem_reg_mem_type = Reg(){UFix(width = 3)}; when (reset.toBool || io.dpath.killx) { - mem_reg_div_mul_val <== Bool(false); - mem_reg_wen <== Bool(false); - mem_reg_fp_wen <== Bool(false); - mem_reg_eret <== Bool(false); - mem_reg_mem_val <== Bool(false); - mem_reg_inst_di <== Bool(false); - mem_reg_inst_ei <== Bool(false); - mem_reg_flush_inst <== Bool(false); - mem_reg_xcpt_ma_inst <== Bool(false); - mem_reg_xcpt_itlb <== Bool(false); - mem_reg_xcpt_illegal <== Bool(false); - mem_reg_xcpt_privileged <== Bool(false); - mem_reg_xcpt_fpu <== Bool(false); - mem_reg_xcpt_vec <== Bool(false); - mem_reg_xcpt_syscall <== Bool(false); + mem_reg_div_mul_val := Bool(false); + mem_reg_wen := Bool(false); + mem_reg_fp_wen := Bool(false); + mem_reg_eret := Bool(false); + mem_reg_mem_val := Bool(false); + mem_reg_inst_di := Bool(false); + mem_reg_inst_ei := Bool(false); + mem_reg_flush_inst := Bool(false); + mem_reg_xcpt_ma_inst := Bool(false); + mem_reg_xcpt_itlb := Bool(false); + mem_reg_xcpt_illegal := Bool(false); + mem_reg_xcpt_privileged := Bool(false); + mem_reg_xcpt_fpu := Bool(false); + mem_reg_xcpt_vec := Bool(false); + mem_reg_xcpt_syscall := Bool(false); } - otherwise { - mem_reg_div_mul_val <== ex_reg_div_val || ex_reg_mul_val; - mem_reg_wen <== ex_reg_wen; - mem_reg_fp_wen <== ex_reg_fp_wen; - mem_reg_eret <== ex_reg_eret; - mem_reg_mem_val <== ex_reg_mem_val; - mem_reg_inst_di <== ex_reg_inst_di; - mem_reg_inst_ei <== ex_reg_inst_ei; - mem_reg_flush_inst <== ex_reg_flush_inst; - mem_reg_xcpt_ma_inst <== ex_reg_xcpt_ma_inst; - mem_reg_xcpt_itlb <== ex_reg_xcpt_itlb; - mem_reg_xcpt_illegal <== ex_reg_xcpt_illegal; - mem_reg_xcpt_privileged <== ex_reg_xcpt_privileged; - mem_reg_xcpt_fpu <== ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; - mem_reg_xcpt_vec <== ex_reg_vec_val && !io.dpath.status(SR_EV).toBool; - mem_reg_xcpt_syscall <== ex_reg_xcpt_syscall; + .otherwise { + mem_reg_div_mul_val := ex_reg_div_val || ex_reg_mul_val; + mem_reg_wen := ex_reg_wen; + mem_reg_fp_wen := ex_reg_fp_wen; + mem_reg_eret := ex_reg_eret; + mem_reg_mem_val := ex_reg_mem_val; + mem_reg_inst_di := ex_reg_inst_di; + mem_reg_inst_ei := ex_reg_inst_ei; + mem_reg_flush_inst := ex_reg_flush_inst; + mem_reg_xcpt_ma_inst := ex_reg_xcpt_ma_inst; + mem_reg_xcpt_itlb := ex_reg_xcpt_itlb; + mem_reg_xcpt_illegal := ex_reg_xcpt_illegal; + mem_reg_xcpt_privileged := ex_reg_xcpt_privileged; + mem_reg_xcpt_fpu := ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; + mem_reg_xcpt_vec := ex_reg_vec_val && !io.dpath.status(SR_EV).toBool; + mem_reg_xcpt_syscall := ex_reg_xcpt_syscall; } - mem_reg_ext_mem_val <== ex_reg_ext_mem_val; - mem_reg_mem_cmd <== ex_reg_mem_cmd; - mem_reg_mem_type <== ex_reg_mem_type; + mem_reg_ext_mem_val := ex_reg_ext_mem_val; + mem_reg_mem_cmd := ex_reg_mem_cmd; + mem_reg_mem_type := ex_reg_mem_type; when (io.dpath.killm) { - wb_reg_wen <== Bool(false); - wb_reg_fp_wen <== Bool(false); - wb_reg_eret <== Bool(false); - wb_reg_inst_di <== Bool(false); - wb_reg_inst_ei <== Bool(false); - wb_reg_flush_inst <== Bool(false); - wb_reg_div_mul_val <== Bool(false); + wb_reg_wen := Bool(false); + wb_reg_fp_wen := Bool(false); + wb_reg_eret := Bool(false); + wb_reg_inst_di := Bool(false); + wb_reg_inst_ei := Bool(false); + wb_reg_flush_inst := Bool(false); + wb_reg_div_mul_val := Bool(false); } - otherwise { - wb_reg_wen <== mem_reg_wen; - wb_reg_fp_wen <== mem_reg_fp_wen; - wb_reg_eret <== mem_reg_eret; - wb_reg_inst_di <== mem_reg_inst_di; - wb_reg_inst_ei <== mem_reg_inst_ei; - wb_reg_flush_inst <== mem_reg_flush_inst; - wb_reg_div_mul_val <== mem_reg_div_mul_val; + .otherwise { + wb_reg_wen := mem_reg_wen; + wb_reg_fp_wen := mem_reg_fp_wen; + wb_reg_eret := mem_reg_eret; + wb_reg_inst_di := mem_reg_inst_di; + wb_reg_inst_ei := mem_reg_inst_ei; + wb_reg_flush_inst := mem_reg_flush_inst; + wb_reg_div_mul_val := mem_reg_div_mul_val; } val sboard = new rocketCtrlSboard(); @@ -591,12 +591,12 @@ class rocketCtrl extends Component ex_reg_mul_val && !io.dpath.mul_rdy val kill_ex = take_pc_wb || replay_ex - mem_reg_replay <== replay_ex && !take_pc_wb; - mem_reg_kill <== kill_ex; + mem_reg_replay := replay_ex && !take_pc_wb; + mem_reg_kill := kill_ex; - wb_reg_replay <== replay_mem && !take_pc_wb; - wb_reg_exception <== mem_exception && !take_pc_wb; - wb_reg_cause <== mem_cause; + wb_reg_replay := replay_mem && !take_pc_wb; + wb_reg_exception := mem_exception && !take_pc_wb; + wb_reg_cause := mem_cause; val wb_badvaddr_wen = wb_reg_exception && ((wb_reg_cause === UFix(10)) || (wb_reg_cause === UFix(11))) diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 09f0ba88..8d99b83c 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -1,5 +1,5 @@ package Top -{ + import Chisel._ import Node._; import Constants._; @@ -23,38 +23,14 @@ class ioCtrlSboard extends Bundle() class rocketCtrlSboard extends Component { override val io = new ioCtrlSboard(); - val reg_busy = Reg(width = 32, resetVal = Bits(0, 32)); + val reg_busy = Reg(resetVal = Bits(0, 32)); val set_mask = io.set.toUFix << io.seta; val clr_mask = ~(io.clr.toUFix << io.clra); - reg_busy <== (reg_busy | set_mask) & clr_mask + reg_busy := (reg_busy | set_mask) & clr_mask io.stalla := reg_busy(io.raddra).toBool; io.stallb := reg_busy(io.raddrb).toBool; io.stallc := reg_busy(io.raddrc).toBool; io.stalld := reg_busy(io.raddrd).toBool; } - -class ioCtrlCnt extends Bundle() -{ - val enq = Bool(INPUT); - val deq = Bool(INPUT); - val empty = Bool(OUTPUT); - val full = Bool(OUTPUT); -} - -class rocketCtrlCnt(n_bits: Int, limit: Int) extends Component -{ - override val io = new ioCtrlCnt(); - val counter = Reg(width = n_bits, resetVal = UFix(0, n_bits)); - when (io.enq && !io.deq) { - counter <== counter + UFix(1, n_bits); - } - when (!io.enq && io.deq) { - counter <== counter - UFix(1, n_bits); - } - io.empty := counter === UFix(0, n_bits); - io.full := counter === UFix(limit, n_bits); -} - -} diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 7355d6d4..88aa3f3f 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -137,23 +137,23 @@ class rocketDCacheDM_flush(lines: Int) extends Component { when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) { - r_cpu_req_tag <== io.cpu.req_tag; - flushing <== Bool(true); - flush_waiting <== Bool(true); + r_cpu_req_tag := io.cpu.req_tag; + flushing := Bool(true); + flush_waiting := Bool(true); } when (dcache.io.cpu.req_rdy && (flush_count === ~Bits(0, indexbits))) { - flushing <== Bool(false); + flushing := Bool(false); } when (dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag) && (flush_resp_count === ~Bits(0, indexbits))) { - flush_waiting <== Bool(false); + flush_waiting := Bool(false); } when (flushing && dcache.io.cpu.req_rdy) { - flush_count <== flush_count + UFix(1,1); + flush_count := flush_count + UFix(1,1); } when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag)) { - flush_resp_count <== flush_resp_count + UFix(1,1); + flush_resp_count := flush_resp_count + UFix(1,1); } dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_cmd != M_FLA) && !flush_waiting) || flushing; @@ -220,33 +220,33 @@ class rocketDCacheDM(lines: Int) extends Component { val r_req_amo = r_cpu_req_cmd(3).toBool; when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_idx <== io.cpu.req_idx; - r_cpu_req_cmd <== io.cpu.req_cmd; - r_cpu_req_type <== io.cpu.req_type; - r_cpu_req_tag <== io.cpu.req_tag; + r_cpu_req_idx := io.cpu.req_idx; + r_cpu_req_cmd := io.cpu.req_cmd; + r_cpu_req_type := io.cpu.req_type; + r_cpu_req_tag := io.cpu.req_tag; } when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_kill) { - r_cpu_req_ppn <== io.cpu.req_ppn; + r_cpu_req_ppn := io.cpu.req_ppn; } when (io.cpu.req_rdy) { - r_cpu_req_val <== io.cpu.req_val; + r_cpu_req_val := io.cpu.req_val; } otherwise { - r_cpu_req_val <== Bool(false); + r_cpu_req_val := Bool(false); } when (((state === s_resolve_miss) && (r_req_load || r_req_amo)) || (state === s_replay_load)) { - r_cpu_resp_val <== Bool(true); + r_cpu_resp_val := Bool(true); } otherwise { - r_cpu_resp_val <== Bool(false); + r_cpu_resp_val := Bool(false); } // refill counter val rr_count = Reg(resetVal = UFix(0,2)); val rr_count_next = rr_count + UFix(1); when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) { - rr_count <== rr_count_next; + rr_count := rr_count_next; } // tag array @@ -257,7 +257,7 @@ class rocketDCacheDM(lines: Int) extends Component { ((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) || ((state === s_resolve_miss) && r_req_flush); - val tag_array = Mem4(lines, r_cpu_req_ppn); + val tag_array = Mem(lines, r_cpu_req_ppn); tag_array.setReadLatency(1); tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); @@ -265,10 +265,10 @@ class rocketDCacheDM(lines: Int) extends Component { // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); when (tag_we && !r_req_flush) { - vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + vb_array := vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (tag_we && r_req_flush) { - vb_array <== vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); + vb_array := vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } val vb_rdata = vb_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; val tag_valid = r_cpu_req_val && vb_rdata; @@ -294,33 +294,33 @@ class rocketDCacheDM(lines: Int) extends Component { // pending store data when (io.cpu.req_val && io.cpu.req_rdy && req_store) { - p_store_idx <== io.cpu.req_idx; - p_store_data <== io.cpu.req_data; - p_store_type <== io.cpu.req_type; + p_store_idx := io.cpu.req_idx; + p_store_data := io.cpu.req_data; + p_store_type := io.cpu.req_type; } when (store_hit && !drain_store) { - p_store_valid <== Bool(true); + p_store_valid := Bool(true); } when (drain_store) { - p_store_valid <== Bool(false); + p_store_valid := Bool(false); } // AMO operand when (io.cpu.req_val && io.cpu.req_rdy && req_amo) { - r_amo_data <== io.cpu.req_data; + r_amo_data := io.cpu.req_data; } // dirty bit array val db_array = Reg(resetVal = Bits(0, lines)); val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; when ((r_cpu_req_val && !io.cpu.req_kill && tag_hit && r_req_store) || resolve_store) { - db_array <== db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + db_array := db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (state === s_write_amo) { - db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); + db_array := db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); } when (tag_we) { - db_array <== db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); + db_array := db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); } // generate write mask and data signals for stores and amos @@ -374,7 +374,7 @@ class rocketDCacheDM(lines: Int) extends Component { Mux((state === s_write_amo), amo_store_wmask, store_wmask)); - val data_array = Mem4(lines*4, data_wdata); + val data_array = Mem(lines*4, data_wdata); data_array.setReadLatency(1); data_array.setTarget('inst); val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); @@ -424,62 +424,62 @@ class rocketDCacheDM(lines: Int) extends Component { // control state machine switch (state) { is (s_reset) { - state <== s_ready; + state := s_ready; } is (s_ready) { when (io.cpu.req_kill) { - state <== s_ready; + state := s_ready; } when (ldst_conflict) { - state <== s_replay_load; + state := s_replay_load; } when (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))) { - state <== s_ready; + state := s_ready; } when (tag_hit && r_req_amo) { - state <== s_write_amo; + state := s_write_amo; } when (tag_valid & tag_dirty) { - state <== s_start_writeback; + state := s_start_writeback; } when (r_req_flush) { - state <== s_resolve_miss; + state := s_resolve_miss; } otherwise { - state <== s_req_refill; + state := s_req_refill; } } is (s_replay_load) { - state <== s_ready; + state := s_ready; } is (s_write_amo) { - state <== s_ready; + state := s_ready; } is (s_start_writeback) { - state <== s_writeback; + state := s_writeback; } is (s_writeback) { when (io.mem.req_rdy && (rr_count === UFix(3,2))) { when (r_req_flush) { - state <== s_resolve_miss; + state := s_resolve_miss; } otherwise { - state <== s_req_refill; + state := s_req_refill; } } } is (s_req_refill) { - when (io.mem.req_rdy) { state <== s_refill; } + when (io.mem.req_rdy) { state := s_refill; } } is (s_refill) { - when (io.mem.resp_val && (rr_count === UFix(3,2))) { state <== s_resolve_miss; } + when (io.mem.resp_val && (rr_count === UFix(3,2))) { state := s_resolve_miss; } } is (s_resolve_miss) { when (r_req_amo) { - state <== s_write_amo; + state := s_write_amo; } - state <== s_ready; + state := s_ready; } } } @@ -505,12 +505,12 @@ class rocketDCacheAmoALU extends Component { val adder_out = adder_lhs + adder_rhs; val alu_out = Wire() { UFix() }; switch (io.cmd) { -// is (M_XA_ADD) { alu_out <== adder_out; } - is (M_XA_SWAP) { alu_out <== io.rhs; } - is (M_XA_AND) { alu_out <== io.lhs & io.rhs; } - is (M_XA_OR) { alu_out <== io.lhs | io.rhs; } +// is (M_XA_ADD) { alu_out := adder_out; } + is (M_XA_SWAP) { alu_out := io.rhs; } + is (M_XA_AND) { alu_out := io.lhs & io.rhs; } + is (M_XA_OR) { alu_out := io.lhs | io.rhs; } } - alu_out <== adder_out; + alu_out := adder_out; io.result := alu_out; } diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 8fb98c1d..95b128ce 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -21,20 +21,6 @@ class ioDivider(width: Int) extends Bundle { val result_rdy = Bool(INPUT); } -// class ioDivider extends Bundle { -// // requests -// val req_val = Bool(INPUT); -// val req_rdy = Bool(OUTPUT); -// val req_fn = UFix(3, INPUT); -// val req_tag = UFix(5, INPUT); -// val req_rs1 = Bits(64, INPUT); -// val req_rs2 = Bits(64, INPUT); -// // responses -// val resp_val = Bool(OUTPUT); -// val resp_data = Bits(64, OUTPUT); -// val resp_tag = UFix(5, OUTPUT); -// } - class rocketDivider(width : Int) extends Component { val io = new ioDivider(width); @@ -55,28 +41,36 @@ class rocketDivider(width : Int) extends Component { val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R); - when (io.div_kill && Reg(state === s_ready)) { // can only kill on first cycle - state <== s_ready; + val do_kill = io.div_kill && Reg(io.div_rdy) // kill on 1st cycle only + + switch (state) { + is (s_ready) { + when (io.div_val) { + state := Mux(tc, s_neg_inputs, s_busy) + } + } + is (s_neg_inputs) { + state := Mux(do_kill, s_ready, s_busy) + } + is (s_busy) { + when (do_kill) { + state := s_ready + } + .elsewhen (count === UFix(width)) { + state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) + } + } + is (s_neg_outputs) { + state := s_done + } + is (s_done) { + when (io.result_rdy) { + state := s_ready + } + } } // state machine - switch (state) { - is (s_ready) { - when (!io.div_val) { state <== s_ready; } - when (tc) { state <== s_neg_inputs }; - otherwise { state <== s_busy; } - } - is (s_neg_inputs) { state <== s_busy; } - is (s_busy) { - when (count != UFix(width)) { state <== s_busy; } - when (!(neg_quo || neg_rem)) { state <== s_done; } - otherwise { state <== s_neg_outputs; } - } - is (s_neg_outputs) { state <== s_done; } - is (s_done) { - when (io.result_rdy) { state <== s_ready; } - } - } val lhs_sign = tc && Mux(io.dw === DW_64, io.in0(width-1), io.in0(width/2-1)).toBool val lhs_hi = Mux(io.dw === DW_64, io.in0(width-1,width/2), Fill(width/2, lhs_sign)) @@ -87,45 +81,45 @@ class rocketDivider(width : Int) extends Component { val rhs_in = Cat(rhs_hi, io.in1(width/2-1,0)) when ((state === s_ready) && io.div_val) { - count <== UFix(0, log2up(width+1)); - half <== (io.dw === DW_32); - neg_quo <== Bool(false); - neg_rem <== Bool(false); - rem <== (io.div_fn === DIV_R) || (io.div_fn === DIV_RU); - reg_tag <== io.div_tag; - divby0 <== Bool(true); - divisor <== rhs_in.toUFix; - remainder <== Cat(UFix(0,width+1), lhs_in).toUFix; + count := UFix(0, log2up(width+1)); + half := (io.dw === DW_32); + neg_quo := Bool(false); + neg_rem := Bool(false); + rem := (io.div_fn === DIV_R) || (io.div_fn === DIV_RU); + reg_tag := io.div_tag; + divby0 := Bool(true); + divisor := rhs_in.toUFix; + remainder := Cat(UFix(0,width+1), lhs_in).toUFix; } - when (state === s_neg_inputs) { - neg_rem <== remainder(width-1).toBool; - neg_quo <== (remainder(width-1) != divisor(width-1)); + neg_rem := remainder(width-1).toBool; + neg_quo := (remainder(width-1) != divisor(width-1)); when (remainder(width-1).toBool) { - remainder <== Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; + remainder := Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; } when (divisor(width-1).toBool) { - divisor <== subtractor(width-1,0); + divisor := subtractor(width-1,0); } } when (state === s_neg_outputs) { when (neg_rem && neg_quo && !divby0) { - remainder <== Cat(-remainder(2*width, width+1), remainder(width), -remainder(width-1,0)).toUFix; + remainder := Cat(-remainder(2*width, width+1), remainder(width), -remainder(width-1,0)).toUFix; } - when (neg_quo && !divby0) { - remainder <== Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; + .elsewhen (neg_quo && !divby0) { + remainder := Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; } - when (neg_rem) { - remainder <== Cat(-remainder(2*width, width+1), remainder(width,0)).toUFix; + .elsewhen (neg_rem) { + remainder := Cat(-remainder(2*width, width+1), remainder(width,0)).toUFix; } + when (divisor(width-1).toBool) { - divisor <== subtractor(width-1,0); + divisor := subtractor(width-1,0); } } when (state === s_busy) { - count <== count + UFix(1); - divby0 <== divby0 && !subtractor(width).toBool; - remainder <== Mux(subtractor(width).toBool, + count := count + UFix(1); + divby0 := divby0 && !subtractor(width).toBool; + remainder := Mux(subtractor(width).toBool, Cat(remainder(2*width-1, width), remainder(width-1,0), ~subtractor(width)), Cat(subtractor(width-1, 0), remainder(width-1,0), ~subtractor(width))).toUFix; } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 96be17b5..2815fc48 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -152,7 +152,7 @@ class rocketDpath extends Component if_pc_plus4))))))); // PC_4 when (!io.ctrl.stallf) { - if_reg_pc <== if_next_pc.toUFix; + if_reg_pc := if_next_pc.toUFix; } io.ctrl.xcpt_ma_inst := if_next_pc(1,0) != Bits(0) @@ -171,14 +171,14 @@ class rocketDpath extends Component // instruction decode stage when (!io.ctrl.stalld) { - id_reg_pc <== if_reg_pc; + id_reg_pc := if_reg_pc; when(io.ctrl.killf) { - id_reg_inst <== NOP; - id_reg_valid <== Bool(false); + id_reg_inst := NOP; + id_reg_valid := Bool(false); } - otherwise { - id_reg_inst <== io.imem.resp_data; - id_reg_valid <== Bool(true); + .otherwise { + id_reg_inst := io.imem.resp_data; + id_reg_valid := Bool(true); } } @@ -235,34 +235,34 @@ class rocketDpath extends Component io.ctrl.inst := id_reg_inst; // execute stage - ex_reg_pc <== id_reg_pc; - ex_reg_inst <== id_reg_inst - ex_reg_raddr1 <== id_raddr1 - ex_reg_raddr2 <== id_raddr2; - ex_reg_op2 <== id_op2; - ex_reg_rs2 <== id_rs2; - ex_reg_rs1 <== id_rs1; - ex_reg_waddr <== id_waddr; - ex_reg_ctrl_fn_dw <== io.ctrl.fn_dw.toUFix; - ex_reg_ctrl_fn_alu <== io.ctrl.fn_alu; - ex_reg_ctrl_mul_fn <== io.ctrl.mul_fn; - ex_reg_ctrl_div_fn <== io.ctrl.div_fn; - ex_reg_ctrl_sel_wb <== io.ctrl.sel_wb; - ex_reg_ctrl_ren_pcr <== io.ctrl.ren_pcr; + ex_reg_pc := id_reg_pc; + ex_reg_inst := id_reg_inst + ex_reg_raddr1 := id_raddr1 + ex_reg_raddr2 := id_raddr2; + ex_reg_op2 := id_op2; + ex_reg_rs2 := id_rs2; + ex_reg_rs1 := id_rs1; + ex_reg_waddr := id_waddr; + ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix; + ex_reg_ctrl_fn_alu := io.ctrl.fn_alu; + ex_reg_ctrl_mul_fn := io.ctrl.mul_fn; + ex_reg_ctrl_div_fn := io.ctrl.div_fn; + ex_reg_ctrl_sel_wb := io.ctrl.sel_wb; + ex_reg_ctrl_ren_pcr := io.ctrl.ren_pcr; when(io.ctrl.killd) { - ex_reg_valid <== Bool(false); - ex_reg_ctrl_div_val <== Bool(false); - ex_reg_ctrl_mul_val <== Bool(false); - ex_reg_ctrl_wen_pcr <== Bool(false); - ex_reg_ctrl_eret <== Bool(false); + ex_reg_valid := Bool(false); + ex_reg_ctrl_div_val := Bool(false); + ex_reg_ctrl_mul_val := Bool(false); + ex_reg_ctrl_wen_pcr := Bool(false); + ex_reg_ctrl_eret := Bool(false); } - otherwise { - ex_reg_valid <== id_reg_valid; - ex_reg_ctrl_div_val <== io.ctrl.div_val; - ex_reg_ctrl_mul_val <== io.ctrl.mul_val; - ex_reg_ctrl_wen_pcr <== io.ctrl.wen_pcr; - ex_reg_ctrl_eret <== io.ctrl.id_eret; + .otherwise { + ex_reg_valid := id_reg_valid; + ex_reg_ctrl_div_val := io.ctrl.div_val; + ex_reg_ctrl_mul_val := io.ctrl.mul_val; + ex_reg_ctrl_wen_pcr := io.ctrl.wen_pcr; + ex_reg_ctrl_eret := io.ctrl.id_eret; } alu.io.dw := ex_reg_ctrl_fn_dw; @@ -330,10 +330,10 @@ class rocketDpath extends Component // time stamp counter val tsc_reg = Reg(resetVal = UFix(0,64)); - tsc_reg <== tsc_reg + UFix(1); + tsc_reg := tsc_reg + UFix(1); // instructions retired counter val irt_reg = Reg(resetVal = UFix(0,64)); - when (wb_reg_valid) { irt_reg <== irt_reg + UFix(1); } + when (wb_reg_valid) { irt_reg := irt_reg + UFix(1); } // writeback select mux ex_wdata := @@ -344,21 +344,21 @@ class rocketDpath extends Component ex_alu_out)))).toBits; // WB_ALU // memory stage - mem_reg_pc <== ex_reg_pc; - mem_reg_inst <== ex_reg_inst - mem_reg_rs2 <== ex_reg_rs2 - mem_reg_waddr <== ex_reg_waddr; - mem_reg_wdata <== ex_wdata; - mem_reg_raddr1 <== ex_reg_raddr1 - mem_reg_raddr2 <== ex_reg_raddr2; + mem_reg_pc := ex_reg_pc; + mem_reg_inst := ex_reg_inst + mem_reg_rs2 := ex_reg_rs2 + mem_reg_waddr := ex_reg_waddr; + mem_reg_wdata := ex_wdata; + mem_reg_raddr1 := ex_reg_raddr1 + mem_reg_raddr2 := ex_reg_raddr2; when (io.ctrl.killx) { - mem_reg_valid <== Bool(false); - mem_reg_ctrl_wen_pcr <== Bool(false); + mem_reg_valid := Bool(false); + mem_reg_ctrl_wen_pcr := Bool(false); } - otherwise { - mem_reg_valid <== ex_reg_valid; - mem_reg_ctrl_wen_pcr <== ex_reg_ctrl_wen_pcr; + .otherwise { + mem_reg_valid := ex_reg_valid; + mem_reg_ctrl_wen_pcr := ex_reg_ctrl_wen_pcr; } // for load/use hazard detection (load byte/halfword) @@ -376,9 +376,9 @@ class rocketDpath extends Component val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(2) val dmem_resp_ext_tag = io.dmem.resp_tag.toUFix >> UFix(1) dmem_resp_replay := io.dmem.resp_replay && dmem_resp_xpu; - r_dmem_resp_replay <== dmem_resp_replay - r_dmem_resp_waddr <== dmem_resp_waddr - r_dmem_fp_replay <== io.dmem.resp_replay && dmem_resp_fpu; + r_dmem_resp_replay := dmem_resp_replay + r_dmem_resp_waddr := dmem_resp_waddr + r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu; val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, Mux(div_result_val, div_result_tag, @@ -389,22 +389,22 @@ class rocketDpath extends Component mem_reg_wdata)) val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val - wb_reg_pc <== mem_reg_pc; - wb_reg_inst <== mem_reg_inst - wb_reg_ll_wb <== mem_ll_wb - wb_reg_rs2 <== mem_reg_rs2 - wb_reg_waddr <== mem_ll_waddr - wb_reg_wdata <== mem_ll_wdata - wb_reg_raddr1 <== mem_reg_raddr1 - wb_reg_raddr2 <== mem_reg_raddr2; + wb_reg_pc := mem_reg_pc; + wb_reg_inst := mem_reg_inst + wb_reg_ll_wb := mem_ll_wb + wb_reg_rs2 := mem_reg_rs2 + wb_reg_waddr := mem_ll_waddr + wb_reg_wdata := mem_ll_wdata + wb_reg_raddr1 := mem_reg_raddr1 + wb_reg_raddr2 := mem_reg_raddr2; when (io.ctrl.killm) { - wb_reg_valid <== Bool(false); - wb_reg_ctrl_wen_pcr <== Bool(false); + wb_reg_valid := Bool(false); + wb_reg_ctrl_wen_pcr := Bool(false); } - otherwise { - wb_reg_valid <== mem_reg_valid; - wb_reg_ctrl_wen_pcr <== mem_reg_ctrl_wen_pcr; + .otherwise { + wb_reg_valid := mem_reg_valid; + wb_reg_ctrl_wen_pcr := mem_reg_ctrl_wen_pcr; } // vector datapath diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index b060b722..48aae6cf 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -40,13 +40,10 @@ class rocketDpathBTB(entries: Int) extends Component val my_clr = io.clr && my_hit || io.invalidate val my_wen = io.wen && (my_hit || !hit && UFix(i) === repl_way) - when (my_clr) { - valid <== Bool(false) - } + valid := !my_clr && (valid || my_wen) when (my_wen) { - valid <== Bool(true) - tag <== io.correct_pc - target <== io.correct_target + tag := io.correct_pc + target := io.correct_target } hit_reduction = hit_reduction || my_hit @@ -66,14 +63,14 @@ class ioDpathPCR extends Bundle() val r = new ioReadPort(); val w = new ioWritePort(); - val status = Bits(17, OUTPUT); + val status = Bits(17, OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); val evec = UFix(VADDR_BITS, OUTPUT); val exception = Bool(INPUT); - val cause = UFix(5, INPUT); + val cause = UFix(5, INPUT); val badvaddr_wen = Bool(INPUT); - val pc = UFix(VADDR_BITS+1, INPUT); - val eret = Bool(INPUT); + val pc = UFix(VADDR_BITS+1, INPUT); + val eret = Bool(INPUT); val ei = Bool(INPUT); val di = Bool(INPUT); val ptbr_wen = Bool(OUTPUT); @@ -120,12 +117,12 @@ class rocketDpathPCR extends Component val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; - io.ptbr_wen := reg_status_vm.toBool && !io.exception && io.w.en && (io.w.addr === PCR_PTBR); - io.status := Cat(reg_status_vm, reg_status_im, reg_status); + io.ptbr_wen := reg_status_vm.toBool && io.w.en && (io.w.addr === PCR_PTBR); + io.status := Cat(reg_status_vm, reg_status_im, reg_status); io.evec := reg_ebase; io.ptbr := reg_ptbr; - io.host.to := Mux(io.host.from_wen, Bits(0), reg_tohost); - io.debug.error_mode := reg_error_mode; + io.host.to := Mux(io.host.from_wen, Bits(0), reg_tohost); + io.debug.error_mode := reg_error_mode; io.r.data := rdata; io.vecbank := reg_vecbank @@ -139,100 +136,99 @@ class rocketDpathPCR extends Component io.console_val := console_wen; when (io.host.from_wen) { - reg_tohost <== Bits(0); - reg_fromhost <== io.host.from; - } - otherwise { - when (!io.exception && io.w.en && (io.w.addr === PCR_TOHOST)) { - reg_tohost <== io.w.data; - reg_fromhost <== Bits(0); - } + reg_tohost := Bits(0); + reg_fromhost := io.host.from; + } + .elsewhen (io.w.en && (io.w.addr === PCR_TOHOST)) { + reg_tohost := io.w.data; + reg_fromhost := Bits(0); } val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) when (io.badvaddr_wen) { - reg_badvaddr <== Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; - } - - when (io.exception && !reg_status_et) { - reg_error_mode <== Bool(true); - } - - when (io.exception && reg_status_et) { - reg_status_s <== Bool(true); - reg_status_ps <== reg_status_s; - reg_status_et <== Bool(false); - reg_epc <== io.pc; - reg_cause <== io.cause; - } - - when (!io.exception && io.di) { - reg_status_et <== Bool(false); - } - - when (!io.exception && io.ei) { - reg_status_et <== Bool(true); - } - - when (!io.exception && io.eret) { - reg_status_s <== reg_status_ps; - reg_status_et <== Bool(true); + reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; } - when (!io.exception && !io.eret && io.w.en) { - when (io.w.addr === PCR_STATUS) { - reg_status_vm <== io.w.data(SR_VM).toBool; - reg_status_im <== io.w.data(15,8); - reg_status_sx <== io.w.data(SR_SX).toBool; - reg_status_ux <== io.w.data(SR_UX).toBool; - reg_status_s <== io.w.data(SR_S).toBool; - reg_status_ps <== io.w.data(SR_PS).toBool; - reg_status_ev <== Bool(HAVE_VEC) && io.w.data(SR_EV).toBool; - reg_status_ef <== Bool(HAVE_FPU) && io.w.data(SR_EF).toBool; - reg_status_ec <== Bool(HAVE_RVC) && io.w.data(SR_EC).toBool; - reg_status_et <== io.w.data(SR_ET).toBool; - } - when (io.w.addr === PCR_EPC) { reg_epc <== io.w.data(VADDR_BITS,0).toUFix; } - when (io.w.addr === PCR_BADVADDR) { reg_badvaddr <== io.w.data(VADDR_BITS,0).toUFix; } - when (io.w.addr === PCR_EVEC) { reg_ebase <== io.w.data(VADDR_BITS-1,0).toUFix; } - when (io.w.addr === PCR_COUNT) { reg_count <== io.w.data(31,0).toUFix; } - when (io.w.addr === PCR_COMPARE) { reg_compare <== io.w.data(31,0).toUFix; r_irq_timer <== Bool(false); } - when (io.w.addr === PCR_CAUSE) { reg_cause <== io.w.data(4,0); } - when (io.w.addr === PCR_FROMHOST) { reg_fromhost <== io.w.data; } - when (io.w.addr === PCR_SEND_IPI) { r_irq_ipi <== Bool(true); } - when (io.w.addr === PCR_CLR_IPI) { r_irq_ipi <== Bool(false); } - when (io.w.addr === PCR_K0) { reg_k0 <== io.w.data; } - when (io.w.addr === PCR_K1) { reg_k1 <== io.w.data; } - when (io.w.addr === PCR_PTBR) { reg_ptbr <== Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } - when (io.w.addr === PCR_VECBANK) { reg_vecbank <== io.w.data(7,0) } + when (io.exception) { + when (!reg_status_et) { + reg_error_mode := Bool(true) + } + .otherwise { + reg_status_s := Bool(true); + reg_status_ps := reg_status_s; + reg_status_et := Bool(false); + reg_epc := io.pc; + reg_cause := io.cause; + } } - - otherwise { - reg_count <== reg_count + UFix(1); + + when (io.di) { + reg_status_et := Bool(false); + } + + when (io.ei) { + reg_status_et := Bool(true); + } + + when (io.eret) { + reg_status_s := reg_status_ps; + reg_status_et := Bool(true); } + when (reg_count === reg_compare) { - r_irq_timer <== Bool(true); + r_irq_timer := Bool(true); } + reg_count := reg_count + UFix(1); + io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; - when (!io.r.en) { rdata <== Bits(0,64); } - switch (io.r.addr) { - is (PCR_STATUS) { rdata <== Cat(Bits(0,47), reg_status_vm, reg_status_im, reg_status); } - is (PCR_EPC) { rdata <== Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } - is (PCR_BADVADDR) { rdata <== Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } - is (PCR_EVEC) { rdata <== Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } - is (PCR_COUNT) { rdata <== Cat(Fill(32, reg_count(31)), reg_count); } - is (PCR_COMPARE) { rdata <== Cat(Fill(32, reg_compare(31)), reg_compare); } - is (PCR_CAUSE) { rdata <== Cat(Bits(0,59), reg_cause); } - is (PCR_COREID) { rdata <== Bits(COREID,64); } - is (PCR_FROMHOST) { rdata <== reg_fromhost; } - is (PCR_TOHOST) { rdata <== reg_tohost; } - is (PCR_K0) { rdata <== reg_k0; } - is (PCR_K1) { rdata <== reg_k1; } - is (PCR_PTBR) { rdata <== Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } - is (PCR_VECBANK) { rdata <== Cat(Bits(0, 56), reg_vecbank) } - otherwise { rdata <== Bits(0,64); } + when (io.w.en) { + when (io.w.addr === PCR_STATUS) { + reg_status_vm := io.w.data(SR_VM).toBool; + reg_status_im := io.w.data(15,8); + reg_status_sx := io.w.data(SR_SX).toBool; + reg_status_ux := io.w.data(SR_UX).toBool; + reg_status_s := io.w.data(SR_S).toBool; + reg_status_ps := io.w.data(SR_PS).toBool; + reg_status_ev := Bool(HAVE_VEC) && io.w.data(SR_EV).toBool; + reg_status_ef := Bool(HAVE_FPU) && io.w.data(SR_EF).toBool; + reg_status_ec := Bool(HAVE_RVC) && io.w.data(SR_EC).toBool; + reg_status_et := io.w.data(SR_ET).toBool; + } + when (io.w.addr === PCR_EPC) { reg_epc := io.w.data(VADDR_BITS,0).toUFix; } + when (io.w.addr === PCR_BADVADDR) { reg_badvaddr := io.w.data(VADDR_BITS,0).toUFix; } + when (io.w.addr === PCR_EVEC) { reg_ebase := io.w.data(VADDR_BITS-1,0).toUFix; } + when (io.w.addr === PCR_COUNT) { reg_count := io.w.data(31,0).toUFix; } + when (io.w.addr === PCR_COMPARE) { reg_compare := io.w.data(31,0).toUFix; r_irq_timer := Bool(false); } + when (io.w.addr === PCR_CAUSE) { reg_cause := io.w.data(4,0); } + when (io.w.addr === PCR_FROMHOST) { reg_fromhost := io.w.data; } + when (io.w.addr === PCR_SEND_IPI) { r_irq_ipi := Bool(true); } + when (io.w.addr === PCR_CLR_IPI) { r_irq_ipi := Bool(false); } + when (io.w.addr === PCR_K0) { reg_k0 := io.w.data; } + when (io.w.addr === PCR_K1) { reg_k1 := io.w.data; } + when (io.w.addr === PCR_PTBR) { reg_ptbr := Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } + when (io.w.addr === PCR_VECBANK) { reg_vecbank := io.w.data(7,0) } + } + + rdata := Bits(0, 64) + when (io.r.en) { + switch (io.r.addr) { + is (PCR_STATUS) { rdata := Cat(Bits(0,47), reg_status_vm, reg_status_im, reg_status); } + is (PCR_EPC) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } + is (PCR_BADVADDR) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } + is (PCR_EVEC) { rdata := Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } + is (PCR_COUNT) { rdata := Cat(Fill(32, reg_count(31)), reg_count); } + is (PCR_COMPARE) { rdata := Cat(Fill(32, reg_compare(31)), reg_compare); } + is (PCR_CAUSE) { rdata := Cat(Bits(0,59), reg_cause); } + is (PCR_COREID) { rdata := Bits(COREID,64); } + is (PCR_FROMHOST) { rdata := reg_fromhost; } + is (PCR_TOHOST) { rdata := reg_tohost; } + is (PCR_K0) { rdata := reg_k0; } + is (PCR_K1) { rdata := reg_k1; } + is (PCR_PTBR) { rdata := Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } + is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } + } } } @@ -261,7 +257,7 @@ class rocketDpathRegfile extends Component { override val io = new ioRegfile(); - val regfile = Mem4(32, io.w0.data); + val regfile = Mem(32, io.w0.data); regfile.setReadLatency(0); regfile.setTarget('inst); regfile.write(io.w0.addr, io.w0.data, io.w0.en); diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 8c6c7d8d..52c148fd 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -150,8 +150,8 @@ class rocketDpathVec extends Component when (io.valid && wb_vec_wen.toBool && wb_vec_fn.toBool) { - reg_hwvl <== hwvl_vcfg - reg_appvl0 <== !(appvl.orR()) + reg_hwvl := hwvl_vcfg + reg_appvl0 := !(appvl.orR()) } io.wen := io.valid && wb_vec_wen.toBool diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index de1098eb..ee64e753 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -52,13 +52,13 @@ class rocketDTLB(entries: Int) extends Component val repl_count = Reg(resetVal = UFix(0,addr_bits)); when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_vpn <== io.cpu.req_vpn; - r_cpu_req_cmd <== io.cpu.req_cmd; - r_cpu_req_asid <== io.cpu.req_asid; - r_cpu_req_val <== Bool(true); + r_cpu_req_vpn := io.cpu.req_vpn; + r_cpu_req_cmd := io.cpu.req_cmd; + r_cpu_req_asid := io.cpu.req_asid; + r_cpu_req_val := Bool(true); } - otherwise { - r_cpu_req_val <== Bool(false); + .otherwise { + r_cpu_req_val := Bool(false); } val req_load = (r_cpu_req_cmd === M_XRD); @@ -96,19 +96,19 @@ class rocketDTLB(entries: Int) extends Component val sr_array = Reg(resetVal = Bits(0, entries)); // supervisor read permission val sw_array = Reg(resetVal = Bits(0, entries)); // supervisor write permission when (io.ptw.resp_val) { - ur_array <== ur_array.bitSet(r_refill_waddr, ptw_perm_ur); - uw_array <== uw_array.bitSet(r_refill_waddr, ptw_perm_uw); - sr_array <== sr_array.bitSet(r_refill_waddr, ptw_perm_sr); - sw_array <== sw_array.bitSet(r_refill_waddr, ptw_perm_sw); + ur_array := ur_array.bitSet(r_refill_waddr, ptw_perm_ur); + uw_array := uw_array.bitSet(r_refill_waddr, ptw_perm_uw); + sr_array := sr_array.bitSet(r_refill_waddr, ptw_perm_sr); + sw_array := sw_array.bitSet(r_refill_waddr, ptw_perm_sw); } // when the page table lookup reports an error, set all permission // bits to 0 so the next access will cause an exception when (io.ptw.resp_err) { - ur_array <== ur_array.bitSet(r_refill_waddr, Bool(false)); - uw_array <== uw_array.bitSet(r_refill_waddr, Bool(false)); - sr_array <== sr_array.bitSet(r_refill_waddr, Bool(false)); - sw_array <== sw_array.bitSet(r_refill_waddr, Bool(false)); + ur_array := ur_array.bitSet(r_refill_waddr, Bool(false)); + uw_array := uw_array.bitSet(r_refill_waddr, Bool(false)); + sr_array := sr_array.bitSet(r_refill_waddr, Bool(false)); + sw_array := sw_array.bitSet(r_refill_waddr, Bool(false)); } // high if there are any unused (invalid) entries in the TLB @@ -128,10 +128,10 @@ class rocketDTLB(entries: Int) extends Component // currently replace TLB entries in LIFO order // TODO: implement LRU replacement policy when (tlb_miss) { - r_refill_tag <== lookup_tag; - r_refill_waddr <== repl_waddr; + r_refill_tag := lookup_tag; + r_refill_waddr := repl_waddr; when (!invalid_entry) { - repl_count <== repl_count + UFix(1); + repl_count := repl_count + UFix(1); } } @@ -166,17 +166,17 @@ class rocketDTLB(entries: Int) extends Component switch (state) { is (s_ready) { when (tlb_miss) { - state <== s_request; + state := s_request; } } is (s_request) { when (io.ptw.req_rdy) { - state <== s_wait; + state := s_wait; } } is (s_wait) { when (io.ptw.resp_val || io.ptw.resp_err) { - state <== s_ready; + state := s_ready; } } } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index f46d9d25..b3f78003 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -126,7 +126,7 @@ class rocketFPU extends Component val ex_reg_inst = Reg() { Bits() } when (io.req_valid) { - ex_reg_inst <== io.req_inst + ex_reg_inst := io.req_inst } // load response @@ -135,12 +135,12 @@ class rocketFPU extends Component val load_wb_data = Reg() { Bits() } val load_wb_tag = Reg() { UFix() } when (dmem_resp_val_fpu) { - load_wb_data <== io.dmem.resp_data - load_wb_tag <== io.dmem.resp_tag.toUFix >> UFix(1) + load_wb_data := io.dmem.resp_data + load_wb_tag := io.dmem.resp_tag.toUFix >> UFix(1) } // regfile - val regfile = Mem4(32, load_wb_data); + val regfile = Mem(32, load_wb_data); regfile.setReadLatency(0); regfile.setTarget('inst); regfile.write(load_wb_tag, load_wb_data, load_wb); diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 5a1b8aaa..b13dbdd8 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -68,14 +68,14 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val tag_hit = Wire() { Bool() } when (io.cpu.req_val && rdy) { - r_cpu_req_val <== Bool(true) - r_cpu_req_idx <== io.cpu.req_idx + r_cpu_req_val := Bool(true) + r_cpu_req_idx := io.cpu.req_idx } - otherwise { - r_cpu_req_val <== Bool(false) + .otherwise { + r_cpu_req_val := Bool(false) } when (state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss) { - r_cpu_req_ppn <== io.cpu.req_ppn + r_cpu_req_ppn := io.cpu.req_ppn } val r_cpu_hit_addr = Cat(io.cpu.req_ppn, r_cpu_req_idx) @@ -86,7 +86,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { // refill counter val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); when (io.mem.resp_val) { - refill_count <== refill_count + UFix(1); + refill_count := refill_count + UFix(1); } val repl_way = LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2up(assoc)-1,0) @@ -104,7 +104,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { for (i <- 0 until assoc) { val repl_me = (repl_way === UFix(i)) - val tag_array = Mem4(lines, r_cpu_miss_tag); + val tag_array = Mem(lines, r_cpu_miss_tag); tag_array.setReadLatency(1); tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_miss_tag, tag_we && repl_me); @@ -112,17 +112,17 @@ class rocketICache(sets: Int, assoc: Int) extends Component { // valid bit array val vb_array = Reg(resetVal = Bits(0, lines)); when (io.cpu.invalidate) { - vb_array <== Bits(0,lines); + vb_array := Bits(0,lines); } - when (tag_we && repl_me) { - vb_array <== vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, UFix(1,1)); + .elsewhen (tag_we && repl_me) { + vb_array := vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, UFix(1,1)); } val valid = vb_array(r_cpu_req_idx(indexmsb,indexlsb)).toBool; val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array - val data_array = Mem4(lines*REFILL_CYCLES, io.mem.resp_data); + val data_array = Mem(lines*REFILL_CYCLES, io.mem.resp_data); data_array.setReadLatency(1); data_array.setTarget('inst); val data_out = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val && repl_me) @@ -144,30 +144,30 @@ class rocketICache(sets: Int, assoc: Int) extends Component { // control state machine switch (state) { is (s_reset) { - state <== s_ready; + state := s_ready; } is (s_ready) { when (io.cpu.itlb_miss) { - state <== s_ready; + state := s_ready; } - when (r_cpu_req_val && !tag_hit) { - state <== s_request; + .elsewhen (r_cpu_req_val && !tag_hit) { + state := s_request; } } is (s_request) { when (io.mem.req_rdy) { - state <== s_refill_wait; + state := s_refill_wait; } } is (s_refill_wait) { when (io.mem.resp_val) { - state <== s_refill; + state := s_refill; } } is (s_refill) { when (io.mem.resp_val && (~refill_count === UFix(0))) { - state <== s_ready; + state := s_ready; } } } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 54007fc9..b69f03a0 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -30,7 +30,7 @@ class rocketIPrefetcher extends Component() { val demand_miss = io.icache.req_val & io.icache.req_rdy; val prefetch_addr = Reg() { UFix(width = io.icache.req_addr.width) }; - when (demand_miss) { prefetch_addr <== io.icache.req_addr + UFix(1); } + when (demand_miss) { prefetch_addr := io.icache.req_addr + UFix(1); } val addr_match = (prefetch_addr === io.icache.req_addr); val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; @@ -44,14 +44,14 @@ class rocketIPrefetcher extends Component() { io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); - when (ip_mem_resp_val.toBool) { fill_cnt <== fill_cnt + UFix(1); } + when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } val fill_done = (~fill_cnt === UFix(0)) & ip_mem_resp_val; val forward = Reg(resetVal = Bool(false)); val forward_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); - when (forward & pdq.io.deq.valid) { forward_cnt <== forward_cnt + UFix(1); } + when (forward & pdq.io.deq.valid) { forward_cnt := forward_cnt + UFix(1); } val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; - forward <== (demand_miss & hit | forward & ~forward_done); + forward := (demand_miss & hit | forward & ~forward_done); io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq.valid); io.icache.resp_data := Mux(forward, pdq.io.deq.bits, io.mem.resp_data); @@ -63,25 +63,25 @@ class rocketIPrefetcher extends Component() { switch (state) { is (s_invalid) { - when (demand_miss) { state <== s_req_wait; } + when (demand_miss) { state := s_req_wait; } } is (s_valid) { - when (demand_miss | (forward & forward_done)) { state <== s_req_wait; } + when (demand_miss | (forward & forward_done)) { state := s_req_wait; } } is (s_refilling) { - when (demand_miss & ~addr_match & fill_done.toBool) { state <== s_req_wait; } - when (demand_miss & ~addr_match) { state <== s_bad_resp_wait; } - when (fill_done.toBool) { state <== s_valid; } + when (demand_miss & ~addr_match & fill_done.toBool) { state := s_req_wait; } + .elsewhen (demand_miss & ~addr_match) { state := s_bad_resp_wait; } + .elsewhen (fill_done.toBool) { state := s_valid; } } is (s_req_wait) { - when (ip_mem_req_rdy) { state <== s_resp_wait; } + when (ip_mem_req_rdy) { state := s_resp_wait; } } is (s_resp_wait) { - when (demand_miss & ~addr_match) { state <== s_bad_resp_wait; } - when (ip_mem_resp_val.toBool) { state <== s_refilling; } + when (demand_miss & ~addr_match) { state := s_bad_resp_wait; } + .elsewhen (ip_mem_resp_val.toBool) { state := s_refilling; } } is (s_bad_resp_wait) { - when (fill_done.toBool & ip_mem_resp_val.toBool) { state <== s_req_wait; } + when (fill_done.toBool & ip_mem_resp_val.toBool) { state := s_req_wait; } } } } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 1b6aa7ed..6e0e6e4d 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -23,30 +23,26 @@ class rocketCAM(entries: Int, tag_bits: Int) extends Component { val io = new ioCAM(entries, addr_bits, tag_bits); val cam_tags = Mem(entries, io.write, io.write_addr, io.write_tag); - val l_hit = Wire() { Bool() }; - val l_hit_addr = Wire() { UFix() }; - val vb_array = Reg(resetVal = Bits(0, entries)); when (io.clear) { - vb_array <== Bits(0, entries); + vb_array := Bits(0, entries); } - when (io.write) { - vb_array <== vb_array.bitSet(io.write_addr, Bool(true)); - } - - for (i <- 0 to entries-1) { - when (vb_array(UFix(i)).toBool && (cam_tags(UFix(i)) === io.tag)) { - l_hit <== Bool(true); - l_hit_addr <== UFix(i,addr_bits); - } + .elsewhen (io.write) { + vb_array := vb_array.bitSet(io.write_addr, Bool(true)); } - l_hit <== Bool(false); - l_hit_addr <== UFix(0, addr_bits); + var l_hit = Bool(false) + val mux = (new Mux1H(entries)) { Bits(width = addr_bits) } + for (i <- 0 to entries-1) { + val my_hit = vb_array(UFix(i)).toBool && (cam_tags(UFix(i)) === io.tag) + l_hit = l_hit || my_hit + mux.io.in(i) := Bits(i) + mux.io.sel(i) := my_hit + } io.valid_bits := vb_array; io.hit := l_hit; - io.hit_addr := l_hit_addr; + io.hit_addr := mux.io.out.toUFix; } // interface between TLB and PTW @@ -104,12 +100,12 @@ class rocketITLB(entries: Int) extends Component val repl_count = Reg(resetVal = UFix(0, addr_bits)); when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_vpn <== io.cpu.req_vpn; - r_cpu_req_asid <== io.cpu.req_asid; - r_cpu_req_val <== Bool(true); + r_cpu_req_vpn := io.cpu.req_vpn; + r_cpu_req_asid := io.cpu.req_asid; + r_cpu_req_val := Bool(true); } - otherwise { - r_cpu_req_val <== Bool(false); + .otherwise { + r_cpu_req_val := Bool(false); } val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); @@ -139,15 +135,15 @@ class rocketITLB(entries: Int) extends Component val ux_array = Reg(resetVal = Bits(0, entries)); // user execute permission val sx_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission when (io.ptw.resp_val) { - ux_array <== ux_array.bitSet(r_refill_waddr, ptw_perm_ux); - sx_array <== sx_array.bitSet(r_refill_waddr, ptw_perm_sx); + ux_array := ux_array.bitSet(r_refill_waddr, ptw_perm_ux); + sx_array := sx_array.bitSet(r_refill_waddr, ptw_perm_sx); } // when the page table lookup reports an error, set both execute permission // bits to 0 so the next access will cause an exceptions when (io.ptw.resp_err) { - ux_array <== ux_array.bitSet(r_refill_waddr, Bool(false)); - sx_array <== sx_array.bitSet(r_refill_waddr, Bool(false)); + ux_array := ux_array.bitSet(r_refill_waddr, Bool(false)); + sx_array := sx_array.bitSet(r_refill_waddr, Bool(false)); } // high if there are any unused entries in the ITLB @@ -165,10 +161,10 @@ class rocketITLB(entries: Int) extends Component val tlb_miss = status_vm && lookup_miss; when (tlb_miss) { - r_refill_tag <== lookup_tag; - r_refill_waddr <== repl_waddr; + r_refill_tag := lookup_tag; + r_refill_waddr := repl_waddr; when (!invalid_entry) { - repl_count <== repl_count + UFix(1); + repl_count := repl_count + UFix(1); } } @@ -190,17 +186,17 @@ class rocketITLB(entries: Int) extends Component switch (state) { is (s_ready) { when (tlb_miss) { - state <== s_request; + state := s_request; } } is (s_request) { when (io.ptw.req_rdy) { - state <== s_wait; + state := s_wait; } } is (s_wait) { when (io.ptw.resp_val || io.ptw.resp_err) { - state <== s_ready; + state := s_ready; } } } diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 52ab0ef0..38ba04c3 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -51,19 +51,21 @@ class rocketMultiplier extends Component { val rhs_sign = (io.mul_fn === MUL_HS) && rhs_msb val rhs_hi = Mux(io.dw === DW_64, io.in1(63,32), Fill(32, rhs_sign)) val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.in1(31,0)) + + val do_kill = io.mul_kill && r_cnt === UFix(0) // can only kill on 1st cycle when (io.mul_val && io.mul_rdy) { - r_val <== Bool(true) - r_cnt <== UFix(0, log2up(cycles+1)) - r_dw <== io.dw - r_fn <== io.mul_fn - r_tag <== io.mul_tag - r_lhs <== lhs_in - r_prod<== rhs_in - r_lsb <== Bool(false) + r_val := Bool(true) + r_cnt := UFix(0, log2up(cycles+1)) + r_dw := io.dw + r_fn := io.mul_fn + r_tag := io.mul_tag + r_lhs := lhs_in + r_prod:= rhs_in + r_lsb := Bool(false) } - when (io.result_val && io.result_rdy || io.mul_kill && r_cnt === UFix(0)) { // can only kill on first cycle - r_val <== Bool(false) + .elsewhen (io.result_val && io.result_rdy || do_kill) { // can only kill on first cycle + r_val := Bool(false) } val lhs_sext = Cat(r_lhs(width-2), r_lhs(width-2), r_lhs).toUFix @@ -86,9 +88,9 @@ class rocketMultiplier extends Component { } when (r_val && (r_cnt != UFix(cycles))) { - r_lsb <== lsb - r_prod <== prod - r_cnt <== r_cnt + UFix(1) + r_lsb := lsb + r_prod := prod + r_cnt := r_cnt + UFix(1) } val mul_output64 = Mux(r_fn === MUL_LO, r_prod(63,0), r_prod(127,64)) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 60a7c5f3..d72fbdfb 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -197,35 +197,46 @@ class MSHR(id: Int) extends Component { val next_dirty = dirty || io.req_sec_val && io.req_sec_rdy && !req_load val sec_rdy = io.idx_match && !refilled && (dirty || !requested || req_load) - val rpq = (new queue(NRPQ)) { new RPQEntry() } + // XXX why doesn't this work? + // val rpq = (new queue(NRPQ)) { new RPQEntry() } + val rpq_enq_bits = Cat(io.req_offset, io.req_cmd, io.req_type, io.req_sdq_id, io.req_tag) + val rpq = (new queue(NRPQ)) { Bits(width = rpq_enq_bits.getWidth) } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq - rpq.io.enq.bits.offset := io.req_offset - rpq.io.enq.bits.cmd := io.req_cmd - rpq.io.enq.bits.typ := io.req_type - rpq.io.enq.bits.sdq_id := io.req_sdq_id - rpq.io.enq.bits.tag := io.req_tag + rpq.io.enq.bits := rpq_enq_bits rpq.io.deq.ready := io.replay.ready && refilled + var rpq_deq_bits = rpq.io.deq.bits + io.replay.bits.tag := rpq_deq_bits + rpq_deq_bits = rpq_deq_bits >> UFix(io.req_tag.width) + io.replay.bits.sdq_id := rpq_deq_bits.toUFix + rpq_deq_bits = rpq_deq_bits >> UFix(io.req_sdq_id.width) + io.replay.bits.typ := rpq_deq_bits + rpq_deq_bits = rpq_deq_bits >> UFix(io.req_type.width) + io.replay.bits.cmd := rpq_deq_bits + rpq_deq_bits = rpq_deq_bits >> UFix(io.req_cmd.width) + io.replay.bits.offset := rpq_deq_bits + rpq_deq_bits = rpq_deq_bits >> UFix(io.req_offset.width) + when (io.req_pri_val && io.req_pri_rdy) { - valid <== Bool(true) - dirty <== !req_load - requested <== Bool(false) - refilled <== Bool(false) - ppn <== io.req_ppn - idx_ <== io.req_idx - way_oh_ <== io.req_way_oh + valid := Bool(true) + dirty := !req_load + requested := Bool(false) + refilled := Bool(false) + ppn := io.req_ppn + idx_ := io.req_idx + way_oh_ := io.req_way_oh } - when (io.mem_req.valid && io.mem_req.ready) { - requested <== Bool(true) - } - when (io.mem_resp_val) { - refilled <== Bool(true) - } - when (io.meta_req.valid && io.meta_req.ready) { - valid <== Bool(false) - } - otherwise { - dirty <== next_dirty + .otherwise { + when (io.mem_req.valid && io.mem_req.ready) { + requested := Bool(true) + } + when (io.mem_resp_val) { + refilled := Bool(true) + } + when (io.meta_req.valid && io.meta_req.ready) { + valid := Bool(false) + } + dirty := next_dirty } io.idx_match := valid && (idx_ === io.req_idx) @@ -251,11 +262,6 @@ class MSHR(id: Int) extends Component { io.replay.valid := rpq.io.deq.valid && refilled io.replay.bits.idx := idx_ - io.replay.bits.tag := rpq.io.deq.bits.tag - io.replay.bits.offset := rpq.io.deq.bits.offset - io.replay.bits.cmd := rpq.io.deq.bits.cmd - io.replay.bits.typ := rpq.io.deq.bits.typ - io.replay.bits.sdq_id := rpq.io.deq.bits.sdq_id io.replay.bits.way_oh := way_oh_ } @@ -366,10 +372,10 @@ class ReplayUnit extends Component { val replay_val = Reg(resetVal = Bool(false)) val replay_retry = replay_val && !io.data_req.ready - replay_val <== io.replay.valid || replay_retry + replay_val := io.replay.valid || replay_retry val rp = Reg { new Replay() } - when (io.replay.valid && io.replay.ready) { rp <== io.replay.bits } + when (io.replay.valid && io.replay.ready) { rp := io.replay.bits } val rp_amo = rp.cmd(3).toBool val rp_store = (rp.cmd === M_XWR) @@ -383,13 +389,13 @@ class ReplayUnit extends Component { val sdq_wen = io.sdq_enq.valid && io.sdq_enq.ready val sdq_addr = Mux(sdq_ren_retry, rp.sdq_id, Mux(sdq_ren_new, io.replay.bits.sdq_id, sdq_alloc_id)) - val sdq = Mem4(NSDQ, io.sdq_enq.bits) + val sdq = Mem(NSDQ, io.sdq_enq.bits) sdq.setReadLatency(1); sdq.setTarget('inst) val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) val sdq_free = replay_val && !replay_retry && rp_write - sdq_val <== sdq_val & ~(sdq_free.toUFix << rp.sdq_id) | (sdq_wen.toUFix << sdq_alloc_id) + sdq_val := sdq_val & ~(sdq_free.toUFix << rp.sdq_id) | (sdq_wen.toUFix << sdq_alloc_id) io.sdq_enq.ready := (~sdq_val != UFix(0)) && !sdq_ren io.sdq_id := sdq_alloc_id @@ -433,9 +439,9 @@ class WritebackUnit extends Component { wbq.io.enq.bits := io.data_resp wbq.io.deq.ready := io.mem_req.ready && !refill_val && (cnt === UFix(REFILL_CYCLES)) - when (io.req.valid && io.req.ready) { valid <== Bool(true); cnt <== UFix(0); addr <== io.req.bits } - when (io.data_req.valid && io.data_req.ready) { cnt <== cnt + UFix(1) } - when ((cnt === UFix(REFILL_CYCLES)) && !wbq.io.deq.valid) { valid <== Bool(false) } + when (io.data_req.valid && io.data_req.ready) { cnt := cnt + UFix(1) } + when ((cnt === UFix(REFILL_CYCLES)) && !wbq.io.deq.valid) { valid := Bool(false) } + when (io.req.valid && io.req.ready) { valid := Bool(true); cnt := UFix(0); addr := io.req.bits } io.req.ready := !valid io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) @@ -474,22 +480,22 @@ class FlushUnit(lines: Int) extends Component { switch (state) { is(s_reset) { when (io.meta_req.ready) { - state <== Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_ready, s_reset); - when (~way_cnt === UFix(0)) { idx_cnt <== next_idx_cnt }; - way_cnt <== next_way_cnt; + state := Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_ready, s_reset); + when (~way_cnt === UFix(0)) { idx_cnt := next_idx_cnt }; + way_cnt := next_way_cnt; } } - is(s_ready) { when (io.req.valid) { state <== s_meta_read; tag <== io.req.bits } } - is(s_meta_read) { when (io.meta_req.ready) { state <== s_meta_wait } } - is(s_meta_wait) { state <== Mux(io.meta_resp.valid && io.meta_resp.dirty && !io.wb_req.ready, s_meta_read, s_meta_write) } + is(s_ready) { when (io.req.valid) { state := s_meta_read; tag := io.req.bits } } + is(s_meta_read) { when (io.meta_req.ready) { state := s_meta_wait } } + is(s_meta_wait) { state := Mux(io.meta_resp.valid && io.meta_resp.dirty && !io.wb_req.ready, s_meta_read, s_meta_write) } is(s_meta_write) { when (io.meta_req.ready) { - state <== Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_done, s_meta_read); - when (~way_cnt === UFix(0)) { idx_cnt <== next_idx_cnt }; - way_cnt <== next_way_cnt; + state := Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_done, s_meta_read); + when (~way_cnt === UFix(0)) { idx_cnt := next_idx_cnt }; + way_cnt := next_way_cnt; } } - is(s_done) { when (io.resp.ready) { state <== s_ready } } + is(s_done) { when (io.resp.ready) { state := s_ready } } } io.req.ready := state === s_ready @@ -515,7 +521,7 @@ class MetaDataArray(lines: Int) extends Component { val state_req = (new ioDecoupled) { new MetaArrayReq() } } - val vd_array = Mem4(lines, Bits(width = 2)) + val vd_array = Mem(lines, Bits(width = 2)) vd_array.setReadLatency(1); val vd_wdata2 = Cat(io.state_req.bits.data.valid, io.state_req.bits.data.dirty) vd_array.write(io.state_req.bits.idx, vd_wdata2, io.state_req.valid && io.state_req.bits.rw) @@ -526,7 +532,7 @@ class MetaDataArray(lines: Int) extends Component { // this could be eliminated if the read port were combinational. val vd_conflict = io.state_req.valid && (io.req.bits.idx === io.state_req.bits.idx) - val tag_array = Mem4(lines, io.resp.tag) + val tag_array = Mem(lines, io.resp.tag) tag_array.setReadLatency(1); tag_array.setTarget('inst) val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) @@ -547,7 +553,7 @@ class MetaDataArrayArray(lines: Int) extends Component { val way_en_ = Reg { Bits(width=NWAYS) } when (io.req.valid && io.req.ready) { - way_en_ <== io.req.bits.way_en + way_en_ := io.req.bits.way_en } var tag_ready = Bool(true) @@ -576,7 +582,7 @@ class DataArray(lines: Int) extends Component { val wmask = FillInterleaved(8, io.req.bits.wmask) - val array = Mem4(lines*REFILL_CYCLES, io.resp) + val array = Mem(lines*REFILL_CYCLES, io.resp) array.setReadLatency(1); array.setTarget('inst) val addr = Cat(io.req.bits.idx, io.req.bits.offset) @@ -594,7 +600,7 @@ class DataArrayArray(lines: Int) extends Component { val way_en_ = Reg { Bits(width=NWAYS) } when (io.req.valid && io.req.ready) { - way_en_ <== io.req.bits.way_en + way_en_ := io.req.bits.way_en } //val data_ready_arr = Vec(NWAYS){ Bool() } @@ -694,26 +700,26 @@ class HellaCacheDM extends Component { val replayer = new ReplayUnit() val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool - when (replay_amo_val) { - r_cpu_req_idx <== Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) - r_cpu_req_cmd <== replayer.io.data_req.bits.cmd - r_cpu_req_type <== replayer.io.data_req.bits.typ - r_cpu_req_data <== replayer.io.data_req.bits.data - } when (io.cpu.req_val) { - r_cpu_req_idx <== io.cpu.req_idx - r_cpu_req_cmd <== io.cpu.req_cmd - r_cpu_req_type <== io.cpu.req_type - r_cpu_req_tag <== io.cpu.req_tag + r_cpu_req_idx := io.cpu.req_idx + r_cpu_req_cmd := io.cpu.req_cmd + r_cpu_req_type := io.cpu.req_type + r_cpu_req_tag := io.cpu.req_tag when (req_write) { - r_cpu_req_data <== io.cpu.req_data + r_cpu_req_data := io.cpu.req_data } } + when (replay_amo_val) { + r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) + r_cpu_req_cmd := replayer.io.data_req.bits.cmd + r_cpu_req_type := replayer.io.data_req.bits.typ + r_cpu_req_data := replayer.io.data_req.bits.data + } // refill counter val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count_next = rr_count + UFix(1) - when (io.mem.resp_val) { rr_count <== rr_count_next } + when (io.mem.resp_val) { rr_count := rr_count_next } val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || @@ -788,7 +794,7 @@ class HellaCacheDM extends Component { val drain_store = drain_store_val && data_arb.io.in(2).ready val p_store_rdy = !p_store_valid || drain_store val p_amo = Reg(tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) - p_store_valid <== !p_store_rdy || (tag_hit && r_req_store) || p_amo + p_store_valid := !p_store_rdy || (tag_hit && r_req_store) || p_amo // writeback val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match @@ -811,14 +817,14 @@ class HellaCacheDM extends Component { val amoalu = new AMOALU storegen.io.typ := r_cpu_req_type storegen.io.din := r_cpu_req_data - when (p_amo) { - p_store_data <== amoalu.io.out - } when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { - p_store_idx <== r_cpu_req_idx - p_store_type <== r_cpu_req_type - p_store_cmd <== r_cpu_req_cmd - p_store_data <== storegen.io.dout + p_store_idx := r_cpu_req_idx + p_store_type := r_cpu_req_type + p_store_cmd := r_cpu_req_cmd + p_store_data := storegen.io.dout + } + when (p_amo) { + p_store_data := amoalu.io.out } // miss handling @@ -852,7 +858,7 @@ class HellaCacheDM extends Component { data_arb.io.in(1).bits.rw := replay.cmd === M_XWR data_arb.io.in(1).valid := replay_val replayer.io.data_req.ready := replay_rdy && !stall_replay - r_replay_amo <== replay_amo_val && replay_rdy && !stall_replay + r_replay_amo := replay_amo_val && replay_rdy && !stall_replay // store write mask generation. // assumes store replays are higher-priority than pending stores. @@ -881,13 +887,13 @@ class HellaCacheDM extends Component { amoalu.io.lhs := loadgen.io.r_dout.toUFix amoalu.io.rhs := p_store_data.toUFix - early_nack <== early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo + early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo // reset and flush unit val flusher = new FlushUnit(lines) val flushed = Reg(resetVal = Bool(true)) val flush_rdy = mshr.io.fence_rdy && wb_rdy && !p_store_valid - flushed <== flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready + flushed := flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed flusher.io.wb_req <> wb_arb.io.in(0) flusher.io.meta_req.bits.inner_req <> meta_arb.io.in(0).bits @@ -899,7 +905,7 @@ class HellaCacheDM extends Component { // we usually nack rather than reporting that the cache is not ready. // fences and flushes are the exceptions. val pending_fence = Reg(resetVal = Bool(false)) - pending_fence <== (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy + pending_fence := (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy val nack_hit = p_store_match || r_req_write && !p_store_rdy val nack_miss = dirty && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || @@ -974,26 +980,26 @@ class HellaCacheAssoc extends Component { val replayer = new ReplayUnit() val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool - when (replay_amo_val) { - r_cpu_req_idx <== Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) - r_cpu_req_cmd <== replayer.io.data_req.bits.cmd - r_cpu_req_type <== replayer.io.data_req.bits.typ - r_cpu_req_data <== replayer.io.data_req.bits.data - } when (io.cpu.req_val) { - r_cpu_req_idx <== io.cpu.req_idx - r_cpu_req_cmd <== io.cpu.req_cmd - r_cpu_req_type <== io.cpu.req_type - r_cpu_req_tag <== io.cpu.req_tag + r_cpu_req_idx := io.cpu.req_idx + r_cpu_req_cmd := io.cpu.req_cmd + r_cpu_req_type := io.cpu.req_type + r_cpu_req_tag := io.cpu.req_tag when (req_write) { - r_cpu_req_data <== io.cpu.req_data + r_cpu_req_data := io.cpu.req_data } } + when (replay_amo_val) { + r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) + r_cpu_req_cmd := replayer.io.data_req.bits.cmd + r_cpu_req_type := replayer.io.data_req.bits.typ + r_cpu_req_data := replayer.io.data_req.bits.data + } // refill counter val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count_next = rr_count + UFix(1) - when (io.mem.resp_val) { rr_count <== rr_count_next } + when (io.mem.resp_val) { rr_count := rr_count_next } val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || @@ -1081,7 +1087,7 @@ class HellaCacheAssoc extends Component { val drain_store = drain_store_val && data_arb.io.in(2).ready val p_store_rdy = !p_store_valid || drain_store val p_amo = Reg(tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) - p_store_valid <== !p_store_rdy || (tag_hit && r_req_store) || p_amo + p_store_valid := !p_store_rdy || (tag_hit && r_req_store) || p_amo // writeback val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match @@ -1105,15 +1111,15 @@ class HellaCacheAssoc extends Component { val amoalu = new AMOALU storegen.io.typ := r_cpu_req_type storegen.io.din := r_cpu_req_data - when (p_amo) { - p_store_data <== amoalu.io.out - } when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { - p_store_idx <== r_cpu_req_idx - p_store_type <== r_cpu_req_type - p_store_cmd <== r_cpu_req_cmd - p_store_way_oh <== Mux(r_replay_amo, replayer.io.way_oh, hit_way_oh) - p_store_data <== storegen.io.dout + p_store_idx := r_cpu_req_idx + p_store_type := r_cpu_req_type + p_store_cmd := r_cpu_req_cmd + p_store_way_oh := Mux(r_replay_amo, replayer.io.way_oh, hit_way_oh) + p_store_data := storegen.io.dout + } + when (p_amo) { + p_store_data := amoalu.io.out } // miss handling @@ -1149,7 +1155,7 @@ class HellaCacheAssoc extends Component { data_arb.io.in(1).valid := replay_val data_arb.io.in(1).bits.way_en := replayer.io.way_oh replayer.io.data_req.ready := replay_rdy && !stall_replay - r_replay_amo <== replay_amo_val && replay_rdy && !stall_replay + r_replay_amo := replay_amo_val && replay_rdy && !stall_replay // store write mask generation. // assumes store replays are higher-priority than pending stores. @@ -1178,13 +1184,13 @@ class HellaCacheAssoc extends Component { amoalu.io.lhs := loadgen.io.r_dout.toUFix amoalu.io.rhs := p_store_data.toUFix - early_nack <== early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo + early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo // reset and flush unit val flusher = new FlushUnit(lines) val flushed = Reg(resetVal = Bool(true)) val flush_rdy = mshr.io.fence_rdy && wb_rdy && !p_store_valid - flushed <== flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready + flushed := flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed flusher.io.wb_req <> wb_arb.io.in(0) flusher.io.meta_req <> meta_arb.io.in(0) @@ -1194,7 +1200,7 @@ class HellaCacheAssoc extends Component { // we usually nack rather than reporting that the cache is not ready. // fences and flushes are the exceptions. val pending_fence = Reg(resetVal = Bool(false)) - pending_fence <== (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy + pending_fence := (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy val nack_hit = p_store_match || r_req_write && !p_store_rdy val nack_miss = dirty && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index ff62acdd..83fb1e6c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -77,22 +77,22 @@ class rocketPTW extends Component val req_itlb_val = io.itlb.req_val; val req_dtlb_val = io.dtlb.req_val && !io.itlb.req_val; - when ((state === s_ready) && req_itlb_val) { - r_req_vpn <== io.itlb.req_vpn; - r_req_dest <== Bool(false); - req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; + when ((state === s_ready) && req_dtlb_val) { + r_req_vpn := io.dtlb.req_vpn; + r_req_dest := Bool(true); + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; } - when ((state === s_ready) && req_dtlb_val) { - r_req_vpn <== io.dtlb.req_vpn; - r_req_dest <== Bool(true); - req_addr <== Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; + when ((state === s_ready) && req_itlb_val) { + r_req_vpn := io.itlb.req_vpn; + r_req_dest := Bool(false); + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; } when (io.dmem.resp_val) { - req_addr <== Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; - r_resp_perm <== io.dmem.resp_data(9,4); - r_resp_ppn <== io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS); + req_addr := Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; + r_resp_perm := io.dmem.resp_data(9,4); + r_resp_ppn := io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS); } io.dmem.req_val := @@ -133,83 +133,83 @@ class rocketPTW extends Component switch (state) { is (s_ready) { when (req_val) { - state <== s_l1_req; + state := s_l1_req; } } // level 1 is (s_l1_req) { when (io.dmem.req_rdy) { - state <== s_l1_wait; + state := s_l1_wait; } } is (s_l1_wait) { when (io.dmem.resp_nack) { - state <== s_l1_req + state := s_l1_req } when (io.dmem.resp_val) { when (resp_ptd) { // page table descriptor - state <== s_l2_req; + state := s_l2_req; } - when (resp_pte) { // page table entry - state <== s_l1_fake; + .elsewhen (resp_pte) { // page table entry + state := s_l1_fake; } - otherwise { - state <== s_error; + .otherwise { + state := s_error; } } } is (s_l1_fake) { - state <== s_ready; + state := s_ready; } // level 2 is (s_l2_req) { when (io.dmem.req_rdy) { - state <== s_l2_wait; + state := s_l2_wait; } } is (s_l2_wait) { when (io.dmem.resp_nack) { - state <== s_l2_req + state := s_l2_req } when (io.dmem.resp_val) { when (resp_ptd) { // page table descriptor - state <== s_l3_req; + state := s_l3_req; } - when (resp_pte) { // page table entry - state <== s_l2_fake; + .elsewhen (resp_pte) { // page table entry + state := s_l2_fake; } - otherwise { - state <== s_error; + .otherwise { + state := s_error; } } } is (s_l2_fake) { - state <== s_ready; + state := s_ready; } // level 3 is (s_l3_req) { when (io.dmem.req_rdy) { - state <== s_l3_wait; + state := s_l3_wait; } } is (s_l3_wait) { when (io.dmem.resp_nack) { - state <== s_l3_req + state := s_l3_req } when (io.dmem.resp_val) { when (resp_pte) { // page table entry - state <== s_done; + state := s_done; } - otherwise { - state <== s_error; + .otherwise { + state := s_error; } } } is (s_done) { - state <== s_ready; + state := s_ready; } is (s_error) { - state <== s_ready; + state := s_ready; } } } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 8912a15a..986e32aa 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -24,22 +24,22 @@ class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) ext val do_enq = io.enq.ready && io.enq.valid val do_deq = io.deq.ready && io.deq.valid - if (flushable) { - when (io.flush) { - deq_ptr <== UFix(0) - enq_ptr <== UFix(0) - maybe_full <== Bool(false) - } - } when (do_deq) { - deq_ptr <== deq_ptr + UFix(1) + deq_ptr := deq_ptr + UFix(1) } when (do_enq) { - enq_ptr <== enq_ptr + UFix(1) + enq_ptr := enq_ptr + UFix(1) } when (do_enq != do_deq) { - maybe_full <== do_enq + maybe_full := do_enq + } + if (flushable) { + when (io.flush) { + deq_ptr := UFix(0) + enq_ptr := UFix(0) + maybe_full := Bool(false) + } } - Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) <> io.deq.bits + io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index f2c19ee4..c1473a89 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -66,7 +66,7 @@ object LFSR16 { val width = 16 val lfsr = Reg(resetVal = UFix(1, width)) - when (increment) { lfsr <== Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)).toUFix } + when (increment) { lfsr := Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)).toUFix } lfsr } } @@ -176,13 +176,13 @@ class priorityDecoder(width: Int) extends Component val io = new ioPriorityEncoder(in_width, width); val l_out = Wire() { Bits() }; - for (i <- 0 to width-1) { + l_out := Bits(0, width); + for (i <- width-1 to 0 by -1) { when (io.in === UFix(i, in_width)) { - l_out <== Bits(1,1) << UFix(i); + l_out := Bits(1,1) << UFix(i); } } - l_out <== Bits(0, width); io.out := l_out; } @@ -198,13 +198,13 @@ class priorityEncoder(width: Int) extends Component val io = new ioPriorityDecoder(width, out_width); val l_out = Wire() { UFix() }; - for (i <- 0 to width-1) { + l_out := UFix(0, out_width); + for (i <- width-1 to 1 by -1) { when (io.in(i).toBool) { - l_out <== UFix(i, out_width); + l_out := UFix(i, out_width); } } - l_out <== UFix(0, out_width); io.out := l_out; } From 50a283d311d3ec3162315d1b1f43ed10f51f54b8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Feb 2012 01:35:55 -0800 Subject: [PATCH 0148/1087] move store data generation into EX stage doing so removes it from the critical path of FP store unrecoding. --- rocket/src/main/scala/ctrl.scala | 34 +++++++++++----------- rocket/src/main/scala/dpath.scala | 9 ++++-- rocket/src/main/scala/fpu.scala | 42 ++++++++++++++++++++++------ rocket/src/main/scala/nbdcache.scala | 30 +++++++------------- 4 files changed, 68 insertions(+), 47 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 29c374b2..508c82b2 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -41,6 +41,7 @@ class ioCtrlDpath extends Bundle() val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); val flush_inst = Bool(OUTPUT); + val ex_mem_type = UFix(3,OUTPUT) // enable/disable interrupts val irq_enable = Bool(OUTPUT); val irq_disable = Bool(OUTPUT); @@ -202,7 +203,7 @@ class rocketCtrl extends Component ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), - CFLUSH-> List(Y, N,BR_N, REN_Y,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), + CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), @@ -244,21 +245,21 @@ class rocketCtrl extends Component VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) )) val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs @@ -708,6 +709,7 @@ class rocketCtrl extends Component io.dpath.wb_eret := wb_reg_eret; io.dpath.irq_disable := wb_reg_inst_di; io.dpath.irq_enable := wb_reg_inst_ei; + io.dpath.ex_mem_type := ex_reg_mem_type io.dtlb_val := ex_reg_mem_val; io.dtlb_kill := mem_reg_kill; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 2815fc48..acd8de2b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -301,7 +301,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_effective_address.toUFix; - io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, ex_reg_rs2) else ex_reg_rs2) + io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, mem_reg_rs2) else mem_reg_rs2) io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix // processor control regfile read @@ -342,11 +342,16 @@ class rocketDpath extends Component Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, ex_alu_out)))).toBits; // WB_ALU + + // subword store data generation + val storegen = new StoreDataGen + storegen.io.typ := io.ctrl.ex_mem_type + storegen.io.din := ex_reg_rs2 // memory stage mem_reg_pc := ex_reg_pc; mem_reg_inst := ex_reg_inst - mem_reg_rs2 := ex_reg_rs2 + mem_reg_rs2 := storegen.io.dout mem_reg_waddr := ex_reg_waddr; mem_reg_wdata := ex_wdata; mem_reg_raddr1 := ex_reg_raddr1 diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index b3f78003..878c0480 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -14,6 +14,9 @@ class rocketFPUDecoder extends Component val ren1 = Bool(OUTPUT) val ren2 = Bool(OUTPUT) val ren3 = Bool(OUTPUT) + val fromint = Bool(OUTPUT) + val toint = Bool(OUTPUT) + val store = Bool(OUTPUT) } // val fp = // ListLookup(io.dpath.inst, @@ -87,22 +90,27 @@ class rocketFPUDecoder extends Component val N = Bool(false) val Y = Bool(true) + val X = Bool(false) val decoder = ListLookup(io.inst, - List (N, N, N, N, N), - Array(FLW -> List(Y, Y, N, N, N), - FLD -> List(Y, Y, N, N, N), - FSW -> List(Y, N, N, Y, N), - FSD -> List(Y, N, N, Y, N), - MTFSR -> List(Y, N, N, N, N), - MFFSR -> List(Y, N, N, N, N) + List (N,X,X,X,X,X,X,X,X), + Array(FLW -> List(Y,Y,N,N,N,Y,N,N,N), + FLD -> List(Y,Y,N,N,N,N,N,N,N), + FSW -> List(Y,N,N,Y,N,Y,N,N,Y), + FSD -> List(Y,N,N,Y,N,N,N,N,Y), + MTFSR -> List(Y,N,N,N,N,X,N,Y,N), + MFFSR -> List(Y,N,N,N,N,X,N,Y,N) )) - val valid :: wen :: ren1 :: ren2 :: ren3 :: Nil = decoder + val valid :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: Nil = decoder io.valid := valid.toBool io.wen := wen.toBool io.ren1 := ren1.toBool io.ren2 := ren2.toBool io.ren3 := ren3.toBool + io.single := single.toBool + io.fromint := fromint.toBool + io.toint := toint.toBool + io.store := store.toBool } class ioDpathFPU extends Bundle { @@ -129,6 +137,9 @@ class rocketFPU extends Component ex_reg_inst := io.req_inst } + val fpdec = new rocketFPUDecoder + fpdec.io.inst := ex_reg_inst + // load response val dmem_resp_val_fpu = io.dmem.resp_val && io.dmem.resp_tag(0).toBool val load_wb = Reg(dmem_resp_val_fpu, resetVal = Bool(false)) @@ -147,5 +158,18 @@ class rocketFPU extends Component io.req_ready := Bool(true) - io.dpath.store_data := regfile(ex_reg_inst(21,17)) + val ex_rs1 = regfile(ex_reg_inst(16,12)) + val ex_rs2 = regfile(ex_reg_inst(21,17)) + val ex_rs3 = regfile(ex_reg_inst(26,22)) + + val fp_toint_data = Reg() { Bits() } + + when (fpdec.io.toint) { + fp_toint_data := ex_rs1 + } + when (fpdec.io.store) { + fp_toint_data := ex_rs2 + } + + io.dpath.store_data := fp_toint_data } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d72fbdfb..a7cd6748 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -673,7 +673,7 @@ class HellaCacheDM extends Component { val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_tag = Reg() { Bits() } - val r_cpu_req_data = Reg() { Bits() } + val r_amo_replay_data = Reg() { Bits() } val p_store_valid = Reg(resetVal = Bool(false)) val p_store_data = Reg() { Bits() } @@ -705,16 +705,14 @@ class HellaCacheDM extends Component { r_cpu_req_cmd := io.cpu.req_cmd r_cpu_req_type := io.cpu.req_type r_cpu_req_tag := io.cpu.req_tag - when (req_write) { - r_cpu_req_data := io.cpu.req_data - } } when (replay_amo_val) { r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) r_cpu_req_cmd := replayer.io.data_req.bits.cmd r_cpu_req_type := replayer.io.data_req.bits.typ - r_cpu_req_data := replayer.io.data_req.bits.data + r_amo_replay_data := replayer.io.data_req.bits.data } + val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) @@ -813,15 +811,12 @@ class HellaCacheDM extends Component { meta.io.state_req.bits.data.dirty := tag_match // pending store data, also used for AMO RHS - val storegen = new StoreDataGen val amoalu = new AMOALU - storegen.io.typ := r_cpu_req_type - storegen.io.din := r_cpu_req_data when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd - p_store_data := storegen.io.dout + p_store_data := cpu_req_data } when (p_amo) { p_store_data := amoalu.io.out @@ -845,7 +840,7 @@ class HellaCacheDM extends Component { meta_arb.io.in(1).valid := mshr.io.meta_req.valid mshr.io.replay <> replayer.io.replay replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy - replayer.io.sdq_enq.bits := storegen.io.dout + replayer.io.sdq_enq.bits := cpu_req_data data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx // replays @@ -952,7 +947,7 @@ class HellaCacheAssoc extends Component { val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_tag = Reg() { Bits() } - val r_cpu_req_data = Reg() { Bits() } + val r_amo_replay_data = Reg() { Bits() } val p_store_valid = Reg(resetVal = Bool(false)) val p_store_data = Reg() { Bits() } @@ -985,16 +980,14 @@ class HellaCacheAssoc extends Component { r_cpu_req_cmd := io.cpu.req_cmd r_cpu_req_type := io.cpu.req_type r_cpu_req_tag := io.cpu.req_tag - when (req_write) { - r_cpu_req_data := io.cpu.req_data - } } when (replay_amo_val) { r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) r_cpu_req_cmd := replayer.io.data_req.bits.cmd r_cpu_req_type := replayer.io.data_req.bits.typ - r_cpu_req_data := replayer.io.data_req.bits.data + r_amo_replay_data := replayer.io.data_req.bits.data } + val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) @@ -1107,16 +1100,13 @@ class HellaCacheAssoc extends Component { meta.io.state_req.bits.way_en := Mux(clear_valid, replaced_way_oh, hit_way_oh) // pending store data, also used for AMO RHS - val storegen = new StoreDataGen val amoalu = new AMOALU - storegen.io.typ := r_cpu_req_type - storegen.io.din := r_cpu_req_data when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd p_store_way_oh := Mux(r_replay_amo, replayer.io.way_oh, hit_way_oh) - p_store_data := storegen.io.dout + p_store_data := cpu_req_data } when (p_amo) { p_store_data := amoalu.io.out @@ -1139,7 +1129,7 @@ class HellaCacheAssoc extends Component { mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.replay <> replayer.io.replay replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy - replayer.io.sdq_enq.bits := storegen.io.dout + replayer.io.sdq_enq.bits := cpu_req_data data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy From 9bb1558a3499f33fb9789e59169205eec5108b8f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Feb 2012 04:36:01 -0800 Subject: [PATCH 0149/1087] WIP on FPU --- rocket/src/main/scala/cpu.scala | 4 +- rocket/src/main/scala/ctrl.scala | 66 +++++---- rocket/src/main/scala/dpath.scala | 12 +- rocket/src/main/scala/fpu.scala | 215 +++++++++++++++++++++--------- 4 files changed, 202 insertions(+), 95 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 978dc409..0d0aee76 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -116,10 +116,8 @@ class rocketProc extends Component if (HAVE_FPU) { val fpu = new rocketFPU - fpu.io.dmem.resp_val := arb.io.cpu.resp_val; - fpu.io.dmem.resp_tag := arb.io.cpu.resp_tag; - fpu.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.fpu <> fpu.io.dpath + ctrl.io.fpu <> fpu.io.ctrl } ctrl.io.ext_mem.req_val := Bool(false) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 508c82b2..6bf607f1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,6 +37,7 @@ class ioCtrlDpath extends Bundle() val mem_load = Bool(OUTPUT); val ex_ext_mem_val = Bool(OUTPUT); val ex_fp_val= Bool(OUTPUT); + val mem_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); @@ -90,15 +91,13 @@ class ioCtrlAll extends Bundle() val xcpt_itlb = Bool(INPUT); val xcpt_ma_ld = Bool(INPUT); val xcpt_ma_st = Bool(INPUT); + val fpu = new ioCtrlFPU(); } class rocketCtrl extends Component { val io = new ioCtrlAll(); - val fpdec = new rocketFPUDecoder - fpdec.io.inst := io.dpath.inst - val xpr64 = Y; val cs = ListLookup(io.dpath.inst, @@ -212,12 +211,12 @@ class rocketCtrl extends Component // Instructions that have not yet been implemented // Faking these for now so akaros will boot - //MFFSR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - //MTFSR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLD-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSW-> List(Y, N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSD-> List(Y, N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,Y), + FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), // Vector Stuff VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), @@ -322,6 +321,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); val mem_reg_xcpt_vec = Reg(resetVal = Bool(false)); val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); + val mem_reg_fp_val = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); val mem_reg_ext_mem_val = Reg(resetVal = Bool(false)) @@ -355,7 +355,7 @@ class rocketCtrl extends Component // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) val illegal_inst = - !(id_int_val.toBool || fpdec.io.valid || id_vec_val.toBool) || + !(id_int_val.toBool || io.fpu.dec.valid || id_vec_val.toBool) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); when (reset.toBool || io.dpath.killd) { @@ -388,7 +388,7 @@ class rocketCtrl extends Component ex_reg_mul_val := id_mul_val.toBool && id_waddr != UFix(0); ex_reg_mem_val := id_mem_val.toBool; ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); - ex_reg_fp_wen := fpdec.io.wen; + ex_reg_fp_wen := io.fpu.dec.wen; ex_reg_eret := id_eret.toBool; ex_reg_replay_next := id_replay_next.toBool; ex_reg_inst_di := (id_irq === I_DI); @@ -399,7 +399,7 @@ class rocketCtrl extends Component ex_reg_xcpt_illegal := illegal_inst; ex_reg_xcpt_privileged := (id_privileged & ~io.dpath.status(SR_S)).toBool; ex_reg_xcpt_syscall := id_syscall.toBool; - ex_reg_fp_val := fpdec.io.valid; + ex_reg_fp_val := io.fpu.dec.valid; ex_reg_vec_val := id_vec_val.toBool ex_reg_replay := id_reg_replay || ex_reg_replay_next; ex_reg_load_use := id_load_use; @@ -447,6 +447,7 @@ class rocketCtrl extends Component mem_reg_xcpt_fpu := Bool(false); mem_reg_xcpt_vec := Bool(false); mem_reg_xcpt_syscall := Bool(false); + mem_reg_fp_val := Bool(false); } .otherwise { mem_reg_div_mul_val := ex_reg_div_val || ex_reg_mul_val; @@ -464,6 +465,7 @@ class rocketCtrl extends Component mem_reg_xcpt_fpu := ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; mem_reg_xcpt_vec := ex_reg_vec_val && !io.dpath.status(SR_EV).toBool; mem_reg_xcpt_syscall := ex_reg_xcpt_syscall; + mem_reg_fp_val := ex_reg_fp_val } mem_reg_ext_mem_val := ex_reg_ext_mem_val; mem_reg_mem_cmd := ex_reg_mem_cmd; @@ -518,10 +520,10 @@ class rocketCtrl extends Component fp_sboard.io.clr := io.dpath.fp_sboard_clr; fp_sboard.io.clra := io.dpath.fp_sboard_clra; - id_stall_fpu = fpdec.io.ren1 && fp_sboard.io.stalla || - fpdec.io.ren2 && fp_sboard.io.stallb || - fpdec.io.ren3 && fp_sboard.io.stallc || - fpdec.io.wen && fp_sboard.io.stalld + id_stall_fpu = io.fpu.dec.ren1 && fp_sboard.io.stalla || + io.fpu.dec.ren2 && fp_sboard.io.stallb || + io.fpu.dec.ren3 && fp_sboard.io.stallc || + io.fpu.dec.wen && fp_sboard.io.stalld } // exception handling @@ -589,7 +591,8 @@ class rocketCtrl extends Component val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy + ex_reg_mul_val && !io.dpath.mul_rdy || + io.fpu.nack val kill_ex = take_pc_wb || replay_ex mem_reg_replay := replay_ex && !take_pc_wb; @@ -627,10 +630,10 @@ class rocketCtrl extends Component id_renx2.toBool && id_raddr2 === io.dpath.ex_waddr || id_wen.toBool && id_waddr === io.dpath.ex_waddr) val fp_data_hazard_ex = ex_reg_fp_wen && - (fpdec.io.ren1 && id_raddr1 === io.dpath.ex_waddr || - fpdec.io.ren2 && id_raddr2 === io.dpath.ex_waddr || - fpdec.io.ren3 && id_raddr3 === io.dpath.ex_waddr || - fpdec.io.wen && id_waddr === io.dpath.ex_waddr) + (io.fpu.dec.ren1 && id_raddr1 === io.dpath.ex_waddr || + io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || + io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || + io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val) || fp_data_hazard_ex && ex_reg_mem_val @@ -643,10 +646,10 @@ class rocketCtrl extends Component id_renx2.toBool && id_raddr2 === io.dpath.mem_waddr || id_wen.toBool && id_waddr === io.dpath.mem_waddr) val fp_data_hazard_mem = mem_reg_fp_wen && - (fpdec.io.ren1 && id_raddr1 === io.dpath.mem_waddr || - fpdec.io.ren2 && id_raddr2 === io.dpath.mem_waddr || - fpdec.io.ren3 && id_raddr3 === io.dpath.mem_waddr || - fpdec.io.wen && id_waddr === io.dpath.mem_waddr) + (io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr || + io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || + io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || + io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val) id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -656,10 +659,10 @@ class rocketCtrl extends Component id_renx2.toBool && id_raddr2 === io.dpath.wb_waddr || id_wen.toBool && id_waddr === io.dpath.wb_waddr) val fp_data_hazard_wb = wb_reg_fp_wen && - (fpdec.io.ren1 && id_raddr1 === io.dpath.wb_waddr || - fpdec.io.ren2 && id_raddr2 === io.dpath.wb_waddr || - fpdec.io.ren3 && id_raddr3 === io.dpath.wb_waddr || - fpdec.io.wen && id_waddr === io.dpath.wb_waddr) + (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || + io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || + io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || + io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || fp_data_hazard_wb && wb_reg_dcache_miss @@ -698,6 +701,7 @@ class rocketCtrl extends Component io.dpath.mul_val := id_mul_val.toBool; io.dpath.ex_ext_mem_val := ex_reg_ext_mem_val; io.dpath.ex_fp_val:= ex_reg_fp_val; + io.dpath.mem_fp_val:= mem_reg_fp_val; io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; io.dpath.wb_wen := wb_reg_wen; @@ -711,6 +715,10 @@ class rocketCtrl extends Component io.dpath.irq_enable := wb_reg_inst_ei; io.dpath.ex_mem_type := ex_reg_mem_type + io.fpu.valid := !io.dpath.killd && io.fpu.dec.valid + io.fpu.killx := kill_ex + io.fpu.killm := kill_mem + io.dtlb_val := ex_reg_mem_val; io.dtlb_kill := mem_reg_kill; io.dmem.req_val := ex_reg_mem_val || ex_reg_ext_mem_val; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index acd8de2b..37cb3d8a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -232,7 +232,8 @@ class rocketDpath extends Component val id_op2 = Mux(io.ctrl.sel_alu2 === A2_RTYPE, id_rs2, id_imm) - io.ctrl.inst := id_reg_inst; + io.ctrl.inst := id_reg_inst + io.fpu.inst := id_reg_inst // execute stage ex_reg_pc := id_reg_pc; @@ -301,7 +302,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_effective_address.toUFix; - io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.ex_fp_val, io.fpu.store_data, mem_reg_rs2) else mem_reg_rs2) + io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) else mem_reg_rs2) io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix // processor control regfile read @@ -374,7 +375,7 @@ class rocketDpath extends Component // 32/64 bit load handling (moved to earlier in file) - // writeback stage + // writeback arbitration val dmem_resp_ext = io.dmem.resp_tag(0).toBool val dmem_resp_xpu = !io.dmem.resp_tag(0).toBool && !io.dmem.resp_tag(1).toBool val dmem_resp_fpu = !io.dmem.resp_tag(0).toBool && io.dmem.resp_tag(1).toBool @@ -394,6 +395,11 @@ class rocketDpath extends Component mem_reg_wdata)) val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val + io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu + io.fpu.dmem_resp_data := io.dmem.resp_data + io.fpu.dmem_resp_tag := dmem_resp_waddr + + // writeback stage wb_reg_pc := mem_reg_pc; wb_reg_inst := mem_reg_inst wb_reg_ll_wb := mem_ll_wb diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 878c0480..8b98e01e 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -1,22 +1,69 @@ package Top import Chisel._ -import Node._; +import Node._ import Constants._ import Instructions._ +object rocketFPConstants +{ + val FCMD_ADD = Bits("b000000") + val FCMD_SUB = Bits("b000001") + val FCMD_MUL = Bits("b000010") + val FCMD_DIV = Bits("b000011") + val FCMD_SQRT = Bits("b000100") + val FCMD_SGNINJ = Bits("b000101") + val FCMD_SGNINJN = Bits("b000110") + val FCMD_SGNMUL = Bits("b000111") + val FCMD_TRUNC_L = Bits("b001000") + val FCMD_TRUNCU_L = Bits("b001001") + val FCMD_TRUNC_W = Bits("b001010") + val FCMD_TRUNCU_W = Bits("b001011") + val FCMD_CVT_L = Bits("b001100") + val FCMD_CVTU_L = Bits("b001101") + val FCMD_CVT_W = Bits("b001110") + val FCMD_CVTU_W = Bits("b001111") + val FCMD_CVT_S = Bits("b010000") + val FCMD_CVT_D = Bits("b010001") + val FCMD_C_EQ = Bits("b010101") + val FCMD_C_LT = Bits("b010110") + val FCMD_C_LE = Bits("b010111") + val FCMD_MIN = Bits("b011000") + val FCMD_MAX = Bits("b011001") + val FCMD_MF = Bits("b011100") + val FCMD_MFFSR = Bits("b011101") + val FCMD_MT = Bits("b011110") + val FCMD_MTFSR = Bits("b011111") + val FCMD_MADD = Bits("b100100") + val FCMD_MSUB = Bits("b100101") + val FCMD_NMSUB = Bits("b100110") + val FCMD_NMADD = Bits("b100111") + val FCMD_LOAD = Bits("b111000") + val FCMD_STORE = Bits("b111001") + val FCMD_WIDTH = 6 +} +import rocketFPConstants._ + +class rocketFPUCtrlSigs extends Bundle +{ + val cmd = Bits(width = FCMD_WIDTH) + val valid = Bool() + val wen = Bool() + val ren1 = Bool() + val ren2 = Bool() + val ren3 = Bool() + val single = Bool() + val fromint = Bool() + val toint = Bool() + val store = Bool() + val fsr = Bool() +} + class rocketFPUDecoder extends Component { val io = new Bundle { val inst = Bits(32, INPUT) - val valid = Bool(OUTPUT) - val wen = Bool(OUTPUT) - val ren1 = Bool(OUTPUT) - val ren2 = Bool(OUTPUT) - val ren3 = Bool(OUTPUT) - val fromint = Bool(OUTPUT) - val toint = Bool(OUTPUT) - val store = Bool(OUTPUT) + val sigs = new rocketFPUCtrlSigs().asOutput } // val fp = // ListLookup(io.dpath.inst, @@ -91,85 +138,133 @@ class rocketFPUDecoder extends Component val N = Bool(false) val Y = Bool(true) val X = Bool(false) + val FCMD_X = FCMD_ADD val decoder = ListLookup(io.inst, - List (N,X,X,X,X,X,X,X,X), - Array(FLW -> List(Y,Y,N,N,N,Y,N,N,N), - FLD -> List(Y,Y,N,N,N,N,N,N,N), - FSW -> List(Y,N,N,Y,N,Y,N,N,Y), - FSD -> List(Y,N,N,Y,N,N,N,N,Y), - MTFSR -> List(Y,N,N,N,N,X,N,Y,N), - MFFSR -> List(Y,N,N,N,N,X,N,Y,N) + List (N,FCMD_X, X,X,X,X,X,X,X,X,X), + Array(FLW -> List(Y,FCMD_LOAD, Y,N,N,N,Y,N,N,N,N), + FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N), + FSW -> List(Y,FCMD_STORE, N,N,Y,N,Y,N,N,Y,N), + FSD -> List(Y,FCMD_STORE, N,N,Y,N,N,N,N,Y,N), + MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,X,N,Y,N,Y), + MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,X,N,Y,N,Y) )) - val valid :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: Nil = decoder + val valid :: cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: fsr :: Nil = decoder - io.valid := valid.toBool - io.wen := wen.toBool - io.ren1 := ren1.toBool - io.ren2 := ren2.toBool - io.ren3 := ren3.toBool - io.single := single.toBool - io.fromint := fromint.toBool - io.toint := toint.toBool - io.store := store.toBool + io.sigs.valid := valid.toBool + io.sigs.cmd := cmd + io.sigs.wen := wen.toBool + io.sigs.ren1 := ren1.toBool + io.sigs.ren2 := ren2.toBool + io.sigs.ren3 := ren3.toBool + io.sigs.single := single.toBool + io.sigs.fromint := fromint.toBool + io.sigs.toint := toint.toBool + io.sigs.store := store.toBool + io.sigs.fsr := fsr.toBool } class ioDpathFPU extends Bundle { + val inst = Bits(32, OUTPUT) + val store_data = Bits(64, INPUT) + + val dmem_resp_val = Bool(OUTPUT) + val dmem_resp_tag = UFix(5, OUTPUT) + val dmem_resp_data = Bits(64, OUTPUT) +} + +class ioCtrlFPU extends Bundle { + val valid = Bool(OUTPUT) + val nack = Bool(INPUT) + val killx = Bool(OUTPUT) + val killm = Bool(OUTPUT) + val dec = new rocketFPUCtrlSigs().asInput +} + +class rocketFPIntUnit extends Component +{ + val io = new Bundle { + val single = Bool(INPUT) + val cmd = Bits(FCMD_WIDTH, INPUT) + val in = Bits(65, INPUT) + val out = Bits(64, OUTPUT) + } + + val unrecoded_s = io.in(31,0) + val unrecoded_d = io.in + + val out_s = unrecoded_s + val out_d = unrecoded_d + + io.out := Mux(io.single, Cat(out_s, out_s), out_d) } class rocketFPU extends Component { val io = new Bundle { - val req_valid = Bool(INPUT) - val req_ready = Bool(OUTPUT) - val req_cmd = Bits(6, INPUT) - val req_inst = Bits(32, INPUT) - - val killx = Bool(INPUT) - val killm = Bool(INPUT) - - val dmem = new ioDmem(List("resp_val", "resp_tag", "resp_data")).flip() + val ctrl = new ioCtrlFPU().flip() val dpath = new ioDpathFPU().flip() } - val ex_reg_inst = Reg() { Bits() } - when (io.req_valid) { - ex_reg_inst := io.req_inst + val reg_inst = Reg() { Bits() } + when (io.ctrl.valid) { + reg_inst := io.dpath.inst + } + val reg_valid = Reg(io.ctrl.valid, Bool(false)) + + val fp_decoder = new rocketFPUDecoder + fp_decoder.io.inst := io.dpath.inst + + val ctrl = Reg() { new rocketFPUCtrlSigs } + when (io.ctrl.valid) { + ctrl := fp_decoder.io.sigs } - val fpdec = new rocketFPUDecoder - fpdec.io.inst := ex_reg_inst - // load response - val dmem_resp_val_fpu = io.dmem.resp_val && io.dmem.resp_tag(0).toBool - val load_wb = Reg(dmem_resp_val_fpu, resetVal = Bool(false)) - val load_wb_data = Reg() { Bits() } + val load_wb = Reg(io.dpath.dmem_resp_val, resetVal = Bool(false)) + val load_wb_data = Reg() { Bits(width = 64) } // XXX WTF why doesn't bit width inference work for the regfile?! val load_wb_tag = Reg() { UFix() } - when (dmem_resp_val_fpu) { - load_wb_data := io.dmem.resp_data - load_wb_tag := io.dmem.resp_tag.toUFix >> UFix(1) + when (io.dpath.dmem_resp_val) { + load_wb_data := io.dpath.dmem_resp_data + load_wb_tag := io.dpath.dmem_resp_tag } // regfile - val regfile = Mem(32, load_wb_data); + val regfile = Mem(32, load_wb, load_wb_tag, load_wb_data); regfile.setReadLatency(0); regfile.setTarget('inst); - regfile.write(load_wb_tag, load_wb_data, load_wb); - io.req_ready := Bool(true) - - val ex_rs1 = regfile(ex_reg_inst(16,12)) - val ex_rs2 = regfile(ex_reg_inst(21,17)) - val ex_rs3 = regfile(ex_reg_inst(26,22)) + val ex_rs1 = regfile.read(reg_inst(16,12)) + val ex_rs2 = regfile.read(reg_inst(21,17)) + val ex_rs3 = regfile.read(reg_inst(26,22)) val fp_toint_data = Reg() { Bits() } + val fp_toint_single = Reg() { Bool() } + val fp_toint_cmd = Reg() { Bits() } - when (fpdec.io.toint) { - fp_toint_data := ex_rs1 - } - when (fpdec.io.store) { - fp_toint_data := ex_rs2 + when (reg_valid) { + when (ctrl.toint) { + fp_toint_data := ex_rs1 + } + when (ctrl.store) { + fp_toint_data := ex_rs2 + } + when (ctrl.toint || ctrl.store) { + fp_toint_single := ctrl.single + fp_toint_cmd := ctrl.cmd + } } - io.dpath.store_data := fp_toint_data + // currently we assume FP stores and FP->int ops take 1 cycle (MEM) + val fpiu = new rocketFPIntUnit + fpiu.io.single := ctrl.single + fpiu.io.cmd := ctrl.cmd + fpiu.io.in := fp_toint_data + + io.dpath.store_data := fpiu.io.out + + val fsr_busy = ctrl.fsr && Bool(false) + val units_busy = Bool(false) + io.ctrl.nack := reg_valid && (fsr_busy || units_busy) + io.ctrl.dec <> fp_decoder.io.sigs } From 08b6517a2386f5da1b85546c8e33c201be0e617a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Feb 2012 20:12:53 -0800 Subject: [PATCH 0150/1087] add FP ops mftx, mxtf, mtfsr, mffsr --- rocket/src/main/scala/cpu.scala | 2 + rocket/src/main/scala/ctrl.scala | 24 ++-- rocket/src/main/scala/dpath.scala | 7 +- rocket/src/main/scala/fpu.scala | 180 ++++++++++++++++++++++-------- 4 files changed, 156 insertions(+), 57 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 0d0aee76..8e106e0c 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -119,6 +119,8 @@ class rocketProc extends Component dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } + else + ctrl.io.fpu.dec.valid := Bool(false) ctrl.io.ext_mem.req_val := Bool(false) dpath.io.ext_mem.req_val := Bool(false) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6bf607f1..01937237 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -209,10 +209,12 @@ class rocketCtrl extends Component RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - // Instructions that have not yet been implemented - // Faking these for now so akaros will boot - MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,Y), + MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), @@ -335,6 +337,7 @@ class rocketCtrl extends Component val wb_reg_exception = Reg(resetVal = Bool(false)); val wb_reg_replay = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; + val wb_reg_fp_val = Reg(resetVal = Bool(false)); val take_pc = Wire() { Bool() }; @@ -479,6 +482,7 @@ class rocketCtrl extends Component wb_reg_inst_ei := Bool(false); wb_reg_flush_inst := Bool(false); wb_reg_div_mul_val := Bool(false); + wb_reg_fp_val := Bool(false) } .otherwise { wb_reg_wen := mem_reg_wen; @@ -488,6 +492,7 @@ class rocketCtrl extends Component wb_reg_inst_ei := mem_reg_inst_ei; wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_div_mul_val := mem_reg_div_mul_val; + wb_reg_fp_val := mem_reg_fp_val } val sboard = new rocketCtrlSboard(); @@ -592,7 +597,7 @@ class rocketCtrl extends Component ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy || - io.fpu.nack + ex_reg_fp_val && io.fpu.nack val kill_ex = take_pc_wb || replay_ex mem_reg_replay := replay_ex && !take_pc_wb; @@ -634,8 +639,8 @@ class rocketCtrl extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val) || - fp_data_hazard_ex && ex_reg_mem_val + val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val || ex_reg_fp_val) || + fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. val mem_mem_cmd_bh = @@ -650,7 +655,8 @@ class rocketCtrl extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val) + val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || + fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. @@ -664,7 +670,7 @@ class rocketCtrl extends Component io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || - fp_data_hazard_wb && wb_reg_dcache_miss + fp_data_hazard_wb && (wb_reg_dcache_miss || wb_reg_fp_val) val ctrl_stalld = !take_pc && diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 37cb3d8a..ec87dfe2 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -292,6 +292,8 @@ class rocketDpath extends Component mul.io.mul_tag := ex_reg_waddr; mul.io.in0 := ex_reg_rs1; mul.io.in1 := ex_reg_rs2; + + io.fpu.fromint_data := ex_reg_rs1 io.ctrl.mul_rdy := mul.io.mul_rdy io.ctrl.mul_result_val := mul.io.result_val; @@ -302,7 +304,7 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_effective_address.toUFix; - io.dmem.req_data := (if (HAVE_FPU) Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) else mem_reg_rs2) + io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix // processor control regfile read @@ -392,7 +394,8 @@ class rocketDpath extends Component mem_reg_waddr))) val mem_ll_wdata = Mux(div_result_val, div_result, Mux(mul_result_val, mul_result, - mem_reg_wdata)) + Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, + mem_reg_wdata))) val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 8b98e01e..1bbb5e13 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -7,40 +7,41 @@ import Instructions._ object rocketFPConstants { - val FCMD_ADD = Bits("b000000") - val FCMD_SUB = Bits("b000001") - val FCMD_MUL = Bits("b000010") - val FCMD_DIV = Bits("b000011") - val FCMD_SQRT = Bits("b000100") - val FCMD_SGNINJ = Bits("b000101") - val FCMD_SGNINJN = Bits("b000110") - val FCMD_SGNMUL = Bits("b000111") - val FCMD_TRUNC_L = Bits("b001000") - val FCMD_TRUNCU_L = Bits("b001001") - val FCMD_TRUNC_W = Bits("b001010") - val FCMD_TRUNCU_W = Bits("b001011") - val FCMD_CVT_L = Bits("b001100") - val FCMD_CVTU_L = Bits("b001101") - val FCMD_CVT_W = Bits("b001110") - val FCMD_CVTU_W = Bits("b001111") - val FCMD_CVT_S = Bits("b010000") - val FCMD_CVT_D = Bits("b010001") - val FCMD_C_EQ = Bits("b010101") - val FCMD_C_LT = Bits("b010110") - val FCMD_C_LE = Bits("b010111") - val FCMD_MIN = Bits("b011000") - val FCMD_MAX = Bits("b011001") - val FCMD_MF = Bits("b011100") - val FCMD_MFFSR = Bits("b011101") - val FCMD_MT = Bits("b011110") - val FCMD_MTFSR = Bits("b011111") - val FCMD_MADD = Bits("b100100") - val FCMD_MSUB = Bits("b100101") - val FCMD_NMSUB = Bits("b100110") - val FCMD_NMADD = Bits("b100111") - val FCMD_LOAD = Bits("b111000") - val FCMD_STORE = Bits("b111001") - val FCMD_WIDTH = 6 + val FCMD_ADD = Bits("b000000") + val FCMD_SUB = Bits("b000001") + val FCMD_MUL = Bits("b000010") + val FCMD_DIV = Bits("b000011") + val FCMD_SQRT = Bits("b000100") + val FCMD_SGNINJ = Bits("b000101") + val FCMD_SGNINJN = Bits("b000110") + val FCMD_SGNMUL = Bits("b000111") + val FCMD_CVT_L_FMT = Bits("b001000") + val FCMD_CVT_LU_FMT = Bits("b001001") + val FCMD_CVT_W_FMT = Bits("b001010") + val FCMD_CVT_WU_FMT = Bits("b001011") + val FCMD_CVT_FMT_L = Bits("b001100") + val FCMD_CVT_FMT_LU = Bits("b001101") + val FCMD_CVT_FMT_W = Bits("b001110") + val FCMD_CVT_FMT_WU = Bits("b001111") + val FCMD_CVT_FMT_S = Bits("b010000") + val FCMD_CVT_FMT_D = Bits("b010001") + val FCMD_EQ = Bits("b010101") + val FCMD_LT = Bits("b010110") + val FCMD_LE = Bits("b010111") + val FCMD_MIN = Bits("b011000") + val FCMD_MAX = Bits("b011001") + val FCMD_MFTX = Bits("b011100") + val FCMD_MFFSR = Bits("b011101") + val FCMD_MXTF = Bits("b011110") + val FCMD_MTFSR = Bits("b011111") + val FCMD_MADD = Bits("b100100") + val FCMD_MSUB = Bits("b100101") + val FCMD_NMSUB = Bits("b100110") + val FCMD_NMADD = Bits("b100111") + val FCMD_LOAD = Bits("b111000") + val FCMD_STORE = Bits("b111001") + val FCMD_WIDTH = 6 + val FSR_WIDTH = 8 } import rocketFPConstants._ @@ -145,8 +146,12 @@ class rocketFPUDecoder extends Component FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N), FSW -> List(Y,FCMD_STORE, N,N,Y,N,Y,N,N,Y,N), FSD -> List(Y,FCMD_STORE, N,N,Y,N,N,N,N,Y,N), - MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,X,N,Y,N,Y), - MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,X,N,Y,N,Y) + MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N), + MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,N,N,N), + MFTX_S -> List(Y,FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N), + MFTX_D -> List(Y,FCMD_MFTX, N,Y,N,N,N,N,Y,N,N), + MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,Y,Y,Y,N,Y), + MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,Y,N,Y,N,Y) )) val valid :: cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: fsr :: Nil = decoder @@ -165,8 +170,10 @@ class rocketFPUDecoder extends Component class ioDpathFPU extends Bundle { val inst = Bits(32, OUTPUT) + val fromint_data = Bits(64, OUTPUT) val store_data = Bits(64, INPUT) + val toint_data = Bits(64, INPUT) val dmem_resp_val = Bool(OUTPUT) val dmem_resp_tag = UFix(5, OUTPUT) @@ -186,17 +193,62 @@ class rocketFPIntUnit extends Component val io = new Bundle { val single = Bool(INPUT) val cmd = Bits(FCMD_WIDTH, INPUT) + val fsr = Bits(FSR_WIDTH, INPUT) val in = Bits(65, INPUT) - val out = Bits(64, OUTPUT) + val store_data = Bits(64, OUTPUT) + val toint_data = Bits(64, OUTPUT) + val exc = Bits(5, OUTPUT) } val unrecoded_s = io.in(31,0) val unrecoded_d = io.in - val out_s = unrecoded_s - val out_d = unrecoded_d + io.store_data := Mux(io.single, Cat(unrecoded_s, unrecoded_s), unrecoded_d) - io.out := Mux(io.single, Cat(out_s, out_s), out_d) + val scmp = Bool(false) + val scmp_exc = Bits(0) + + val s2i = UFix(0) + val s2i_exc = Bits(0) + + val dcmp = Bool(false) + val dcmp_exc = Bits(0) + + val d2i = UFix(0) + val d2i_exc = Bits(0) + + // output muxing + val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) + out_s := Cat(Fill(32, unrecoded_s(31)), unrecoded_s) + exc_s := Bits(0) + val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) + out_d := unrecoded_d + exc_d := Bits(0) + + when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { + out_s := io.fsr + } + when (io.cmd === FCMD_CVT_W_FMT || io.cmd === FCMD_CVT_WU_FMT) { + out_s := Cat(Fill(32, s2i(31)), s2i(31,0)) + exc_s := s2i_exc + out_d := Cat(Fill(32, d2i(31)), d2i(31,0)) + exc_d := d2i_exc + } + when (io.cmd === FCMD_CVT_L_FMT || io.cmd === FCMD_CVT_LU_FMT) { + out_s := s2i + exc_s := s2i_exc + out_d := d2i + exc_d := d2i_exc + } + when (io.cmd === FCMD_EQ || io.cmd === FCMD_LT || io.cmd === FCMD_LE) { + out_s := scmp + exc_s := scmp_exc + out_d := dcmp + exc_d := dcmp_exc + } + + io.toint_data := Mux(io.single, out_s, out_d) + io.exc := Mux(io.single, exc_s, exc_d) } class rocketFPU extends Component @@ -229,21 +281,36 @@ class rocketFPU extends Component load_wb_tag := io.dpath.dmem_resp_tag } + val fsr_rm = Reg() { Bits(width = 3) } + val fsr_exc = Reg() { Bits(width = 5) } + // regfile val regfile = Mem(32, load_wb, load_wb_tag, load_wb_data); regfile.setReadLatency(0); regfile.setTarget('inst); - val ex_rs1 = regfile.read(reg_inst(16,12)) + val ex_rs1 = regfile.read(reg_inst(26,22)) val ex_rs2 = regfile.read(reg_inst(21,17)) - val ex_rs3 = regfile.read(reg_inst(26,22)) + val ex_rs3 = regfile.read(reg_inst(16,12)) + val fp_fromint_val = Reg(resetVal = Bool(false)) + val fp_fromint_data = Reg() { Bits() } + val fp_toint_val = Reg(resetVal = Bool(false)) val fp_toint_data = Reg() { Bits() } val fp_toint_single = Reg() { Bool() } val fp_toint_cmd = Reg() { Bits() } + val fp_waddr = Reg() { Bits() } - when (reg_valid) { + fp_fromint_val := Bool(false) + fp_toint_val := Bool(false) + when (reg_valid && !io.ctrl.killx) { + fp_waddr := reg_inst(31,27) + when (ctrl.fromint) { + fp_fromint_val := Bool(true) + fp_fromint_data := io.dpath.fromint_data + } when (ctrl.toint) { + fp_toint_val := Bool(true) fp_toint_data := ex_rs1 } when (ctrl.store) { @@ -259,12 +326,33 @@ class rocketFPU extends Component val fpiu = new rocketFPIntUnit fpiu.io.single := ctrl.single fpiu.io.cmd := ctrl.cmd + fpiu.io.fsr := Cat(fsr_rm, fsr_exc) fpiu.io.in := fp_toint_data - io.dpath.store_data := fpiu.io.out + io.dpath.store_data := fpiu.io.store_data + io.dpath.toint_data := fpiu.io.toint_data - val fsr_busy = ctrl.fsr && Bool(false) + val retire_toint = Reg(!io.ctrl.killm && fp_toint_val, resetVal = Bool(false)) + val retire_toint_exc = Reg(fpiu.io.exc) + val retire_fromint = Reg(!io.ctrl.killm && fp_fromint_val, resetVal = Bool(false)) + val retire_fromint_wdata = Reg(fp_fromint_data) + val retire_fromint_waddr = Reg(fp_waddr) + + when (retire_toint) { + fsr_exc := fsr_exc | retire_toint_exc + } + when (retire_toint && retire_fromint) { // MTFSR + fsr_exc := retire_fromint_wdata(4,0) + fsr_rm := retire_fromint_wdata(7,5) + } + + regfile.write(retire_fromint_waddr, retire_fromint_wdata, retire_fromint && !retire_toint) + + val fp_inflight = fp_toint_val || retire_toint || fp_fromint_val || retire_fromint + val mtfsr_inflight = fp_toint_val && fp_fromint_val || retire_toint && retire_fromint + val fsr_busy = ctrl.fsr && fp_inflight || mtfsr_inflight val units_busy = Bool(false) - io.ctrl.nack := reg_valid && (fsr_busy || units_busy) + val write_port_busy = Bool(false) + io.ctrl.nack := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs } From 25ecfb9bbcca937df6499e4de1a1e5575a5b1623 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Feb 2012 20:32:06 -0800 Subject: [PATCH 0151/1087] clean up caches - remove incompatible blocking D$ - remove direct-mapped nonblocking cache --- rocket/src/main/scala/dcache.scala | 517 --------------------------- rocket/src/main/scala/nbdcache.scala | 310 ++-------------- rocket/src/main/scala/top.scala | 6 +- 3 files changed, 40 insertions(+), 793 deletions(-) delete mode 100644 rocket/src/main/scala/dcache.scala diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala deleted file mode 100644 index 88aa3f3f..00000000 --- a/rocket/src/main/scala/dcache.scala +++ /dev/null @@ -1,517 +0,0 @@ -package Top { - -import Chisel._ -import Node._; -import Constants._; -import scala.math._; - -// interface between D$ and processor/DTLB -class ioDmem(view: List[String] = null) extends Bundle(view) { - val req_kill = Bool(INPUT); - val req_val = Bool(INPUT); - val req_rdy = Bool(OUTPUT); - val req_cmd = Bits(4, INPUT); - val req_type = Bits(3, INPUT); - val req_idx = Bits(PGIDX_BITS, INPUT); - val req_ppn = Bits(PPN_BITS, INPUT); - val req_data = Bits(64, INPUT); - val req_tag = Bits(DCACHE_TAG_BITS, INPUT); - val xcpt_ma_ld = Bool(OUTPUT); // misaligned load - val xcpt_ma_st = Bool(OUTPUT); // misaligned store - val resp_miss = Bool(OUTPUT); - val resp_nack = Bool(OUTPUT); - val resp_val = Bool(OUTPUT); - val resp_replay = Bool(OUTPUT); - val resp_data = Bits(64, OUTPUT); - val resp_data_subword = Bits(64, OUTPUT); - val resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT); -} - -// interface between D$ and next level in memory hierarchy -class ioDCache(view: List[String] = null) extends Bundle(view) { - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); - val req_tag = UFix(DMEM_TAG_BITS, INPUT); - val req_val = Bool(INPUT); - val req_rdy = Bool(OUTPUT); - val req_wdata = Bits(MEM_DATA_BITS, INPUT); - val req_rw = Bool(INPUT); - val resp_data = Bits(MEM_DATA_BITS, OUTPUT); - val resp_tag = Bits(DMEM_TAG_BITS, OUTPUT); - val resp_val = Bool(OUTPUT); -} - -class ioDCacheDM extends Bundle() { - val cpu = new ioDmem(); - val mem = new ioDCache().flip(); -} - -class ioDCacheHella extends Bundle() { - val cpu = new ioDmem(); - val mem = new ioDCache().flip(); -} - -class rocketDCacheStoreGen extends Component { - val io = new Bundle { - val req_type = Bits(3, INPUT); - val req_addr_lsb = Bits(3, INPUT); - val req_data = Bits(64, INPUT); - val store_wmask = Bits(64, OUTPUT); - val store_data = Bits(64, OUTPUT); - } - - // generate write mask and store data signals based on store type and address LSBs - val wmask_b = - Mux(io.req_addr_lsb === UFix(0, 3), Bits("b0000_0001", 8), - Mux(io.req_addr_lsb === UFix(1, 3), Bits("b0000_0010", 8), - Mux(io.req_addr_lsb === UFix(2, 3), Bits("b0000_0100", 8), - Mux(io.req_addr_lsb === UFix(3, 3), Bits("b0000_1000", 8), - Mux(io.req_addr_lsb === UFix(4, 3), Bits("b0001_0000", 8), - Mux(io.req_addr_lsb === UFix(5, 3), Bits("b0010_0000", 8), - Mux(io.req_addr_lsb === UFix(6, 3), Bits("b0100_0000", 8), - Mux(io.req_addr_lsb === UFix(7, 3), Bits("b1000_0000", 8), - UFix(0, 8))))))))); - - val wmask_h = - Mux(io.req_addr_lsb(2,1) === UFix(0, 2), Bits("b0000_0011", 8), - Mux(io.req_addr_lsb(2,1) === UFix(1, 2), Bits("b0000_1100", 8), - Mux(io.req_addr_lsb(2,1) === UFix(2, 2), Bits("b0011_0000", 8), - Mux(io.req_addr_lsb(2,1) === UFix(3, 2), Bits("b1100_0000", 8), - UFix(0, 8))))); - - val wmask_w = - Mux(io.req_addr_lsb(2) === UFix(0, 1), Bits("b0000_1111", 8), - Mux(io.req_addr_lsb(2) === UFix(1, 1), Bits("b1111_0000", 8), - UFix(0, 8))); - - val wmask_d = - Bits("b1111_1111", 8); - - val store_wmask_byte = - Mux(io.req_type === MT_B, wmask_b, - Mux(io.req_type === MT_H, wmask_h, - Mux(io.req_type === MT_W, wmask_w, - Mux(io.req_type === MT_D, wmask_d, - UFix(0, 8))))); - - val store_wmask_d = Cat(Fill(8, store_wmask_byte(7)), - Fill(8, store_wmask_byte(6)), - Fill(8, store_wmask_byte(5)), - Fill(8, store_wmask_byte(4)), - Fill(8, store_wmask_byte(3)), - Fill(8, store_wmask_byte(2)), - Fill(8, store_wmask_byte(1)), - Fill(8, store_wmask_byte(0))); - - io.store_wmask := store_wmask_d; - - io.store_data := - Mux(io.req_type === MT_B, Fill(8, io.req_data( 7,0)), - Mux(io.req_type === MT_H, Fill(4, io.req_data(15,0)), - Mux(io.req_type === MT_W, Fill(2, io.req_data(31,0)), - Mux(io.req_type === MT_D, io.req_data, - UFix(0, 64))))); - -} - -// state machine to flush (write back dirty lines, invalidate clean ones) the D$ -class rocketDCacheDM_flush(lines: Int) extends Component { - val io = new ioDCacheDM(); - val dcache = new rocketDCacheDM(lines); - - val addrbits = PADDR_BITS; - val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; - val tagmsb = addrbits - 1; - val taglsb = indexbits+offsetbits; - val tagbits = tagmsb-taglsb+1; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val offsetlsb = 3; - - val flush_count = Reg(resetVal = UFix(0, indexbits)); - val flush_resp_count = Reg(resetVal = UFix(0, indexbits)); - val flushing = Reg(resetVal = Bool(false)); - val flush_waiting = Reg(resetVal = Bool(false)); - val r_cpu_req_tag = Reg() { Bits() } - - when (io.cpu.req_val && io.cpu.req_rdy && (io.cpu.req_cmd === M_FLA)) - { - r_cpu_req_tag := io.cpu.req_tag; - flushing := Bool(true); - flush_waiting := Bool(true); - } - - when (dcache.io.cpu.req_rdy && (flush_count === ~Bits(0, indexbits))) { - flushing := Bool(false); - } - when (dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag) && (flush_resp_count === ~Bits(0, indexbits))) { - flush_waiting := Bool(false); - } - - when (flushing && dcache.io.cpu.req_rdy) { - flush_count := flush_count + UFix(1,1); - } - when (flush_waiting && dcache.io.cpu.resp_val && (dcache.io.cpu.resp_tag === r_cpu_req_tag)) { - flush_resp_count := flush_resp_count + UFix(1,1); - } - - dcache.io.cpu.req_val := (io.cpu.req_val && (io.cpu.req_cmd != M_FLA) && !flush_waiting) || flushing; - dcache.io.cpu.req_cmd := Mux(flushing, M_FLA, io.cpu.req_cmd); - dcache.io.cpu.req_idx := Mux(flushing, Cat(flush_count, Bits(0,offsetbits)), io.cpu.req_idx); - dcache.io.cpu.req_ppn := Mux(flushing, UFix(0,PPN_BITS), io.cpu.req_ppn); - dcache.io.cpu.req_tag := Mux(flushing, r_cpu_req_tag, io.cpu.req_tag); - dcache.io.cpu.req_type := io.cpu.req_type; - dcache.io.cpu.req_data <> io.cpu.req_data; - dcache.io.cpu.req_kill := io.cpu.req_kill && !flush_waiting; - dcache.io.mem <> io.mem; - - io.cpu.xcpt_ma_ld := dcache.io.cpu.xcpt_ma_ld; - io.cpu.xcpt_ma_st := dcache.io.cpu.xcpt_ma_st; - io.cpu.req_rdy := dcache.io.cpu.req_rdy && !flush_waiting; - io.cpu.resp_miss := dcache.io.cpu.resp_miss; - io.cpu.resp_nack := dcache.io.cpu.resp_nack; - io.cpu.resp_data := dcache.io.cpu.resp_data; - io.cpu.resp_tag := dcache.io.cpu.resp_tag; - io.cpu.resp_val := dcache.io.cpu.resp_val & - !(flush_waiting && (io.cpu.resp_tag === r_cpu_req_tag) && (flush_count != ~Bits(0, addrbits))); - -} - -class rocketDCacheDM(lines: Int) extends Component { - val io = new ioDCacheDM(); - - val addrbits = PADDR_BITS; - val indexbits = ceil(log10(lines)/log10(2)).toInt; - val offsetbits = 6; // 64 byte cache lines = 2^6 bytes - val tagmsb = PADDR_BITS-1; - val taglsb = indexbits+offsetbits; - val tagbits = tagmsb-taglsb+1; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val offsetlsb = 3; - - val s_reset :: s_ready :: s_replay_load :: s_write_amo :: s_start_writeback :: s_writeback :: s_req_refill :: s_refill :: s_resolve_miss :: Nil = Enum(9) { UFix() }; - val state = Reg(resetVal = s_reset); - - // idx arrives one clock cycle prior to ppn b/c of DTLB - val r_cpu_req_idx = Reg(resetVal = Bits(0, PGIDX_BITS)); - val r_cpu_req_ppn = Reg(resetVal = Bits(0, PPN_BITS)); - val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_cmd = Reg(resetVal = Bits(0,4)); - val r_cpu_req_type = Reg(resetVal = Bits(0,3)); - val r_cpu_req_tag = Reg() { Bits() } - val r_cpu_resp_val = Reg(resetVal = Bool(false)); - val r_amo_data = Reg(resetVal = Bits(0,64)); - - val p_store_data = Reg(resetVal = Bits(0,64)); - val p_store_idx = Reg(resetVal = Bits(0,PGIDX_BITS)); - val p_store_type = Reg(resetVal = Bits(0,3)); - val p_store_valid = Reg(resetVal = Bool(false)); - - val req_store = (io.cpu.req_cmd === M_XWR); - val req_load = (io.cpu.req_cmd === M_XRD); - val req_flush = (io.cpu.req_cmd === M_FLA); - val req_amo = io.cpu.req_cmd(3).toBool; - val r_req_load = (r_cpu_req_cmd === M_XRD); - val r_req_store = (r_cpu_req_cmd === M_XWR); - val r_req_flush = (r_cpu_req_cmd === M_FLA); - val r_req_amo = r_cpu_req_cmd(3).toBool; - - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_idx := io.cpu.req_idx; - r_cpu_req_cmd := io.cpu.req_cmd; - r_cpu_req_type := io.cpu.req_type; - r_cpu_req_tag := io.cpu.req_tag; - } - - when ((state === s_ready) && r_cpu_req_val && !io.cpu.req_kill) { - r_cpu_req_ppn := io.cpu.req_ppn; - } - when (io.cpu.req_rdy) { - r_cpu_req_val := io.cpu.req_val; - } - otherwise { - r_cpu_req_val := Bool(false); - } - when (((state === s_resolve_miss) && (r_req_load || r_req_amo)) || (state === s_replay_load)) { - r_cpu_resp_val := Bool(true); - } - otherwise { - r_cpu_resp_val := Bool(false); - } - - // refill counter - val rr_count = Reg(resetVal = UFix(0,2)); - val rr_count_next = rr_count + UFix(1); - when (((state === s_refill) && io.mem.resp_val) || ((state === s_writeback) && io.mem.req_rdy)) { - rr_count := rr_count_next; - } - - // tag array - val tag_addr = - Mux((state === s_ready), io.cpu.req_idx(PGIDX_BITS-1,offsetbits), - r_cpu_req_idx(PGIDX_BITS-1,offsetbits)).toUFix; - val tag_we = - ((state === s_refill) && io.mem.resp_val && (rr_count === UFix(3,2))) || - ((state === s_resolve_miss) && r_req_flush); - - val tag_array = Mem(lines, r_cpu_req_ppn); - tag_array.setReadLatency(1); - tag_array.setTarget('inst); - val tag_rdata = tag_array.rw(tag_addr, r_cpu_req_ppn, tag_we); - - // valid bit array - val vb_array = Reg(resetVal = Bits(0, lines)); - when (tag_we && !r_req_flush) { - vb_array := vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); - } - when (tag_we && r_req_flush) { - vb_array := vb_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); - } - val vb_rdata = vb_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; - val tag_valid = r_cpu_req_val && vb_rdata; - val tag_match = (tag_rdata === io.cpu.req_ppn); - val tag_hit = tag_valid && tag_match; - val miss = r_cpu_req_val && (!vb_rdata || !tag_match); - - // load/store addresses conflict if they are to any part of the same 64 bit word - val addr_match = (r_cpu_req_idx(PGIDX_BITS-1,offsetlsb) === p_store_idx(PGIDX_BITS-1,offsetlsb)); - val ldst_conflict = tag_valid && tag_match && (r_req_load || r_req_amo) && p_store_valid && addr_match; - val store_hit = r_cpu_req_val && !io.cpu.req_kill && tag_hit && r_req_store ; - - // write the pending store data when the cache is idle, when the next command isn't a load - // or when there's a load to the same address (in which case there's a 2 cycle delay: - // once cycle to write the store data and another to read the data back) - val drain_store = - ((store_hit || p_store_valid) && (!io.cpu.req_val || req_store || req_flush)) || - (p_store_valid && (miss || ldst_conflict)); - - // write pending store data from a store which missed - // after the cache line refill has completed - val resolve_store = (state === s_resolve_miss) && r_req_store; - - // pending store data - when (io.cpu.req_val && io.cpu.req_rdy && req_store) { - p_store_idx := io.cpu.req_idx; - p_store_data := io.cpu.req_data; - p_store_type := io.cpu.req_type; - } - when (store_hit && !drain_store) { - p_store_valid := Bool(true); - } - when (drain_store) { - p_store_valid := Bool(false); - } - - // AMO operand - when (io.cpu.req_val && io.cpu.req_rdy && req_amo) { - r_amo_data := io.cpu.req_data; - } - - // dirty bit array - val db_array = Reg(resetVal = Bits(0, lines)); - val tag_dirty = db_array(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix).toBool; - when ((r_cpu_req_val && !io.cpu.req_kill && tag_hit && r_req_store) || resolve_store) { - db_array := db_array.bitSet(p_store_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); - } - when (state === s_write_amo) { - db_array := db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(1,1)); - } - when (tag_we) { - db_array := db_array.bitSet(r_cpu_req_idx(PGIDX_BITS-1,offsetbits).toUFix, UFix(0,1)); - } - - // generate write mask and data signals for stores and amos - val storegen = new rocketDCacheStoreGen(); - storegen.io.req_addr_lsb := p_store_idx(2,0); - storegen.io.req_data := p_store_data; - storegen.io.req_type := p_store_type; - val store_data = Fill(2, storegen.io.store_data); - val store_wmask_d = storegen.io.store_wmask; - val store_wmask = Mux(p_store_idx(offsetlsb).toBool, Cat(store_wmask_d, Bits(0,64)), Cat(Bits(0,64), store_wmask_d)); - - // ALU for AMOs - val amo_alu = new rocketDCacheAmoALU(); - val amo_alu_out = Cat(amo_alu.io.result,amo_alu.io.result); - val amo_wmask = - Mux(r_cpu_req_type === MT_D, ~Bits(0,8), - Mux(r_cpu_req_idx(2).toBool, Cat(~Bits(0,4), Bits(0,4)), - Cat(Bits(0,4), ~Bits(0,4)))); - - val amo_store_wmask_d = Cat(Fill(8, amo_wmask(7)), - Fill(8, amo_wmask(6)), - Fill(8, amo_wmask(5)), - Fill(8, amo_wmask(4)), - Fill(8, amo_wmask(3)), - Fill(8, amo_wmask(2)), - Fill(8, amo_wmask(1)), - Fill(8, amo_wmask(0))); - - val amo_store_wmask = Mux(r_cpu_req_idx(offsetlsb).toBool, Cat(amo_store_wmask_d, Bits(0,64)), Cat(Bits(0,64), amo_store_wmask_d)); - - // data array - val data_addr = - Mux(drain_store || resolve_store, p_store_idx(PGIDX_BITS-1, offsetmsb-1), - Mux((state === s_writeback) && io.mem.req_rdy, Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count_next), - Mux((state === s_start_writeback) || (state === s_writeback) || (state === s_refill), Cat(r_cpu_req_idx(PGIDX_BITS-1, offsetbits), rr_count), - Mux((state === s_resolve_miss) || (state === s_replay_load) || (state === s_write_amo), r_cpu_req_idx(PGIDX_BITS-1, offsetmsb-1), - io.cpu.req_idx(PGIDX_BITS-1, offsetmsb-1))))).toUFix; - - val data_wdata = - Mux((state === s_refill), io.mem.resp_data, - Mux((state === s_write_amo), amo_alu_out, - store_data)); - - val data_we = - ((state === s_refill) && io.mem.resp_val) || - (state === s_write_amo) || - drain_store || resolve_store; - - val data_wmask = - Mux((state === s_refill), ~Bits(0,128), - Mux((state === s_write_amo), amo_store_wmask, - store_wmask)); - - val data_array = Mem(lines*4, data_wdata); - data_array.setReadLatency(1); - data_array.setTarget('inst); - val data_array_rdata = data_array.rw(data_addr, data_wdata, data_we, data_wmask); - val resp_data = Mux(r_cpu_req_idx(offsetlsb).toBool, data_array_rdata(127, 64), data_array_rdata(63,0)); - val r_resp_data = Reg(resp_data); - - amo_alu.io.cmd := r_cpu_req_cmd; - amo_alu.io.wmask := amo_wmask; - amo_alu.io.lhs := Mux(r_cpu_resp_val, resp_data, r_resp_data).toUFix; - amo_alu.io.rhs := r_amo_data.toUFix; - - // signal a load miss when the data isn't present in the cache and when it's in the pending store data register - // (causes the cache to block for 2 cycles and the load or amo instruction is replayed) - val load_miss = - !io.cpu.req_kill && - (state === s_ready) && r_cpu_req_val && (r_req_load || r_req_amo) && (!tag_hit || (p_store_valid && addr_match)); - - // output signals - // busy when there's a load to the same address as a pending store, or on a cache miss, or when executing a flush - io.cpu.req_rdy := (state === s_ready) && !io.cpu.req_kill && !ldst_conflict && (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))); - io.cpu.resp_val := !io.cpu.req_kill && - ((state === s_ready) && tag_hit && (r_req_load || r_req_amo) && !(p_store_valid && addr_match)) || - ((state === s_resolve_miss) && r_req_flush) || - r_cpu_resp_val; - - val misaligned = - (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && r_cpu_req_idx(0).toBool) || - (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0,2))) || - ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0,3))); - - io.cpu.xcpt_ma_ld := r_cpu_req_val && (r_req_load || r_req_amo) && misaligned; - io.cpu.xcpt_ma_st := r_cpu_req_val && (r_req_store || r_req_amo) && misaligned; - - io.cpu.resp_miss := load_miss; - io.cpu.resp_nack := Bool(false) - io.cpu.resp_tag := r_cpu_req_tag - io.cpu.resp_data := resp_data; - - io.mem.req_val := (state === s_req_refill) || (state === s_writeback); - io.mem.req_rw := (state === s_writeback); - io.mem.req_wdata := data_array_rdata; - io.mem.req_tag := UFix(0); - io.mem.req_addr := - Mux(state === s_writeback, Cat(tag_rdata, r_cpu_req_idx(PGIDX_BITS-1, offsetbits)), - Cat(r_cpu_req_ppn, r_cpu_req_idx(PGIDX_BITS-1, offsetbits))).toUFix; - - // control state machine - switch (state) { - is (s_reset) { - state := s_ready; - } - is (s_ready) { - when (io.cpu.req_kill) { - state := s_ready; - } - when (ldst_conflict) { - state := s_replay_load; - } - when (!r_cpu_req_val || (tag_hit && !(r_req_flush || r_req_amo))) { - state := s_ready; - } - when (tag_hit && r_req_amo) { - state := s_write_amo; - } - when (tag_valid & tag_dirty) { - state := s_start_writeback; - } - when (r_req_flush) { - state := s_resolve_miss; - } - otherwise { - state := s_req_refill; - } - } - is (s_replay_load) { - state := s_ready; - } - is (s_write_amo) { - state := s_ready; - } - is (s_start_writeback) { - state := s_writeback; - } - is (s_writeback) { - when (io.mem.req_rdy && (rr_count === UFix(3,2))) { - when (r_req_flush) { - state := s_resolve_miss; - } - otherwise { - state := s_req_refill; - } - } - } - is (s_req_refill) - { - when (io.mem.req_rdy) { state := s_refill; } - } - is (s_refill) { - when (io.mem.resp_val && (rr_count === UFix(3,2))) { state := s_resolve_miss; } - } - is (s_resolve_miss) { - when (r_req_amo) { - state := s_write_amo; - } - state := s_ready; - } - } -} - -class rocketDCacheAmoALU extends Component { - val io = new Bundle { - val cmd = Bits(4, INPUT); - val wmask = Bits(8, INPUT); - val lhs = UFix(64, INPUT); - val rhs = UFix(64, INPUT); - val result = UFix(64, OUTPUT); - } - -// val signed_cmp = (op === M_XA_MIN) || (op === M_XA_MAX); -// val sub = (op === M_XA_MIN) || (op === M_XA_MINU) || -// (op === M_XA_MAX) || (op === M_XA_MAXU); - - val adder_lhs = Cat(io.lhs(63,32),io.wmask(3) & io.lhs(31), io.lhs(30,0)).toUFix; - val adder_rhs = Cat(io.rhs(63,32),io.wmask(3) & io.rhs(31), io.rhs(30,0)).toUFix; -// val adder_rhs = Cat(Mux(sub, ~io.rhs, io.rhs), sub).toUFix; -// val sum = adder_lhs + adder_rhs; -// val adder_out = sum(64,1); - val adder_out = adder_lhs + adder_rhs; - val alu_out = Wire() { UFix() }; - switch (io.cmd) { -// is (M_XA_ADD) { alu_out := adder_out; } - is (M_XA_SWAP) { alu_out := io.rhs; } - is (M_XA_AND) { alu_out := io.lhs & io.rhs; } - is (M_XA_OR) { alu_out := io.lhs | io.rhs; } - } - alu_out := adder_out; - io.result := alu_out; -} - -} diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a7cd6748..2582ce0e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -650,282 +650,46 @@ class AMOALU extends Component { io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) } -class HellaCacheDM extends Component { - val io = new ioDCacheHella() +// interface between D$ and processor/DTLB +class ioDmem(view: List[String] = null) extends Bundle(view) { + val req_kill = Bool(INPUT); + val req_val = Bool(INPUT); + val req_rdy = Bool(OUTPUT); + val req_cmd = Bits(4, INPUT); + val req_type = Bits(3, INPUT); + val req_idx = Bits(PGIDX_BITS, INPUT); + val req_ppn = Bits(PPN_BITS, INPUT); + val req_data = Bits(64, INPUT); + val req_tag = Bits(DCACHE_TAG_BITS, INPUT); + val xcpt_ma_ld = Bool(OUTPUT); // misaligned load + val xcpt_ma_st = Bool(OUTPUT); // misaligned store + val resp_miss = Bool(OUTPUT); + val resp_nack = Bool(OUTPUT); + val resp_val = Bool(OUTPUT); + val resp_replay = Bool(OUTPUT); + val resp_data = Bits(64, OUTPUT); + val resp_data_subword = Bits(64, OUTPUT); + val resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT); +} - val lines = 1 << IDX_BITS - val addrbits = PADDR_BITS - val indexbits = log2up(lines) - val offsetbits = OFFSET_BITS - val tagmsb = PADDR_BITS-1 - val taglsb = indexbits+offsetbits - val tagbits = tagmsb-taglsb+1 - val indexmsb = taglsb-1 - val indexlsb = offsetbits - val offsetmsb = indexlsb-1 - val offsetlsb = log2up(CPU_DATA_BITS/8) - val ramindexlsb = log2up(MEM_DATA_BITS/8) - - val early_nack = Reg { Bool() } - val r_cpu_req_val_ = Reg(io.cpu.req_val && io.cpu.req_rdy, resetVal = Bool(false)) - val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_kill && !early_nack - val r_cpu_req_idx = Reg() { Bits() } - val r_cpu_req_cmd = Reg() { Bits() } - val r_cpu_req_type = Reg() { Bits() } - val r_cpu_req_tag = Reg() { Bits() } - val r_amo_replay_data = Reg() { Bits() } - - val p_store_valid = Reg(resetVal = Bool(false)) - val p_store_data = Reg() { Bits() } - val p_store_idx = Reg() { Bits() } - val p_store_cmd = Reg() { Bits() } - val p_store_type = Reg() { Bits() } - val r_replay_amo = Reg(resetVal = Bool(false)) - - val req_store = (io.cpu.req_cmd === M_XWR) - val req_load = (io.cpu.req_cmd === M_XRD) - val req_amo = io.cpu.req_cmd(3).toBool - val req_read = req_load || req_amo - val req_write = req_store || req_amo - val r_req_load = (r_cpu_req_cmd === M_XRD) - val r_req_store = (r_cpu_req_cmd === M_XWR) - val r_req_flush = (r_cpu_req_cmd === M_FLA) - val r_req_fence = (r_cpu_req_cmd === M_FENCE) - val r_req_amo = r_cpu_req_cmd(3).toBool - val r_req_read = r_req_load || r_req_amo - val r_req_write = r_req_store || r_req_amo - val r_req_readwrite = r_req_read || r_req_write - - // replay unit - val replayer = new ReplayUnit() - val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool - - when (io.cpu.req_val) { - r_cpu_req_idx := io.cpu.req_idx - r_cpu_req_cmd := io.cpu.req_cmd - r_cpu_req_type := io.cpu.req_type - r_cpu_req_tag := io.cpu.req_tag - } - when (replay_amo_val) { - r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) - r_cpu_req_cmd := replayer.io.data_req.bits.cmd - r_cpu_req_type := replayer.io.data_req.bits.typ - r_amo_replay_data := replayer.io.data_req.bits.data - } - val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) - - // refill counter - val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val rr_count_next = rr_count + UFix(1) - when (io.mem.resp_val) { rr_count := rr_count_next } - - val misaligned = - (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || - (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || - ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - - io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned - io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned - - // tags - val meta = new MetaDataArray(lines) - val meta_arb = (new Arbiter(3)) { new MetaArrayReq() } - meta_arb.io.out <> meta.io.req - - // data - val data = new DataArray(lines) - val data_arb = (new Arbiter(5)) { new DataArrayReq() } - data_arb.io.out <> data.io.req - - // writeback unit - val wb = new WritebackUnit - val wb_arb = (new Arbiter(2)) { new WritebackReq() } - wb_arb.io.out <> wb.io.req - wb.io.data_req.bits.inner_req <> data_arb.io.in(3).bits - wb.io.data_req.ready := data_arb.io.in(3).ready - data_arb.io.in(3).valid := wb.io.data_req.valid - wb.io.data_resp <> data.io.resp - - // cpu tag check - meta_arb.io.in(2).valid := io.cpu.req_val - meta_arb.io.in(2).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) - meta_arb.io.in(2).bits.rw := Bool(false) - meta_arb.io.in(2).bits.data.valid := Bool(false) // don't care - meta_arb.io.in(2).bits.data.dirty := Bool(false) // don't care - meta_arb.io.in(2).bits.data.tag := UFix(0) // don't care - val early_tag_nack = !meta_arb.io.in(2).ready - val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match = meta.io.resp.valid && (meta.io.resp.tag === cpu_req_tag) - val tag_hit = r_cpu_req_val && tag_match - val tag_miss = r_cpu_req_val && !tag_match - val dirty = meta.io.resp.valid && meta.io.resp.dirty - - // refill response - val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) - data_arb.io.in(0).valid := io.mem.resp_val || block_during_refill - data_arb.io.in(0).bits.offset := rr_count - data_arb.io.in(0).bits.rw := !block_during_refill - data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) - data_arb.io.in(0).bits.data := io.mem.resp_data - - // load hits - data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) - data_arb.io.in(4).bits.rw := Bool(false) - data_arb.io.in(4).bits.wmask := UFix(0) // don't care - data_arb.io.in(4).bits.data := io.mem.resp_data // don't care - data_arb.io.in(4).valid := io.cpu.req_val && req_read - val early_load_nack = req_read && !data_arb.io.in(4).ready - - // store hits and AMO hits and misses use a pending store register. - // we nack new stores if a pending store can't retire for some reason. - // we drain a pending store if the CPU performs a store or a - // conflictig load, or if the cache is idle, or after a miss. - val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) - val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) - val p_store_match = r_cpu_req_val && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(tag_miss))) || p_store_match - data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) - data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) - data_arb.io.in(2).bits.rw := Bool(true) - data_arb.io.in(2).valid := drain_store_val - val drain_store = drain_store_val && data_arb.io.in(2).ready - val p_store_rdy = !p_store_valid || drain_store - val p_amo = Reg(tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) - p_store_valid := !p_store_rdy || (tag_hit && r_req_store) || p_amo - - // writeback - val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match - wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && dirty && !p_store_idx_match - wb_arb.io.in(1).bits.ppn := meta.io.resp.tag - wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - - // tag update after a miss or a store to an exclusive clean line. - val clear_valid = tag_miss && r_req_readwrite && meta.io.resp.valid && (!dirty || wb_rdy) - val set_dirty = tag_hit && !meta.io.resp.dirty && r_req_write - meta.io.state_req.valid := clear_valid || set_dirty - meta.io.state_req.bits.rw := Bool(true) - meta.io.state_req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - meta.io.state_req.bits.data.tag := UFix(0) // don't care - meta.io.state_req.bits.data.valid := tag_match - meta.io.state_req.bits.data.dirty := tag_match - - // pending store data, also used for AMO RHS - val amoalu = new AMOALU - when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { - p_store_idx := r_cpu_req_idx - p_store_type := r_cpu_req_type - p_store_cmd := r_cpu_req_cmd - p_store_data := cpu_req_data - } - when (p_amo) { - p_store_data := amoalu.io.out - } - - // miss handling - val mshr = new MSHRFile() - mshr.io.req_val := tag_miss && r_req_readwrite && (!dirty || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) - mshr.io.req_ppn := cpu_req_tag - mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) - mshr.io.req_tag := r_cpu_req_tag - mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) - mshr.io.req_cmd := r_cpu_req_cmd - mshr.io.req_type := r_cpu_req_type - mshr.io.req_sdq_id := replayer.io.sdq_id - mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) - mshr.io.mem_resp_tag := io.mem.resp_tag - mshr.io.mem_req <> wb.io.refill_req - mshr.io.meta_req.bits.inner_req <> meta_arb.io.in(1).bits - mshr.io.meta_req.ready := meta_arb.io.in(1).ready - meta_arb.io.in(1).valid := mshr.io.meta_req.valid - mshr.io.replay <> replayer.io.replay - replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy - replayer.io.sdq_enq.bits := cpu_req_data - data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx - - // replays - val replay = replayer.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || p_store_valid - val replay_val = replayer.io.data_req.valid && !stall_replay - val replay_rdy = data_arb.io.in(1).ready - data_arb.io.in(1).bits.offset := replay.offset(offsetmsb,ramindexlsb) - data_arb.io.in(1).bits.idx := replay.idx - data_arb.io.in(1).bits.rw := replay.cmd === M_XWR - data_arb.io.in(1).valid := replay_val - replayer.io.data_req.ready := replay_rdy && !stall_replay - r_replay_amo := replay_amo_val && replay_rdy && !stall_replay - - // store write mask generation. - // assumes store replays are higher-priority than pending stores. - val maskgen = new StoreMaskGen - val store_offset = Mux(!replay_val, p_store_idx(offsetmsb,0), replay.offset) - maskgen.io.typ := Mux(!replay_val, p_store_type, replay.typ) - maskgen.io.addr := store_offset(offsetlsb-1,0) - val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix - val store_data = Mux(!replay_val, p_store_data, replay.data) - val store_data_wide = Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) - data_arb.io.in(1).bits.data := store_data_wide - data_arb.io.in(1).bits.wmask := store_wmask_wide - data_arb.io.in(2).bits.data := store_data_wide - data_arb.io.in(2).bits.wmask := store_wmask_wide - - // load data subword mux/sign extension. - // subword loads are delayed by one cycle. - val loadgen = new LoadDataGen - val loadgen_use_replay = Reg(replay_val && replay_rdy) - loadgen.io.typ := Mux(loadgen_use_replay, Reg(replay.typ), r_cpu_req_type) - loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(ramindexlsb-1,0) - loadgen.io.din := data.io.resp - - amoalu.io.cmd := p_store_cmd - amoalu.io.typ := p_store_type - amoalu.io.lhs := loadgen.io.r_dout.toUFix - amoalu.io.rhs := p_store_data.toUFix - - early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo - - // reset and flush unit - val flusher = new FlushUnit(lines) - val flushed = Reg(resetVal = Bool(true)) - val flush_rdy = mshr.io.fence_rdy && wb_rdy && !p_store_valid - flushed := flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed - flusher.io.wb_req <> wb_arb.io.in(0) - flusher.io.meta_req.bits.inner_req <> meta_arb.io.in(0).bits - flusher.io.meta_req.ready := meta_arb.io.in(0).ready - meta_arb.io.in(0).valid := flusher.io.meta_req.valid - flusher.io.meta_resp <> meta.io.resp - flusher.io.resp.ready := Bool(true) // we don't respond to flush requests - - // we usually nack rather than reporting that the cache is not ready. - // fences and flushes are the exceptions. - val pending_fence = Reg(resetVal = Bool(false)) - pending_fence := (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy - val nack_hit = p_store_match || r_req_write && !p_store_rdy - val nack_miss = dirty && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready - val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || - !flushed && r_req_flush - val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush - - io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence - io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || replayer.io.cpu_resp_val - io.cpu.resp_replay := replayer.io.cpu_resp_val - io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read - io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) - io.cpu.resp_data := loadgen.io.dout - io.cpu.resp_data_subword := loadgen.io.r_dout_subword - - wb.io.mem_req.ready := io.mem.req_rdy - io.mem.req_val := wb.io.mem_req.valid - io.mem.req_rw := wb.io.mem_req.bits.rw - io.mem.req_wdata := wb.io.mem_req_data - io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix - io.mem.req_addr := wb.io.mem_req.bits.addr +// interface between D$ and next level in memory hierarchy +class ioDCache(view: List[String] = null) extends Bundle(view) { + val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); + val req_tag = UFix(DMEM_TAG_BITS, INPUT); + val req_val = Bool(INPUT); + val req_rdy = Bool(OUTPUT); + val req_wdata = Bits(MEM_DATA_BITS, INPUT); + val req_rw = Bool(INPUT); + val resp_data = Bits(MEM_DATA_BITS, OUTPUT); + val resp_tag = Bits(DMEM_TAG_BITS, OUTPUT); + val resp_val = Bool(OUTPUT); } -class HellaCacheAssoc extends Component { - val io = new ioDCacheHella() +class HellaCache extends Component { + val io = new Bundle { + val cpu = new ioDmem() + val mem = new ioDCache().flip + } val lines = 1 << IDX_BITS val addrbits = PADDR_BITS diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 2e5e0311..605f882c 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -17,7 +17,7 @@ class Top() extends Component { val cpu = new rocketProc(); val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); - val dcache = new HellaCacheAssoc(); + val dcache = new HellaCache(); val arbiter = new rocketMemArbiter(); arbiter.io.mem <> io.mem; @@ -37,9 +37,9 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance -// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); + val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); // val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); - val cpu_args = args ++ Array("--target-dir", "generated-src"); +// val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match { From 069037ff3a63146a4b54649ce337bf03586c1098 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 12 Feb 2012 23:31:50 -0800 Subject: [PATCH 0152/1087] add FP recoding --- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/dpath.scala | 2 + rocket/src/main/scala/fpu.scala | 74 +++++++++++++++++++++++++--- rocket/src/main/scala/nbdcache.scala | 2 + 4 files changed, 72 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 8e106e0c..5462e07e 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -105,6 +105,7 @@ class rocketProc extends Component dpath.io.dmem.resp_val := arb.io.cpu.resp_val; dpath.io.dmem.resp_miss := arb.io.cpu.resp_miss; dpath.io.dmem.resp_replay := arb.io.cpu.resp_replay; + dpath.io.dmem.resp_type := io.dmem.resp_type; dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; dpath.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ec87dfe2..0cb22794 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -15,6 +15,7 @@ class ioDpathDmem extends Bundle() val resp_val = Bool(INPUT); val resp_miss = Bool(INPUT); val resp_replay = Bool(INPUT); + val resp_type = Bits(3, INPUT); val resp_tag = Bits(CPU_TAG_BITS, INPUT); val resp_data = Bits(64, INPUT); val resp_data_subword = Bits(64, INPUT); @@ -400,6 +401,7 @@ class rocketDpath extends Component io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp_data + io.fpu.dmem_resp_type := io.dmem.resp_type io.fpu.dmem_resp_tag := dmem_resp_waddr // writeback stage diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 1bbb5e13..375126a0 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -176,6 +176,7 @@ class ioDpathFPU extends Bundle { val toint_data = Bits(64, INPUT) val dmem_resp_val = Bool(OUTPUT) + val dmem_resp_type = Bits(3, OUTPUT) val dmem_resp_tag = UFix(5, OUTPUT) val dmem_resp_data = Bits(64, OUTPUT) } @@ -200,10 +201,12 @@ class rocketFPIntUnit extends Component val exc = Bits(5, OUTPUT) } - val unrecoded_s = io.in(31,0) - val unrecoded_d = io.in + val unrec_s = new hardfloat.recodedFloat32ToFloat32 + val unrec_d = new hardfloat.recodedFloat64ToFloat64 + unrec_s.io.in := io.in + unrec_d.io.in := io.in - io.store_data := Mux(io.single, Cat(unrecoded_s, unrecoded_s), unrecoded_d) + io.store_data := Mux(io.single, Cat(unrec_s.io.out, unrec_s.io.out), unrec_d.io.out) val scmp = Bool(false) val scmp_exc = Bits(0) @@ -219,10 +222,10 @@ class rocketFPIntUnit extends Component // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) - out_s := Cat(Fill(32, unrecoded_s(31)), unrecoded_s) + out_s := Cat(Fill(32, unrec_s.io.out(31)), unrec_s.io.out) exc_s := Bits(0) val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) - out_d := unrecoded_d + out_d := unrec_d.io.out exc_d := Bits(0) when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { @@ -251,6 +254,51 @@ class rocketFPIntUnit extends Component io.exc := Mux(io.single, exc_s, exc_d) } +class rocketIntFPUnit extends Component +{ + val io = new Bundle { + val single = Bool(INPUT) + val cmd = Bits(FCMD_WIDTH, INPUT) + val fsr = Bits(FSR_WIDTH, INPUT) + val in = Bits(64, INPUT) + val out = Bits(65, OUTPUT) + val exc = Bits(5, OUTPUT) + } + + val rec_s = new hardfloat.float32ToRecodedFloat32 + val rec_d = new hardfloat.float64ToRecodedFloat64 + rec_s.io.in := io.in + rec_d.io.in := io.in + + val i2s = Bits(0) + val i2s_exc = Bits(0) + + val i2d = Bits(0) + val i2d_exc = Bits(0) + + // output muxing + val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) + out_s := rec_s.io.out + exc_s := Bits(0) + val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) + out_d := rec_d.io.out + exc_d := Bits(0) + + when (io.cmd === FCMD_CVT_FMT_W || io.cmd === FCMD_CVT_FMT_WU || + io.cmd === FCMD_CVT_FMT_L || io.cmd === FCMD_CVT_FMT_LU) { + out_s := i2s + exc_s := i2s_exc + out_d := i2d + exc_d := i2d_exc + } + when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { + out_s := Cat(out_s(32,FSR_WIDTH), io.in(FSR_WIDTH-1,0)) + } + + io.out := Mux(io.single, Cat(Fill(32,UFix(1)), out_s), out_d) + io.exc := Mux(io.single, exc_s, exc_d) +} + class rocketFPU extends Component { val io = new Bundle { @@ -274,18 +322,25 @@ class rocketFPU extends Component // load response val load_wb = Reg(io.dpath.dmem_resp_val, resetVal = Bool(false)) + val load_wb_single = Reg() { Bool() } val load_wb_data = Reg() { Bits(width = 64) } // XXX WTF why doesn't bit width inference work for the regfile?! val load_wb_tag = Reg() { UFix() } when (io.dpath.dmem_resp_val) { + load_wb_single := io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU load_wb_data := io.dpath.dmem_resp_data load_wb_tag := io.dpath.dmem_resp_tag } + val rec_s = new hardfloat.float32ToRecodedFloat32 + val rec_d = new hardfloat.float64ToRecodedFloat64 + rec_s.io.in := load_wb_data + rec_d.io.in := load_wb_data + val load_wb_data_recoded = Mux(load_wb_single, Cat(Fill(32,UFix(1)), rec_s.io.out), rec_d.io.out) val fsr_rm = Reg() { Bits(width = 3) } val fsr_exc = Reg() { Bits(width = 5) } // regfile - val regfile = Mem(32, load_wb, load_wb_tag, load_wb_data); + val regfile = Mem(32, load_wb, load_wb_tag, load_wb_data_recoded); regfile.setReadLatency(0); regfile.setTarget('inst); @@ -332,10 +387,15 @@ class rocketFPU extends Component io.dpath.store_data := fpiu.io.store_data io.dpath.toint_data := fpiu.io.toint_data + val ifpu = new rocketIntFPUnit + ifpu.io.single := ctrl.single + ifpu.io.cmd := ctrl.cmd + ifpu.io.in := fp_fromint_data + val retire_toint = Reg(!io.ctrl.killm && fp_toint_val, resetVal = Bool(false)) val retire_toint_exc = Reg(fpiu.io.exc) val retire_fromint = Reg(!io.ctrl.killm && fp_fromint_val, resetVal = Bool(false)) - val retire_fromint_wdata = Reg(fp_fromint_data) + val retire_fromint_wdata = Reg(ifpu.io.out) val retire_fromint_waddr = Reg(fp_waddr) when (retire_toint) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2582ce0e..34b0e925 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -667,6 +667,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val resp_nack = Bool(OUTPUT); val resp_val = Bool(OUTPUT); val resp_replay = Bool(OUTPUT); + val resp_type = Bits(3, OUTPUT); val resp_data = Bits(64, OUTPUT); val resp_data_subword = Bits(64, OUTPUT); val resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT); @@ -967,6 +968,7 @@ class HellaCache extends Component { io.cpu.resp_replay := replayer.io.cpu_resp_val io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) + io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data_subword := loadgen.io.r_dout_subword From a4a9d2312ce010ffda84a91f07ad2ed77c92b06f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Feb 2012 01:30:01 -0800 Subject: [PATCH 0153/1087] add fcvt.[w|l][u].[s|d], f[eq|lt|le].[s|d] --- rocket/src/main/scala/ctrl.scala | 24 ++++++- rocket/src/main/scala/fpu.scala | 106 ++++++++++++++++++++----------- 2 files changed, 93 insertions(+), 37 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 01937237..eb4b25a2 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -211,8 +211,30 @@ class rocketCtrl extends Component MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), @@ -463,7 +485,7 @@ class rocketCtrl extends Component mem_reg_flush_inst := ex_reg_flush_inst; mem_reg_xcpt_ma_inst := ex_reg_xcpt_ma_inst; mem_reg_xcpt_itlb := ex_reg_xcpt_itlb; - mem_reg_xcpt_illegal := ex_reg_xcpt_illegal; + mem_reg_xcpt_illegal := ex_reg_xcpt_illegal || ex_reg_fp_val && io.fpu.illegal_rm; mem_reg_xcpt_privileged := ex_reg_xcpt_privileged; mem_reg_xcpt_fpu := ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; mem_reg_xcpt_vec := ex_reg_vec_val && !io.dpath.status(SR_EV).toBool; diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 375126a0..25d5a70d 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -141,17 +141,31 @@ class rocketFPUDecoder extends Component val X = Bool(false) val FCMD_X = FCMD_ADD val decoder = ListLookup(io.inst, - List (N,FCMD_X, X,X,X,X,X,X,X,X,X), - Array(FLW -> List(Y,FCMD_LOAD, Y,N,N,N,Y,N,N,N,N), - FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N), - FSW -> List(Y,FCMD_STORE, N,N,Y,N,Y,N,N,Y,N), - FSD -> List(Y,FCMD_STORE, N,N,Y,N,N,N,N,Y,N), - MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N), - MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,N,N,N), - MFTX_S -> List(Y,FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N), - MFTX_D -> List(Y,FCMD_MFTX, N,Y,N,N,N,N,Y,N,N), - MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,Y,Y,Y,N,Y), - MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,Y,N,Y,N,Y) + List (N,FCMD_X, X,X,X,X,X,X,X,X,X), + Array(FLW -> List(Y,FCMD_LOAD, Y,N,N,N,Y,N,N,N,N), + FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N), + FSW -> List(Y,FCMD_STORE, N,N,Y,N,Y,N,N,Y,N), + FSD -> List(Y,FCMD_STORE, N,N,Y,N,N,N,N,Y,N), + MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N), + MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,N,N,N), + MFTX_S -> List(Y,FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N), + MFTX_D -> List(Y,FCMD_MFTX, N,Y,N,N,N,N,Y,N,N), + FCVT_W_S -> List(Y,FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N), + FCVT_W_D -> List(Y,FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N), + FCVT_WU_S-> List(Y,FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N), + FCVT_WU_D-> List(Y,FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N), + FCVT_L_S -> List(Y,FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N), + FCVT_L_D -> List(Y,FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N), + FCVT_LU_S-> List(Y,FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N), + FCVT_LU_D-> List(Y,FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N), + FEQ_S -> List(Y,FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N), + FEQ_D -> List(Y,FCMD_EQ, N,Y,Y,N,N,N,Y,N,N), + FLT_S -> List(Y,FCMD_LT, N,Y,Y,N,Y,N,Y,N,N), + FLT_D -> List(Y,FCMD_LT, N,Y,Y,N,N,N,Y,N,N), + FLE_S -> List(Y,FCMD_LE, N,Y,Y,N,Y,N,Y,N,N), + FLE_D -> List(Y,FCMD_LE, N,Y,Y,N,N,N,Y,N,N), + MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,Y,Y,Y,N,Y), + MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,Y,N,Y,N,Y) )) val valid :: cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: fsr :: Nil = decoder @@ -184,6 +198,7 @@ class ioDpathFPU extends Bundle { class ioCtrlFPU extends Bundle { val valid = Bool(OUTPUT) val nack = Bool(INPUT) + val illegal_rm = Bool(INPUT) val killx = Bool(OUTPUT) val killm = Bool(OUTPUT) val dec = new rocketFPUCtrlSigs().asInput @@ -195,7 +210,8 @@ class rocketFPIntUnit extends Component val single = Bool(INPUT) val cmd = Bits(FCMD_WIDTH, INPUT) val fsr = Bits(FSR_WIDTH, INPUT) - val in = Bits(65, INPUT) + val in1 = Bits(65, INPUT) + val in2 = Bits(65, INPUT) val store_data = Bits(64, OUTPUT) val toint_data = Bits(64, OUTPUT) val exc = Bits(5, OUTPUT) @@ -203,22 +219,32 @@ class rocketFPIntUnit extends Component val unrec_s = new hardfloat.recodedFloat32ToFloat32 val unrec_d = new hardfloat.recodedFloat64ToFloat64 - unrec_s.io.in := io.in - unrec_d.io.in := io.in + unrec_s.io.in := io.in1 + unrec_d.io.in := io.in1 io.store_data := Mux(io.single, Cat(unrec_s.io.out, unrec_s.io.out), unrec_d.io.out) - val scmp = Bool(false) - val scmp_exc = Bits(0) + val scmp = new hardfloat.recodedFloat32Compare + scmp.io.a := io.in1 + scmp.io.b := io.in2 + val scmp_out = (io.cmd(1,0) & Cat(scmp.io.a_lt_b, scmp.io.a_eq_b)).orR + val scmp_exc = (io.cmd(1,0) & Cat(scmp.io.a_lt_b_invalid, scmp.io.a_eq_b_invalid)).orR << UFix(4) - val s2i = UFix(0) - val s2i_exc = Bits(0) + val s2i = new hardfloat.recodedFloat32ToAny + s2i.io.in := io.in1 + s2i.io.roundingMode := io.fsr >> UFix(5) + s2i.io.typeOp := ~io.cmd(1,0) - val dcmp = Bool(false) - val dcmp_exc = Bits(0) + val dcmp = new hardfloat.recodedFloat64Compare + dcmp.io.a := io.in1 + dcmp.io.b := io.in2 + val dcmp_out = (io.cmd(1,0) & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR + val dcmp_exc = (io.cmd(1,0) & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UFix(4) - val d2i = UFix(0) - val d2i_exc = Bits(0) + val d2i = new hardfloat.recodedFloat64ToAny + d2i.io.in := io.in1 + d2i.io.roundingMode := io.fsr >> UFix(5) + d2i.io.typeOp := ~io.cmd(1,0) // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) @@ -232,21 +258,21 @@ class rocketFPIntUnit extends Component out_s := io.fsr } when (io.cmd === FCMD_CVT_W_FMT || io.cmd === FCMD_CVT_WU_FMT) { - out_s := Cat(Fill(32, s2i(31)), s2i(31,0)) - exc_s := s2i_exc - out_d := Cat(Fill(32, d2i(31)), d2i(31,0)) - exc_d := d2i_exc + out_s := Cat(Fill(32, s2i.io.out(31)), s2i.io.out(31,0)) + exc_s := s2i.io.exceptionFlags + out_d := Cat(Fill(32, d2i.io.out(31)), d2i.io.out(31,0)) + exc_d := d2i.io.exceptionFlags } when (io.cmd === FCMD_CVT_L_FMT || io.cmd === FCMD_CVT_LU_FMT) { - out_s := s2i - exc_s := s2i_exc - out_d := d2i - exc_d := d2i_exc + out_s := s2i.io.out + exc_s := s2i.io.exceptionFlags + out_d := d2i.io.out + exc_d := d2i.io.exceptionFlags } when (io.cmd === FCMD_EQ || io.cmd === FCMD_LT || io.cmd === FCMD_LE) { - out_s := scmp + out_s := scmp_out exc_s := scmp_exc - out_d := dcmp + out_d := dcmp_out exc_d := dcmp_exc } @@ -352,21 +378,25 @@ class rocketFPU extends Component val fp_fromint_data = Reg() { Bits() } val fp_toint_val = Reg(resetVal = Bool(false)) val fp_toint_data = Reg() { Bits() } + val fp_cmp_data = Reg() { Bits() } val fp_toint_single = Reg() { Bool() } val fp_toint_cmd = Reg() { Bits() } val fp_waddr = Reg() { Bits() } fp_fromint_val := Bool(false) fp_toint_val := Bool(false) - when (reg_valid && !io.ctrl.killx) { + when (reg_valid) { fp_waddr := reg_inst(31,27) when (ctrl.fromint) { - fp_fromint_val := Bool(true) + fp_fromint_val := !io.ctrl.killx fp_fromint_data := io.dpath.fromint_data } when (ctrl.toint) { - fp_toint_val := Bool(true) + fp_toint_val := !io.ctrl.killx fp_toint_data := ex_rs1 + when (ctrl.ren2) { + fp_cmp_data := ex_rs2 + } } when (ctrl.store) { fp_toint_data := ex_rs2 @@ -382,7 +412,8 @@ class rocketFPU extends Component fpiu.io.single := ctrl.single fpiu.io.cmd := ctrl.cmd fpiu.io.fsr := Cat(fsr_rm, fsr_exc) - fpiu.io.in := fp_toint_data + fpiu.io.in1 := fp_toint_data + fpiu.io.in2 := fp_cmp_data io.dpath.store_data := fpiu.io.store_data io.dpath.toint_data := fpiu.io.toint_data @@ -390,6 +421,7 @@ class rocketFPU extends Component val ifpu = new rocketIntFPUnit ifpu.io.single := ctrl.single ifpu.io.cmd := ctrl.cmd + ifpu.io.fsr := Cat(fsr_rm, fsr_exc) ifpu.io.in := fp_fromint_data val retire_toint = Reg(!io.ctrl.killm && fp_toint_val, resetVal = Bool(false)) @@ -415,4 +447,6 @@ class rocketFPU extends Component val write_port_busy = Bool(false) io.ctrl.nack := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs + // we don't currently support round-max-magnitude (rm=4) + io.ctrl.illegal_rm := Mux(reg_inst(11,9) === Bits(7), fsr_rm(2), reg_inst(11)) } From b5a19a54a31e7beb7b2f5c9339d01e237143c4ed Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Feb 2012 02:01:26 -0800 Subject: [PATCH 0154/1087] add fcvt.[s|d].[w|l][u] --- rocket/src/main/scala/fpu.scala | 35 +++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 25d5a70d..44defdf7 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -148,6 +148,14 @@ class rocketFPUDecoder extends Component FSD -> List(Y,FCMD_STORE, N,N,Y,N,N,N,N,Y,N), MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N), MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,N,N,N), + FCVT_S_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N), + FCVT_D_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N), + FCVT_S_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N), + FCVT_D_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N), + FCVT_S_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N), + FCVT_D_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N), + FCVT_S_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N), + FCVT_D_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N), MFTX_S -> List(Y,FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N), MFTX_D -> List(Y,FCMD_MFTX, N,Y,N,N,N,N,Y,N,N), FCVT_W_S -> List(Y,FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N), @@ -296,11 +304,15 @@ class rocketIntFPUnit extends Component rec_s.io.in := io.in rec_d.io.in := io.in - val i2s = Bits(0) - val i2s_exc = Bits(0) + val i2s = new hardfloat.anyToRecodedFloat32 + i2s.io.in := io.in + i2s.io.roundingMode := io.fsr >> UFix(5) + i2s.io.typeOp := ~io.cmd(1,0) - val i2d = Bits(0) - val i2d_exc = Bits(0) + val i2d = new hardfloat.anyToRecodedFloat64 + i2d.io.in := io.in + i2d.io.roundingMode := io.fsr >> UFix(5) + i2d.io.typeOp := ~io.cmd(1,0) // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) @@ -312,10 +324,10 @@ class rocketIntFPUnit extends Component when (io.cmd === FCMD_CVT_FMT_W || io.cmd === FCMD_CVT_FMT_WU || io.cmd === FCMD_CVT_FMT_L || io.cmd === FCMD_CVT_FMT_LU) { - out_s := i2s - exc_s := i2s_exc - out_d := i2d - exc_d := i2d_exc + out_s := i2s.io.out + exc_s := i2s.io.exceptionFlags + out_d := i2d.io.out + exc_d := i2d.io.exceptionFlags } when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { out_s := Cat(out_s(32,FSR_WIDTH), io.in(FSR_WIDTH-1,0)) @@ -427,11 +439,14 @@ class rocketFPU extends Component val retire_toint = Reg(!io.ctrl.killm && fp_toint_val, resetVal = Bool(false)) val retire_toint_exc = Reg(fpiu.io.exc) val retire_fromint = Reg(!io.ctrl.killm && fp_fromint_val, resetVal = Bool(false)) + val retire_fromint_exc = Reg(ifpu.io.exc) val retire_fromint_wdata = Reg(ifpu.io.out) val retire_fromint_waddr = Reg(fp_waddr) - when (retire_toint) { - fsr_exc := fsr_exc | retire_toint_exc + when (retire_toint || retire_fromint) { + fsr_exc := fsr_exc | + Fill(fsr_exc.getWidth, retire_toint) & retire_toint_exc | + Fill(fsr_exc.getWidth, retire_fromint) & retire_fromint_exc } when (retire_toint && retire_fromint) { // MTFSR fsr_exc := retire_fromint_wdata(4,0) From c78c738f60b979c1b1fcec2865489ba4914e76a4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Feb 2012 03:13:49 -0800 Subject: [PATCH 0155/1087] minor cleanups --- rocket/src/main/scala/consts.scala | 10 ---------- rocket/src/main/scala/ctrl.scala | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 12a5dfac..34026971 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -24,9 +24,6 @@ object Constants val PC_EVEC = UFix(6, 3); val PC_JR = UFix(7, 3); - val KF_Y = UFix(1, 1); - val KF_N = UFix(0, 1); - val REN_Y = UFix(1, 1); val REN_N = UFix(0, 1); @@ -69,13 +66,6 @@ object Constants val N = UFix(0, 1); val Y = UFix(1, 1); - val Y_SH = UFix(1, 1); - - val FWBQ_N = UFix(0, 1); - val FWBQ_Y = UFix(1, 1); - - val FSDQ_N = UFix(0, 1); - val FSDQ_Y = UFix(1, 1); val FN_X = UFix(0, 4); val FN_ADD = UFix(0, 4); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index eb4b25a2..49ca4dbc 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -158,8 +158,8 @@ class rocketCtrl extends Component ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLI-> List(Y_SH, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAI-> List(Y_SH, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), From 6c2d8a37ae8fa033fd8d94cdec8087070783cf86 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Feb 2012 16:45:29 -0800 Subject: [PATCH 0156/1087] remove a partial update that makes chisel barf chisel regards it as a combinational loop, even though it isn't. --- rocket/src/main/scala/fpu.scala | 6 +++--- rocket/src/main/scala/top.scala | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 44defdf7..3ea7238d 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -322,6 +322,9 @@ class rocketIntFPUnit extends Component out_d := rec_d.io.out exc_d := Bits(0) + when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { + out_s := io.in(FSR_WIDTH-1,0) + } when (io.cmd === FCMD_CVT_FMT_W || io.cmd === FCMD_CVT_FMT_WU || io.cmd === FCMD_CVT_FMT_L || io.cmd === FCMD_CVT_FMT_LU) { out_s := i2s.io.out @@ -329,9 +332,6 @@ class rocketIntFPUnit extends Component out_d := i2d.io.out exc_d := i2d.io.exceptionFlags } - when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { - out_s := Cat(out_s(32,FSR_WIDTH), io.in(FSR_WIDTH-1,0)) - } io.out := Mux(io.single, Cat(Fill(32,UFix(1)), out_s), out_d) io.exc := Mux(io.single, exc_s, exc_d) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 605f882c..58aba59b 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -37,9 +37,9 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { // Can turn off --debug and --vcd when done with debugging to improve emulator performance - val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); +// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); // val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); -// val cpu_args = args ++ Array("--target-dir", "generated-src"); + val cpu_args = args ++ Array("--target-dir", "generated-src"); // Set variables based off of command flags // for(a <- args) { // a match { From 0366465cb100910fea87ed545ccc716135860395 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Feb 2012 18:12:23 -0800 Subject: [PATCH 0157/1087] parameterize the scoreboards --- rocket/src/main/scala/ctrl.scala | 52 ++++++++++++++------------- rocket/src/main/scala/ctrl_util.scala | 52 +++++++++++++-------------- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 49ca4dbc..2897b41e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -517,40 +517,44 @@ class rocketCtrl extends Component wb_reg_fp_val := mem_reg_fp_val } - val sboard = new rocketCtrlSboard(); - sboard.io.raddra := id_raddr2.toUFix; - sboard.io.raddrb := id_raddr1.toUFix; - sboard.io.raddrc := id_waddr.toUFix; + val sboard = new rocketCtrlSboard(32, 3, 2); + sboard.io.r(0).addr := id_raddr2.toUFix; + sboard.io.r(1).addr := id_raddr1.toUFix; + sboard.io.r(2).addr := id_waddr.toUFix; // scoreboard set (for D$ misses, div, mul) - sboard.io.set := wb_reg_div_mul_val || wb_reg_dcache_miss && wb_reg_wen; - sboard.io.seta := io.dpath.wb_waddr; + sboard.io.w(0).en := wb_reg_div_mul_val || wb_reg_dcache_miss && wb_reg_wen + sboard.io.w(0).data := Bool(true) + sboard.io.w(0).addr := io.dpath.wb_waddr - sboard.io.clr := io.dpath.sboard_clr; - sboard.io.clra := io.dpath.sboard_clra; + sboard.io.w(1).en := io.dpath.sboard_clr + sboard.io.w(1).data := Bool(false) + sboard.io.w(1).addr := io.dpath.sboard_clra - val id_stall_raddr2 = id_renx2.toBool && sboard.io.stalla; - val id_stall_raddr1 = id_renx1.toBool && sboard.io.stallb; - val id_stall_waddr = id_wen.toBool && sboard.io.stallc; + val id_stall_raddr2 = id_renx2.toBool && sboard.io.r(0).data + val id_stall_raddr1 = id_renx1.toBool && sboard.io.r(1).data + val id_stall_waddr = id_wen.toBool && sboard.io.r(2).data var id_stall_fpu = Bool(false) if (HAVE_FPU) { - val fp_sboard = new rocketCtrlSboard(); - fp_sboard.io.raddra := id_raddr1.toUFix; - fp_sboard.io.raddrb := id_raddr2.toUFix; - fp_sboard.io.raddrc := id_raddr3.toUFix; - fp_sboard.io.raddrd := id_waddr.toUFix; + val fp_sboard = new rocketCtrlSboard(32, 4, 2); + fp_sboard.io.r(0).addr := id_raddr1.toUFix + fp_sboard.io.r(1).addr := id_raddr2.toUFix + fp_sboard.io.r(2).addr := id_raddr3.toUFix + fp_sboard.io.r(3).addr := id_waddr.toUFix - fp_sboard.io.set := wb_reg_dcache_miss && wb_reg_fp_wen; - fp_sboard.io.seta := io.dpath.wb_waddr; + fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen + fp_sboard.io.w(0).data := Bool(true) + fp_sboard.io.w(0).addr := io.dpath.wb_waddr - fp_sboard.io.clr := io.dpath.fp_sboard_clr; - fp_sboard.io.clra := io.dpath.fp_sboard_clra; + fp_sboard.io.w(1).en := io.dpath.fp_sboard_clr + fp_sboard.io.w(1).data := Bool(false) + fp_sboard.io.w(1).addr := io.dpath.fp_sboard_clra - id_stall_fpu = io.fpu.dec.ren1 && fp_sboard.io.stalla || - io.fpu.dec.ren2 && fp_sboard.io.stallb || - io.fpu.dec.ren3 && fp_sboard.io.stallc || - io.fpu.dec.wen && fp_sboard.io.stalld + id_stall_fpu = io.fpu.dec.ren1 && fp_sboard.io.r(0).data || + io.fpu.dec.ren2 && fp_sboard.io.r(1).data || + io.fpu.dec.ren3 && fp_sboard.io.r(2).data || + io.fpu.dec.wen && fp_sboard.io.r(3).data } // exception handling diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 8d99b83c..86ffcd78 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -2,35 +2,31 @@ package Top import Chisel._ import Node._; -import Constants._; -class ioCtrlSboard extends Bundle() +class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component { - val clr = Bool(INPUT); - val clra = UFix(5, INPUT); - val set = Bool(INPUT); - val seta = UFix(5, INPUT); - val raddra = UFix(5, INPUT); - val raddrb = UFix(5, INPUT); - val raddrc = UFix(5, INPUT); - val raddrd = UFix(5, INPUT); - val stalla = Bool(OUTPUT); - val stallb = Bool(OUTPUT); - val stallc = Bool(OUTPUT); - val stalld = Bool(OUTPUT); -} + class read_port extends Bundle { + val addr = UFix(log2up(entries), INPUT) + val data = Bool(OUTPUT) + } + class write_port extends Bundle { + val en = Bool(INPUT) + val addr = UFix(log2up(entries), INPUT) + val data = Bool(INPUT) + } -class rocketCtrlSboard extends Component -{ - override val io = new ioCtrlSboard(); - val reg_busy = Reg(resetVal = Bits(0, 32)); - - val set_mask = io.set.toUFix << io.seta; - val clr_mask = ~(io.clr.toUFix << io.clra); - reg_busy := (reg_busy | set_mask) & clr_mask - - io.stalla := reg_busy(io.raddra).toBool; - io.stallb := reg_busy(io.raddrb).toBool; - io.stallc := reg_busy(io.raddrc).toBool; - io.stalld := reg_busy(io.raddrd).toBool; + val io = new Bundle { + val r = Vec(nread) { new read_port() } + val w = Vec(nwrite) { new write_port() } + } + + val busybits = Reg(resetVal = Bits(0, entries)); + + for (i <- 0 until nread) + io.r(i).data := busybits(io.r(i).addr) + + var wdata = busybits + for (i <- 0 until nwrite) + wdata = wdata.bitSet(io.w(i).addr, Mux(io.w(i).en, io.w(i).data, wdata(io.w(i).addr))) + busybits := wdata } From 6d36168183619416bfb324be231b5d339e5455e5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 13 Feb 2012 14:37:51 -0800 Subject: [PATCH 0158/1087] Fixed two associative nbcache bugs, one in amo replays and one in the flush unit --- rocket/src/main/scala/nbdcache.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 34b0e925..246cdeb1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -508,7 +508,7 @@ class FlushUnit(lines: Int) extends Component { io.meta_req.bits.inner_req.data.valid := Bool(false) io.meta_req.bits.inner_req.data.dirty := Bool(false) io.meta_req.bits.inner_req.data.tag := UFix(0) - io.wb_req.valid := state === s_meta_wait + io.wb_req.valid := state === s_meta_wait && io.meta_resp.valid && io.meta_resp.dirty io.wb_req.bits.ppn := io.meta_resp.tag io.wb_req.bits.idx := idx_cnt io.wb_req.bits.way_oh := UFixToOH(way_cnt, NWAYS) @@ -713,6 +713,7 @@ class HellaCache extends Component { val r_cpu_req_type = Reg() { Bits() } val r_cpu_req_tag = Reg() { Bits() } val r_amo_replay_data = Reg() { Bits() } + val r_way_oh = Reg() { Bits() } val p_store_valid = Reg(resetVal = Bool(false)) val p_store_data = Reg() { Bits() } @@ -751,6 +752,7 @@ class HellaCache extends Component { r_cpu_req_cmd := replayer.io.data_req.bits.cmd r_cpu_req_type := replayer.io.data_req.bits.typ r_amo_replay_data := replayer.io.data_req.bits.data + r_way_oh := replayer.io.way_oh } val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) @@ -870,7 +872,7 @@ class HellaCache extends Component { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd - p_store_way_oh := Mux(r_replay_amo, replayer.io.way_oh, hit_way_oh) + p_store_way_oh := Mux(r_replay_amo, r_way_oh, hit_way_oh) p_store_data := cpu_req_data } when (p_amo) { From 0671a99712d37ee108b65a6f65a285db40806993 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 13 Feb 2012 21:43:49 -0800 Subject: [PATCH 0159/1087] NBcache works with associativities other than powers of 2 --- rocket/src/main/scala/nbdcache.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 246cdeb1..b014ac74 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -14,7 +14,13 @@ class ioReplacementWayGen extends Bundle { class RandomReplacementWayGen extends Component { val io = new ioReplacementWayGen() //TODO: Actually limit selection based on which ways are allowed (io.ways_en) - if(NWAYS > 1) io.way_id := LFSR16(io.pick_new_way)(log2up(NWAYS)-1,0) + if(NWAYS > 1) + { + val rand_way_id = UFix(width = log2up(NWAYS)) + rand_way_id := LFSR16(io.pick_new_way)(log2up(NWAYS)-1,0) + when (rand_way_id >= UFix(NWAYS, width = log2up(NWAYS))) { io.way_id := UFix(0, width = log2up(NWAYS)) } + .otherwise { io.way_id := rand_way_id } + } else io.way_id := UFix(0) } From 15dc2d8c40d968b28c125aa68af812b148fea9d8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Feb 2012 00:32:25 -0800 Subject: [PATCH 0160/1087] add fp writeback arbitration logic --- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/ctrl.scala | 13 +- rocket/src/main/scala/fpu.scala | 217 +++++++++++++++---------------- 3 files changed, 116 insertions(+), 116 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 5462e07e..ad2b5698 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -116,7 +116,7 @@ class rocketProc extends Component if (HAVE_FPU) { - val fpu = new rocketFPU + val fpu = new rocketFPU(4,6) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2897b41e..48fa1318 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -328,6 +328,7 @@ class rocketCtrl extends Component val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val ex_reg_fp_val = Reg(resetVal = Bool(false)); + val ex_reg_fp_sboard_set = Reg(resetVal = Bool(false)); val ex_reg_vec_val = Reg(resetVal = Bool(false)); val ex_reg_replay = Reg(resetVal = Bool(false)); val ex_reg_load_use = Reg(resetVal = Bool(false)); @@ -349,6 +350,7 @@ class rocketCtrl extends Component val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); val mem_reg_ext_mem_val = Reg(resetVal = Bool(false)) + val mem_reg_fp_sboard_set = Reg(resetVal = Bool(false)); val wb_reg_wen = Reg(resetVal = Bool(false)); val wb_reg_fp_wen = Reg(resetVal = Bool(false)); @@ -360,6 +362,7 @@ class rocketCtrl extends Component val wb_reg_replay = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; val wb_reg_fp_val = Reg(resetVal = Bool(false)); + val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); val take_pc = Wire() { Bool() }; @@ -402,6 +405,7 @@ class rocketCtrl extends Component ex_reg_xcpt_privileged := Bool(false); ex_reg_xcpt_syscall := Bool(false); ex_reg_fp_val := Bool(false); + ex_reg_fp_sboard_set := Bool(false); ex_reg_vec_val := Bool(false); ex_reg_replay := Bool(false); ex_reg_load_use := Bool(false); @@ -424,7 +428,8 @@ class rocketCtrl extends Component ex_reg_xcpt_illegal := illegal_inst; ex_reg_xcpt_privileged := (id_privileged & ~io.dpath.status(SR_S)).toBool; ex_reg_xcpt_syscall := id_syscall.toBool; - ex_reg_fp_val := io.fpu.dec.valid; + ex_reg_fp_val := io.fpu.dec.valid + ex_reg_fp_sboard_set := io.fpu.dec.sboard ex_reg_vec_val := id_vec_val.toBool ex_reg_replay := id_reg_replay || ex_reg_replay_next; ex_reg_load_use := id_load_use; @@ -473,6 +478,7 @@ class rocketCtrl extends Component mem_reg_xcpt_vec := Bool(false); mem_reg_xcpt_syscall := Bool(false); mem_reg_fp_val := Bool(false); + mem_reg_fp_sboard_set := Bool(false) } .otherwise { mem_reg_div_mul_val := ex_reg_div_val || ex_reg_mul_val; @@ -491,6 +497,7 @@ class rocketCtrl extends Component mem_reg_xcpt_vec := ex_reg_vec_val && !io.dpath.status(SR_EV).toBool; mem_reg_xcpt_syscall := ex_reg_xcpt_syscall; mem_reg_fp_val := ex_reg_fp_val + mem_reg_fp_sboard_set := ex_reg_fp_sboard_set } mem_reg_ext_mem_val := ex_reg_ext_mem_val; mem_reg_mem_cmd := ex_reg_mem_cmd; @@ -505,6 +512,7 @@ class rocketCtrl extends Component wb_reg_flush_inst := Bool(false); wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) + wb_reg_fp_sboard_set := Bool(false) } .otherwise { wb_reg_wen := mem_reg_wen; @@ -515,6 +523,7 @@ class rocketCtrl extends Component wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_div_mul_val := mem_reg_div_mul_val; wb_reg_fp_val := mem_reg_fp_val + wb_reg_fp_sboard_set := mem_reg_fp_sboard_set } val sboard = new rocketCtrlSboard(32, 3, 2); @@ -543,7 +552,7 @@ class rocketCtrl extends Component fp_sboard.io.r(2).addr := id_raddr3.toUFix fp_sboard.io.r(3).addr := id_waddr.toUFix - fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen + fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen || wb_reg_fp_sboard_set fp_sboard.io.w(0).data := Bool(true) fp_sboard.io.w(0).addr := io.dpath.wb_waddr diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 3ea7238d..be242bbf 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -50,14 +50,18 @@ class rocketFPUCtrlSigs extends Bundle val cmd = Bits(width = FCMD_WIDTH) val valid = Bool() val wen = Bool() + val sboard = Bool() val ren1 = Bool() val ren2 = Bool() val ren3 = Bool() val single = Bool() val fromint = Bool() val toint = Bool() + val fastpipe = Bool() + val fma = Bool() val store = Bool() - val fsr = Bool() + val rdfsr = Bool() + val wrfsr = Bool() } class rocketFPUDecoder extends Component @@ -66,128 +70,68 @@ class rocketFPUDecoder extends Component val inst = Bits(32, INPUT) val sigs = new rocketFPUCtrlSigs().asOutput } -// val fp = -// ListLookup(io.dpath.inst, -// List(FPU_N, FPU_N, FPU_N, FPU_N, FPU_N), -// Array( -// FMOVZ -> List(Bool(true)), -// FMOVN -> List(Bool(true)), -// FADD_S -> List(Bool(true)), -// FSUB_S -> List(Bool(true)), -// FMUL_S -> List(Bool(true)), -// FDIV_S -> List(Bool(true)), -// FSQRT_S -> List(Bool(true)), -// FSGNJ_S -> List(Bool(true)), -// FSGNJN_S -> List(Bool(true)), -// FSGNJX_S -> List(Bool(true)), -// FADD_D -> List(Bool(true)), -// FSUB_D -> List(Bool(true)), -// FMUL_D -> List(Bool(true)), -// FDIV_D -> List(Bool(true)), -// FSQRT_D -> List(Bool(true)), -// FSGNJ_D -> List(Bool(true)), -// FSGNJN_D -> List(Bool(true)), -// FSGNJX_D -> List(Bool(true)), -// FCVT_L_S -> List(Bool(true)), -// FCVT_LU_S -> List(Bool(true)), -// FCVT_W_S -> List(Bool(true)), -// FCVT_WU_S -> List(Bool(true)), -// FCVT_L_D -> List(Bool(true)), -// FCVT_LU_D -> List(Bool(true)), -// FCVT_W_D -> List(Bool(true)), -// FCVT_WU_D -> List(Bool(true)), -// FCVT_S_L -> List(Bool(true)), -// FCVT_S_LU -> List(Bool(true)), -// FCVT_S_W -> List(Bool(true)), -// FCVT_S_WU -> List(Bool(true)), -// FCVT_D_L -> List(Bool(true)), -// FCVT_D_LU -> List(Bool(true)), -// FCVT_D_W -> List(Bool(true)), -// FCVT_D_WU -> List(Bool(true)), -// FCVT_S_D -> List(Bool(true)), -// FCVT_D_S -> List(Bool(true)), -// FEQ_S -> List(Bool(true)), -// FLT_S -> List(Bool(true)), -// FLE_S -> List(Bool(true)), -// FEQ_D -> List(Bool(true)), -// FLT_D -> List(Bool(true)), -// FLE_D -> List(Bool(true)), -// FMIN_S -> List(Bool(true)), -// FMAX_S -> List(Bool(true)), -// FMIN_D -> List(Bool(true)), -// FMAX_D -> List(Bool(true)), -// MFTX_S -> List(Bool(true)), -// MFTX_D -> List(Bool(true)), -// MFFSR -> List(Bool(true)), -// MXTF_S -> List(Bool(true)), -// MXTF_D -> List(Bool(true)), -// MTFSR -> List(Bool(true)), -// FLW -> List(FPU_Y, FPU_Y, FPU_N, FPU_N, FPU_N), -// FLD -> List(FPU_Y, FPU_Y, FPU_N, FPU_N, FPU_N), -// FSW -> List(FPU_Y, FPU_N, FPU_N, FPU_Y, FPU_N), -// FSD -> List(FPU_Y, FPU_N, FPU_N, FPU_Y, FPU_N) -// FMADD_S -> List(Bool(true)), -// FMSUB_S -> List(Bool(true)), -// FNMSUB_S -> List(Bool(true)), -// FNMADD_S -> List(Bool(true)), -// FMADD_D -> List(Bool(true)), -// FMSUB_D -> List(Bool(true)), -// FNMSUB_D -> List(Bool(true)), -// FNMADD_D -> List(Bool(true)) -// )); val N = Bool(false) val Y = Bool(true) val X = Bool(false) val FCMD_X = FCMD_ADD val decoder = ListLookup(io.inst, - List (N,FCMD_X, X,X,X,X,X,X,X,X,X), - Array(FLW -> List(Y,FCMD_LOAD, Y,N,N,N,Y,N,N,N,N), - FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N), - FSW -> List(Y,FCMD_STORE, N,N,Y,N,Y,N,N,Y,N), - FSD -> List(Y,FCMD_STORE, N,N,Y,N,N,N,N,Y,N), - MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N), - MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,N,N,N), - FCVT_S_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N), - FCVT_D_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N), - FCVT_S_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N), - FCVT_D_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N), - FCVT_S_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N), - FCVT_D_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N), - FCVT_S_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N), - FCVT_D_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N), - MFTX_S -> List(Y,FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N), - MFTX_D -> List(Y,FCMD_MFTX, N,Y,N,N,N,N,Y,N,N), - FCVT_W_S -> List(Y,FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N), - FCVT_W_D -> List(Y,FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N), - FCVT_WU_S-> List(Y,FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N), - FCVT_WU_D-> List(Y,FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N), - FCVT_L_S -> List(Y,FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N), - FCVT_L_D -> List(Y,FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N), - FCVT_LU_S-> List(Y,FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N), - FCVT_LU_D-> List(Y,FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N), - FEQ_S -> List(Y,FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N), - FEQ_D -> List(Y,FCMD_EQ, N,Y,Y,N,N,N,Y,N,N), - FLT_S -> List(Y,FCMD_LT, N,Y,Y,N,Y,N,Y,N,N), - FLT_D -> List(Y,FCMD_LT, N,Y,Y,N,N,N,Y,N,N), - FLE_S -> List(Y,FCMD_LE, N,Y,Y,N,Y,N,Y,N,N), - FLE_D -> List(Y,FCMD_LE, N,Y,Y,N,N,N,Y,N,N), - MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,Y,Y,Y,N,Y), - MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,Y,N,Y,N,Y) + List (N,FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X), + Array(FLW -> List(Y,FCMD_LOAD, Y,N,N,N,N,Y,N,N,N,N,N,N,N), + FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N,N), + FSW -> List(Y,FCMD_STORE, N,N,N,Y,N,Y,N,N,N,N,Y,N,N), + FSD -> List(Y,FCMD_STORE, N,N,N,Y,N,N,N,N,N,N,Y,N,N), + MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_S_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_S_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_S_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_S_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_D_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_D_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_D_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_D_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + MFTX_S -> List(Y,FCMD_MFTX, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + MFTX_D -> List(Y,FCMD_MFTX, N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_W_S -> List(Y,FCMD_CVT_W_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_WU_S-> List(Y,FCMD_CVT_WU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_L_S -> List(Y,FCMD_CVT_L_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_LU_S-> List(Y,FCMD_CVT_LU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_W_D -> List(Y,FCMD_CVT_W_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_WU_D-> List(Y,FCMD_CVT_WU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_L_D -> List(Y,FCMD_CVT_L_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_LU_D-> List(Y,FCMD_CVT_LU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FEQ_S -> List(Y,FCMD_EQ, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), + FLT_S -> List(Y,FCMD_LT, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), + FLE_S -> List(Y,FCMD_LE, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), + FEQ_D -> List(Y,FCMD_EQ, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), + FLT_D -> List(Y,FCMD_LT, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), + FLE_D -> List(Y,FCMD_LE, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), + MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,Y), + MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,N) +// FADD_S -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), +// FSUB_S -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), +// FMUL_S -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), +// FMIN_S -> List(Y,FCMD_MIN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), +// FMAX_S -> List(Y,FCMD_MAX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N) )) - val valid :: cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: store :: fsr :: Nil = decoder + val valid :: cmd :: wen :: sboard :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: store :: rdfsr :: wrfsr :: Nil = decoder io.sigs.valid := valid.toBool io.sigs.cmd := cmd io.sigs.wen := wen.toBool + io.sigs.sboard := sboard.toBool io.sigs.ren1 := ren1.toBool io.sigs.ren2 := ren2.toBool io.sigs.ren3 := ren3.toBool io.sigs.single := single.toBool io.sigs.fromint := fromint.toBool io.sigs.toint := toint.toBool + io.sigs.fastpipe := fastpipe.toBool + io.sigs.fma := fma.toBool io.sigs.store := store.toBool - io.sigs.fsr := fsr.toBool + io.sigs.rdfsr := rdfsr.toBool + io.sigs.wrfsr := wrfsr.toBool } class ioDpathFPU extends Bundle { @@ -337,7 +281,7 @@ class rocketIntFPUnit extends Component io.exc := Mux(io.single, exc_s, exc_d) } -class rocketFPU extends Component +class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { val ctrl = new ioCtrlFPU().flip() @@ -390,6 +334,7 @@ class rocketFPU extends Component val fp_fromint_data = Reg() { Bits() } val fp_toint_val = Reg(resetVal = Bool(false)) val fp_toint_data = Reg() { Bits() } + val fp_wrfsr_val = Reg(resetVal = Bool(false)) val fp_cmp_data = Reg() { Bits() } val fp_toint_single = Reg() { Bool() } val fp_toint_cmd = Reg() { Bits() } @@ -397,14 +342,20 @@ class rocketFPU extends Component fp_fromint_val := Bool(false) fp_toint_val := Bool(false) + fp_wrfsr_val := Bool(false) when (reg_valid) { fp_waddr := reg_inst(31,27) - when (ctrl.fromint) { + when (ctrl.fromint || ctrl.wrfsr) { fp_fromint_val := !io.ctrl.killx fp_fromint_data := io.dpath.fromint_data } + when (ctrl.wrfsr) { + fp_wrfsr_val := !io.ctrl.killx + } when (ctrl.toint) { fp_toint_val := !io.ctrl.killx + } + when (ctrl.toint || ctrl.fastpipe) { fp_toint_data := ex_rs1 when (ctrl.ren2) { fp_cmp_data := ex_rs2 @@ -436,6 +387,7 @@ class rocketFPU extends Component ifpu.io.fsr := Cat(fsr_rm, fsr_exc) ifpu.io.in := fp_fromint_data + val retire_wrfsr = Reg(!io.ctrl.killm && fp_wrfsr_val, resetVal = Bool(false)) val retire_toint = Reg(!io.ctrl.killm && fp_toint_val, resetVal = Bool(false)) val retire_toint_exc = Reg(fpiu.io.exc) val retire_fromint = Reg(!io.ctrl.killm && fp_fromint_val, resetVal = Bool(false)) @@ -448,18 +400,57 @@ class rocketFPU extends Component Fill(fsr_exc.getWidth, retire_toint) & retire_toint_exc | Fill(fsr_exc.getWidth, retire_fromint) & retire_fromint_exc } - when (retire_toint && retire_fromint) { // MTFSR + when (retire_wrfsr) { // MTFSR fsr_exc := retire_fromint_wdata(4,0) fsr_rm := retire_fromint_wdata(7,5) } - regfile.write(retire_fromint_waddr, retire_fromint_wdata, retire_fromint && !retire_toint) + // writeback arbitration + val wen = Reg(resetVal = Bits(0, dfma_latency-1)) + val winfo = Vec(dfma_latency-1) { Reg() { Bits() } } + val mem_stage_wen = Reg(resetVal = Bool(false)) - val fp_inflight = fp_toint_val || retire_toint || fp_fromint_val || retire_fromint - val mtfsr_inflight = fp_toint_val && fp_fromint_val || retire_toint && retire_fromint - val fsr_busy = ctrl.fsr && fp_inflight || mtfsr_inflight + val fastpipe_latency = 2 + require(fastpipe_latency < sfma_latency && sfma_latency <= dfma_latency) + val ex_stage_fu_latency = Mux(ctrl.fastpipe, UFix(fastpipe_latency-1), + Mux(ctrl.single, UFix(sfma_latency-1), + UFix(dfma_latency-1))) + val mem_stage_fu_latency = Reg(ex_stage_fu_latency - UFix(1)) + val write_port_busy = ctrl.fastpipe && wen(fastpipe_latency-1) || + Bool(sfma_latency < dfma_latency) && ctrl.fma && ctrl.single && wen(sfma_latency-1) || + mem_stage_wen && mem_stage_fu_latency === ex_stage_fu_latency + mem_stage_wen := reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) + val ex_stage_wsrc = Cat(ctrl.fastpipe, ctrl.single) + val mem_stage_winfo = Reg(Cat(reg_inst(31,27), ex_stage_wsrc)) + + for (i <- 0 until dfma_latency-2) { + winfo(i) := winfo(i+1) + } + when (mem_stage_wen) { + when (!io.ctrl.killm) { + wen := (wen >> UFix(1)) | (UFix(1) << mem_stage_fu_latency) + } + for (i <- 0 until dfma_latency-1) { + when (UFix(i) === mem_stage_fu_latency) { + winfo(i) := mem_stage_winfo + } + } + } + .otherwise { + wen := wen >> UFix(1) + } + + val wsrc = winfo(0)(1,0) + val wdata = Mux(wsrc === UFix(0), UFix(0), // DFMA + Mux(wsrc === UFix(1), UFix(0), // SFMA + Mux(wsrc === UFix(2), retire_fromint_wdata, + retire_fromint_wdata))) + val waddr = winfo(0) >> UFix(2) + regfile.write(waddr, wdata, wen(0)) + + val fp_inflight = fp_toint_val || retire_toint || mem_stage_wen || wen.orR + val fsr_busy = ctrl.rdfsr && fp_inflight || fp_wrfsr_val || retire_wrfsr val units_busy = Bool(false) - val write_port_busy = Bool(false) io.ctrl.nack := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs // we don't currently support round-max-magnitude (rm=4) From 1d604bcd49ce3e59b3ede977f7dbf533967d85c3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Feb 2012 02:53:43 -0800 Subject: [PATCH 0161/1087] remove top-level Makefile new, simpler build instructions are in the README. note that for "make run-asm-tests-debug" you need to update your fesvr. --- rocket/src/main/scala/top.scala | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 58aba59b..ac3678c0 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -1,4 +1,4 @@ -package Top { +package Top import Chisel._ import Node._; @@ -36,20 +36,6 @@ class Top() extends Component { object top_main { def main(args: Array[String]) = { - // Can turn off --debug and --vcd when done with debugging to improve emulator performance -// val cpu_args = args ++ Array("--target-dir", "generated-src","--debug","--vcd"); -// val cpu_args = args ++ Array("--target-dir", "generated-src", "--debug"); - val cpu_args = args ++ Array("--target-dir", "generated-src"); - // Set variables based off of command flags -// for(a <- args) { -// a match { -// case "-bp" => isBranchPrediction = true; -// case any => -// } -// } - - chiselMain(cpu_args, () => new Top()); + chiselMain(args, () => new Top()); } } - -} From ce202c73d14a4a59b3ee1c0c25a91ea14a3e7a46 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Feb 2012 04:24:35 -0800 Subject: [PATCH 0162/1087] add fsgnj[n|x].[s|d] --- rocket/src/main/scala/fpu.scala | 209 +++++++++++++++++++------------- 1 file changed, 122 insertions(+), 87 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index be242bbf..6557056b 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -12,9 +12,9 @@ object rocketFPConstants val FCMD_MUL = Bits("b000010") val FCMD_DIV = Bits("b000011") val FCMD_SQRT = Bits("b000100") - val FCMD_SGNINJ = Bits("b000101") - val FCMD_SGNINJN = Bits("b000110") - val FCMD_SGNMUL = Bits("b000111") + val FCMD_SGNJ = Bits("b000101") + val FCMD_SGNJN = Bits("b000110") + val FCMD_SGNJX = Bits("b000111") val FCMD_CVT_L_FMT = Bits("b001000") val FCMD_CVT_LU_FMT = Bits("b001001") val FCMD_CVT_W_FMT = Bits("b001010") @@ -108,7 +108,13 @@ class rocketFPUDecoder extends Component FLT_D -> List(Y,FCMD_LT, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), FLE_D -> List(Y,FCMD_LE, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,Y), - MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,N) + MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,N), + FSGNJ_S -> List(Y,FCMD_SGNJ, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FSGNJN_S -> List(Y,FCMD_SGNJN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FSGNJX_S -> List(Y,FCMD_SGNJX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FSGNJ_D -> List(Y,FCMD_SGNJ, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FSGNJN_D -> List(Y,FCMD_SGNJN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FSGNJX_D -> List(Y,FCMD_SGNJX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N) // FADD_S -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), // FSUB_S -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), // FMUL_S -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), @@ -232,53 +238,77 @@ class rocketFPIntUnit extends Component io.exc := Mux(io.single, exc_s, exc_d) } -class rocketIntFPUnit extends Component +class rocketFPUFastPipe extends Component { val io = new Bundle { val single = Bool(INPUT) val cmd = Bits(FCMD_WIDTH, INPUT) val fsr = Bits(FSR_WIDTH, INPUT) - val in = Bits(64, INPUT) - val out = Bits(65, OUTPUT) - val exc = Bits(5, OUTPUT) + val fromint = Bits(64, INPUT) + val in1 = Bits(65, INPUT) + val in2 = Bits(65, INPUT) + val out_s = Bits(65, OUTPUT) + val exc_s = Bits(5, OUTPUT) + val out_d = Bits(65, OUTPUT) + val exc_d = Bits(5, OUTPUT) } + // int->fp units val rec_s = new hardfloat.float32ToRecodedFloat32 val rec_d = new hardfloat.float64ToRecodedFloat64 - rec_s.io.in := io.in - rec_d.io.in := io.in + rec_s.io.in := io.fromint + rec_d.io.in := io.fromint val i2s = new hardfloat.anyToRecodedFloat32 - i2s.io.in := io.in + i2s.io.in := io.fromint i2s.io.roundingMode := io.fsr >> UFix(5) i2s.io.typeOp := ~io.cmd(1,0) val i2d = new hardfloat.anyToRecodedFloat64 - i2d.io.in := io.in + i2d.io.in := io.fromint i2d.io.roundingMode := io.fsr >> UFix(5) i2d.io.typeOp := ~io.cmd(1,0) + // fp->fp units + val sign_s = Mux(io.cmd === FCMD_SGNJ, io.in2(32), + Mux(io.cmd === FCMD_SGNJN, ~io.in2(32), + io.in1(32) ^ io.in2(32))) // FCMD_SGNJX + val sign_d = Mux(io.cmd === FCMD_SGNJ, io.in2(64), + Mux(io.cmd === FCMD_SGNJN, ~io.in2(64), + io.in1(64) ^ io.in2(64))) // FCMD_SGNJX + val fsgnj = Cat(Mux(io.single, io.in1(64), sign_d), io.in1(63,33), + Mux(io.single, sign_s, io.in1(32)), io.in1(31,0)) + // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) - out_s := rec_s.io.out + out_s := Reg(rec_s.io.out) exc_s := Bits(0) val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) - out_d := rec_d.io.out + out_d := Reg(rec_d.io.out) exc_d := Bits(0) - when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { - out_s := io.in(FSR_WIDTH-1,0) + val r_cmd = Reg(io.cmd) + + when (r_cmd === FCMD_MTFSR || r_cmd === FCMD_MFFSR) { + out_s := Reg(io.fromint(FSR_WIDTH-1,0)) } - when (io.cmd === FCMD_CVT_FMT_W || io.cmd === FCMD_CVT_FMT_WU || - io.cmd === FCMD_CVT_FMT_L || io.cmd === FCMD_CVT_FMT_LU) { - out_s := i2s.io.out - exc_s := i2s.io.exceptionFlags - out_d := i2d.io.out - exc_d := i2d.io.exceptionFlags + when (r_cmd === FCMD_SGNJ || r_cmd === FCMD_SGNJN || r_cmd === FCMD_SGNJX) { + val r_fsgnj = Reg(fsgnj) + out_s := r_fsgnj(32,0) + out_d := r_fsgnj + } + when (r_cmd === FCMD_CVT_FMT_W || r_cmd === FCMD_CVT_FMT_WU || + r_cmd === FCMD_CVT_FMT_L || r_cmd === FCMD_CVT_FMT_LU) { + out_s := Reg(i2s.io.out) + exc_s := Reg(i2s.io.exceptionFlags) + out_d := Reg(i2d.io.out) + exc_d := Reg(i2d.io.exceptionFlags) } - io.out := Mux(io.single, Cat(Fill(32,UFix(1)), out_s), out_d) - io.exc := Mux(io.single, exc_s, exc_d) + io.out_s := Cat(Fill(32,UFix(1)), out_s) + io.exc_s := exc_s + io.out_d := out_d + io.exc_d := exc_d } class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component @@ -301,6 +331,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component when (io.ctrl.valid) { ctrl := fp_decoder.io.sigs } + val mem_ctrl = Reg(ctrl) // load response val load_wb = Reg(io.dpath.dmem_resp_val, resetVal = Bool(false)) @@ -330,109 +361,99 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val ex_rs2 = regfile.read(reg_inst(21,17)) val ex_rs3 = regfile.read(reg_inst(16,12)) - val fp_fromint_val = Reg(resetVal = Bool(false)) - val fp_fromint_data = Reg() { Bits() } - val fp_toint_val = Reg(resetVal = Bool(false)) - val fp_toint_data = Reg() { Bits() } - val fp_wrfsr_val = Reg(resetVal = Bool(false)) - val fp_cmp_data = Reg() { Bits() } - val fp_toint_single = Reg() { Bool() } - val fp_toint_cmd = Reg() { Bits() } - val fp_waddr = Reg() { Bits() } + val mem_fromint_val = Reg(resetVal = Bool(false)) + val mem_fromint_data = Reg() { Bits() } + val mem_toint_val = Reg(resetVal = Bool(false)) + val mem_rs1 = Reg() { Bits() } + val mem_rs2 = Reg() { Bits() } + val mem_rs3 = Reg() { Bits() } + val mem_wrfsr_val = Reg(resetVal = Bool(false)) - fp_fromint_val := Bool(false) - fp_toint_val := Bool(false) - fp_wrfsr_val := Bool(false) + mem_fromint_val := Bool(false) + mem_toint_val := Bool(false) + mem_wrfsr_val := Bool(false) when (reg_valid) { - fp_waddr := reg_inst(31,27) when (ctrl.fromint || ctrl.wrfsr) { - fp_fromint_val := !io.ctrl.killx - fp_fromint_data := io.dpath.fromint_data + mem_fromint_val := !io.ctrl.killx + mem_fromint_data := io.dpath.fromint_data } when (ctrl.wrfsr) { - fp_wrfsr_val := !io.ctrl.killx + mem_wrfsr_val := !io.ctrl.killx } when (ctrl.toint) { - fp_toint_val := !io.ctrl.killx + mem_toint_val := !io.ctrl.killx } when (ctrl.toint || ctrl.fastpipe) { - fp_toint_data := ex_rs1 + mem_rs1 := ex_rs1 when (ctrl.ren2) { - fp_cmp_data := ex_rs2 + mem_rs2 := ex_rs2 } } - when (ctrl.store) { - fp_toint_data := ex_rs2 + when (ctrl.ren1) { + mem_rs1 := ex_rs1 } - when (ctrl.toint || ctrl.store) { - fp_toint_single := ctrl.single - fp_toint_cmd := ctrl.cmd + when (ctrl.ren2) { + mem_rs2 := ex_rs2 + } + when (ctrl.ren3) { + mem_rs3 := ex_rs3 + } + when (ctrl.store) { + mem_rs1 := ex_rs2 } } // currently we assume FP stores and FP->int ops take 1 cycle (MEM) val fpiu = new rocketFPIntUnit - fpiu.io.single := ctrl.single - fpiu.io.cmd := ctrl.cmd + fpiu.io.single := mem_ctrl.single + fpiu.io.cmd := mem_ctrl.cmd fpiu.io.fsr := Cat(fsr_rm, fsr_exc) - fpiu.io.in1 := fp_toint_data - fpiu.io.in2 := fp_cmp_data + fpiu.io.in1 := mem_rs1 + fpiu.io.in2 := mem_rs2 io.dpath.store_data := fpiu.io.store_data io.dpath.toint_data := fpiu.io.toint_data - val ifpu = new rocketIntFPUnit - ifpu.io.single := ctrl.single - ifpu.io.cmd := ctrl.cmd - ifpu.io.fsr := Cat(fsr_rm, fsr_exc) - ifpu.io.in := fp_fromint_data + val fastpipe = new rocketFPUFastPipe + fastpipe.io.single := mem_ctrl.single + fastpipe.io.cmd := mem_ctrl.cmd + fastpipe.io.fsr := Cat(fsr_rm, fsr_exc) + fastpipe.io.fromint := mem_fromint_data + fastpipe.io.in1 := mem_rs1 + fastpipe.io.in2 := mem_rs2 - val retire_wrfsr = Reg(!io.ctrl.killm && fp_wrfsr_val, resetVal = Bool(false)) - val retire_toint = Reg(!io.ctrl.killm && fp_toint_val, resetVal = Bool(false)) - val retire_toint_exc = Reg(fpiu.io.exc) - val retire_fromint = Reg(!io.ctrl.killm && fp_fromint_val, resetVal = Bool(false)) - val retire_fromint_exc = Reg(ifpu.io.exc) - val retire_fromint_wdata = Reg(ifpu.io.out) - val retire_fromint_waddr = Reg(fp_waddr) - - when (retire_toint || retire_fromint) { - fsr_exc := fsr_exc | - Fill(fsr_exc.getWidth, retire_toint) & retire_toint_exc | - Fill(fsr_exc.getWidth, retire_fromint) & retire_fromint_exc - } - when (retire_wrfsr) { // MTFSR - fsr_exc := retire_fromint_wdata(4,0) - fsr_rm := retire_fromint_wdata(7,5) - } + val wb_wrfsr_val = Reg(!io.ctrl.killm && mem_wrfsr_val, resetVal = Bool(false)) + val wb_toint_val = Reg(!io.ctrl.killm && mem_toint_val, resetVal = Bool(false)) + val wb_toint_exc = Reg(fpiu.io.exc) // writeback arbitration val wen = Reg(resetVal = Bits(0, dfma_latency-1)) val winfo = Vec(dfma_latency-1) { Reg() { Bits() } } - val mem_stage_wen = Reg(resetVal = Bool(false)) + val mem_wen = Reg(resetVal = Bool(false)) val fastpipe_latency = 2 require(fastpipe_latency < sfma_latency && sfma_latency <= dfma_latency) val ex_stage_fu_latency = Mux(ctrl.fastpipe, UFix(fastpipe_latency-1), Mux(ctrl.single, UFix(sfma_latency-1), UFix(dfma_latency-1))) - val mem_stage_fu_latency = Reg(ex_stage_fu_latency - UFix(1)) + val mem_fu_latency = Reg(ex_stage_fu_latency - UFix(1)) val write_port_busy = ctrl.fastpipe && wen(fastpipe_latency-1) || Bool(sfma_latency < dfma_latency) && ctrl.fma && ctrl.single && wen(sfma_latency-1) || - mem_stage_wen && mem_stage_fu_latency === ex_stage_fu_latency - mem_stage_wen := reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) + mem_wen && mem_fu_latency === ex_stage_fu_latency + mem_wen := reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) val ex_stage_wsrc = Cat(ctrl.fastpipe, ctrl.single) - val mem_stage_winfo = Reg(Cat(reg_inst(31,27), ex_stage_wsrc)) + val mem_winfo = Reg(Cat(reg_inst(31,27), ex_stage_wsrc)) for (i <- 0 until dfma_latency-2) { winfo(i) := winfo(i+1) } - when (mem_stage_wen) { + when (mem_wen) { when (!io.ctrl.killm) { - wen := (wen >> UFix(1)) | (UFix(1) << mem_stage_fu_latency) + wen := (wen >> UFix(1)) | (UFix(1) << mem_fu_latency) } for (i <- 0 until dfma_latency-1) { - when (UFix(i) === mem_stage_fu_latency) { - winfo(i) := mem_stage_winfo + when (UFix(i) === mem_fu_latency) { + winfo(i) := mem_winfo } } } @@ -443,13 +464,27 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val wsrc = winfo(0)(1,0) val wdata = Mux(wsrc === UFix(0), UFix(0), // DFMA Mux(wsrc === UFix(1), UFix(0), // SFMA - Mux(wsrc === UFix(2), retire_fromint_wdata, - retire_fromint_wdata))) + Mux(wsrc === UFix(2), fastpipe.io.out_d, + fastpipe.io.out_s))) + val wexc = Mux(wsrc === UFix(0), Bits(0), // DFMA + Mux(wsrc === UFix(1), Bits(0), // SFMA + Mux(wsrc === UFix(2), fastpipe.io.exc_d, + fastpipe.io.exc_s))) val waddr = winfo(0) >> UFix(2) regfile.write(waddr, wdata, wen(0)) - val fp_inflight = fp_toint_val || retire_toint || mem_stage_wen || wen.orR - val fsr_busy = ctrl.rdfsr && fp_inflight || fp_wrfsr_val || retire_wrfsr + when (wb_toint_val || wen(0)) { + fsr_exc := fsr_exc | + Fill(fsr_exc.getWidth, wb_toint_val) & wb_toint_exc | + Fill(fsr_exc.getWidth, wen(0)) & wexc + } + when (wb_wrfsr_val) { + fsr_exc := fastpipe.io.out_s(4,0) + fsr_rm := fastpipe.io.out_s(7,5) + } + + val fp_inflight = mem_toint_val || wb_toint_val || mem_wen || wen.orR + val fsr_busy = ctrl.rdfsr && fp_inflight || mem_wrfsr_val || wb_wrfsr_val val units_busy = Bool(false) io.ctrl.nack := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs From ee9fc10668a307e00d08838ab0a73ddb8c04f1a0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Feb 2012 06:03:43 -0800 Subject: [PATCH 0163/1087] add fcvt.s.d, fcvt.d.s --- rocket/src/main/scala/fpu.scala | 46 +++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 6557056b..2e26a369 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -101,6 +101,8 @@ class rocketFPUDecoder extends Component FCVT_WU_D-> List(Y,FCMD_CVT_WU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), FCVT_L_D -> List(Y,FCMD_CVT_L_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), FCVT_LU_D-> List(Y,FCMD_CVT_LU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_S_D -> List(Y,FCMD_CVT_FMT_D, Y,N,Y,N,N,Y,N,N,Y,N,N,N,N), + FCVT_D_S -> List(Y,FCMD_CVT_FMT_S, Y,N,Y,N,N,N,N,N,Y,N,N,N,N), FEQ_S -> List(Y,FCMD_EQ, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), FLT_S -> List(Y,FCMD_LT, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), FLE_S -> List(Y,FCMD_LE, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), @@ -167,6 +169,7 @@ class rocketFPIntUnit extends Component val io = new Bundle { val single = Bool(INPUT) val cmd = Bits(FCMD_WIDTH, INPUT) + val rm = Bits(3, INPUT) val fsr = Bits(FSR_WIDTH, INPUT) val in1 = Bits(65, INPUT) val in2 = Bits(65, INPUT) @@ -190,7 +193,7 @@ class rocketFPIntUnit extends Component val s2i = new hardfloat.recodedFloat32ToAny s2i.io.in := io.in1 - s2i.io.roundingMode := io.fsr >> UFix(5) + s2i.io.roundingMode := io.rm s2i.io.typeOp := ~io.cmd(1,0) val dcmp = new hardfloat.recodedFloat64Compare @@ -201,7 +204,7 @@ class rocketFPIntUnit extends Component val d2i = new hardfloat.recodedFloat64ToAny d2i.io.in := io.in1 - d2i.io.roundingMode := io.fsr >> UFix(5) + d2i.io.roundingMode := io.rm d2i.io.typeOp := ~io.cmd(1,0) // output muxing @@ -243,7 +246,7 @@ class rocketFPUFastPipe extends Component val io = new Bundle { val single = Bool(INPUT) val cmd = Bits(FCMD_WIDTH, INPUT) - val fsr = Bits(FSR_WIDTH, INPUT) + val rm = Bits(3, INPUT) val fromint = Bits(64, INPUT) val in1 = Bits(65, INPUT) val in2 = Bits(65, INPUT) @@ -261,12 +264,12 @@ class rocketFPUFastPipe extends Component val i2s = new hardfloat.anyToRecodedFloat32 i2s.io.in := io.fromint - i2s.io.roundingMode := io.fsr >> UFix(5) + i2s.io.roundingMode := io.rm i2s.io.typeOp := ~io.cmd(1,0) val i2d = new hardfloat.anyToRecodedFloat64 i2d.io.in := io.fromint - i2d.io.roundingMode := io.fsr >> UFix(5) + i2d.io.roundingMode := io.rm i2d.io.typeOp := ~io.cmd(1,0) // fp->fp units @@ -279,6 +282,13 @@ class rocketFPUFastPipe extends Component val fsgnj = Cat(Mux(io.single, io.in1(64), sign_d), io.in1(63,33), Mux(io.single, sign_s, io.in1(32)), io.in1(31,0)) + val s2d = new hardfloat.recodedFloat32ToRecodedFloat64 + s2d.io.in := io.in1 + + val d2s = new hardfloat.recodedFloat64ToRecodedFloat32 + d2s.io.in := io.in1 + d2s.io.roundingMode := io.rm + // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) out_s := Reg(rec_s.io.out) @@ -297,6 +307,12 @@ class rocketFPUFastPipe extends Component out_s := r_fsgnj(32,0) out_d := r_fsgnj } + when (r_cmd === FCMD_CVT_FMT_S || r_cmd === FCMD_CVT_FMT_D) { + out_s := Reg(d2s.io.out) + exc_s := Reg(d2s.io.exceptionFlags) + out_d := Reg(s2d.io.out) + exc_d := Reg(s2d.io.exceptionFlags) + } when (r_cmd === FCMD_CVT_FMT_W || r_cmd === FCMD_CVT_FMT_WU || r_cmd === FCMD_CVT_FMT_L || r_cmd === FCMD_CVT_FMT_LU) { out_s := Reg(i2s.io.out) @@ -360,6 +376,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val ex_rs1 = regfile.read(reg_inst(26,22)) val ex_rs2 = regfile.read(reg_inst(21,17)) val ex_rs3 = regfile.read(reg_inst(16,12)) + val ex_rm = Mux(reg_inst(11,9) === Bits(7), fsr_rm, reg_inst(11,9)) val mem_fromint_val = Reg(resetVal = Bool(false)) val mem_fromint_data = Reg() { Bits() } @@ -367,12 +384,14 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val mem_rs1 = Reg() { Bits() } val mem_rs2 = Reg() { Bits() } val mem_rs3 = Reg() { Bits() } + val mem_rm = Reg() { Bits() } val mem_wrfsr_val = Reg(resetVal = Bool(false)) mem_fromint_val := Bool(false) mem_toint_val := Bool(false) mem_wrfsr_val := Bool(false) when (reg_valid) { + mem_rm := ex_rm when (ctrl.fromint || ctrl.wrfsr) { mem_fromint_val := !io.ctrl.killx mem_fromint_data := io.dpath.fromint_data @@ -383,30 +402,25 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component when (ctrl.toint) { mem_toint_val := !io.ctrl.killx } - when (ctrl.toint || ctrl.fastpipe) { - mem_rs1 := ex_rs1 - when (ctrl.ren2) { - mem_rs2 := ex_rs2 - } - } when (ctrl.ren1) { mem_rs1 := ex_rs1 } + when (ctrl.store) { + mem_rs1 := ex_rs2 + } when (ctrl.ren2) { mem_rs2 := ex_rs2 } when (ctrl.ren3) { mem_rs3 := ex_rs3 } - when (ctrl.store) { - mem_rs1 := ex_rs2 - } } // currently we assume FP stores and FP->int ops take 1 cycle (MEM) val fpiu = new rocketFPIntUnit fpiu.io.single := mem_ctrl.single fpiu.io.cmd := mem_ctrl.cmd + fpiu.io.rm := mem_rm fpiu.io.fsr := Cat(fsr_rm, fsr_exc) fpiu.io.in1 := mem_rs1 fpiu.io.in2 := mem_rs2 @@ -417,7 +431,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val fastpipe = new rocketFPUFastPipe fastpipe.io.single := mem_ctrl.single fastpipe.io.cmd := mem_ctrl.cmd - fastpipe.io.fsr := Cat(fsr_rm, fsr_exc) + fastpipe.io.rm := mem_rm fastpipe.io.fromint := mem_fromint_data fastpipe.io.in1 := mem_rs1 fastpipe.io.in2 := mem_rs2 @@ -489,5 +503,5 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component io.ctrl.nack := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs // we don't currently support round-max-magnitude (rm=4) - io.ctrl.illegal_rm := Mux(reg_inst(11,9) === Bits(7), fsr_rm(2), reg_inst(11)) + io.ctrl.illegal_rm := ex_rm(2) } From 38c67e5a9e525afa5315490520b811939862a4a6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Feb 2012 06:37:18 -0800 Subject: [PATCH 0164/1087] add fmin.[s|d] and fmax.[s|d] --- rocket/src/main/scala/fpu.scala | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 2e26a369..be6284f1 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -116,12 +116,14 @@ class rocketFPUDecoder extends Component FSGNJX_S -> List(Y,FCMD_SGNJX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), FSGNJ_D -> List(Y,FCMD_SGNJ, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), FSGNJN_D -> List(Y,FCMD_SGNJN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FSGNJX_D -> List(Y,FCMD_SGNJX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N) + FSGNJX_D -> List(Y,FCMD_SGNJX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FMIN_S -> List(Y,FCMD_MIN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FMAX_S -> List(Y,FCMD_MAX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FMIN_D -> List(Y,FCMD_MIN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FMAX_D -> List(Y,FCMD_MAX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N) // FADD_S -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), // FSUB_S -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), // FMUL_S -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), -// FMIN_S -> List(Y,FCMD_MIN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), -// FMAX_S -> List(Y,FCMD_MAX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N) )) val valid :: cmd :: wen :: sboard :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: store :: rdfsr :: wrfsr :: Nil = decoder @@ -173,6 +175,8 @@ class rocketFPIntUnit extends Component val fsr = Bits(FSR_WIDTH, INPUT) val in1 = Bits(65, INPUT) val in2 = Bits(65, INPUT) + val lt_s = Bool(OUTPUT) + val lt_d = Bool(OUTPUT) val store_data = Bits(64, OUTPUT) val toint_data = Bits(64, OUTPUT) val exc = Bits(5, OUTPUT) @@ -239,6 +243,8 @@ class rocketFPIntUnit extends Component io.toint_data := Mux(io.single, out_s, out_d) io.exc := Mux(io.single, exc_s, exc_d) + io.lt_s := scmp.io.a_lt_b + io.lt_d := dcmp.io.a_lt_b } class rocketFPUFastPipe extends Component @@ -250,6 +256,8 @@ class rocketFPUFastPipe extends Component val fromint = Bits(64, INPUT) val in1 = Bits(65, INPUT) val in2 = Bits(65, INPUT) + val lt_s = Bool(INPUT) + val lt_d = Bool(INPUT) val out_s = Bits(65, OUTPUT) val exc_s = Bits(5, OUTPUT) val out_d = Bits(65, OUTPUT) @@ -289,6 +297,15 @@ class rocketFPUFastPipe extends Component d2s.io.in := io.in1 d2s.io.roundingMode := io.rm + val isnan1 = Mux(io.single, io.in1(31,29) === Bits("b111"), io.in1(63,61) === Bits("b111")) + val isnan2 = Mux(io.single, io.in2(31,29) === Bits("b111"), io.in2(63,61) === Bits("b111")) + val issnan1 = isnan1 && ~Mux(io.single, io.in1(22), io.in1(51)) + val issnan2 = isnan2 && ~Mux(io.single, io.in2(22), io.in2(51)) + val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4)) + val min = io.cmd === FCMD_MIN + val lt = Mux(io.single, io.lt_s, io.lt_d) + val minmax = Mux(isnan2 || !isnan1 && (min === lt), io.in1, io.in2) + // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) out_s := Reg(rec_s.io.out) @@ -307,6 +324,14 @@ class rocketFPUFastPipe extends Component out_s := r_fsgnj(32,0) out_d := r_fsgnj } + when (r_cmd === FCMD_MIN || r_cmd === FCMD_MAX) { + val r_minmax = Reg(minmax) + val r_minmax_exc = Reg(minmax_exc) + out_s := r_minmax(32,0) + out_d := r_minmax + exc_s := r_minmax_exc + exc_d := r_minmax_exc + } when (r_cmd === FCMD_CVT_FMT_S || r_cmd === FCMD_CVT_FMT_D) { out_s := Reg(d2s.io.out) exc_s := Reg(d2s.io.exceptionFlags) @@ -435,6 +460,8 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component fastpipe.io.fromint := mem_fromint_data fastpipe.io.in1 := mem_rs1 fastpipe.io.in2 := mem_rs2 + fastpipe.io.lt_s := fpiu.io.lt_s + fastpipe.io.lt_d := fpiu.io.lt_d val wb_wrfsr_val = Reg(!io.ctrl.killm && mem_wrfsr_val, resetVal = Bool(false)) val wb_toint_val = Reg(!io.ctrl.killm && mem_toint_val, resetVal = Bool(false)) From 297223a13c4740af02ee07b2456a04b20b81add1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Feb 2012 15:12:16 -0800 Subject: [PATCH 0165/1087] squash subsequent external mem request after nack --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 48fa1318..eded6f51 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -624,7 +624,7 @@ class rocketCtrl extends Component val dmem_kill_mem = io.dpath.mem_valid && (io.dtlb_miss || io.dmem.resp_nack) val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill - val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || Reg(io.ext_mem.resp_nack) // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions From 0ec7767c13b21f776b42cefc59ec89a60aa752b5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Feb 2012 19:11:57 -0800 Subject: [PATCH 0166/1087] declaring success on FPU for now --- rocket/src/main/scala/ctrl.scala | 6 +- rocket/src/main/scala/fpu.scala | 163 ++++++++++++++++++++++++++----- rocket/src/main/scala/util.scala | 6 ++ 3 files changed, 151 insertions(+), 24 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index eded6f51..eb954c70 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -546,7 +546,7 @@ class rocketCtrl extends Component var id_stall_fpu = Bool(false) if (HAVE_FPU) { - val fp_sboard = new rocketCtrlSboard(32, 4, 2); + val fp_sboard = new rocketCtrlSboard(32, 4, 3); fp_sboard.io.r(0).addr := id_raddr1.toUFix fp_sboard.io.r(1).addr := id_raddr2.toUFix fp_sboard.io.r(2).addr := id_raddr3.toUFix @@ -560,6 +560,10 @@ class rocketCtrl extends Component fp_sboard.io.w(1).data := Bool(false) fp_sboard.io.w(1).addr := io.dpath.fp_sboard_clra + fp_sboard.io.w(2).en := io.fpu.sboard_clr + fp_sboard.io.w(2).data := Bool(false) + fp_sboard.io.w(2).addr := io.fpu.sboard_clra + id_stall_fpu = io.fpu.dec.ren1 && fp_sboard.io.r(0).data || io.fpu.dec.ren2 && fp_sboard.io.r(1).data || io.fpu.dec.ren3 && fp_sboard.io.r(2).data || diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index be6284f1..f8c02269 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -120,10 +120,21 @@ class rocketFPUDecoder extends Component FMIN_S -> List(Y,FCMD_MIN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), FMAX_S -> List(Y,FCMD_MAX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), FMIN_D -> List(Y,FCMD_MIN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FMAX_D -> List(Y,FCMD_MAX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N) -// FADD_S -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), -// FSUB_S -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), -// FMUL_S -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), + FMAX_D -> List(Y,FCMD_MAX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FADD_S -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), + FSUB_S -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), + FMUL_S -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), + FADD_D -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), + FSUB_D -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), + FMUL_D -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), + FMADD_S -> List(Y,FCMD_MADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FMSUB_S -> List(Y,FCMD_MSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FNMADD_S -> List(Y,FCMD_NMADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FNMSUB_S -> List(Y,FCMD_NMSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FMADD_D -> List(Y,FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), + FMSUB_D -> List(Y,FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), + FNMADD_D -> List(Y,FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), + FNMSUB_D -> List(Y,FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N) )) val valid :: cmd :: wen :: sboard :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: store :: rdfsr :: wrfsr :: Nil = decoder @@ -164,6 +175,8 @@ class ioCtrlFPU extends Bundle { val killx = Bool(OUTPUT) val killm = Bool(OUTPUT) val dec = new rocketFPUCtrlSigs().asInput + val sboard_clr = Bool(INPUT) + val sboard_clra = UFix(5, INPUT) } class rocketFPIntUnit extends Component @@ -192,8 +205,8 @@ class rocketFPIntUnit extends Component val scmp = new hardfloat.recodedFloat32Compare scmp.io.a := io.in1 scmp.io.b := io.in2 - val scmp_out = (io.cmd(1,0) & Cat(scmp.io.a_lt_b, scmp.io.a_eq_b)).orR - val scmp_exc = (io.cmd(1,0) & Cat(scmp.io.a_lt_b_invalid, scmp.io.a_eq_b_invalid)).orR << UFix(4) + val scmp_out = (io.cmd & Cat(scmp.io.a_lt_b, scmp.io.a_eq_b)).orR + val scmp_exc = (io.cmd & Cat(scmp.io.a_lt_b_invalid, scmp.io.a_eq_b_invalid)).orR << UFix(4) val s2i = new hardfloat.recodedFloat32ToAny s2i.io.in := io.in1 @@ -203,8 +216,8 @@ class rocketFPIntUnit extends Component val dcmp = new hardfloat.recodedFloat64Compare dcmp.io.a := io.in1 dcmp.io.b := io.in2 - val dcmp_out = (io.cmd(1,0) & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR - val dcmp_exc = (io.cmd(1,0) & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UFix(4) + val dcmp_out = (io.cmd & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR + val dcmp_exc = (io.cmd & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UFix(4) val d2i = new hardfloat.recodedFloat64ToAny d2i.io.in := io.in1 @@ -258,7 +271,7 @@ class rocketFPUFastPipe extends Component val in2 = Bits(65, INPUT) val lt_s = Bool(INPUT) val lt_d = Bool(INPUT) - val out_s = Bits(65, OUTPUT) + val out_s = Bits(33, OUTPUT) val exc_s = Bits(5, OUTPUT) val out_d = Bits(65, OUTPUT) val exc_d = Bits(5, OUTPUT) @@ -346,12 +359,96 @@ class rocketFPUFastPipe extends Component exc_d := Reg(i2d.io.exceptionFlags) } - io.out_s := Cat(Fill(32,UFix(1)), out_s) + io.out_s := out_s io.exc_s := exc_s io.out_d := out_d io.exc_d := exc_d } +class rocketFPUSFMAPipe(latency: Int) extends Component +{ + val io = new Bundle { + val valid = Bool(INPUT) + val cmd = Bits(FCMD_WIDTH, INPUT) + val rm = Bits(3, INPUT) + val in1 = Bits(33, INPUT) + val in2 = Bits(33, INPUT) + val in3 = Bits(33, INPUT) + val out = Bits(33, OUTPUT) + val exc = Bits(5, OUTPUT) + } + + val cmd = Reg() { Bits() } + val rm = Reg() { Bits() } + val in1 = Reg() { Bits() } + val in2 = Reg() { Bits() } + val in3 = Reg() { Bits() } + + val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || + io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB + val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB + + when (io.valid) { + cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) + rm := io.rm + in1 := io.in1 + in2 := Mux(cmd_addsub, Bits("h80000000"), io.in2) + in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, Bits(0))) + } + + val fma = new hardfloat.mulAddSubRecodedFloat32_1 + fma.io.op := cmd + fma.io.roundingMode := rm + fma.io.a := in1 + fma.io.b := in2 + fma.io.c := in3 + + io.out := ShiftRegister(latency-1, fma.io.out) + io.exc := ShiftRegister(latency-1, fma.io.exceptionFlags) +} + +class rocketFPUDFMAPipe(latency: Int) extends Component +{ + val io = new Bundle { + val valid = Bool(INPUT) + val cmd = Bits(FCMD_WIDTH, INPUT) + val rm = Bits(3, INPUT) + val in1 = Bits(65, INPUT) + val in2 = Bits(65, INPUT) + val in3 = Bits(65, INPUT) + val out = Bits(65, OUTPUT) + val exc = Bits(5, OUTPUT) + } + + val cmd = Reg() { Bits() } + val rm = Reg() { Bits() } + val in1 = Reg() { Bits() } + val in2 = Reg() { Bits() } + val in3 = Reg() { Bits() } + + val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || + io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB + val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB + + when (io.valid) { + cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) + rm := io.rm + in1 := io.in1 + in2 := Mux(cmd_addsub, Bits("h8000000000000000"), io.in2) + in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, Bits(0))) + } + + val fma = new hardfloat.mulAddSubRecodedFloat64_1 + fma.io.op := cmd + fma.io.roundingMode := rm + fma.io.a := in1 + fma.io.b := in2 + fma.io.c := in3 + + io.out := ShiftRegister(latency-1, fma.io.out) + io.exc := ShiftRegister(latency-1, fma.io.exceptionFlags) +} + class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { @@ -388,7 +485,8 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val rec_d = new hardfloat.float64ToRecodedFloat64 rec_s.io.in := load_wb_data rec_d.io.in := load_wb_data - val load_wb_data_recoded = Mux(load_wb_single, Cat(Fill(32,UFix(1)), rec_s.io.out), rec_d.io.out) + val sp_msbs = Fill(32, UFix(1,1)) + val load_wb_data_recoded = Mux(load_wb_single, Cat(sp_msbs, rec_s.io.out), rec_d.io.out) val fsr_rm = Reg() { Bits(width = 3) } val fsr_exc = Reg() { Bits(width = 5) } @@ -403,7 +501,6 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val ex_rs3 = regfile.read(reg_inst(16,12)) val ex_rm = Mux(reg_inst(11,9) === Bits(7), fsr_rm, reg_inst(11,9)) - val mem_fromint_val = Reg(resetVal = Bool(false)) val mem_fromint_data = Reg() { Bits() } val mem_toint_val = Reg(resetVal = Bool(false)) val mem_rs1 = Reg() { Bits() } @@ -412,13 +509,11 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val mem_rm = Reg() { Bits() } val mem_wrfsr_val = Reg(resetVal = Bool(false)) - mem_fromint_val := Bool(false) mem_toint_val := Bool(false) mem_wrfsr_val := Bool(false) when (reg_valid) { mem_rm := ex_rm when (ctrl.fromint || ctrl.wrfsr) { - mem_fromint_val := !io.ctrl.killx mem_fromint_data := io.dpath.fromint_data } when (ctrl.wrfsr) { @@ -453,6 +548,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component io.dpath.store_data := fpiu.io.store_data io.dpath.toint_data := fpiu.io.toint_data + // 2-cycle pipe for int->FP and non-FMA FP->FP ops val fastpipe = new rocketFPUFastPipe fastpipe.io.single := mem_ctrl.single fastpipe.io.cmd := mem_ctrl.cmd @@ -463,12 +559,31 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component fastpipe.io.lt_s := fpiu.io.lt_s fastpipe.io.lt_d := fpiu.io.lt_d + val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || + mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB + val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB + val sfma = new rocketFPUSFMAPipe(sfma_latency-1) + sfma.io.valid := Reg(reg_valid && ctrl.fma && ctrl.single) + sfma.io.in1 := mem_rs1 + sfma.io.in2 := mem_rs2 + sfma.io.in3 := mem_rs3 + sfma.io.cmd := mem_ctrl.cmd + sfma.io.rm := mem_rm + + val dfma = new rocketFPUDFMAPipe(dfma_latency-1) + dfma.io.valid := Reg(reg_valid && ctrl.fma && !ctrl.single) + dfma.io.in1 := mem_rs1 + dfma.io.in2 := mem_rs2 + dfma.io.in3 := mem_rs3 + dfma.io.cmd := mem_ctrl.cmd + dfma.io.rm := mem_rm + val wb_wrfsr_val = Reg(!io.ctrl.killm && mem_wrfsr_val, resetVal = Bool(false)) val wb_toint_val = Reg(!io.ctrl.killm && mem_toint_val, resetVal = Bool(false)) val wb_toint_exc = Reg(fpiu.io.exc) // writeback arbitration - val wen = Reg(resetVal = Bits(0, dfma_latency-1)) + val wen = Reg(resetVal = Bits(0, dfma_latency)) val winfo = Vec(dfma_latency-1) { Reg() { Bits() } } val mem_wen = Reg(resetVal = Bool(false)) @@ -478,8 +593,8 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component Mux(ctrl.single, UFix(sfma_latency-1), UFix(dfma_latency-1))) val mem_fu_latency = Reg(ex_stage_fu_latency - UFix(1)) - val write_port_busy = ctrl.fastpipe && wen(fastpipe_latency-1) || - Bool(sfma_latency < dfma_latency) && ctrl.fma && ctrl.single && wen(sfma_latency-1) || + val write_port_busy = ctrl.fastpipe && wen(fastpipe_latency) || + Bool(sfma_latency < dfma_latency) && ctrl.fma && ctrl.single && wen(sfma_latency) || mem_wen && mem_fu_latency === ex_stage_fu_latency mem_wen := reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) val ex_stage_wsrc = Cat(ctrl.fastpipe, ctrl.single) @@ -503,15 +618,15 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component } val wsrc = winfo(0)(1,0) - val wdata = Mux(wsrc === UFix(0), UFix(0), // DFMA - Mux(wsrc === UFix(1), UFix(0), // SFMA + val wdata = Mux(wsrc === UFix(0), dfma.io.out, // DFMA + Mux(wsrc === UFix(1), Cat(sp_msbs, sfma.io.out), // SFMA Mux(wsrc === UFix(2), fastpipe.io.out_d, - fastpipe.io.out_s))) - val wexc = Mux(wsrc === UFix(0), Bits(0), // DFMA - Mux(wsrc === UFix(1), Bits(0), // SFMA + Cat(sp_msbs, fastpipe.io.out_s)))) + val wexc = Mux(wsrc === UFix(0), dfma.io.exc, // DFMA + Mux(wsrc === UFix(1), sfma.io.exc, // SFMA Mux(wsrc === UFix(2), fastpipe.io.exc_d, fastpipe.io.exc_s))) - val waddr = winfo(0) >> UFix(2) + val waddr = winfo(0).toUFix >> UFix(2) regfile.write(waddr, wdata, wen(0)) when (wb_toint_val || wen(0)) { @@ -531,4 +646,6 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component io.ctrl.dec <> fp_decoder.io.sigs // we don't currently support round-max-magnitude (rm=4) io.ctrl.illegal_rm := ex_rm(2) + io.ctrl.sboard_clr := wen(0) && !wsrc(1).toBool // only for FMA pipes + io.ctrl.sboard_clra := waddr } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index c1473a89..da2343e3 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -71,6 +71,12 @@ object LFSR16 } } +object ShiftRegister +{ + def apply [T <: Data](n: Int, in: T): T = + if (n > 0) Reg(apply(n-1, in)) else in +} + object Mux1H { //TODO: cloning in(0) is unsafe if other elements have different widths, but From a51c7cc927257e28e6472eb0cc83280a44eb604c Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 14 Feb 2012 19:43:59 -0800 Subject: [PATCH 0167/1087] new build system with updated chisel, hwacha --- rocket/src/main/scala/dpath_alu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 947723df..72d22cae 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -37,7 +37,7 @@ class rocketDpathALU extends Component val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) val shin_r = Cat(shin_hi, io.in1(31,0)) val shin = Mux(shright, shin_r, Reverse(shin_r)) - val shout_r = (Cat(sra & shin_r(63), shin).toFix >>> shamt)(63,0) + val shout_r = (Cat(sra & shin_r(63), shin).toFix >> shamt)(63,0) val logic = Mux(io.fn === FN_AND, io.in1 & io.in2, From 6bdf9dc513c7f4738dd60218e6733d936ca11ab6 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 14 Feb 2012 23:34:57 -0800 Subject: [PATCH 0168/1087] hwacha integration: now it compiles correctly! --- rocket/src/main/scala/arbiter.scala | 30 +++++++++++++++++++---------- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/cpu.scala | 30 +++++++++++++++++++++++++++++ rocket/src/main/scala/dpath.scala | 6 ++++-- rocket/src/main/scala/top.scala | 3 +++ 5 files changed, 58 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index b0c51155..4a60eb10 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -23,6 +23,7 @@ class ioMemArbiter extends Bundle() { val dcache = new ioDCache(); // val icache = new ioICache(); val icache = new ioIPrefetcherMem().flip(); + val vicache = new ioICache(); } class rocketMemArbiter extends Component { @@ -33,16 +34,23 @@ class rocketMemArbiter extends Component { // ***************************** // Memory request is valid if either icache or dcache have a valid request - io.mem.req_val := (io.icache.req_val || io.dcache.req_val); + io.mem.req_val := (io.icache.req_val || io.vicache.req_val || io.dcache.req_val); // Set read/write bit. ICache always reads io.mem.req_rw := Mux(io.dcache.req_val, io.dcache.req_rw, Bool(false)); // Give priority to ICache - io.mem.req_addr := Mux(io.dcache.req_val, io.dcache.req_addr, io.icache.req_addr); + io.mem.req_addr := + Mux(io.dcache.req_val, io.dcache.req_addr, + Mux(io.icache.req_val, io.icache.req_addr, + io.vicache.req_addr)) - // low bit of tag=0 for I$, 1 for D$ - io.mem.req_tag := Cat(Mux(io.dcache.req_val, io.dcache.req_tag, io.icache.req_tag), io.dcache.req_val) + // low bit of tag to indicate D$, I$, and VI$ + val t_dcache :: t_icache :: t_vicache :: Nil = Enum(3){ UFix() } + io.mem.req_tag := + Mux(io.dcache.req_val, Cat(io.dcache.req_tag, t_dcache), + Mux(io.icache.req_val, Cat(io.icache.req_tag, t_icache), + Cat(Bits(0, MEM_TAG_BITS-2), t_vicache))) // Just pass through write data (only D$ will write) io.mem.req_wdata := io.dcache.req_wdata; @@ -55,18 +63,20 @@ class rocketMemArbiter extends Component { // This way, writebacks will never be interrupted by I$ refills. io.dcache.req_rdy := io.mem.req_rdy; io.icache.req_rdy := io.mem.req_rdy && !io.dcache.req_val; + io.vicache.req_rdy := io.mem.req_rdy && !io.dcache.req_val && !io.icache.req_val // Response will only be valid for D$ or I$ not both because of tag bits - io.icache.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool; - io.dcache.resp_val := io.mem.resp_val && io.mem.resp_tag(0).toBool; + io.dcache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_dcache) + io.icache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_icache) + io.vicache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_vicache) // Feed through data to both - io.icache.resp_data := io.mem.resp_data; io.dcache.resp_data := io.mem.resp_data; + io.icache.resp_data := io.mem.resp_data; + io.vicache.resp_data := io.mem.resp_data - io.icache.resp_tag := io.mem.resp_tag >> UFix(1) - io.dcache.resp_tag := io.mem.resp_tag >> UFix(1) - + io.dcache.resp_tag := io.mem.resp_tag >> UFix(2) + io.icache.resp_tag := io.mem.resp_tag >> UFix(2) } } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 34026971..2905770a 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -177,7 +177,7 @@ object Constants // external memory interface val IMEM_TAG_BITS = 1; val DMEM_TAG_BITS = ceil(log(NMSHR)/log(2)).toInt; - val MEM_TAG_BITS = 1 + max(IMEM_TAG_BITS, DMEM_TAG_BITS); + val MEM_TAG_BITS = 2 + max(IMEM_TAG_BITS, DMEM_TAG_BITS); val MEM_DATA_BITS = 128; val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS; diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index ad2b5698..cf9f2c80 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -30,6 +30,7 @@ class ioRocket extends Bundle() val console = new ioConsole(); val host = new ioHost(); val imem = new ioImem().flip(); + val vimem = new ioImem().flip(); val dmem = new ioDmem().flip(); } @@ -42,6 +43,7 @@ class rocketProc extends Component val dtlb = new rocketDTLB(DTLB_ENTRIES); val itlb = new rocketITLB(ITLB_ENTRIES); + val vitlb = new rocketITLB(ITLB_ENTRIES); val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(); @@ -130,8 +132,36 @@ class rocketProc extends Component { val vu = new vu() + vitlb.io.cpu.invalidate := dpath.io.ptbr_wen + vitlb.io.cpu.status := dpath.io.ctrl.status + vitlb.io.cpu.req_val := vu.io.imem_req.valid + vitlb.io.cpu.req_asid := Bits(0,ASID_BITS) // FIXME: connect to PCR + vitlb.io.cpu.req_vpn := vu.io.imem_req.bits(VADDR_BITS,PGIDX_BITS).toUFix + io.vimem.req_idx := vu.io.imem_req.bits(PGIDX_BITS-1,0) + io.vimem.req_ppn := vitlb.io.cpu.resp_ppn + io.vimem.req_val := vu.io.imem_req.valid + io.vimem.invalidate := ctrl.io.dpath.flush_inst + vu.io.imem_resp.valid := io.vimem.resp_val + vu.io.imem_resp.bits := io.vimem.resp_data + // handle vitlb.io.cpu.exception + io.vimem.itlb_miss := vitlb.io.cpu.resp_miss + vu.io.vec_cmdq <> dpath.io.vcmdq vu.io.vec_ximm1q <> dpath.io.vximm1q vu.io.vec_ximm2q <> dpath.io.vximm2q + + ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid + ctrl.io.ext_mem.req_cmd := vu.io.dmem_req.bits.cmd + ctrl.io.ext_mem.req_type := vu.io.dmem_req.bits.typ + + dpath.io.ext_mem.req_val := vu.io.dmem_req.valid + dpath.io.ext_mem.req_idx := vu.io.dmem_req.bits.idx + dpath.io.ext_mem.req_ppn := vu.io.dmem_req.bits.ppn + dpath.io.ext_mem.req_data := vu.io.dmem_req.bits.data + + vu.io.dmem_resp.valid := dpath.io.ext_mem.resp_val + vu.io.dmem_resp.bits.nack := ctrl.io.ext_mem.resp_nack + vu.io.dmem_resp.bits.data := dpath.io.ext_mem.resp_data + vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag } } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0cb22794..593ef45e 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -34,7 +34,7 @@ class ioDpathAll extends Bundle() val console = new ioConsole(List("valid","bits")); val debug = new ioDebug(); val dmem = new ioDpathDmem(); - val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "resp_val", "resp_data", "resp_tag")) + val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "req_tag", "resp_val", "resp_data", "resp_tag")) val imem = new ioDpathImem(); val vcmdq = new io_vec_cmdq() val vximm1q = new io_vec_ximm1q() @@ -101,6 +101,7 @@ class rocketDpath extends Component val ex_reg_ctrl_sel_wb = Reg() { UFix() }; val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); val ex_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); + val ex_reg_ext_mem_tag = Reg() { Bits() }; val ex_wdata = Wire() { Bits() }; // memory definitions @@ -251,6 +252,7 @@ class rocketDpath extends Component ex_reg_ctrl_div_fn := io.ctrl.div_fn; ex_reg_ctrl_sel_wb := io.ctrl.sel_wb; ex_reg_ctrl_ren_pcr := io.ctrl.ren_pcr; + ex_reg_ext_mem_tag := io.ext_mem.req_tag when(io.ctrl.killd) { ex_reg_valid := Bool(false); @@ -306,7 +308,7 @@ class rocketDpath extends Component // other signals (req_val, req_rdy) connect to control module io.dmem.req_addr := ex_effective_address.toUFix; io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) - io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val, io.ctrl.ex_ext_mem_val).toUFix + io.dmem.req_tag := Cat(Mux(io.ctrl.ex_ext_mem_val, ex_reg_ext_mem_tag(CPU_TAG_BITS-2, 0), Cat(ex_reg_waddr, io.ctrl.ex_fp_val)), io.ctrl.ex_ext_mem_val).toUFix // processor control regfile read pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index ac3678c0..92c6b9dc 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -17,12 +17,14 @@ class Top() extends Component { val cpu = new rocketProc(); val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); + val vicache = new rocketICache(128, 2); // 128 sets x 2 ways val dcache = new HellaCache(); val arbiter = new rocketMemArbiter(); arbiter.io.mem <> io.mem; arbiter.io.dcache <> dcache.io.mem; arbiter.io.icache <> icache_pf.io.mem; + arbiter.io.vicache <> vicache.io.mem cpu.io.host <> io.host; cpu.io.debug <> io.debug; @@ -30,6 +32,7 @@ class Top() extends Component { icache.io.mem <> icache_pf.io.icache; cpu.io.imem <> icache.io.cpu; + cpu.io.vimem <> vicache.io.cpu; cpu.io.dmem <> dcache.io.cpu; } From 7c11c1406c09e1af6169c7ad33534fb6f677e5b7 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 15 Feb 2012 02:28:07 -0800 Subject: [PATCH 0169/1087] vector-vector add working! --- rocket/src/main/scala/cpu.scala | 3 +++ rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/dpath_vec.scala | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index cf9f2c80..072ec90b 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -141,6 +141,7 @@ class rocketProc extends Component io.vimem.req_ppn := vitlb.io.cpu.resp_ppn io.vimem.req_val := vu.io.imem_req.valid io.vimem.invalidate := ctrl.io.dpath.flush_inst + vu.io.imem_req.ready := Bool(true) vu.io.imem_resp.valid := io.vimem.resp_val vu.io.imem_resp.bits := io.vimem.resp_data // handle vitlb.io.cpu.exception @@ -149,6 +150,7 @@ class rocketProc extends Component vu.io.vec_cmdq <> dpath.io.vcmdq vu.io.vec_ximm1q <> dpath.io.vximm1q vu.io.vec_ximm2q <> dpath.io.vximm2q + vu.io.vec_ackq.ready := Bool(true) ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid ctrl.io.ext_mem.req_cmd := vu.io.dmem_req.bits.cmd @@ -158,6 +160,7 @@ class rocketProc extends Component dpath.io.ext_mem.req_idx := vu.io.dmem_req.bits.idx dpath.io.ext_mem.req_ppn := vu.io.dmem_req.bits.ppn dpath.io.ext_mem.req_data := vu.io.dmem_req.bits.data + dpath.io.ext_mem.req_tag := vu.io.dmem_req.bits.tag vu.io.dmem_resp.valid := dpath.io.ext_mem.resp_val vu.io.dmem_resp.bits.nack := ctrl.io.ext_mem.resp_nack diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index eb954c70..12c3ea87 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -764,7 +764,7 @@ class rocketCtrl extends Component io.fpu.killx := kill_ex io.fpu.killm := kill_mem - io.dtlb_val := ex_reg_mem_val; + io.dtlb_val := ex_reg_mem_val || ex_reg_ext_mem_val; io.dtlb_kill := mem_reg_kill; io.dmem.req_val := ex_reg_mem_val || ex_reg_ext_mem_val; io.dmem.req_kill := kill_dcache; diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 52c148fd..fc066504 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -174,7 +174,7 @@ class rocketDpathVec extends Component Bits(0,20))))))) io.vximm1q.bits := - Mux(wb_sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1), + Mux(wb_sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1(10,0)), io.wdata) // VIMM_ALU io.vximm2q.bits := io.rs2 From 32bdf5098afeed5fbaa62f1398d6170d03ea53ee Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 15 Feb 2012 13:30:22 -0800 Subject: [PATCH 0170/1087] refactor vector control logic & datapath in the rocket core --- rocket/src/main/scala/cpu.scala | 18 ++++- rocket/src/main/scala/ctrl.scala | 14 ++++ rocket/src/main/scala/ctrl_vec.scala | 103 +++++++++++++++++++++++++ rocket/src/main/scala/dpath.scala | 57 +++++++------- rocket/src/main/scala/dpath_vec.scala | 104 +++++++------------------- 5 files changed, 191 insertions(+), 105 deletions(-) create mode 100644 rocket/src/main/scala/ctrl_vec.scala diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 072ec90b..b87e7549 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -130,8 +130,11 @@ class rocketProc extends Component if (HAVE_VEC) { + dpath.io.vec_ctrl <> ctrl.io.vec_dpath + val vu = new vu() + // hooking up vector I$ vitlb.io.cpu.invalidate := dpath.io.ptbr_wen vitlb.io.cpu.status := dpath.io.ctrl.status vitlb.io.cpu.req_val := vu.io.imem_req.valid @@ -147,11 +150,20 @@ class rocketProc extends Component // handle vitlb.io.cpu.exception io.vimem.itlb_miss := vitlb.io.cpu.resp_miss - vu.io.vec_cmdq <> dpath.io.vcmdq - vu.io.vec_ximm1q <> dpath.io.vximm1q - vu.io.vec_ximm2q <> dpath.io.vximm2q + // hooking up vector command queues + vu.io.vec_cmdq.valid := ctrl.io.vec_iface.vcmdq_valid + vu.io.vec_cmdq.bits := dpath.io.vec_iface.vcmdq_bits + vu.io.vec_ximm1q.valid := ctrl.io.vec_iface.vximm1q_valid + vu.io.vec_ximm1q.bits := dpath.io.vec_iface.vximm1q_bits + vu.io.vec_ximm2q.valid := ctrl.io.vec_iface.vximm2q_valid + vu.io.vec_ximm2q.bits := dpath.io.vec_iface.vximm2q_bits + + ctrl.io.vec_iface.vcmdq_ready := vu.io.vec_cmdq.ready + ctrl.io.vec_iface.vximm1q_ready := vu.io.vec_ximm1q.ready + ctrl.io.vec_iface.vximm2q_ready := vu.io.vec_ximm2q.ready vu.io.vec_ackq.ready := Bool(true) + // hooking up vector memory interface ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid ctrl.io.ext_mem.req_cmd := vu.io.dmem_req.bits.cmd ctrl.io.ext_mem.req_type := vu.io.dmem_req.bits.typ diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 12c3ea87..dcffc636 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -92,6 +92,8 @@ class ioCtrlAll extends Bundle() val xcpt_ma_ld = Bool(INPUT); val xcpt_ma_st = Bool(INPUT); val fpu = new ioCtrlFPU(); + val vec_dpath = new ioCtrlDpathVec() + val vec_iface = new ioCtrlVecInterface() } class rocketCtrl extends Component @@ -570,6 +572,17 @@ class rocketCtrl extends Component io.fpu.dec.wen && fp_sboard.io.r(3).data } + if (HAVE_VEC) + { + // vector control + val vec = new rocketCtrlVec() + + io.vec_dpath <> vec.io.dpath + io.vec_iface <> vec.io.iface + + vec.io.sr_ev := io.dpath.status(SR_EV) + } + // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); @@ -718,6 +731,7 @@ class rocketCtrl extends Component id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_stall_fpu || io.ext_mem.req_val || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || + id_vec_val.toBool && !(io.vec_iface.vcmdq_ready && io.vec_iface.vximm1q_ready && io.vec_iface.vximm2q_ready) || // being conservative ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || id_console_out_val && !io.console.rdy ); diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala new file mode 100644 index 00000000..4fe44eb5 --- /dev/null +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -0,0 +1,103 @@ +package Top + +import Chisel._ +import Node._ +import Constants._ +import Instructions._ + +class ioCtrlDpathVec extends Bundle +{ + val valid = Bool(INPUT) + val inst = Bits(32, INPUT) + val appvl0 = Bool(INPUT) + val wen = Bool(OUTPUT) + val fn = Bits(1, OUTPUT) + val sel_vcmd = Bits(3, OUTPUT) + val sel_vimm = Bits(1, OUTPUT) +} + +class ioCtrlVecInterface extends Bundle +{ + val vcmdq_valid = Bool(OUTPUT) + val vcmdq_ready = Bool(INPUT) + val vximm1q_valid = Bool(OUTPUT) + val vximm1q_ready = Bool(INPUT) + val vximm2q_valid = Bool(OUTPUT) + val vximm2q_ready = Bool(INPUT) +} + +class ioCtrlVec extends Bundle +{ + val dpath = new ioCtrlDpathVec() + val iface = new ioCtrlVecInterface() + val sr_ev = Bool(INPUT) +} + +class rocketCtrlVec extends Component +{ + val io = new ioCtrlVec() + + val veccs = + ListLookup(io.dpath.inst, + // appvlmask + // | vcmdq + // wen | | vximm1q + // val vcmd vimm | fn | | | vximm2q + // | | | | | | | | | + List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N), + VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y) + )) + + val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs + val wb_vec_cmdq_val :: wb_vec_ximm1q_val :: wb_vec_ximm2q_val :: Nil = veccs0 + + val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) + + io.iface.vcmdq_valid := valid_common && wb_vec_cmdq_val + io.iface.vximm1q_valid := valid_common && wb_vec_ximm1q_val + io.iface.vximm2q_valid := valid_common && wb_vec_ximm2q_val + + io.dpath.wen := wb_vec_wen.toBool + io.dpath.fn := wb_vec_fn + io.dpath.sel_vcmd := wb_sel_vcmd + io.dpath.sel_vimm := wb_sel_vimm +} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 593ef45e..2d5a7c85 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -36,12 +36,11 @@ class ioDpathAll extends Bundle() val dmem = new ioDpathDmem(); val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "req_tag", "resp_val", "resp_data", "resp_tag")) val imem = new ioDpathImem(); - val vcmdq = new io_vec_cmdq() - val vximm1q = new io_vec_ximm1q() - val vximm2q = new io_vec_ximm2q() val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); val fpu = new ioDpathFPU(); + val vec_ctrl = new ioCtrlDpathVec().flip() + val vec_iface = new ioDpathVecInterface() } class rocketDpath extends Component @@ -55,8 +54,6 @@ class rocketDpath extends Component val pcr = new rocketDpathPCR(); val ex_pcr = pcr.io.r.data; - val vec = new rocketDpathVec() - val alu = new rocketDpathALU(); val ex_alu_out = alu.io.out; val ex_alu_adder_out = alu.io.adder_out; @@ -425,23 +422,37 @@ class rocketDpath extends Component wb_reg_ctrl_wen_pcr := mem_reg_ctrl_wen_pcr; } - // vector datapath - vec.io.valid := wb_reg_valid - vec.io.sr_ev := pcr.io.status(SR_EV) - vec.io.inst := wb_reg_inst - vec.io.waddr := wb_reg_waddr - vec.io.raddr1 := wb_reg_raddr1 - vec.io.vecbank := pcr.io.vecbank - vec.io.vecbankcnt := pcr.io.vecbankcnt - vec.io.wdata := wb_reg_wdata - vec.io.rs2 := wb_reg_rs2 - - // regfile write + // regfile write val wb_src_dmem = Reg(io.ctrl.mem_load) && wb_reg_valid || r_dmem_resp_replay - wb_wdata := - Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), - Mux(wb_src_dmem, io.dmem.resp_data_subword, - wb_reg_wdata)) + + if (HAVE_VEC) + { + // vector datapath + val vec = new rocketDpathVec() + + vec.io.ctrl <> io.vec_ctrl + io.vec_iface <> vec.io.iface + + vec.io.valid := wb_reg_valid + vec.io.inst := wb_reg_inst + vec.io.waddr := wb_reg_waddr + vec.io.raddr1 := wb_reg_raddr1 + vec.io.vecbank := pcr.io.vecbank + vec.io.vecbankcnt := pcr.io.vecbankcnt + vec.io.wdata := wb_reg_wdata + vec.io.rs2 := wb_reg_rs2 + + wb_wdata := + Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), + Mux(wb_src_dmem, io.dmem.resp_data_subword, + wb_reg_wdata)) + } + else + { + wb_wdata := + Mux(wb_src_dmem, io.dmem.resp_data_subword, + wb_reg_wdata) + } rfile.io.w0.addr := wb_reg_waddr rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb @@ -454,10 +465,6 @@ class rocketDpath extends Component io.ctrl.wb_waddr := wb_reg_waddr; io.ctrl.mem_wb := dmem_resp_replay; - vec.io.vcmdq <> io.vcmdq - vec.io.vximm1q <> io.vximm1q - vec.io.vximm2q <> io.vximm2q - // scoreboard clear (for div/mul and D$ load miss writebacks) io.ctrl.sboard_clr := mem_ll_wb io.ctrl.sboard_clra := mem_ll_waddr diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index fc066504..0fe55fdf 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -4,12 +4,20 @@ import Chisel._ import Node._ import Constants._ import Instructions._ -import hwacha._ +import hwacha.Interface._ + +class ioDpathVecInterface extends Bundle +{ + val vcmdq_bits = Bits(VCMD_SZ, OUTPUT) + val vximm1q_bits = Bits(VIMM_SZ, OUTPUT) + val vximm2q_bits = Bits(VSTRIDE_SZ, OUTPUT) +} class ioDpathVec extends Bundle { + val ctrl = new ioCtrlDpathVec().flip() + val iface = new ioDpathVecInterface() val valid = Bool(INPUT) - val sr_ev = Bool(INPUT) val inst = Bits(32, INPUT) val waddr = UFix(5, INPUT) val raddr1 = UFix(5, INPUT) @@ -19,68 +27,12 @@ class ioDpathVec extends Bundle val rs2 = Bits(64, INPUT) val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) - val vcmdq = new io_vec_cmdq() - val vximm1q = new io_vec_ximm1q() - val vximm2q = new io_vec_ximm2q() } class rocketDpathVec extends Component { val io = new ioDpathVec() - val veccs = - ListLookup(io.inst, - // appvlmask - // | vcmdq - // wen | | vximm1q - // val vcmd vimm | fn | | | vximm2q - // | | | | | | | | | - List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N), - VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y) - )) - - val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_val :: wb_vec_ximm1q_val :: wb_vec_ximm2q_val :: Nil = veccs0 - val nxregs = Cat(UFix(0,1),io.inst(15,10).toUFix) // FIXME: to make the nregs width 7 bits val nfregs = io.inst(21,16).toUFix val nregs = nxregs + nfregs @@ -145,37 +97,35 @@ class rocketDpathVec extends Component val reg_hwvl = Reg(resetVal = UFix(32, 12)) val reg_appvl0 = Reg(resetVal = Bool(true)) val hwvl_vcfg = (uts_per_bank * io.vecbankcnt)(11,0) - val hwvl = Mux(wb_vec_fn.toBool, hwvl_vcfg, reg_hwvl) + val hwvl = Mux(io.ctrl.fn === VEC_CFG, hwvl_vcfg, reg_hwvl) val appvl = Mux(io.wdata(11,0) < hwvl, io.wdata(11,0), hwvl).toUFix - when (io.valid && wb_vec_wen.toBool && wb_vec_fn.toBool) + when (io.valid && io.ctrl.wen && (io.ctrl.fn === VEC_CFG)) { reg_hwvl := hwvl_vcfg reg_appvl0 := !(appvl.orR()) } - io.wen := io.valid && wb_vec_wen.toBool + io.wen := io.valid && io.ctrl.wen io.appvl := appvl val vlenm1 = appvl - Bits(1,1) - val valid_common = io.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && reg_appvl0) - - io.vcmdq.valid := valid_common && wb_vec_cmdq_val - io.vximm1q.valid := valid_common && wb_vec_ximm1q_val - io.vximm2q.valid := valid_common && wb_vec_ximm2q_val - - io.vcmdq.bits := - Mux(wb_sel_vcmd === VCMD_I, Cat(Bits(0,2), Bits(0,4), io.inst(9,8), Bits(0,6), Bits(0,6)), - Mux(wb_sel_vcmd === VCMD_F, Cat(Bits(0,2), Bits(1,3), io.inst(9,7), Bits(0,6), Bits(0,6)), - Mux(wb_sel_vcmd === VCMD_TX, Cat(Bits(1,2), io.inst(13,8), Bits(0,1), io.waddr, Bits(0,1), io.raddr1), - Mux(wb_sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), io.waddr, Bits(1,1), io.raddr1), - Mux(wb_sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), io.waddr, Bits(0,1), io.waddr), - Mux(wb_sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), io.waddr, Bits(1,1), io.waddr), + io.iface.vcmdq_bits := + Mux(io.ctrl.sel_vcmd === VCMD_I, Cat(Bits(0,2), Bits(0,4), io.inst(9,8), Bits(0,6), Bits(0,6)), + Mux(io.ctrl.sel_vcmd === VCMD_F, Cat(Bits(0,2), Bits(1,3), io.inst(9,7), Bits(0,6), Bits(0,6)), + Mux(io.ctrl.sel_vcmd === VCMD_TX, Cat(Bits(1,2), io.inst(13,8), Bits(0,1), io.waddr, Bits(0,1), io.raddr1), + Mux(io.ctrl.sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), io.waddr, Bits(1,1), io.raddr1), + Mux(io.ctrl.sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), io.waddr, Bits(0,1), io.waddr), + Mux(io.ctrl.sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), io.waddr, Bits(1,1), io.waddr), Bits(0,20))))))) - io.vximm1q.bits := - Mux(wb_sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1(10,0)), + io.iface.vximm1q_bits := + Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1(10,0)), io.wdata) // VIMM_ALU - io.vximm2q.bits := io.rs2 + io.iface.vximm2q_bits := io.rs2 + + io.ctrl.valid := io.valid + io.ctrl.inst := io.inst + io.ctrl.appvl0 := reg_appvl0 } From 258d050e1b9e3e9b1687392e97dadb2774088b26 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 15 Feb 2012 14:48:41 -0800 Subject: [PATCH 0171/1087] add stall logic for vector command queues --- rocket/src/main/scala/ctrl.scala | 5 ++++- rocket/src/main/scala/ctrl_vec.scala | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dcffc636..f60278f8 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -572,6 +572,8 @@ class rocketCtrl extends Component io.fpu.dec.wen && fp_sboard.io.r(3).data } + var vec_replay = Bool(false) + if (HAVE_VEC) { // vector control @@ -579,6 +581,7 @@ class rocketCtrl extends Component io.vec_dpath <> vec.io.dpath io.vec_iface <> vec.io.iface + vec_replay = vec.io.replay vec.io.sr_ev := io.dpath.status(SR_EV) } @@ -655,7 +658,7 @@ class rocketCtrl extends Component mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_kill := kill_ex; - wb_reg_replay := replay_mem && !take_pc_wb; + wb_reg_replay := replay_mem && !take_pc_wb || vec_replay; wb_reg_exception := mem_exception && !take_pc_wb; wb_reg_cause := mem_cause; diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 4fe44eb5..2c2897d8 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -31,6 +31,7 @@ class ioCtrlVec extends Bundle val dpath = new ioCtrlDpathVec() val iface = new ioCtrlVecInterface() val sr_ev = Bool(INPUT) + val replay = Bool(OUTPUT) } class rocketCtrlVec extends Component @@ -88,16 +89,26 @@ class rocketCtrlVec extends Component )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_val :: wb_vec_ximm1q_val :: wb_vec_ximm2q_val :: Nil = veccs0 + val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: Nil = veccs0 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) - io.iface.vcmdq_valid := valid_common && wb_vec_cmdq_val - io.iface.vximm1q_valid := valid_common && wb_vec_ximm1q_val - io.iface.vximm2q_valid := valid_common && wb_vec_ximm2q_val + val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.iface.vcmdq_ready + val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.iface.vximm1q_ready + val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.iface.vximm2q_ready io.dpath.wen := wb_vec_wen.toBool io.dpath.fn := wb_vec_fn io.dpath.sel_vcmd := wb_sel_vcmd io.dpath.sel_vimm := wb_sel_vimm + + io.iface.vcmdq_valid := valid_common && wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready + io.iface.vximm1q_valid := valid_common && mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready + io.iface.vximm2q_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq + + io.replay := valid_common && ( + wb_vec_cmdq_enq && !io.iface.vcmdq_ready || + wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || + wb_vec_ximm2q_enq && !io.iface.vximm2q_ready + ) } From c13524ad3a4ddfd542518bf6fe2d45ee71f68fbe Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Feb 2012 17:49:12 -0800 Subject: [PATCH 0172/1087] fix vcmdq full replay logic --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/ctrl.scala | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 2905770a..d10093d0 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -193,7 +193,7 @@ object Constants val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = false + val HAVE_VEC = true val FPU_N = UFix(0, 1); val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f60278f8..e593dac7 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -658,10 +658,12 @@ class rocketCtrl extends Component mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_kill := kill_ex; - wb_reg_replay := replay_mem && !take_pc_wb || vec_replay; + wb_reg_replay := replay_mem && !take_pc_wb wb_reg_exception := mem_exception && !take_pc_wb; wb_reg_cause := mem_cause; + val replay_wb = wb_reg_replay || vec_replay + val wb_badvaddr_wen = wb_reg_exception && ((wb_reg_cause === UFix(10)) || (wb_reg_cause === UFix(11))) // write cause to PCR on an exception @@ -671,7 +673,7 @@ class rocketCtrl extends Component io.dpath.sel_pc := Mux(wb_reg_exception, PC_EVEC, // exception - Mux(wb_reg_replay, PC_WB, // replay + Mux(replay_wb, PC_WB, // replay Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch From 82cd3625c271ee7556bb1d0aaf01ab42a8a776d9 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 15 Feb 2012 17:53:24 -0800 Subject: [PATCH 0173/1087] add in vackq interface --- rocket/src/main/scala/cpu.scala | 3 ++- rocket/src/main/scala/ctrl.scala | 13 ++++++++++++- rocket/src/main/scala/ctrl_vec.scala | 2 ++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index b87e7549..96b33035 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -161,7 +161,8 @@ class rocketProc extends Component ctrl.io.vec_iface.vcmdq_ready := vu.io.vec_cmdq.ready ctrl.io.vec_iface.vximm1q_ready := vu.io.vec_ximm1q.ready ctrl.io.vec_iface.vximm2q_ready := vu.io.vec_ximm2q.ready - vu.io.vec_ackq.ready := Bool(true) + ctrl.io.vec_iface.vackq_valid := vu.io.vec_ackq.valid + vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready // hooking up vector memory interface ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index e593dac7..3723ca36 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -580,7 +580,18 @@ class rocketCtrl extends Component val vec = new rocketCtrlVec() io.vec_dpath <> vec.io.dpath - io.vec_iface <> vec.io.iface + + io.vec_iface.vcmdq_valid := vec.io.iface.vcmdq_valid + io.vec_iface.vximm1q_valid := vec.io.iface.vximm1q_valid + io.vec_iface.vximm2q_valid := vec.io.iface.vximm2q_valid + vec.io.iface.vcmdq_ready := io.vec_iface.vcmdq_ready + vec.io.iface.vximm1q_ready := io.vec_iface.vximm1q_ready + vec.io.iface.vximm2q_ready := io.vec_iface.vximm2q_ready + + // FIXME + // use io.vec_iface.vackq_valid + io.vec_iface.vackq_ready := Bool(true) + vec_replay = vec.io.replay vec.io.sr_ev := io.dpath.status(SR_EV) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 2c2897d8..1d63cec0 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -24,6 +24,8 @@ class ioCtrlVecInterface extends Bundle val vximm1q_ready = Bool(INPUT) val vximm2q_valid = Bool(OUTPUT) val vximm2q_ready = Bool(INPUT) + val vackq_valid = Bool(INPUT) + val vackq_ready = Bool(OUTPUT) } class ioCtrlVec extends Bundle From fe2c1d132181773c3ccf58ce355fafb1a4b8ce97 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Feb 2012 18:30:58 -0800 Subject: [PATCH 0174/1087] add vec->ctrl fences --- rocket/src/main/scala/ctrl.scala | 19 +++--- rocket/src/main/scala/ctrl_vec.scala | 89 ++++++++++++++-------------- 2 files changed, 58 insertions(+), 50 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3723ca36..a40c64b3 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -253,8 +253,8 @@ class rocketCtrl extends Component VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_L_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_G_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), + FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), @@ -573,7 +573,7 @@ class rocketCtrl extends Component } var vec_replay = Bool(false) - + var vec_cpfence = Bool(false) if (HAVE_VEC) { // vector control @@ -588,12 +588,16 @@ class rocketCtrl extends Component vec.io.iface.vximm1q_ready := io.vec_iface.vximm1q_ready vec.io.iface.vximm2q_ready := io.vec_iface.vximm2q_ready - // FIXME - // use io.vec_iface.vackq_valid - io.vec_iface.vackq_ready := Bool(true) - vec_replay = vec.io.replay + vec_cpfence = Reg(resetVal = Bool(false)) + when (vec.io.cpfence) { + vec_cpfence := Bool(true) + } + when (io.vec_iface.vackq_valid || wb_reg_exception) { + vec_cpfence := Bool(false) + } + io.vec_iface.vackq_ready := Bool(true) vec.io.sr_ev := io.dpath.status(SR_EV) } @@ -749,6 +753,7 @@ class rocketCtrl extends Component id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || id_vec_val.toBool && !(io.vec_iface.vcmdq_ready && io.vec_iface.vximm1q_ready && io.vec_iface.vximm2q_ready) || // being conservative ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || + vec_cpfence || id_console_out_val && !io.console.rdy ); val ctrl_stallf = ctrl_stalld; diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 1d63cec0..a7526e14 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -34,6 +34,7 @@ class ioCtrlVec extends Bundle val iface = new ioCtrlVecInterface() val sr_ev = Bool(INPUT) val replay = Bool(OUTPUT) + val cpfence = Bool(OUTPUT) } class rocketCtrlVec extends Component @@ -46,52 +47,53 @@ class rocketCtrlVec extends Component // | vcmdq // wen | | vximm1q // val vcmd vimm | fn | | | vximm2q - // | | | | | | | | | - List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N), - VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y) + // | | | | | | | | | cpfence + // | | | | | | | | | | + List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,Y), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,Y), + VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: Nil = veccs0 + val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cpfence :: Nil = veccs0 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) @@ -113,4 +115,5 @@ class rocketCtrlVec extends Component wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || wb_vec_ximm2q_enq && !io.iface.vximm2q_ready ) + io.cpfence := valid_common && wb_vec_cpfence && !io.replay } From 8b3b3abd3d9001c8fa2149c8579f5d1d27b5d889 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Feb 2012 18:57:40 -0800 Subject: [PATCH 0175/1087] fix external memory request nack logic --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a40c64b3..12228da9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -806,7 +806,7 @@ class rocketCtrl extends Component io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; - io.ext_mem.resp_nack:= mem_reg_ext_mem_val && (io.dmem.resp_nack || Reg(!io.dmem.req_rdy)) + io.ext_mem.resp_nack:= mem_reg_ext_mem_val && (io.dmem.req_kill || io.dmem.resp_nack || Reg(!io.dmem.req_rdy)) } } From fc5ba769dafdbf31205182f962dcb20d89384288 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Feb 2012 18:58:41 -0800 Subject: [PATCH 0176/1087] disable vector unit by default --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d10093d0..2905770a 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -193,7 +193,7 @@ object Constants val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = true + val HAVE_VEC = false val FPU_N = UFix(0, 1); val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N; From 1b5e39e7fc488df93e3c91d701c6e5d230fb1e1b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Feb 2012 21:36:08 -0800 Subject: [PATCH 0177/1087] fix bug in BTB a BTB update followed by a taken branch could cause incorrect control flow. --- rocket/src/main/scala/dpath_util.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 48aae6cf..cc89def5 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -23,22 +23,22 @@ class rocketDpathBTB(entries: Int) extends Component { val io = new ioDpathBTB(); - val do_update = io.wen || io.clr - val expected_tag = Mux(do_update, io.correct_pc, io.current_pc) - val repl_way = LFSR16(io.wen)(log2up(entries)-1,0) // TODO: pseudo-LRU var hit_reduction = Bool(false) val hit = Wire() { Bool() } + val update = Wire() { Bool() } + var update_reduction = Bool(false) val mux = (new Mux1H(entries)) { Bits(width = VADDR_BITS) } for (i <- 0 until entries) { val tag = Reg() { UFix() } val target = Reg() { UFix() } val valid = Reg(resetVal = Bool(false)) - val my_hit = valid && tag === expected_tag - val my_clr = io.clr && my_hit || io.invalidate - val my_wen = io.wen && (my_hit || !hit && UFix(i) === repl_way) + val my_hit = valid && tag === io.current_pc + val my_update = valid && tag === io.correct_pc + val my_clr = io.clr && my_update || io.invalidate + val my_wen = io.wen && (my_update || !update && UFix(i) === repl_way) valid := !my_clr && (valid || my_wen) when (my_wen) { @@ -47,10 +47,12 @@ class rocketDpathBTB(entries: Int) extends Component } hit_reduction = hit_reduction || my_hit + update_reduction = update_reduction || my_update mux.io.sel(i) := my_hit mux.io.in(i) := target } hit := hit_reduction + update := update_reduction io.hit := hit io.target := mux.io.out.toUFix From 619929eba1cf80fb84347abfa44e9a053724e4a5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 14 Feb 2012 15:51:32 -0800 Subject: [PATCH 0178/1087] Added coherence tile function defs, with traits and constants --- rocket/src/main/scala/coherence.scala | 128 ++++++++++++++++++++++++++ rocket/src/main/scala/consts.scala | 6 +- 2 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 rocket/src/main/scala/coherence.scala diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala new file mode 100644 index 00000000..460214e1 --- /dev/null +++ b/rocket/src/main/scala/coherence.scala @@ -0,0 +1,128 @@ +package Top { + +import Chisel._ +import Constants._ + +class TransactionInit extends Bundle { + val ttype = Bits(width = 2) + val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) + val address = Bits(width = PADDR_BITS) + val data = Bits(width = MEM_DATA_BITS) +} + +class TransactionAbort extends Bundle { + val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) +} + +class ProbeRequest extends Bundle { + val ptype = Bits(width = 2) + val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) + val address = Bits(width = PADDR_BITS) +} + +class ProbeReply extends Bundle { + val ptype = Bits(width = 2) + val hasData = Bool() + val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) + val data = Bits(width = MEM_DATA_BITS) +} + +class TransactionReply extends Bundle { + val ttype = Bits(width = 2) + val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) + val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) + val data = Bits(width = MEM_DATA_BITS) +} + +class TransactionFinish extends Bundle { + val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) +} + +class ioTileLink extends Bundle { + val xact_init = new TransactionInit().asOutput + val xact_abort = new TransactionAbort().asInput + val probe_req = new ProbeRequest().asInput + val probe_rep = new ProbeReply().asOutput + val xact_rep = new TransactionReply().asInput + val xact_finish = new TransactionFinish().asOutput +} + +trait ThreeStateIncoherence { + val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } + + def cpuCmdToRW( cmd: Bits): (Bool, Bool) = { + val store = (cmd === M_XWR) + val load = (cmd === M_XRD) + val amo = cmd(3).toBool + val read = load || amo || (cmd === M_PFR) + val write = store || amo || (cmd === M_PFW) + (read, write) + } + + def isHit ( cmd: Bits, state: UFix): Bool = { + val (read, write) = cpuCmdToRW(cmd) + ( state === tileClean || state === tileDirty) + } + + def isValid (state: UFix): Bool = { + state != tileInvalid + } + + def needsWriteback (state: UFix): Bool = { + state === tileDirty + } + + def newStateOnWriteback() = tileInvalid + def newStateOnFlush() = tileInvalid + def newState(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, tileDirty, Mux(read, Mux(state === tileDirty, tileDirty, tileClean), tileInvalid)) + } + def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) + def newStateOnPrimaryMiss(cmd: Bits): UFix = newState(cmd, tileInvalid) + def newStateOnSecondaryMiss(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, tileDirty, state) + } + +} + +trait FourStateCoherence { + + val tileInvalid :: tileShared :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(4){ UFix() } + val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } + val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(3){ UFix() } + + def isHit ( cmd: Bits, state: UFix): Bool = { + val is_hit = Bool(false) + switch(cmd) { + is(M_XRD) { + is_hit := state === tileShared || + state === tileExclusiveClean || + state === tileExclusiveDirty + } + is(M_XWR) { + is_hit := state === tileExclusiveClean || + state === tileExclusiveDirty + } + } + is_hit + } + + def needsWriteback (state: UFix): Bool = { + state === tileExclusiveDirty + } + + def needsSecondaryXact (cmd: Bits, outstanding: TransactionInit): Bool + + def getMetaUpdateOnProbe (incoming: ProbeRequest): Bits = { + val state = UFix(0) + switch(incoming.ptype) { + is(probeInvalidate) { state := tileInvalid } + is(probeDowngrade) { state := tileShared } + } + state.toBits + } +} + +} diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 2905770a..34d3a70c 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -160,7 +160,7 @@ object Constants val ASID_BITS = 7; val PERM_BITS = 6; - // rocketNBDCacheDM parameters + // rocketNBDCache parameters val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; val DCACHE_TAG_BITS = 1 + CPU_TAG_BITS; @@ -174,6 +174,10 @@ object Constants val NWAYS = 1; require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); + // coherence parameters + val TILE_XACT_ID_BITS = 1; // log2(NMSHR) + val GLOBAL_XACT_ID_BITS = IDX_BITS; // if one active xact per set + // external memory interface val IMEM_TAG_BITS = 1; val DMEM_TAG_BITS = ceil(log(NMSHR)/log(2)).toInt; From 124efe5281a0cfec90a28a9bc8b0761b718d7c6e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 15 Feb 2012 13:54:36 -0800 Subject: [PATCH 0179/1087] Replace nbcache manipulation of meta state bits with abstracted functions --- rocket/src/main/scala/coherence.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 85 ++++++++++++--------------- 2 files changed, 40 insertions(+), 47 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 460214e1..7419555e 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -76,7 +76,7 @@ trait ThreeStateIncoherence { def newStateOnFlush() = tileInvalid def newState(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileDirty, Mux(read, Mux(state === tileDirty, tileDirty, tileClean), tileInvalid)) + Mux(write, tileDirty, Mux(read, Mux(state === tileDirty, tileDirty, tileClean), state)) } def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) def newStateOnPrimaryMiss(cmd: Bits): UFix = newState(cmd, tileInvalid) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b014ac74..41a22728 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1,9 +1,7 @@ package Top { import Chisel._ -import Node._; -import Constants._; -import scala.math._; +import Constants._ class ioReplacementWayGen extends Bundle { val pick_new_way = Bool(dir = INPUT) @@ -18,7 +16,7 @@ class RandomReplacementWayGen extends Component { { val rand_way_id = UFix(width = log2up(NWAYS)) rand_way_id := LFSR16(io.pick_new_way)(log2up(NWAYS)-1,0) - when (rand_way_id >= UFix(NWAYS, width = log2up(NWAYS))) { io.way_id := UFix(0, width = log2up(NWAYS)) } + when (rand_way_id >= UFix(NWAYS, width = log2up(NWAYS)+1)) { io.way_id := UFix(0, width = log2up(NWAYS)) } .otherwise { io.way_id := rand_way_id } } else io.way_id := UFix(0) @@ -148,8 +146,7 @@ class WritebackReq extends Bundle { } class MetaData extends Bundle { - val valid = Bool() - val dirty = Bool() + val state = UFix(width = 2) val tag = Bits(width = TAG_BITS) } @@ -164,7 +161,7 @@ class MetaArrayArrayReq extends Bundle { val way_en = Bits(width = NWAYS) } -class MSHR(id: Int) extends Component { +class MSHR(id: Int) extends Component with ThreeStateIncoherence { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -191,7 +188,7 @@ class MSHR(id: Int) extends Component { } val valid = Reg(resetVal = Bool(false)) - val dirty = Reg { Bool() } + val state = Reg { UFix() } val requested = Reg { Bool() } val refilled = Reg { Bool() } val ppn = Reg { Bits() } @@ -200,8 +197,8 @@ class MSHR(id: Int) extends Component { val req_load = (io.req_cmd === M_XRD) || (io.req_cmd === M_PFR) val req_use_rpq = (io.req_cmd != M_PFR) && (io.req_cmd != M_PFW) - val next_dirty = dirty || io.req_sec_val && io.req_sec_rdy && !req_load - val sec_rdy = io.idx_match && !refilled && (dirty || !requested || req_load) + val next_state = Mux(io.req_sec_val && io.req_sec_rdy, newStateOnSecondaryMiss(io.req_cmd, state), state) + val sec_rdy = io.idx_match && !refilled && (needsWriteback(state) || !requested || req_load) // XXX why doesn't this work? // val rpq = (new queue(NRPQ)) { new RPQEntry() } @@ -225,7 +222,7 @@ class MSHR(id: Int) extends Component { when (io.req_pri_val && io.req_pri_rdy) { valid := Bool(true) - dirty := !req_load + state := newStateOnPrimaryMiss(io.req_cmd) requested := Bool(false) refilled := Bool(false) ppn := io.req_ppn @@ -242,7 +239,7 @@ class MSHR(id: Int) extends Component { when (io.meta_req.valid && io.meta_req.ready) { valid := Bool(false) } - dirty := next_dirty + state := next_state } io.idx_match := valid && (idx_ === io.req_idx) @@ -255,8 +252,7 @@ class MSHR(id: Int) extends Component { io.meta_req.valid := valid && refilled && !rpq.io.deq.valid io.meta_req.bits.inner_req.rw := Bool(true) io.meta_req.bits.inner_req.idx := idx_ - io.meta_req.bits.inner_req.data.valid := Bool(true) - io.meta_req.bits.inner_req.data.dirty := dirty + io.meta_req.bits.inner_req.data.state := state io.meta_req.bits.inner_req.data.tag := ppn io.meta_req.bits.way_en := way_oh_ @@ -466,7 +462,7 @@ class WritebackUnit extends Component { io.mem_req_data := wbq.io.deq.bits } -class FlushUnit(lines: Int) extends Component { +class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ val io = new Bundle { val req = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) } val resp = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) }.flip() @@ -493,7 +489,7 @@ class FlushUnit(lines: Int) extends Component { } is(s_ready) { when (io.req.valid) { state := s_meta_read; tag := io.req.bits } } is(s_meta_read) { when (io.meta_req.ready) { state := s_meta_wait } } - is(s_meta_wait) { state := Mux(io.meta_resp.valid && io.meta_resp.dirty && !io.wb_req.ready, s_meta_read, s_meta_write) } + is(s_meta_wait) { state := Mux(needsWriteback(io.meta_resp.state) && !io.wb_req.ready, s_meta_read, s_meta_write) } is(s_meta_write) { when (io.meta_req.ready) { state := Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_done, s_meta_read); @@ -511,10 +507,9 @@ class FlushUnit(lines: Int) extends Component { io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) io.meta_req.bits.inner_req.idx := idx_cnt io.meta_req.bits.inner_req.rw := (state === s_meta_write) || (state === s_reset) - io.meta_req.bits.inner_req.data.valid := Bool(false) - io.meta_req.bits.inner_req.data.dirty := Bool(false) + io.meta_req.bits.inner_req.data.state := newStateOnFlush() io.meta_req.bits.inner_req.data.tag := UFix(0) - io.wb_req.valid := state === s_meta_wait && io.meta_resp.valid && io.meta_resp.dirty + io.wb_req.valid := state === s_meta_wait && needsWriteback(io.meta_resp.state) io.wb_req.bits.ppn := io.meta_resp.tag io.wb_req.bits.idx := idx_cnt io.wb_req.bits.way_oh := UFixToOH(way_cnt, NWAYS) @@ -527,26 +522,23 @@ class MetaDataArray(lines: Int) extends Component { val state_req = (new ioDecoupled) { new MetaArrayReq() } } - val vd_array = Mem(lines, Bits(width = 2)) - vd_array.setReadLatency(1); - val vd_wdata2 = Cat(io.state_req.bits.data.valid, io.state_req.bits.data.dirty) - vd_array.write(io.state_req.bits.idx, vd_wdata2, io.state_req.valid && io.state_req.bits.rw) - val vd_wdata1 = Cat(io.req.bits.data.valid, io.req.bits.data.dirty) - val vd_rdata1 = vd_array.rw(io.req.bits.idx, vd_wdata1, io.req.valid && io.req.bits.rw) + val permissions_array = Mem(lines, Bits(width = 2)) + permissions_array.setReadLatency(1); + permissions_array.write(io.state_req.bits.idx, io.state_req.bits.data.state, io.state_req.valid && io.state_req.bits.rw) + val permissions_rdata1 = permissions_array.rw(io.req.bits.idx, io.req.bits.data.state, io.req.valid && io.req.bits.rw) // don't allow reading and writing of vd_array in same cycle. // this could be eliminated if the read port were combinational. - val vd_conflict = io.state_req.valid && (io.req.bits.idx === io.state_req.bits.idx) + val permissions_conflict = io.state_req.valid && (io.req.bits.idx === io.state_req.bits.idx) val tag_array = Mem(lines, io.resp.tag) tag_array.setReadLatency(1); tag_array.setTarget('inst) val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) - io.resp.valid := vd_rdata1(1).toBool - io.resp.dirty := vd_rdata1(0).toBool + io.resp.state := permissions_rdata1.toUFix io.resp.tag := tag_rdata - io.req.ready := !vd_conflict + io.req.ready := !permissions_conflict } class MetaDataArrayArray(lines: Int) extends Component { @@ -692,7 +684,7 @@ class ioDCache(view: List[String] = null) extends Bundle(view) { val resp_val = Bool(OUTPUT); } -class HellaCache extends Component { +class HellaCache extends Component with ThreeStateIncoherence { val io = new Bundle { val cpu = new ioDmem() val mem = new ioDCache().flip @@ -774,6 +766,7 @@ class HellaCache extends Component { io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned + // tags val meta = new MetaDataArrayArray(lines) val meta_arb = (new Arbiter(3)) { new MetaArrayArrayReq() } @@ -788,17 +781,16 @@ class HellaCache extends Component { meta_arb.io.in(2).valid := io.cpu.req_val meta_arb.io.in(2).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) meta_arb.io.in(2).bits.inner_req.rw := Bool(false) - meta_arb.io.in(2).bits.inner_req.data.valid := Bool(false) // don't care - meta_arb.io.in(2).bits.inner_req.data.dirty := Bool(false) // don't care - meta_arb.io.in(2).bits.inner_req.data.tag := UFix(0) // don't care + meta_arb.io.in(2).bits.inner_req.data.state := UFix(0) // don't care + meta_arb.io.in(2).bits.inner_req.data.tag := UFix(0) // don't care meta_arb.io.in(2).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(2).ready val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until NWAYS).map( w => meta.io.resp(w).valid && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_arr = (0 until NWAYS).map( w => isHit(io.cpu.req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match - val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*) //TODO: use GenArray + val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use GenArray val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(NWAYS, meta_resp_way_oh, meta.io.resp) @@ -817,7 +809,7 @@ class HellaCache extends Component { val replaced_way_id = replacer.io.way_id val replaced_way_oh = UFixToOH(replaced_way_id, NWAYS) val meta_wb_mux = meta.io.resp(replaced_way_id) - val dirty = meta_wb_mux.valid && meta_wb_mux.dirty + val needs_writeback = needsWriteback(meta_wb_mux.state) // refill response val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) @@ -857,20 +849,21 @@ class HellaCache extends Component { // writeback val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match - wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && dirty && !p_store_idx_match + wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && needs_writeback && !p_store_idx_match wb_arb.io.in(1).bits.ppn := meta_wb_mux.tag wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) wb_arb.io.in(1).bits.way_oh := replaced_way_oh // tag update after a miss or a store to an exclusive clean line. - val clear_valid = tag_miss && r_req_readwrite && meta_wb_mux.valid && (!dirty || wb_rdy) - val set_dirty = tag_hit && !meta_resp_mux.dirty && r_req_write + val set_wb_state = tag_miss && r_req_readwrite && isValid(meta_wb_mux.state) && (!needs_writeback || wb_rdy) + //val set_hit_state = tag_hit && meta_resp_mux.state != newStateOnHit(r_cpu_req_cmd) + val new_hit_state = newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) + val set_hit_state = tag_hit && meta_resp_mux.state != new_hit_state meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := r_cpu_req_idx(indexmsb,indexlsb) - meta.io.state_req.bits.inner_req.data.valid := tag_match - meta.io.state_req.bits.inner_req.data.dirty := tag_match - meta.io.state_req.valid := clear_valid || set_dirty - meta.io.state_req.bits.way_en := Mux(clear_valid, replaced_way_oh, hit_way_oh) + meta.io.state_req.bits.inner_req.data.state := Mux(set_wb_state, newStateOnWriteback(), new_hit_state) + meta.io.state_req.bits.way_en := Mux(set_wb_state, replaced_way_oh, hit_way_oh) + meta.io.state_req.valid := set_wb_state || set_hit_state // pending store data, also used for AMO RHS val amoalu = new AMOALU @@ -887,7 +880,7 @@ class HellaCache extends Component { // miss handling val mshr = new MSHRFile() - mshr.io.req_val := tag_miss && r_req_readwrite && (!dirty || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) + mshr.io.req_val := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) mshr.io.req_ppn := cpu_req_tag mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) mshr.io.req_tag := r_cpu_req_tag @@ -901,7 +894,7 @@ class HellaCache extends Component { mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.replay <> replayer.io.replay - replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!dirty || wb_rdy) && mshr.io.req_rdy + replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!needs_writeback || wb_rdy) && mshr.io.req_rdy replayer.io.sdq_enq.bits := cpu_req_data data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh @@ -965,7 +958,7 @@ class HellaCache extends Component { val pending_fence = Reg(resetVal = Bool(false)) pending_fence := (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy val nack_hit = p_store_match || r_req_write && !p_store_rdy - val nack_miss = dirty && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready + val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush From d46e59a16de4e51bd08025b53fcf178a73bd9317 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 16 Feb 2012 12:34:51 -0800 Subject: [PATCH 0180/1087] Abstract base nbcache class --- rocket/src/main/scala/nbdcache.scala | 13 ++++++++++++- rocket/src/main/scala/top.scala | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 41a22728..42d402e3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -684,7 +684,18 @@ class ioDCache(view: List[String] = null) extends Bundle(view) { val resp_val = Bool(OUTPUT); } -class HellaCache extends Component with ThreeStateIncoherence { +abstract class HellaCache extends Component { + def isHit ( cmd: Bits, state: UFix): Bool + def isValid (state: UFix): Bool + def needsWriteback (state: UFix): Bool + def newStateOnWriteback(): UFix + def newStateOnFlush(): UFix + def newStateOnHit(cmd: Bits, state: UFix): UFix + def newStateOnPrimaryMiss(cmd: Bits): UFix + def newStateOnSecondaryMiss(cmd: Bits, state: UFix): UFix +} + +class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val io = new Bundle { val cpu = new ioDmem() val mem = new ioDCache().flip diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 92c6b9dc..ec40a7c7 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -18,7 +18,7 @@ class Top() extends Component { val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); val vicache = new rocketICache(128, 2); // 128 sets x 2 ways - val dcache = new HellaCache(); + val dcache = new HellaCacheUniproc(); val arbiter = new rocketMemArbiter(); arbiter.io.mem <> io.mem; From e555fd3fc4b3f89b9e62ce3e48db4df3b3a525cd Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 16 Feb 2012 12:59:38 -0800 Subject: [PATCH 0181/1087] Abstract class for coherence policies --- rocket/src/main/scala/coherence.scala | 39 +++++++++++++++------------ 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 7419555e..b55ac9a1 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -47,9 +47,7 @@ class ioTileLink extends Bundle { val xact_finish = new TransactionFinish().asOutput } -trait ThreeStateIncoherence { - val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } - +trait CoherencePolicy { def cpuCmdToRW( cmd: Bits): (Bool, Bool) = { val store = (cmd === M_XWR) val load = (cmd === M_XRD) @@ -58,6 +56,10 @@ trait ThreeStateIncoherence { val write = store || amo || (cmd === M_PFW) (read, write) } +} + +trait ThreeStateIncoherence extends CoherencePolicy { + val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } def isHit ( cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) @@ -87,32 +89,35 @@ trait ThreeStateIncoherence { } -trait FourStateCoherence { +trait FourStateCoherence extends CoherencePolicy { val tileInvalid :: tileShared :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(4){ UFix() } val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(3){ UFix() } def isHit ( cmd: Bits, state: UFix): Bool = { - val is_hit = Bool(false) - switch(cmd) { - is(M_XRD) { - is_hit := state === tileShared || - state === tileExclusiveClean || - state === tileExclusiveDirty - } - is(M_XWR) { - is_hit := state === tileExclusiveClean || - state === tileExclusiveDirty - } - } - is_hit + val (read, write) = cpuCmdToRW(cmd) + ((read && ( state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) || + (write && (state === tileExclusiveClean || state === tileExclusiveDirty))) + } + + def isValid (state: UFix): Bool = { + state != tileInvalid } def needsWriteback (state: UFix): Bool = { state === tileExclusiveDirty } + def newStateOnWriteback() = tileInvalid + def newStateOnFlush() = tileInvalid + + // TODO: New funcs as compared to incoherent protocol: + def newState(cmd: Bits, state: UFix): UFix + def newStateOnHit(cmd: Bits, state: UFix): UFix + def newStateOnPrimaryMiss(cmd: Bits): UFix + def newStateOnSecondaryMiss(cmd: Bits, state: UFix): UFix + def needsSecondaryXact (cmd: Bits, outstanding: TransactionInit): Bool def getMetaUpdateOnProbe (incoming: ProbeRequest): Bits = { From 9af86633d762a79c95130371a84bd7b0dc60b928 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 17 Feb 2012 17:56:01 -0800 Subject: [PATCH 0182/1087] invalidate I$ prefetcher when invalidating I$ --- rocket/src/main/scala/icache_prefetch.scala | 5 +++++ rocket/src/main/scala/top.scala | 1 + 2 files changed, 6 insertions(+) diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index b69f03a0..1d0615aa 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -19,6 +19,7 @@ class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) class ioIPrefetcher extends Bundle() { val icache = new ioICache(); val mem = new ioIPrefetcherMem(); + val invalidate = Bool(INPUT) } class rocketIPrefetcher extends Component() { @@ -84,6 +85,10 @@ class rocketIPrefetcher extends Component() { when (fill_done.toBool & ip_mem_resp_val.toBool) { state := s_req_wait; } } } + + when (io.invalidate) { + state := s_invalid + } } } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index ec40a7c7..a3b1f5c5 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -30,6 +30,7 @@ class Top() extends Component { cpu.io.debug <> io.debug; cpu.io.console <> io.console; + icache_pf.io.invalidate := cpu.io.imem.invalidate icache.io.mem <> icache_pf.io.icache; cpu.io.imem <> icache.io.cpu; cpu.io.vimem <> vicache.io.cpu; From 7034c9be65771e79b196a61eec9fe81c5caf912d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 19 Feb 2012 23:15:45 -0800 Subject: [PATCH 0183/1087] new htif protocol and implementation You must update your fesvr and isasim! --- rocket/src/main/scala/arbiter.scala | 29 +++-- rocket/src/main/scala/consts.scala | 1 - rocket/src/main/scala/cpu.scala | 22 +--- rocket/src/main/scala/ctrl.scala | 9 +- rocket/src/main/scala/dpath.scala | 9 +- rocket/src/main/scala/dpath_util.scala | 84 ++++++------- rocket/src/main/scala/htif.scala | 168 +++++++++++++++++++++++++ rocket/src/main/scala/top.scala | 15 ++- 8 files changed, 242 insertions(+), 95 deletions(-) create mode 100644 rocket/src/main/scala/htif.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 4a60eb10..da5495e6 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -24,6 +24,7 @@ class ioMemArbiter extends Bundle() { // val icache = new ioICache(); val icache = new ioIPrefetcherMem().flip(); val vicache = new ioICache(); + val htif = new ioDCache(); } class rocketMemArbiter extends Component { @@ -34,26 +35,31 @@ class rocketMemArbiter extends Component { // ***************************** // Memory request is valid if either icache or dcache have a valid request - io.mem.req_val := (io.icache.req_val || io.vicache.req_val || io.dcache.req_val); + io.mem.req_val := io.icache.req_val || io.vicache.req_val || io.dcache.req_val || io.htif.req_val - // Set read/write bit. ICache always reads - io.mem.req_rw := Mux(io.dcache.req_val, io.dcache.req_rw, Bool(false)); + // Set read/write bit. I$ always reads + io.mem.req_rw := + Mux(io.dcache.req_val, io.dcache.req_rw, + Mux(io.icache.req_val, Bool(false), + Mux(io.vicache.req_val, Bool(false), + io.htif.req_rw))) - // Give priority to ICache + // Give priority to D$ io.mem.req_addr := Mux(io.dcache.req_val, io.dcache.req_addr, Mux(io.icache.req_val, io.icache.req_addr, - io.vicache.req_addr)) + Mux(io.vicache.req_val, io.vicache.req_addr, + io.htif.req_addr))) + + io.mem.req_wdata := Mux(io.dcache.req_val, io.dcache.req_wdata, io.htif.req_wdata) // low bit of tag to indicate D$, I$, and VI$ - val t_dcache :: t_icache :: t_vicache :: Nil = Enum(3){ UFix() } + val t_dcache :: t_icache :: t_vicache :: t_htif :: Nil = Enum(4){ UFix() } io.mem.req_tag := Mux(io.dcache.req_val, Cat(io.dcache.req_tag, t_dcache), Mux(io.icache.req_val, Cat(io.icache.req_tag, t_icache), - Cat(Bits(0, MEM_TAG_BITS-2), t_vicache))) - - // Just pass through write data (only D$ will write) - io.mem.req_wdata := io.dcache.req_wdata; + Mux(io.vicache.req_val, t_vicache, + t_htif))) // ***************************** // Interface to caches @@ -64,16 +70,19 @@ class rocketMemArbiter extends Component { io.dcache.req_rdy := io.mem.req_rdy; io.icache.req_rdy := io.mem.req_rdy && !io.dcache.req_val; io.vicache.req_rdy := io.mem.req_rdy && !io.dcache.req_val && !io.icache.req_val + io.htif.req_rdy := io.mem.req_rdy && !io.dcache.req_val && !io.icache.req_val && !io.vicache.req_val // Response will only be valid for D$ or I$ not both because of tag bits io.dcache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_dcache) io.icache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_icache) io.vicache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_vicache) + io.htif.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_htif) // Feed through data to both io.dcache.resp_data := io.mem.resp_data; io.icache.resp_data := io.mem.resp_data; io.vicache.resp_data := io.mem.resp_data + io.htif.resp_data := io.mem.resp_data io.dcache.resp_tag := io.mem.resp_tag >> UFix(2) io.icache.resp_tag := io.mem.resp_tag >> UFix(2) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 34d3a70c..e00e51cb 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -138,7 +138,6 @@ object Constants val PCR_TOHOST = UFix(16, 5); val PCR_FROMHOST = UFix(17, 5); val PCR_VECBANK = UFix(18, 5); - val PCR_CONSOLE = UFix(19, 5); // definition of bits in PCR status reg val SR_ET = 0; // enable traps diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 96b33035..c8ac9643 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -10,25 +10,10 @@ class ioDebug(view: List[String] = null) extends Bundle(view) val error_mode = Bool(OUTPUT); } -class ioHost(view: List[String] = null) extends Bundle(view) -{ - val from_wen = Bool(INPUT); - val from = Bits(64, INPUT); - val to = Bits(64, OUTPUT); -} - -class ioConsole(view: List[String] = null) extends Bundle(view) -{ - val rdy = Bool(INPUT); - val valid = Bool(OUTPUT); - val bits = Bits(8, OUTPUT); -} - class ioRocket extends Bundle() { val debug = new ioDebug(); - val console = new ioConsole(); - val host = new ioHost(); + val host = new ioHTIF(); val imem = new ioImem().flip(); val vimem = new ioImem().flip(); val dmem = new ioDmem().flip(); @@ -47,6 +32,7 @@ class rocketProc extends Component val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(); + ctrl.io.htif_reset := io.host.reset ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; dpath.io.debug <> io.debug; @@ -112,10 +98,6 @@ class rocketProc extends Component dpath.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; - io.console.bits := dpath.io.console.bits; - io.console.valid := dpath.io.console.valid; - ctrl.io.console.rdy := io.console.rdy; - if (HAVE_FPU) { val fpu = new rocketFPU(4,6) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 12228da9..4bf8f4cc 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -77,8 +77,8 @@ class ioCtrlDpath extends Bundle() class ioCtrlAll extends Bundle() { + val htif_reset = Bool(INPUT) val dpath = new ioCtrlDpath(); - val console = new ioConsole(List("rdy")); val imem = new ioImem(List("req_val", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); val ext_mem = new ioDmem(List("req_val", "req_cmd", "req_type", "resp_nack")) @@ -298,8 +298,6 @@ class rocketCtrl extends Component val id_raddr1 = io.dpath.inst(26,22); val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); - val id_console_out_val = id_wen_pcr.toBool && (id_raddr2 === PCR_CONSOLE); - val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val wb_reg_dcache_miss = Reg(io.dmem.resp_miss, resetVal = Bool(false)); @@ -699,7 +697,7 @@ class rocketCtrl extends Component io.dpath.wen_btb := !ex_reg_btb_hit && br_taken io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss; - io.imem.req_val := take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay) + io.imem.req_val := !io.htif_reset && (take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay)) // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val data_hazard_ex = ex_reg_wen && @@ -753,8 +751,7 @@ class rocketCtrl extends Component id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || id_vec_val.toBool && !(io.vec_iface.vcmdq_ready && io.vec_iface.vximm1q_ready && io.vec_iface.vximm2q_ready) || // being conservative ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || - vec_cpfence || - id_console_out_val && !io.console.rdy + vec_cpfence ); val ctrl_stallf = ctrl_stalld; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 2d5a7c85..9b167c5e 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -29,9 +29,8 @@ class ioDpathImem extends Bundle() class ioDpathAll extends Bundle() { - val host = new ioHost(); + val host = new ioHTIF(); val ctrl = new ioCtrlDpath().flip(); - val console = new ioConsole(List("valid","bits")); val debug = new ioDebug(); val dmem = new ioDpathDmem(); val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "req_tag", "resp_val", "resp_data", "resp_tag")) @@ -313,9 +312,7 @@ class rocketDpath extends Component Mux(ex_reg_ctrl_eret, PCR_EPC, ex_reg_raddr2); - pcr.io.host.from_wen <> io.host.from_wen; - pcr.io.host.from <> io.host.from; - pcr.io.host.to <> io.host.to; + pcr.io.host <> io.host io.ctrl.irq_timer := pcr.io.irq_timer; io.ctrl.irq_ipi := pcr.io.irq_ipi; @@ -483,8 +480,6 @@ class rocketDpath extends Component pcr.io.cause := io.ctrl.cause; pcr.io.pc := wb_reg_pc; pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; - io.console.bits := pcr.io.console_data; - io.console.valid := pcr.io.console_val; } } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index cc89def5..62aab094 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -60,7 +60,7 @@ class rocketDpathBTB(entries: Int) extends Component class ioDpathPCR extends Bundle() { - val host = new ioHost(List("from", "from_wen", "to")); + val host = new ioHTIF() val debug = new ioDebug(List("error_mode", "log_control")); val r = new ioReadPort(); val w = new ioWritePort(); @@ -78,8 +78,6 @@ class ioDpathPCR extends Bundle() val ptbr_wen = Bool(OUTPUT); val irq_timer = Bool(OUTPUT); val irq_ipi = Bool(OUTPUT); - val console_data = Bits(8, OUTPUT); - val console_val = Bool(OUTPUT); val vecbank = Bits(8, OUTPUT) val vecbankcnt = UFix(4, OUTPUT) } @@ -119,11 +117,19 @@ class rocketDpathPCR extends Component val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; - io.ptbr_wen := reg_status_vm.toBool && io.w.en && (io.w.addr === PCR_PTBR); + val ren = io.r.en || io.host.pcr_ren + val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_addr) + io.host.pcr_rdata := rdata + + val wen = io.w.en || io.host.pcr_wen + val waddr = Mux(io.w.en, io.w.addr, io.host.pcr_addr) + val wdata = Mux(io.w.en, io.w.data, io.host.pcr_wdata) + io.host.pcr_rdy := Mux(io.host.pcr_wen, !io.w.en, !io.r.en) + + io.ptbr_wen := reg_status_vm.toBool && wen && (waddr === PCR_PTBR); io.status := Cat(reg_status_vm, reg_status_im, reg_status); io.evec := reg_ebase; io.ptbr := reg_ptbr; - io.host.to := Mux(io.host.from_wen, Bits(0), reg_tohost); io.debug.error_mode := reg_error_mode; io.r.data := rdata; @@ -133,19 +139,6 @@ class rocketDpathPCR extends Component cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) - val console_wen = !io.exception && io.w.en && (io.w.addr === PCR_CONSOLE); - io.console_data := Mux(console_wen, io.w.data(7,0), Bits(0,8)); - io.console_val := console_wen; - - when (io.host.from_wen) { - reg_tohost := Bits(0); - reg_fromhost := io.host.from; - } - .elsewhen (io.w.en && (io.w.addr === PCR_TOHOST)) { - reg_tohost := io.w.data; - reg_fromhost := Bits(0); - } - val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) when (io.badvaddr_wen) { reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; @@ -185,37 +178,38 @@ class rocketDpathPCR extends Component io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; - when (io.w.en) { - when (io.w.addr === PCR_STATUS) { - reg_status_vm := io.w.data(SR_VM).toBool; - reg_status_im := io.w.data(15,8); - reg_status_sx := io.w.data(SR_SX).toBool; - reg_status_ux := io.w.data(SR_UX).toBool; - reg_status_s := io.w.data(SR_S).toBool; - reg_status_ps := io.w.data(SR_PS).toBool; - reg_status_ev := Bool(HAVE_VEC) && io.w.data(SR_EV).toBool; - reg_status_ef := Bool(HAVE_FPU) && io.w.data(SR_EF).toBool; - reg_status_ec := Bool(HAVE_RVC) && io.w.data(SR_EC).toBool; - reg_status_et := io.w.data(SR_ET).toBool; + when (wen) { + when (waddr === PCR_STATUS) { + reg_status_vm := wdata(SR_VM).toBool; + reg_status_im := wdata(15,8); + reg_status_sx := wdata(SR_SX).toBool; + reg_status_ux := wdata(SR_UX).toBool; + reg_status_s := wdata(SR_S).toBool; + reg_status_ps := wdata(SR_PS).toBool; + reg_status_ev := Bool(HAVE_VEC) && wdata(SR_EV).toBool; + reg_status_ef := Bool(HAVE_FPU) && wdata(SR_EF).toBool; + reg_status_ec := Bool(HAVE_RVC) && wdata(SR_EC).toBool; + reg_status_et := wdata(SR_ET).toBool; } - when (io.w.addr === PCR_EPC) { reg_epc := io.w.data(VADDR_BITS,0).toUFix; } - when (io.w.addr === PCR_BADVADDR) { reg_badvaddr := io.w.data(VADDR_BITS,0).toUFix; } - when (io.w.addr === PCR_EVEC) { reg_ebase := io.w.data(VADDR_BITS-1,0).toUFix; } - when (io.w.addr === PCR_COUNT) { reg_count := io.w.data(31,0).toUFix; } - when (io.w.addr === PCR_COMPARE) { reg_compare := io.w.data(31,0).toUFix; r_irq_timer := Bool(false); } - when (io.w.addr === PCR_CAUSE) { reg_cause := io.w.data(4,0); } - when (io.w.addr === PCR_FROMHOST) { reg_fromhost := io.w.data; } - when (io.w.addr === PCR_SEND_IPI) { r_irq_ipi := Bool(true); } - when (io.w.addr === PCR_CLR_IPI) { r_irq_ipi := Bool(false); } - when (io.w.addr === PCR_K0) { reg_k0 := io.w.data; } - when (io.w.addr === PCR_K1) { reg_k1 := io.w.data; } - when (io.w.addr === PCR_PTBR) { reg_ptbr := Cat(io.w.data(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } - when (io.w.addr === PCR_VECBANK) { reg_vecbank := io.w.data(7,0) } + when (waddr === PCR_EPC) { reg_epc := wdata(VADDR_BITS,0).toUFix; } + when (waddr === PCR_BADVADDR) { reg_badvaddr := wdata(VADDR_BITS,0).toUFix; } + when (waddr === PCR_EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } + when (waddr === PCR_COUNT) { reg_count := wdata(31,0).toUFix; } + when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } + when (waddr === PCR_CAUSE) { reg_cause := wdata(4,0); } + when (waddr === PCR_TOHOST) { reg_tohost := wdata; reg_fromhost := Bits(0) } + when (waddr === PCR_FROMHOST) { reg_fromhost := wdata; reg_tohost := Bits(0) } + when (waddr === PCR_SEND_IPI) { r_irq_ipi := Bool(true); } + when (waddr === PCR_CLR_IPI) { r_irq_ipi := Bool(false); } + when (waddr === PCR_K0) { reg_k0 := wdata; } + when (waddr === PCR_K1) { reg_k1 := wdata; } + when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } + when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } } rdata := Bits(0, 64) - when (io.r.en) { - switch (io.r.addr) { + when (ren) { + switch (raddr) { is (PCR_STATUS) { rdata := Cat(Bits(0,47), reg_status_vm, reg_status_im, reg_status); } is (PCR_EPC) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } is (PCR_BADVADDR) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala new file mode 100644 index 00000000..39cbdc90 --- /dev/null +++ b/rocket/src/main/scala/htif.scala @@ -0,0 +1,168 @@ +package Top + +import Chisel._ +import Node._; +import Constants._; + +class ioHost(w: Int, view: List[String] = null) extends Bundle(view) +{ + val in = new ioDecoupled()(Bits(width = w)) + val out = new ioDecoupled()(Bits(width = w)).flip() +} + +class ioHTIF extends Bundle +{ + val reset = Bool(INPUT) + val pcr_wen = Bool(INPUT) + val pcr_ren = Bool(INPUT) + val pcr_rdy = Bool(OUTPUT) + val pcr_addr = Bits(5, INPUT) + val pcr_wdata = Bits(64, INPUT) + val pcr_rdata = Bits(64, OUTPUT) +} + +class rocketHTIF(w: Int, ncores: Int) extends Component +{ + val io = new Bundle { + val host = new ioHost(w) + val cpu = Vec(ncores) { new ioHTIF().flip() } + val mem = new ioDCache().flip() + } + + val short_request_bits = 64 + val long_request_bits = 576 + require(short_request_bits % w == 0) + + val rx_count_w = 13 + log2up(8) - log2up(w) // data size field is 12 bits + val rx_count = Reg(resetVal = UFix(0,rx_count_w)) + val rx_shifter = Reg() { Bits(width = short_request_bits) } + val header = Reg() { Bits() } + val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w)) + when (io.host.in.valid && io.host.in.ready) { + rx_shifter := rx_shifter_in + rx_count := rx_count + UFix(1) + when (rx_count === UFix(short_request_bits/w-1)) { + header := rx_shifter_in + } + } + + val rx_count_words = rx_count >> UFix(log2up(short_request_bits/w)) + val packet_ram_wen = rx_count(log2up(short_request_bits/w)-1,0).andR && + io.host.in.valid && io.host.in.ready + val packet_ram = Mem(long_request_bits/short_request_bits-1, + packet_ram_wen, rx_count_words - UFix(1), rx_shifter_in) + + val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } + val cmd = header(3,0) + val size = header(15,4) + val seqno = header(23,16) + val addr = header(63,24).toUFix + + val pcr_addr = addr(19,0) + val pcr_coreid = addr(39,20) + val pcr_wdata = packet_ram(UFix(0)) + + val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, size != UFix((1 << OFFSET_BITS)/8), + Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UFix(1), + Bool(true))) + + val tx_count = Reg(resetVal = UFix(0, log2up(long_request_bits/w+1))) + val packet_ram_raddr = (tx_count >> UFix(log2up(short_request_bits/w))) + when (io.host.out.valid && io.host.out.ready) { + tx_count := tx_count + UFix(1) + } + + val rx_size = Mux(cmd === cmd_writemem || cmd === cmd_writecr, size, UFix(0)) + val rx_done = rx_count >= UFix(short_request_bits/w) && rx_count_words-UFix(1) === rx_size + val tx_size = Mux(!nack && cmd === cmd_readmem, UFix((1 << OFFSET_BITS)/8), + Mux(!nack && cmd === cmd_readcr, UFix(1), UFix(0))) + val tx_done = packet_ram_raddr - UFix(1) === tx_size + + val state_rx :: state_pcr :: state_mem_req :: state_mem_resp :: state_tx :: Nil = Enum(5) { UFix() } + val state = Reg(resetVal = state_rx) + + when (state === state_rx && rx_done) { + state := Mux(cmd === cmd_readmem || cmd === cmd_writemem, state_mem_req, + Mux(cmd === cmd_readcr || cmd === cmd_writecr, state_pcr, + state_tx)) + } + + val pcr_done = Reg() { Bool() } + when (state === state_pcr && pcr_done) { + state := state_tx + } + + val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + when (state === state_mem_req && io.mem.req_rdy) { + when (cmd === cmd_writemem) { + when (mem_cnt.andR) { + state := state_tx + } + mem_cnt := mem_cnt + UFix(1) + } + .otherwise { + state := state_mem_resp + } + } + when (state === state_mem_resp && io.mem.resp_val) { + when (mem_cnt.andR) { + state := state_tx + } + mem_cnt := mem_cnt + UFix(1) + } + when (state === state_tx && tx_done) { + rx_count := UFix(0) + tx_count := UFix(0) + state := state_rx + } + + var mem_req_data: Bits = null + for (i <- 0 until MEM_DATA_BITS/short_request_bits) { + val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) + packet_ram.write(idx, io.mem.resp_data((i+1)*short_request_bits-1, i*short_request_bits), + state === state_mem_resp && io.mem.resp_val) + mem_req_data = Cat(packet_ram.read(idx), mem_req_data) + } + io.mem.req_val := state === state_mem_req + io.mem.req_rw := cmd === cmd_writemem + io.mem.req_addr := addr >> UFix(3) + io.mem.req_wdata := mem_req_data + + pcr_done := Bool(false) + val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } + for (i <- 0 until ncores) { + val me = pcr_coreid === UFix(i) + io.cpu(i).pcr_wen := Reg(state === state_pcr && cmd === cmd_writecr && me, resetVal = Bool(false)) + io.cpu(i).pcr_addr := Reg(pcr_addr) + io.cpu(i).pcr_wdata := Reg(pcr_wdata) + + val my_reset = Reg(resetVal = Bool(true)) + when (io.cpu(i).pcr_wen && io.cpu(i).pcr_rdy) { + when (io.cpu(i).pcr_addr === UFix(15)) { my_reset := io.cpu(i).pcr_wdata(0) } + pcr_done := Bool(true) + } + io.cpu(i).reset := my_reset + + io.cpu(i).pcr_ren := Reg(state === state_pcr && cmd === cmd_readcr && me, resetVal = Bool(false)) + val rdata = Reg() { Bits() } + when (io.cpu(i).pcr_ren && io.cpu(i).pcr_rdy) { + rdata := io.cpu(i).pcr_rdata + when (io.cpu(i).pcr_addr === UFix(15)) { rdata := my_reset } + pcr_done := Bool(true) + } + pcr_mux.io.sel(i) := Reg(me) + pcr_mux.io.in(i) := rdata + } + + val tx_cmd = Mux(nack, cmd_nack, cmd_ack) + val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) + val tx_size_ext = Cat(Bits(0, 12-tx_size.getWidth), tx_size) + val tx_header = Cat(addr, seqno, tx_size_ext, tx_cmd_ext) + val tx_data = Mux(packet_ram_raddr === UFix(0), tx_header, + Mux(packet_ram_raddr === UFix(1) && cmd === cmd_readcr, pcr_mux.io.out, + packet_ram(packet_ram_raddr - UFix(1)))) + + io.host.in.ready := state === state_rx && !rx_done + io.host.out.valid := state === state_tx && !tx_done + io.host.out.bits := tx_data >> Cat(tx_count(log2up(short_request_bits/w)-1,0), Bits(0, log2up(w))) +} diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index a3b1f5c5..293fc9f2 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -4,15 +4,17 @@ import Chisel._ import Node._; import Constants._; -class ioTop extends Bundle { +class ioTop(htif_width: Int) extends Bundle { val debug = new ioDebug(); - val console = new ioConsole(); - val host = new ioHost(); + val host = new ioHost(htif_width); val mem = new ioMem(); } class Top() extends Component { - val io = new ioTop(); + + val htif_width = 16 + val io = new ioTop(htif_width); + val htif = new rocketHTIF(htif_width, 1) val cpu = new rocketProc(); val icache = new rocketICache(128, 2); // 128 sets x 2 ways @@ -25,10 +27,11 @@ class Top() extends Component { arbiter.io.dcache <> dcache.io.mem; arbiter.io.icache <> icache_pf.io.mem; arbiter.io.vicache <> vicache.io.mem + arbiter.io.htif <> htif.io.mem - cpu.io.host <> io.host; + htif.io.host <> io.host + cpu.io.host <> htif.io.cpu(0); cpu.io.debug <> io.debug; - cpu.io.console <> io.console; icache_pf.io.invalidate := cpu.io.imem.invalidate icache.io.mem <> icache_pf.io.icache; From 6135615104dff0fa5674a7f8cb50034198b2bf6b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 20 Feb 2012 00:51:48 -0800 Subject: [PATCH 0184/1087] unify cache backend interfaces; generify arbiter --- rocket/src/main/scala/arbiter.scala | 96 ++++++++------------- rocket/src/main/scala/icache.scala | 13 +-- rocket/src/main/scala/icache_prefetch.scala | 18 +--- rocket/src/main/scala/top.scala | 12 +-- 4 files changed, 47 insertions(+), 92 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index da5495e6..a5d9d7da 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -18,74 +18,48 @@ class ioMem() extends Bundle val resp_data = Bits(MEM_DATA_BITS, INPUT); } -class ioMemArbiter extends Bundle() { +class ioMemArbiter(n: Int) extends Bundle() { val mem = new ioMem(); - val dcache = new ioDCache(); -// val icache = new ioICache(); - val icache = new ioIPrefetcherMem().flip(); - val vicache = new ioICache(); - val htif = new ioDCache(); + val requestor = Vec(n) { new ioDCache() } } -class rocketMemArbiter extends Component { - val io = new ioMemArbiter(); +class rocketMemArbiter(n: Int) extends Component { + val io = new ioMemArbiter(n); + require(io.mem.req_tag.getWidth >= log2up(n) + io.requestor(0).req_tag.getWidth) - // ***************************** - // Interface to memory - // ***************************** + var req_val = Bool(false) + var req_rdy = io.mem.req_rdy + for (i <- 0 until n) + { + io.requestor(i).req_rdy := req_rdy + req_val = req_val || io.requestor(i).req_val + req_rdy = req_rdy && !io.requestor(i).req_val + } - // Memory request is valid if either icache or dcache have a valid request - io.mem.req_val := io.icache.req_val || io.vicache.req_val || io.dcache.req_val || io.htif.req_val + var req_rw = io.requestor(n-1).req_rw + var req_addr = io.requestor(n-1).req_addr + var req_wdata = io.requestor(n-1).req_wdata + var req_tag = Cat(io.requestor(n-1).req_tag, UFix(n-1, log2up(n))) + for (i <- n-1 to 0 by -1) + { + req_rw = Mux(io.requestor(i).req_val, io.requestor(i).req_rw, req_rw) + req_addr = Mux(io.requestor(i).req_val, io.requestor(i).req_addr, req_addr) + req_wdata = Mux(io.requestor(i).req_val, io.requestor(i).req_wdata, req_wdata) + req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) + } - // Set read/write bit. I$ always reads - io.mem.req_rw := - Mux(io.dcache.req_val, io.dcache.req_rw, - Mux(io.icache.req_val, Bool(false), - Mux(io.vicache.req_val, Bool(false), - io.htif.req_rw))) + io.mem.req_val := req_val + io.mem.req_rw := req_rw + io.mem.req_addr := req_addr + io.mem.req_wdata := req_wdata + io.mem.req_tag := req_tag - // Give priority to D$ - io.mem.req_addr := - Mux(io.dcache.req_val, io.dcache.req_addr, - Mux(io.icache.req_val, io.icache.req_addr, - Mux(io.vicache.req_val, io.vicache.req_addr, - io.htif.req_addr))) - - io.mem.req_wdata := Mux(io.dcache.req_val, io.dcache.req_wdata, io.htif.req_wdata) - - // low bit of tag to indicate D$, I$, and VI$ - val t_dcache :: t_icache :: t_vicache :: t_htif :: Nil = Enum(4){ UFix() } - io.mem.req_tag := - Mux(io.dcache.req_val, Cat(io.dcache.req_tag, t_dcache), - Mux(io.icache.req_val, Cat(io.icache.req_tag, t_icache), - Mux(io.vicache.req_val, t_vicache, - t_htif))) - - // ***************************** - // Interface to caches - // ***************************** - - // Read for request from cache if the memory is ready. Give priority to D$. - // This way, writebacks will never be interrupted by I$ refills. - io.dcache.req_rdy := io.mem.req_rdy; - io.icache.req_rdy := io.mem.req_rdy && !io.dcache.req_val; - io.vicache.req_rdy := io.mem.req_rdy && !io.dcache.req_val && !io.icache.req_val - io.htif.req_rdy := io.mem.req_rdy && !io.dcache.req_val && !io.icache.req_val && !io.vicache.req_val - - // Response will only be valid for D$ or I$ not both because of tag bits - io.dcache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_dcache) - io.icache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_icache) - io.vicache.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_vicache) - io.htif.resp_val := io.mem.resp_val && (io.mem.resp_tag(1,0) === t_htif) - - // Feed through data to both - io.dcache.resp_data := io.mem.resp_data; - io.icache.resp_data := io.mem.resp_data; - io.vicache.resp_data := io.mem.resp_data - io.htif.resp_data := io.mem.resp_data - - io.dcache.resp_tag := io.mem.resp_tag >> UFix(2) - io.icache.resp_tag := io.mem.resp_tag >> UFix(2) + for (i <- 0 until n) + { + io.requestor(i).resp_val := io.mem.resp_val && io.mem.resp_tag(log2up(n)-1,0) === UFix(i) + io.requestor(i).resp_data := io.mem.resp_data + io.requestor(i).resp_tag := io.mem.resp_tag >> UFix(log2up(n)) + } } } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index b13dbdd8..f52ad748 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -17,20 +17,10 @@ class ioImem(view: List[String] = null) extends Bundle (view) val resp_val = Bool(OUTPUT); } -// interface between I$ and memory (128 bits wide) -class ioICache(view: List[String] = null) extends Bundle (view) -{ - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); - val req_val = Bool(INPUT); - val req_rdy = Bool(OUTPUT); - val resp_data = Bits(MEM_DATA_BITS, OUTPUT); - val resp_val = Bool(OUTPUT); -} - class ioRocketICache extends Bundle() { val cpu = new ioImem(); - val mem = new ioICache().flip(); + val mem = new ioDCache().flip() } // basic direct mapped instruction cache @@ -139,6 +129,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out io.mem.req_val := (state === s_request); + io.mem.req_rw := Bool(false) io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix // control state machine diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 1d0615aa..db0c2cc0 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -5,20 +5,9 @@ import Node._; import Constants._; import scala.math._; -class ioIPrefetcherMem(view: List[String] = null) extends Bundle (view) -{ - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT); - val req_val = Bool(OUTPUT); - val req_rdy = Bool(INPUT); - val req_tag = Bits(IMEM_TAG_BITS, OUTPUT); - val resp_data = Bits(MEM_DATA_BITS, INPUT); - val resp_val = Bool(INPUT); - val resp_tag = Bits(IMEM_TAG_BITS, INPUT); -} - class ioIPrefetcher extends Bundle() { - val icache = new ioICache(); - val mem = new ioIPrefetcherMem(); + val icache = new ioDCache(); + val mem = new ioDCache().flip() val invalidate = Bool(INPUT) } @@ -41,7 +30,8 @@ class rocketIPrefetcher extends Component() { val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); - io.mem.req_tag := !(io.icache.req_val && !hit); + io.mem.req_rw := Bool(false) + io.mem.req_tag := Mux(io.icache.req_val && !hit, UFix(0), UFix(1)) io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 293fc9f2..86bff016 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -21,13 +21,13 @@ class Top() extends Component { val icache_pf = new rocketIPrefetcher(); val vicache = new rocketICache(128, 2); // 128 sets x 2 ways val dcache = new HellaCacheUniproc(); - val arbiter = new rocketMemArbiter(); - arbiter.io.mem <> io.mem; - arbiter.io.dcache <> dcache.io.mem; - arbiter.io.icache <> icache_pf.io.mem; - arbiter.io.vicache <> vicache.io.mem - arbiter.io.htif <> htif.io.mem + val arbiter = new rocketMemArbiter(4); + arbiter.io.requestor(0) <> dcache.io.mem + arbiter.io.requestor(1) <> icache_pf.io.mem + arbiter.io.requestor(2) <> vicache.io.mem + arbiter.io.requestor(3) <> htif.io.mem + arbiter.io.mem <> io.mem htif.io.host <> io.host cpu.io.host <> htif.io.cpu(0); From d5608b272850f7bc5eca79d0ff9f9cfcd2cad696 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Feb 2012 01:02:16 -0800 Subject: [PATCH 0185/1087] fix AMO replay bug didn't check for structural hazard on AMO unit if a replay was initiated one cycle before before a hit-under-miss AMO was issued --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 42d402e3..3530a5df 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -913,7 +913,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // replays val replay = replayer.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || p_store_valid + val stall_replay = r_cpu_req_val && r_req_amo || r_replay_amo || p_amo || p_store_valid val replay_val = replayer.io.data_req.valid && !stall_replay val replay_rdy = data_arb.io.in(1).ready data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) From c8f768c8b38a42ba0cabce11e07cff6f1072a4c4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Feb 2012 14:39:54 -0800 Subject: [PATCH 0186/1087] fix AMO replay bug like the recent AMO bug fix, but affects stores too. oops. --- rocket/src/main/scala/nbdcache.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3530a5df..4812a6ab 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -846,7 +846,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // conflictig load, or if the cache is idle, or after a miss. val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) - val p_store_match = r_cpu_req_val && r_req_read && p_store_idx_match && p_store_offset_match + val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(tag_miss))) || p_store_match data_arb.io.in(2).bits.inner_req.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.inner_req.idx := p_store_idx(indexmsb,indexlsb) @@ -913,7 +913,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // replays val replay = replayer.io.data_req.bits - val stall_replay = r_cpu_req_val && r_req_amo || r_replay_amo || p_amo || p_store_valid + val stall_replay = r_cpu_req_val_ && r_req_store || r_replay_amo || p_amo || p_store_valid val replay_val = replayer.io.data_req.valid && !stall_replay val replay_rdy = data_arb.io.in(1).ready data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) @@ -951,14 +951,14 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { amoalu.io.lhs := loadgen.io.r_dout.toUFix amoalu.io.rhs := p_store_data.toUFix - early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo + early_nack := early_tag_nack || early_load_nack || r_cpu_req_val_ && r_req_amo || replay_amo_val || r_replay_amo // reset and flush unit val flusher = new FlushUnit(lines) val flushed = Reg(resetVal = Bool(true)) val flush_rdy = mshr.io.fence_rdy && wb_rdy && !p_store_valid - flushed := flushed && !r_cpu_req_val || r_cpu_req_val && r_req_flush && flush_rdy && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val && r_req_flush && flush_rdy && !flushed + flushed := flushed && !r_cpu_req_val_ || r_cpu_req_val_ && r_req_flush && flush_rdy && flusher.io.req.ready + flusher.io.req.valid := r_cpu_req_val_ && r_req_flush && flush_rdy && !flushed flusher.io.wb_req <> wb_arb.io.in(0) flusher.io.meta_req <> meta_arb.io.in(0) flusher.io.meta_resp <> meta_resp_mux @@ -967,7 +967,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // we usually nack rather than reporting that the cache is not ready. // fences and flushes are the exceptions. val pending_fence = Reg(resetVal = Bool(false)) - pending_fence := (r_cpu_req_val && r_req_fence || pending_fence) && !flush_rdy + pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !flush_rdy val nack_hit = p_store_match || r_req_write && !p_store_rdy val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || From 9a80adef507f82830e31b533b5a5259302510ad9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Feb 2012 15:53:19 -0800 Subject: [PATCH 0187/1087] only instantiate VI$ if HAVE_VEC --- rocket/src/main/scala/top.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 86bff016..d1ece4c8 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -19,16 +19,23 @@ class Top() extends Component { val cpu = new rocketProc(); val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); - val vicache = new rocketICache(128, 2); // 128 sets x 2 ways val dcache = new HellaCacheUniproc(); val arbiter = new rocketMemArbiter(4); arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache_pf.io.mem - arbiter.io.requestor(2) <> vicache.io.mem arbiter.io.requestor(3) <> htif.io.mem arbiter.io.mem <> io.mem + if (HAVE_VEC) + { + val vicache = new rocketICache(128, 2); // 128 sets x 2 ways + arbiter.io.requestor(2) <> vicache.io.mem + cpu.io.vimem <> vicache.io.cpu; + } + else + arbiter.io.requestor(2).req_val := Bool(false) + htif.io.host <> io.host cpu.io.host <> htif.io.cpu(0); cpu.io.debug <> io.debug; @@ -36,7 +43,6 @@ class Top() extends Component { icache_pf.io.invalidate := cpu.io.imem.invalidate icache.io.mem <> icache_pf.io.icache; cpu.io.imem <> icache.io.cpu; - cpu.io.vimem <> vicache.io.cpu; cpu.io.dmem <> dcache.io.cpu; } From cfd79c731b0828a63924176def168d905dbb562d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Feb 2012 17:42:00 -0800 Subject: [PATCH 0188/1087] add resp_type to ext_mem interface --- rocket/src/main/scala/dpath.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 9b167c5e..c595db6d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -33,7 +33,7 @@ class ioDpathAll extends Bundle() val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); val dmem = new ioDpathDmem(); - val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "req_tag", "resp_val", "resp_data", "resp_tag")) + val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "req_tag", "resp_val", "resp_data", "resp_type", "resp_tag")) val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); @@ -457,6 +457,7 @@ class rocketDpath extends Component io.ext_mem.resp_val := Reg(io.dmem.resp_val && dmem_resp_ext, resetVal = Bool(false)) io.ext_mem.resp_tag := Reg(dmem_resp_ext_tag) + io.ext_mem.resp_type := Reg(io.dmem.resp_type) io.ext_mem.resp_data := io.dmem.resp_data_subword io.ctrl.wb_waddr := wb_reg_waddr; From 22f8dd0994c297d39194dd282791f75967b3838c Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Tue, 21 Feb 2012 18:20:32 -0800 Subject: [PATCH 0189/1087] Hook up resp_type to vector unit --- rocket/src/main/scala/cpu.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index c8ac9643..550e56f6 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -161,5 +161,6 @@ class rocketProc extends Component vu.io.dmem_resp.bits.nack := ctrl.io.ext_mem.resp_nack vu.io.dmem_resp.bits.data := dpath.io.ext_mem.resp_data vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag + vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type } } From 18bd0c232b381fcbc70958ed35cd327e754da66a Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 22 Feb 2012 10:12:13 -0800 Subject: [PATCH 0190/1087] Added coherence message type enums --- rocket/src/main/scala/coherence.scala | 8 ++++---- rocket/src/main/scala/consts.scala | 11 +++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index b55ac9a1..9fcf32eb 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -4,7 +4,7 @@ import Chisel._ import Constants._ class TransactionInit extends Bundle { - val ttype = Bits(width = 2) + val ttype = Bits(width = TTYPE_BITS) val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) val address = Bits(width = PADDR_BITS) val data = Bits(width = MEM_DATA_BITS) @@ -15,20 +15,20 @@ class TransactionAbort extends Bundle { } class ProbeRequest extends Bundle { - val ptype = Bits(width = 2) + val ptype = Bits(width = PTYPE_BITS) val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) val address = Bits(width = PADDR_BITS) } class ProbeReply extends Bundle { - val ptype = Bits(width = 2) + val ptype = Bits(width = PTYPE_BITS) val hasData = Bool() val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) val data = Bits(width = MEM_DATA_BITS) } class TransactionReply extends Bundle { - val ttype = Bits(width = 2) + val ttype = Bits(width = TTYPE_BITS) val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) val data = Bits(width = MEM_DATA_BITS) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e00e51cb..34071842 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -177,6 +177,17 @@ object Constants val TILE_XACT_ID_BITS = 1; // log2(NMSHR) val GLOBAL_XACT_ID_BITS = IDX_BITS; // if one active xact per set + val TTYPE_BITS = 2 + val X_READ_SHARED = UFix(0, TTYPE_BITS) + val X_READ_EXCLUSIVE = UFix(1, TTYPE_BITS) + val X_READ_UNCACHED = UFix(2, TTYPE_BITS) + val X_WRITE_UNCACHED = UFix(3, TTYPE_BITS) + + val PTYPE_BITS = 2 + val P_INVALIDATE = UFix(0, PTYPE_BITS) + val P_DOWNGRADE = UFix(1, PTYPE_BITS) + val P_COPY = UFix(2, PTYPE_BITS) + // external memory interface val IMEM_TAG_BITS = 1; val DMEM_TAG_BITS = ceil(log(NMSHR)/log(2)).toInt; From 24a32c28113bd46846dfc0a64d82a5805696c2ed Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 22 Feb 2012 12:14:57 -0800 Subject: [PATCH 0191/1087] Refining tilelink interface --- rocket/src/main/scala/coherence.scala | 14 ++++++++------ rocket/src/main/scala/consts.scala | 7 +++++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 9fcf32eb..cfde62ba 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -39,12 +39,12 @@ class TransactionFinish extends Bundle { } class ioTileLink extends Bundle { - val xact_init = new TransactionInit().asOutput - val xact_abort = new TransactionAbort().asInput - val probe_req = new ProbeRequest().asInput - val probe_rep = new ProbeReply().asOutput - val xact_rep = new TransactionReply().asInput - val xact_finish = new TransactionFinish().asOutput + val xact_init = (new ioDecoupled) { new TransactionInit() }.flip + val xact_abort = (new ioDecoupled) { new TransactionAbort() } + val probe_req = (new ioDecoupled) { new ProbeRequest() } + val probe_rep = (new ioDecoupled) { new ProbeReply() }.flip + val xact_rep = (new ioDecoupled) { new TransactionReply() } + val xact_finish = (new ioDecoupled) { new TransactionFinish() }.flip } trait CoherencePolicy { @@ -130,4 +130,6 @@ trait FourStateCoherence extends CoherencePolicy { } } + + } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 34071842..e2ce4cce 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -174,8 +174,11 @@ object Constants require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); // coherence parameters - val TILE_XACT_ID_BITS = 1; // log2(NMSHR) - val GLOBAL_XACT_ID_BITS = IDX_BITS; // if one active xact per set + val NTILES = 1 + val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 + val TILE_ID_BITS = 1 + val TILE_XACT_ID_BITS = 1 // log2(NMSHR) + val GLOBAL_XACT_ID_BITS = IDX_BITS // if one active xact per set val TTYPE_BITS = 2 val X_READ_SHARED = UFix(0, TTYPE_BITS) From 62837537f490a366652450d581cce4084e4c7da0 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 22 Feb 2012 18:24:52 -0800 Subject: [PATCH 0192/1087] Improved TileIO organization, beginnings of hub implementation --- rocket/src/main/scala/coherence.scala | 92 +++++++++++++++++++++++++-- rocket/src/main/scala/consts.scala | 1 + 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index cfde62ba..776a64e0 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -24,6 +24,9 @@ class ProbeReply extends Bundle { val ptype = Bits(width = PTYPE_BITS) val hasData = Bool() val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) +} + +class ProbeReplyData extends Bundle { val data = Bits(width = MEM_DATA_BITS) } @@ -31,6 +34,9 @@ class TransactionReply extends Bundle { val ttype = Bits(width = TTYPE_BITS) val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) +} + +class TransactionReplyData extends Bundle { val data = Bits(width = MEM_DATA_BITS) } @@ -39,12 +45,14 @@ class TransactionFinish extends Bundle { } class ioTileLink extends Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit() }.flip - val xact_abort = (new ioDecoupled) { new TransactionAbort() } - val probe_req = (new ioDecoupled) { new ProbeRequest() } - val probe_rep = (new ioDecoupled) { new ProbeReply() }.flip - val xact_rep = (new ioDecoupled) { new TransactionReply() } - val xact_finish = (new ioDecoupled) { new TransactionFinish() }.flip + val xact_init = (new ioDecoupled) { new TransactionInit() }.flip + val xact_abort = (new ioDecoupled) { new TransactionAbort() } + val probe_req = (new ioDecoupled) { new ProbeRequest() } + val probe_rep = (new ioDecoupled) { new ProbeReply() }.flip + val probe_rep_data = (new ioDecoupled) { new ProbeReplyData() }.flip + val xact_rep = (new ioDecoupled) { new TransactionReply() } + val xact_rep_data = (new ioDecoupled) { new TransactionReplyData() } + val xact_finish = (new ioDecoupled) { new TransactionFinish() }.flip } trait CoherencePolicy { @@ -130,6 +138,78 @@ trait FourStateCoherence extends CoherencePolicy { } } +class XactTracker(id: Int) extends Component { + val io = new Bundle { + val xact_init = (new ioDecoupled) { new TransactionInit() } + val probe_rep = (new ioDecoupled) { new ProbeReply() } + val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip + val xact_rep = (new ioDecoupled) { new TransactionReply() }.flip + val mem_req = (new ioDecoupled) { new MemReq() }.flip + val xact_finish = Bool(INPUT) + val tile_id_in = Bits(TILE_ID_BITS, INPUT) + val tile_id_out = Bits(TILE_ID_BITS, OUTPUT) + val ongoing_addr = Bits(PADDR_BITS, OUTPUT) + val busy = Bool(OUTPUT) + } + + val valid = Reg(resetVal = Bool(false)) + val addr = Reg{ Bits() } + val ttype = Reg{ Bits() } + val tile_id = Reg{ Bits() } + val tile_xact_id = Reg{ Bits() } + val probe_done = Reg{ Bits() } + +} + +abstract class CoherenceHub extends Component + +class CoherenceHubNoDir extends CoherenceHub { + val io = new Bundle { + val tiles = Vec(NTILES) { new ioTileLink() } + val mem = new ioDCache().flip + } + + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) + + // In parallel, every cycle: nack conflicting transactions, free finished ones + for( j <- 0 until NTILES ) { + val init = io.tiles(j).xact_init + val abort = io.tiles(j).xact_abort + val conflicts = Bits(width = NGLOBAL_XACTS) + val busys = Bits(width = NGLOBAL_XACTS) + for( i <- 0 until NGLOBAL_XACTS) { + val t = trackerList(i).io + busys(i) := t.busy + conflicts(i) := t.busy && init.valid && (t.ongoing_addr === init.bits.address) + } + abort.valid := conflicts.orR || busys.andR + abort.bits.tileTransactionID := init.bits.tileTransactionID + //if abort.rdy, init.pop() + + } + for( i <- 0 until NGLOBAL_XACTS) { + val t = trackerList(i).io + val freed = Bits(width = NTILES) + for( j <- 0 until NTILES ) { + val finish = io.tiles(j).xact_finish + freed(j) := finish.valid && (UFix(i) === finish.bits.globalTransactionID) + } + t.xact_finish := freed.orR + //finish.pop() + } + + // Forward memory responses from mem to tile + //for( j <- until NTILES ) { + // tiles(j).xact_rep.ttype = + // tiles(j).xact_rep.tileTransactionID = + // tiles(j).xact_rep.globalTransactionID = + // val data = Bits + // + // Pick a single request of these types to process + //val xact_init_arb = (new Arbiter(NTILES)) { new TransactionInit() } + //val probe_reply_arb = (new Arbiter(NTILES)) { new ProbeReply() } } + +} diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e2ce4cce..6cdb9b28 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -179,6 +179,7 @@ object Constants val TILE_ID_BITS = 1 val TILE_XACT_ID_BITS = 1 // log2(NMSHR) val GLOBAL_XACT_ID_BITS = IDX_BITS // if one active xact per set + val NGLOBAL_XACTS = 1 << IDX_BITS val TTYPE_BITS = 2 val X_READ_SHARED = UFix(0, TTYPE_BITS) From 3eebf4031050511850f73109351ee959b9d07c30 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Feb 2012 18:52:18 -0800 Subject: [PATCH 0193/1087] nack CPU requests during any replay --- rocket/src/main/scala/nbdcache.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4812a6ab..9a8e6d86 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -854,9 +854,10 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { data_arb.io.in(2).valid := drain_store_val data_arb.io.in(2).bits.way_en := p_store_way_oh val drain_store = drain_store_val && data_arb.io.in(2).ready - val p_store_rdy = !p_store_valid || drain_store - val p_amo = Reg(tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo, resetVal = Bool(false)) - p_store_valid := !p_store_rdy || (tag_hit && r_req_store) || p_amo + val p_amo = Reg(resetVal = Bool(false)) + val p_store_rdy = !(p_store_valid && !drain_store) && !(replayer.io.data_req.valid || r_replay_amo || p_amo) + p_amo := tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo + p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && p_store_rdy) || p_amo // writeback val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match @@ -913,7 +914,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // replays val replay = replayer.io.data_req.bits - val stall_replay = r_cpu_req_val_ && r_req_store || r_replay_amo || p_amo || p_store_valid + val stall_replay = r_replay_amo || p_amo || p_store_valid val replay_val = replayer.io.data_req.valid && !stall_replay val replay_rdy = data_arb.io.in(1).ready data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) From 7c929afe2b270060d456caa0bbd2252d38f03c7e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Feb 2012 19:30:03 -0800 Subject: [PATCH 0194/1087] HTIF now controls CPU reset --- rocket/src/main/scala/cpu.scala | 3 +-- rocket/src/main/scala/ctrl.scala | 11 ++++++++--- rocket/src/main/scala/top.scala | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 550e56f6..051f7e3c 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -19,7 +19,7 @@ class ioRocket extends Bundle() val dmem = new ioDmem().flip(); } -class rocketProc extends Component +class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { val io = new ioRocket(); @@ -32,7 +32,6 @@ class rocketProc extends Component val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(); - ctrl.io.htif_reset := io.host.reset ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; dpath.io.debug <> io.debug; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 4bf8f4cc..3d93ea34 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -77,7 +77,6 @@ class ioCtrlDpath extends Bundle() class ioCtrlAll extends Bundle() { - val htif_reset = Bool(INPUT) val dpath = new ioCtrlDpath(); val imem = new ioImem(List("req_val", "resp_val")).flip(); val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); @@ -336,6 +335,7 @@ class rocketCtrl extends Component val mem_reg_wen = Reg(resetVal = Bool(false)); val mem_reg_fp_wen = Reg(resetVal = Bool(false)); + val mem_reg_replay_next = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); val mem_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -354,6 +354,7 @@ class rocketCtrl extends Component val wb_reg_wen = Reg(resetVal = Bool(false)); val wb_reg_fp_wen = Reg(resetVal = Bool(false)); + val wb_reg_replay_next = Reg(resetVal = Bool(false)); val wb_reg_inst_di = Reg(resetVal = Bool(false)); val wb_reg_inst_ei = Reg(resetVal = Bool(false)); val wb_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -431,7 +432,7 @@ class rocketCtrl extends Component ex_reg_fp_val := io.fpu.dec.valid ex_reg_fp_sboard_set := io.fpu.dec.sboard ex_reg_vec_val := id_vec_val.toBool - ex_reg_replay := id_reg_replay || ex_reg_replay_next; + ex_reg_replay := id_reg_replay || ex_reg_replay_next || mem_reg_replay_next || wb_reg_replay_next ex_reg_load_use := id_load_use; } ex_reg_ext_mem_val := io.ext_mem.req_val @@ -467,6 +468,7 @@ class rocketCtrl extends Component mem_reg_fp_wen := Bool(false); mem_reg_eret := Bool(false); mem_reg_mem_val := Bool(false); + mem_reg_replay_next := Bool(false); mem_reg_inst_di := Bool(false); mem_reg_inst_ei := Bool(false); mem_reg_flush_inst := Bool(false); @@ -486,6 +488,7 @@ class rocketCtrl extends Component mem_reg_fp_wen := ex_reg_fp_wen; mem_reg_eret := ex_reg_eret; mem_reg_mem_val := ex_reg_mem_val; + mem_reg_replay_next := ex_reg_replay_next mem_reg_inst_di := ex_reg_inst_di; mem_reg_inst_ei := ex_reg_inst_ei; mem_reg_flush_inst := ex_reg_flush_inst; @@ -507,6 +510,7 @@ class rocketCtrl extends Component wb_reg_wen := Bool(false); wb_reg_fp_wen := Bool(false); wb_reg_eret := Bool(false); + wb_reg_replay_next := Bool(false) wb_reg_inst_di := Bool(false); wb_reg_inst_ei := Bool(false); wb_reg_flush_inst := Bool(false); @@ -518,6 +522,7 @@ class rocketCtrl extends Component wb_reg_wen := mem_reg_wen; wb_reg_fp_wen := mem_reg_fp_wen; wb_reg_eret := mem_reg_eret; + wb_reg_replay_next := mem_reg_replay_next wb_reg_inst_di := mem_reg_inst_di; wb_reg_inst_ei := mem_reg_inst_ei; wb_reg_flush_inst := mem_reg_flush_inst; @@ -697,7 +702,7 @@ class rocketCtrl extends Component io.dpath.wen_btb := !ex_reg_btb_hit && br_taken io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss; - io.imem.req_val := !io.htif_reset && (take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay)) + io.imem.req_val := !reset.toBool && (take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay)) // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val data_hazard_ex = ex_reg_wen && diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index d1ece4c8..481ca6be 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -16,7 +16,7 @@ class Top() extends Component { val io = new ioTop(htif_width); val htif = new rocketHTIF(htif_width, 1) - val cpu = new rocketProc(); + val cpu = new rocketProc(resetSignal = htif.io.cpu(0).reset); val icache = new rocketICache(128, 2); // 128 sets x 2 ways val icache_pf = new rocketIPrefetcher(); val dcache = new HellaCacheUniproc(); From e53792a1eb1984ec7a64baab2b2388bc5398c6dc Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 23 Feb 2012 10:14:14 -0800 Subject: [PATCH 0195/1087] fix bug in rocket's vector datapath related to wakeup --- rocket/src/main/scala/dpath_vec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 0fe55fdf..7e778685 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -100,9 +100,9 @@ class rocketDpathVec extends Component val hwvl = Mux(io.ctrl.fn === VEC_CFG, hwvl_vcfg, reg_hwvl) val appvl = Mux(io.wdata(11,0) < hwvl, io.wdata(11,0), hwvl).toUFix - when (io.valid && io.ctrl.wen && (io.ctrl.fn === VEC_CFG)) + when (io.valid && io.ctrl.wen) { - reg_hwvl := hwvl_vcfg + when (io.ctrl.fn === VEC_CFG) { reg_hwvl := hwvl_vcfg } reg_appvl0 := !(appvl.orR()) } From f939088be1f66e71cee77b60890074220c415ae1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 23 Feb 2012 14:43:49 -0800 Subject: [PATCH 0196/1087] move datapath control signals into control unit because that's where control signals go --- rocket/src/main/scala/ctrl.scala | 30 +++++++++++++++++--- rocket/src/main/scala/dpath.scala | 47 ++++--------------------------- 2 files changed, 31 insertions(+), 46 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3d93ea34..5b2b7ba5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -41,6 +41,7 @@ class ioCtrlDpath extends Bundle() val ex_wen = Bool(OUTPUT); val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); + val wb_valid = Bool(OUTPUT) val flush_inst = Bool(OUTPUT); val ex_mem_type = UFix(3,OUTPUT) // enable/disable interrupts @@ -70,7 +71,6 @@ class ioCtrlDpath extends Bundle() val sboard_clra = UFix(5, INPUT); val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UFix(5, INPUT); - val mem_valid = Bool(INPUT); // high if there's a valid (not flushed) instruction in mem stage val irq_timer = Bool(INPUT); val irq_ipi = Bool(INPUT); } @@ -300,6 +300,7 @@ class rocketCtrl extends Component val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val wb_reg_dcache_miss = Reg(io.dmem.resp_miss, resetVal = Bool(false)); + val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_btb_hit = Reg(resetVal = Bool(false)); val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); @@ -314,6 +315,8 @@ class rocketCtrl extends Component val ex_reg_mem_val = Reg(){Bool()}; val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; + val ex_reg_valid = Reg(resetVal = Bool(false)); + val ex_reg_wen_pcr = Reg(resetVal = Bool(false)); val ex_reg_wen = Reg(resetVal = Bool(false)); val ex_reg_fp_wen = Reg(resetVal = Bool(false)); val ex_reg_eret = Reg(resetVal = Bool(false)); @@ -333,6 +336,8 @@ class rocketCtrl extends Component val ex_reg_load_use = Reg(resetVal = Bool(false)); val ex_reg_ext_mem_val = Reg(resetVal = Bool(false)) + val mem_reg_valid = Reg(resetVal = Bool(false)); + val mem_reg_wen_pcr = Reg(resetVal = Bool(false)); val mem_reg_wen = Reg(resetVal = Bool(false)); val mem_reg_fp_wen = Reg(resetVal = Bool(false)); val mem_reg_replay_next = Reg(resetVal = Bool(false)); @@ -352,6 +357,8 @@ class rocketCtrl extends Component val mem_reg_ext_mem_val = Reg(resetVal = Bool(false)) val mem_reg_fp_sboard_set = Reg(resetVal = Bool(false)); + val wb_reg_valid = Reg(resetVal = Bool(false)); + val wb_reg_wen_pcr = Reg(resetVal = Bool(false)); val wb_reg_wen = Reg(resetVal = Bool(false)); val wb_reg_fp_wen = Reg(resetVal = Bool(false)); val wb_reg_replay_next = Reg(resetVal = Bool(false)); @@ -369,11 +376,13 @@ class rocketCtrl extends Component when (!io.dpath.stalld) { when (io.dpath.killf) { + id_reg_valid := Bool(false) id_reg_btb_hit := Bool(false); id_reg_xcpt_ma_inst := Bool(false); id_reg_xcpt_itlb := Bool(false); } .otherwise{ + id_reg_valid := Bool(true) id_reg_btb_hit := io.dpath.btb_hit; id_reg_xcpt_ma_inst := if_reg_xcpt_ma_inst; id_reg_xcpt_itlb := io.xcpt_itlb; @@ -393,6 +402,8 @@ class rocketCtrl extends Component ex_reg_div_val := Bool(false); ex_reg_mul_val := Bool(false); ex_reg_mem_val := Bool(false); + ex_reg_valid := Bool(false); + ex_reg_wen_pcr := Bool(false) ex_reg_wen := Bool(false); ex_reg_fp_wen := Bool(false); ex_reg_eret := Bool(false); @@ -417,6 +428,8 @@ class rocketCtrl extends Component ex_reg_div_val := id_div_val.toBool && id_waddr != UFix(0); ex_reg_mul_val := id_mul_val.toBool && id_waddr != UFix(0); ex_reg_mem_val := id_mem_val.toBool; + ex_reg_valid := id_reg_valid + ex_reg_wen_pcr := id_wen_pcr ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); ex_reg_fp_wen := io.fpu.dec.wen; ex_reg_eret := id_eret.toBool; @@ -463,6 +476,8 @@ class rocketCtrl extends Component val mem_reg_mem_type = Reg(){UFix(width = 3)}; when (reset.toBool || io.dpath.killx) { + mem_reg_valid := Bool(false); + mem_reg_wen_pcr := Bool(false) mem_reg_div_mul_val := Bool(false); mem_reg_wen := Bool(false); mem_reg_fp_wen := Bool(false); @@ -483,6 +498,8 @@ class rocketCtrl extends Component mem_reg_fp_sboard_set := Bool(false) } .otherwise { + mem_reg_valid := ex_reg_valid + mem_reg_wen_pcr := ex_reg_wen_pcr mem_reg_div_mul_val := ex_reg_div_val || ex_reg_mul_val; mem_reg_wen := ex_reg_wen; mem_reg_fp_wen := ex_reg_fp_wen; @@ -507,6 +524,8 @@ class rocketCtrl extends Component mem_reg_mem_type := ex_reg_mem_type; when (io.dpath.killm) { + wb_reg_valid := Bool(false) + wb_reg_wen_pcr := Bool(false) wb_reg_wen := Bool(false); wb_reg_fp_wen := Bool(false); wb_reg_eret := Bool(false); @@ -519,6 +538,8 @@ class rocketCtrl extends Component wb_reg_fp_sboard_set := Bool(false) } .otherwise { + wb_reg_valid := mem_reg_valid + wb_reg_wen_pcr := mem_reg_wen_pcr wb_reg_wen := mem_reg_wen; wb_reg_fp_wen := mem_reg_fp_wen; wb_reg_eret := mem_reg_eret; @@ -609,7 +630,7 @@ class rocketCtrl extends Component val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); val interrupt = - io.dpath.status(SR_ET).toBool && io.dpath.mem_valid && + io.dpath.status(SR_ET).toBool && mem_reg_valid && ((io.dpath.status(15).toBool && io.dpath.irq_timer) || (io.dpath.status(13).toBool && io.dpath.irq_ipi)); @@ -659,7 +680,7 @@ class rocketCtrl extends Component // replay mem stage PC on a DTLB miss or a long-latency writeback val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = io.dpath.mem_valid && (io.dtlb_miss || io.dmem.resp_nack) + val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || Reg(io.ext_mem.resp_nack) @@ -787,10 +808,11 @@ class rocketCtrl extends Component io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; io.dpath.wb_wen := wb_reg_wen; + io.dpath.wb_valid := wb_reg_valid; io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb; io.dpath.ren_pcr := id_ren_pcr.toBool; - io.dpath.wen_pcr := id_wen_pcr.toBool; + io.dpath.wen_pcr := wb_reg_wen_pcr io.dpath.id_eret := id_eret.toBool; io.dpath.wb_eret := wb_reg_eret; io.dpath.irq_disable := wb_reg_inst_di; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index c595db6d..da4162da 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -73,12 +73,10 @@ class rocketDpath extends Component val if_reg_pc = Reg(resetVal = UFix(START_ADDR,VADDR_BITS+1)); // instruction decode definitions - val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_inst = Reg(resetVal = NOP); val id_reg_pc = Reg() { UFix(width = VADDR_BITS+1) }; // execute definitions - val ex_reg_valid = Reg(resetVal = Bool(false)); val ex_reg_pc = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; val ex_reg_raddr1 = Reg() { UFix() }; @@ -96,12 +94,10 @@ class rocketDpath extends Component val ex_reg_ctrl_div_fn = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val ex_reg_ext_mem_tag = Reg() { Bits() }; val ex_wdata = Wire() { Bits() }; // memory definitions - val mem_reg_valid = Reg(resetVal = Bool(false)); val mem_reg_pc = Reg() { UFix() }; val mem_reg_inst = Reg() { Bits() }; val mem_reg_rs2 = Reg() { Bits() }; @@ -109,11 +105,9 @@ class rocketDpath extends Component val mem_reg_wdata = Reg() { Bits() }; val mem_reg_raddr1 = Reg() { UFix() }; val mem_reg_raddr2 = Reg() { UFix() }; - val mem_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val mem_wdata = Wire() { Bits() }; // writeback definitions - val wb_reg_valid = Reg(resetVal = Bool(false)); val wb_reg_pc = Reg() { UFix() }; val wb_reg_inst = Reg() { Bits() }; val wb_reg_rs2 = Reg() { Bits() }; @@ -121,7 +115,6 @@ class rocketDpath extends Component val wb_reg_wdata = Reg() { Bits() }; val wb_reg_raddr1 = Reg() { UFix() }; val wb_reg_raddr2 = Reg() { UFix() }; - val wb_reg_ctrl_wen_pcr = Reg(resetVal = Bool(false)); val wb_reg_ll_wb = Reg(resetVal = Bool(false)); val wb_wdata = Wire() { Bits() }; @@ -170,14 +163,7 @@ class rocketDpath extends Component // instruction decode stage when (!io.ctrl.stalld) { id_reg_pc := if_reg_pc; - when(io.ctrl.killf) { - id_reg_inst := NOP; - id_reg_valid := Bool(false); - } - .otherwise { - id_reg_inst := io.imem.resp_data; - id_reg_valid := Bool(true); - } + id_reg_inst := Mux(io.ctrl.killf, NOP, io.imem.resp_data) } val id_raddr1 = id_reg_inst(26,22).toUFix; @@ -251,17 +237,13 @@ class rocketDpath extends Component ex_reg_ext_mem_tag := io.ext_mem.req_tag when(io.ctrl.killd) { - ex_reg_valid := Bool(false); ex_reg_ctrl_div_val := Bool(false); ex_reg_ctrl_mul_val := Bool(false); - ex_reg_ctrl_wen_pcr := Bool(false); ex_reg_ctrl_eret := Bool(false); } .otherwise { - ex_reg_valid := id_reg_valid; ex_reg_ctrl_div_val := io.ctrl.div_val; ex_reg_ctrl_mul_val := io.ctrl.mul_val; - ex_reg_ctrl_wen_pcr := io.ctrl.wen_pcr; ex_reg_ctrl_eret := io.ctrl.id_eret; } @@ -333,7 +315,7 @@ class rocketDpath extends Component tsc_reg := tsc_reg + UFix(1); // instructions retired counter val irt_reg = Reg(resetVal = UFix(0,64)); - when (wb_reg_valid) { irt_reg := irt_reg + UFix(1); } + when (io.ctrl.wb_valid) { irt_reg := irt_reg + UFix(1); } // writeback select mux ex_wdata := @@ -356,19 +338,9 @@ class rocketDpath extends Component mem_reg_wdata := ex_wdata; mem_reg_raddr1 := ex_reg_raddr1 mem_reg_raddr2 := ex_reg_raddr2; - - when (io.ctrl.killx) { - mem_reg_valid := Bool(false); - mem_reg_ctrl_wen_pcr := Bool(false); - } - .otherwise { - mem_reg_valid := ex_reg_valid; - mem_reg_ctrl_wen_pcr := ex_reg_ctrl_wen_pcr; - } // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; - io.ctrl.mem_valid := mem_reg_valid; mem_wdata := Mux(io.ctrl.mem_load, io.dmem.resp_data, mem_reg_wdata) @@ -410,17 +382,8 @@ class rocketDpath extends Component wb_reg_raddr1 := mem_reg_raddr1 wb_reg_raddr2 := mem_reg_raddr2; - when (io.ctrl.killm) { - wb_reg_valid := Bool(false); - wb_reg_ctrl_wen_pcr := Bool(false); - } - .otherwise { - wb_reg_valid := mem_reg_valid; - wb_reg_ctrl_wen_pcr := mem_reg_ctrl_wen_pcr; - } - // regfile write - val wb_src_dmem = Reg(io.ctrl.mem_load) && wb_reg_valid || r_dmem_resp_replay + val wb_src_dmem = Reg(io.ctrl.mem_load) && io.ctrl.wb_valid || r_dmem_resp_replay if (HAVE_VEC) { @@ -430,7 +393,7 @@ class rocketDpath extends Component vec.io.ctrl <> io.vec_ctrl io.vec_iface <> vec.io.iface - vec.io.valid := wb_reg_valid + vec.io.valid := io.ctrl.wb_valid vec.io.inst := wb_reg_inst vec.io.waddr := wb_reg_waddr vec.io.raddr1 := wb_reg_raddr1 @@ -471,7 +434,7 @@ class rocketDpath extends Component // processor control regfile write pcr.io.w.addr := wb_reg_raddr2; - pcr.io.w.en := wb_reg_ctrl_wen_pcr; + pcr.io.w.en := io.ctrl.wen_pcr pcr.io.w.data := wb_reg_wdata; pcr.io.di := io.ctrl.irq_disable; From 6ceaa0e80a207cda22943125731225275a4fb701 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 23 Feb 2012 16:49:46 -0800 Subject: [PATCH 0197/1087] correct and simplify replay_next logic --- rocket/src/main/scala/ctrl.scala | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 5b2b7ba5..19f902ed 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -320,7 +320,6 @@ class rocketCtrl extends Component val ex_reg_wen = Reg(resetVal = Bool(false)); val ex_reg_fp_wen = Reg(resetVal = Bool(false)); val ex_reg_eret = Reg(resetVal = Bool(false)); - val ex_reg_replay_next = Reg(resetVal = Bool(false)); val ex_reg_inst_di = Reg(resetVal = Bool(false)); val ex_reg_inst_ei = Reg(resetVal = Bool(false)); val ex_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -340,7 +339,6 @@ class rocketCtrl extends Component val mem_reg_wen_pcr = Reg(resetVal = Bool(false)); val mem_reg_wen = Reg(resetVal = Bool(false)); val mem_reg_fp_wen = Reg(resetVal = Bool(false)); - val mem_reg_replay_next = Reg(resetVal = Bool(false)); val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); val mem_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -361,7 +359,6 @@ class rocketCtrl extends Component val wb_reg_wen_pcr = Reg(resetVal = Bool(false)); val wb_reg_wen = Reg(resetVal = Bool(false)); val wb_reg_fp_wen = Reg(resetVal = Bool(false)); - val wb_reg_replay_next = Reg(resetVal = Bool(false)); val wb_reg_inst_di = Reg(resetVal = Bool(false)); val wb_reg_inst_ei = Reg(resetVal = Bool(false)); val wb_reg_flush_inst = Reg(resetVal = Bool(false)); @@ -380,15 +377,16 @@ class rocketCtrl extends Component id_reg_btb_hit := Bool(false); id_reg_xcpt_ma_inst := Bool(false); id_reg_xcpt_itlb := Bool(false); + id_reg_replay := !take_pc; // replay on I$ miss } .otherwise{ id_reg_valid := Bool(true) id_reg_btb_hit := io.dpath.btb_hit; id_reg_xcpt_ma_inst := if_reg_xcpt_ma_inst; id_reg_xcpt_itlb := io.xcpt_itlb; + id_reg_replay := id_replay_next } id_reg_icmiss := !io.imem.resp_val; - id_reg_replay := !take_pc && !io.imem.resp_val; } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) @@ -407,7 +405,6 @@ class rocketCtrl extends Component ex_reg_wen := Bool(false); ex_reg_fp_wen := Bool(false); ex_reg_eret := Bool(false); - ex_reg_replay_next := Bool(false); ex_reg_inst_di := Bool(false); ex_reg_inst_ei := Bool(false); ex_reg_flush_inst := Bool(false); @@ -433,7 +430,6 @@ class rocketCtrl extends Component ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); ex_reg_fp_wen := io.fpu.dec.wen; ex_reg_eret := id_eret.toBool; - ex_reg_replay_next := id_replay_next.toBool; ex_reg_inst_di := (id_irq === I_DI); ex_reg_inst_ei := (id_irq === I_EI); ex_reg_flush_inst := (id_sync === SYNC_I); @@ -445,7 +441,7 @@ class rocketCtrl extends Component ex_reg_fp_val := io.fpu.dec.valid ex_reg_fp_sboard_set := io.fpu.dec.sboard ex_reg_vec_val := id_vec_val.toBool - ex_reg_replay := id_reg_replay || ex_reg_replay_next || mem_reg_replay_next || wb_reg_replay_next + ex_reg_replay := id_reg_replay ex_reg_load_use := id_load_use; } ex_reg_ext_mem_val := io.ext_mem.req_val @@ -483,7 +479,6 @@ class rocketCtrl extends Component mem_reg_fp_wen := Bool(false); mem_reg_eret := Bool(false); mem_reg_mem_val := Bool(false); - mem_reg_replay_next := Bool(false); mem_reg_inst_di := Bool(false); mem_reg_inst_ei := Bool(false); mem_reg_flush_inst := Bool(false); @@ -505,7 +500,6 @@ class rocketCtrl extends Component mem_reg_fp_wen := ex_reg_fp_wen; mem_reg_eret := ex_reg_eret; mem_reg_mem_val := ex_reg_mem_val; - mem_reg_replay_next := ex_reg_replay_next mem_reg_inst_di := ex_reg_inst_di; mem_reg_inst_ei := ex_reg_inst_ei; mem_reg_flush_inst := ex_reg_flush_inst; @@ -529,7 +523,6 @@ class rocketCtrl extends Component wb_reg_wen := Bool(false); wb_reg_fp_wen := Bool(false); wb_reg_eret := Bool(false); - wb_reg_replay_next := Bool(false) wb_reg_inst_di := Bool(false); wb_reg_inst_ei := Bool(false); wb_reg_flush_inst := Bool(false); @@ -543,7 +536,6 @@ class rocketCtrl extends Component wb_reg_wen := mem_reg_wen; wb_reg_fp_wen := mem_reg_fp_wen; wb_reg_eret := mem_reg_eret; - wb_reg_replay_next := mem_reg_replay_next wb_reg_inst_di := mem_reg_inst_di; wb_reg_inst_ei := mem_reg_inst_ei; wb_reg_flush_inst := mem_reg_flush_inst; From 5332bab6f1fa0b623b55d7fd0ffa12e85bf3f251 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 23 Feb 2012 17:39:34 -0800 Subject: [PATCH 0198/1087] expose FMA ports outside of FPU (for the VU) --- rocket/src/main/scala/cpu.scala | 20 ++++-- rocket/src/main/scala/fpu.scala | 114 ++++++++++++++------------------ 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 051f7e3c..a9b8c29c 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -97,18 +97,16 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dpath.io.dmem.resp_data := arb.io.cpu.resp_data; dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; + var fpu: rocketFPU = null if (HAVE_FPU) { - val fpu = new rocketFPU(4,6) + fpu = new rocketFPU(4,6) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } else ctrl.io.fpu.dec.valid := Bool(false) - ctrl.io.ext_mem.req_val := Bool(false) - dpath.io.ext_mem.req_val := Bool(false) - if (HAVE_VEC) { dpath.io.vec_ctrl <> ctrl.io.vec_dpath @@ -161,5 +159,19 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.dmem_resp.bits.data := dpath.io.ext_mem.resp_data vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type + + fpu.io.sfma.valid := Bool(false) + fpu.io.dfma.valid := Bool(false) + } + else + { + ctrl.io.ext_mem.req_val := Bool(false) + dpath.io.ext_mem.req_val := Bool(false) + + if (HAVE_FPU) + { + fpu.io.sfma.valid := Bool(false) + fpu.io.dfma.valid := Bool(false) + } } } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index f8c02269..20236b9c 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -365,18 +365,20 @@ class rocketFPUFastPipe extends Component io.exc_d := exc_d } +class ioFMA(width: Int) extends Bundle { + val valid = Bool(INPUT) + val cmd = Bits(FCMD_WIDTH, INPUT) + val rm = Bits(3, INPUT) + val in1 = Bits(width, INPUT) + val in2 = Bits(width, INPUT) + val in3 = Bits(width, INPUT) + val out = Bits(width, OUTPUT) + val exc = Bits(5, OUTPUT) +} + class rocketFPUSFMAPipe(latency: Int) extends Component { - val io = new Bundle { - val valid = Bool(INPUT) - val cmd = Bits(FCMD_WIDTH, INPUT) - val rm = Bits(3, INPUT) - val in1 = Bits(33, INPUT) - val in2 = Bits(33, INPUT) - val in3 = Bits(33, INPUT) - val out = Bits(33, OUTPUT) - val exc = Bits(5, OUTPUT) - } + val io = new ioFMA(33) val cmd = Reg() { Bits() } val rm = Reg() { Bits() } @@ -409,16 +411,7 @@ class rocketFPUSFMAPipe(latency: Int) extends Component class rocketFPUDFMAPipe(latency: Int) extends Component { - val io = new Bundle { - val valid = Bool(INPUT) - val cmd = Bits(FCMD_WIDTH, INPUT) - val rm = Bits(3, INPUT) - val in1 = Bits(65, INPUT) - val in2 = Bits(65, INPUT) - val in3 = Bits(65, INPUT) - val out = Bits(65, OUTPUT) - val exc = Bits(5, OUTPUT) - } + val io = new ioFMA(65) val cmd = Reg() { Bits() } val rm = Reg() { Bits() } @@ -454,13 +447,15 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val io = new Bundle { val ctrl = new ioCtrlFPU().flip() val dpath = new ioDpathFPU().flip() + val sfma = new ioFMA(33) + val dfma = new ioFMA(65) } - val reg_inst = Reg() { Bits() } + val ex_reg_inst = Reg() { Bits() } when (io.ctrl.valid) { - reg_inst := io.dpath.inst + ex_reg_inst := io.dpath.inst } - val reg_valid = Reg(io.ctrl.valid, Bool(false)) + val ex_reg_valid = Reg(io.ctrl.valid, Bool(false)) val fp_decoder = new rocketFPUDecoder fp_decoder.io.inst := io.dpath.inst @@ -470,6 +465,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component ctrl := fp_decoder.io.sigs } val mem_ctrl = Reg(ctrl) + val wb_ctrl = Reg(mem_ctrl) // load response val load_wb = Reg(io.dpath.dmem_resp_val, resetVal = Bool(false)) @@ -496,32 +492,23 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component regfile.setReadLatency(0); regfile.setTarget('inst); - val ex_rs1 = regfile.read(reg_inst(26,22)) - val ex_rs2 = regfile.read(reg_inst(21,17)) - val ex_rs3 = regfile.read(reg_inst(16,12)) - val ex_rm = Mux(reg_inst(11,9) === Bits(7), fsr_rm, reg_inst(11,9)) + val ex_rs1 = regfile.read(ex_reg_inst(26,22)) + val ex_rs2 = regfile.read(ex_reg_inst(21,17)) + val ex_rs3 = regfile.read(ex_reg_inst(16,12)) + val ex_rm = Mux(ex_reg_inst(11,9) === Bits(7), fsr_rm, ex_reg_inst(11,9)) + val mem_reg_valid = Reg(ex_reg_valid && !io.ctrl.killx, resetVal = Bool(false)) val mem_fromint_data = Reg() { Bits() } - val mem_toint_val = Reg(resetVal = Bool(false)) val mem_rs1 = Reg() { Bits() } val mem_rs2 = Reg() { Bits() } val mem_rs3 = Reg() { Bits() } val mem_rm = Reg() { Bits() } - val mem_wrfsr_val = Reg(resetVal = Bool(false)) - mem_toint_val := Bool(false) - mem_wrfsr_val := Bool(false) - when (reg_valid) { + when (ex_reg_valid) { mem_rm := ex_rm when (ctrl.fromint || ctrl.wrfsr) { mem_fromint_data := io.dpath.fromint_data } - when (ctrl.wrfsr) { - mem_wrfsr_val := !io.ctrl.killx - } - when (ctrl.toint) { - mem_toint_val := !io.ctrl.killx - } when (ctrl.ren1) { mem_rs1 := ex_rs1 } @@ -563,23 +550,26 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB val sfma = new rocketFPUSFMAPipe(sfma_latency-1) - sfma.io.valid := Reg(reg_valid && ctrl.fma && ctrl.single) - sfma.io.in1 := mem_rs1 - sfma.io.in2 := mem_rs2 - sfma.io.in3 := mem_rs3 - sfma.io.cmd := mem_ctrl.cmd - sfma.io.rm := mem_rm + sfma.io.valid := io.sfma.valid || mem_reg_valid && mem_ctrl.fma && mem_ctrl.single + sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, mem_rs1) + sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, mem_rs2) + sfma.io.in3 := Mux(io.sfma.valid, io.sfma.in3, mem_rs3) + sfma.io.cmd := Mux(io.sfma.valid, io.sfma.cmd, mem_ctrl.cmd) + sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm, mem_rm) + io.sfma.out := sfma.io.out + io.sfma.exc := sfma.io.exc val dfma = new rocketFPUDFMAPipe(dfma_latency-1) - dfma.io.valid := Reg(reg_valid && ctrl.fma && !ctrl.single) - dfma.io.in1 := mem_rs1 - dfma.io.in2 := mem_rs2 - dfma.io.in3 := mem_rs3 - dfma.io.cmd := mem_ctrl.cmd - dfma.io.rm := mem_rm + dfma.io.valid := io.dfma.valid || mem_reg_valid && mem_ctrl.fma && !mem_ctrl.single + dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, mem_rs1) + dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, mem_rs2) + dfma.io.in3 := Mux(io.dfma.valid, io.dfma.in3, mem_rs3) + dfma.io.cmd := Mux(io.dfma.valid, io.dfma.cmd, mem_ctrl.cmd) + dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm, mem_rm) + io.dfma.out := dfma.io.out + io.dfma.exc := dfma.io.exc - val wb_wrfsr_val = Reg(!io.ctrl.killm && mem_wrfsr_val, resetVal = Bool(false)) - val wb_toint_val = Reg(!io.ctrl.killm && mem_toint_val, resetVal = Bool(false)) + val wb_reg_valid = Reg(mem_reg_valid && !io.ctrl.killm, resetVal = Bool(false)) val wb_toint_exc = Reg(fpiu.io.exc) // writeback arbitration @@ -596,13 +586,14 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val write_port_busy = ctrl.fastpipe && wen(fastpipe_latency) || Bool(sfma_latency < dfma_latency) && ctrl.fma && ctrl.single && wen(sfma_latency) || mem_wen && mem_fu_latency === ex_stage_fu_latency - mem_wen := reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) + mem_wen := ex_reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) val ex_stage_wsrc = Cat(ctrl.fastpipe, ctrl.single) - val mem_winfo = Reg(Cat(reg_inst(31,27), ex_stage_wsrc)) + val mem_winfo = Reg(Cat(ex_reg_inst(31,27), ex_stage_wsrc)) for (i <- 0 until dfma_latency-2) { winfo(i) := winfo(i+1) } + wen := wen >> UFix(1) when (mem_wen) { when (!io.ctrl.killm) { wen := (wen >> UFix(1)) | (UFix(1) << mem_fu_latency) @@ -613,9 +604,6 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component } } } - .otherwise { - wen := wen >> UFix(1) - } val wsrc = winfo(0)(1,0) val wdata = Mux(wsrc === UFix(0), dfma.io.out, // DFMA @@ -629,19 +617,19 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val waddr = winfo(0).toUFix >> UFix(2) regfile.write(waddr, wdata, wen(0)) - when (wb_toint_val || wen(0)) { + when (wb_reg_valid && wb_ctrl.toint || wen(0)) { fsr_exc := fsr_exc | - Fill(fsr_exc.getWidth, wb_toint_val) & wb_toint_exc | + Fill(fsr_exc.getWidth, wb_reg_valid && wb_ctrl.toint) & wb_toint_exc | Fill(fsr_exc.getWidth, wen(0)) & wexc } - when (wb_wrfsr_val) { + when (wb_reg_valid && wb_ctrl.wrfsr) { fsr_exc := fastpipe.io.out_s(4,0) fsr_rm := fastpipe.io.out_s(7,5) } - val fp_inflight = mem_toint_val || wb_toint_val || mem_wen || wen.orR - val fsr_busy = ctrl.rdfsr && fp_inflight || mem_wrfsr_val || wb_wrfsr_val - val units_busy = Bool(false) + val fp_inflight = mem_reg_valid && mem_ctrl.toint || wb_reg_valid && wb_ctrl.toint || mem_wen || wen.orR + val fsr_busy = ctrl.rdfsr && fp_inflight || mem_reg_valid && mem_ctrl.wrfsr || wb_reg_valid && wb_ctrl.wrfsr + val units_busy = mem_reg_valid && mem_ctrl.fma && (io.sfma.valid && mem_ctrl.single || io.dfma.valid && !mem_ctrl.single) io.ctrl.nack := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs // we don't currently support round-max-magnitude (rm=4) From 1c1ce7d60b563db46ae8991c817088344c1f8db7 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 23 Feb 2012 17:49:28 -0800 Subject: [PATCH 0199/1087] finished xact_rep transactor in coherence hub --- rocket/src/main/scala/coherence.scala | 96 ++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 776a64e0..3744fdb6 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -2,6 +2,7 @@ package Top { import Chisel._ import Constants._ +import hwacha.GenArray class TransactionInit extends Bundle { val ttype = Bits(width = TTYPE_BITS) @@ -146,10 +147,12 @@ class XactTracker(id: Int) extends Component { val xact_rep = (new ioDecoupled) { new TransactionReply() }.flip val mem_req = (new ioDecoupled) { new MemReq() }.flip val xact_finish = Bool(INPUT) - val tile_id_in = Bits(TILE_ID_BITS, INPUT) - val tile_id_out = Bits(TILE_ID_BITS, OUTPUT) - val ongoing_addr = Bits(PADDR_BITS, OUTPUT) val busy = Bool(OUTPUT) + val addr = Bits(PADDR_BITS, OUTPUT) + val tile_id = Bits(TILE_ID_BITS, OUTPUT) + val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) + val sharer_count = Bits(TILE_ID_BITS, OUTPUT) + val ttype = Bits(TTYPE_BITS, OUTPUT) } val valid = Reg(resetVal = Bool(false)) @@ -164,47 +167,104 @@ class XactTracker(id: Int) extends Component { abstract class CoherenceHub extends Component class CoherenceHubNoDir extends CoherenceHub { + + def coherenceConflict(addr1: Bits, addr2: Bits): Bool = { + addr1(PADDR_BITS-1, OFFSET_BITS) === addr2(PADDR_BITS-1, OFFSET_BITS) + } + def getTransactionReplyType(ttype: UFix, count: UFix): Bits = { + val ret = Wire() { Bits(width = TTYPE_BITS) } + switch (ttype) { + is(X_READ_SHARED) { ret := Mux(count > UFix(0), X_READ_SHARED, X_READ_EXCLUSIVE) } + is(X_READ_EXCLUSIVE) { ret := X_READ_EXCLUSIVE } + is(X_READ_UNCACHED) { ret := X_READ_UNCACHED } + is(X_WRITE_UNCACHED) { ret := X_WRITE_UNCACHED } + } + ret + } + val io = new Bundle { val tiles = Vec(NTILES) { new ioTileLink() } val mem = new ioDCache().flip } - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) + val busy_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val addr_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } + val tile_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val tile_xact_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } + val sh_count_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val ttype_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } + val free_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + for( i <- 0 until NGLOBAL_XACTS) { + busy_arr.write( UFix(i), trackerList(i).io.busy) + addr_arr.write( UFix(i), trackerList(i).io.addr) + tile_id_arr.write( UFix(i), trackerList(i).io.tile_id) + tile_xact_id_arr.write(UFix(i), trackerList(i).io.tile_xact_id) + ttype_arr.write( UFix(i), trackerList(i).io.ttype) + sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) + trackerList(i).io.xact_finish := free_arr.read(UFix(i)) + } // In parallel, every cycle: nack conflicting transactions, free finished ones for( j <- 0 until NTILES ) { val init = io.tiles(j).xact_init val abort = io.tiles(j).xact_abort val conflicts = Bits(width = NGLOBAL_XACTS) - val busys = Bits(width = NGLOBAL_XACTS) for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - busys(i) := t.busy - conflicts(i) := t.busy && init.valid && (t.ongoing_addr === init.bits.address) + conflicts(i) := t.busy(i) && coherenceConflict(t.addr, init.bits.address) } - abort.valid := conflicts.orR || busys.andR + abort.valid := init.valid && (conflicts.orR || busy_arr.flatten().andR) abort.bits.tileTransactionID := init.bits.tileTransactionID - //if abort.rdy, init.pop() - + // TODO: + // Reg(aborted) := (abort.ready && abort.valid) + // Reg(allocated) : = had_priority(j) & !(abort.ready && abort.valid) + // init.rdy = aborted || allocated } + +/* +// Todo: which implementation is clearer? for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io val freed = Bits(width = NTILES) for( j <- 0 until NTILES ) { val finish = io.tiles(j).xact_finish - freed(j) := finish.valid && (UFix(i) === finish.bits.globalTransactionID) + free(j) := finish.valid && (UFix(i) === finish.bits.globalTransactionID) + finish.ready := Bool(true) // finsh.pop() } t.xact_finish := freed.orR - //finish.pop() + } +*/ + + free_arr := Bits(0, width=NGLOBAL_XACTS) + for( j <- 0 until NTILES ) { + val finish = io.tiles(j).xact_finish + when(finish.valid) { + free_arr.write(finish.bits.globalTransactionID, Bool(true)) + } + finish.ready := Bool(true) } // Forward memory responses from mem to tile - //for( j <- until NTILES ) { - // tiles(j).xact_rep.ttype = - // tiles(j).xact_rep.tileTransactionID = - // tiles(j).xact_rep.globalTransactionID = - // val data = Bits - // + val xrep_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val xrep_cnt_next = xrep_cnt + UFix(1) + when (io.mem.resp_val) { xrep_cnt := xrep_cnt_next } + val idx = io.mem.resp_tag + val readys = Bits(width = NTILES) + for( j <- 0 until NTILES ) { + io.tiles(j).xact_rep.bits.ttype := getTransactionReplyType(ttype_arr.read(idx), sh_count_arr.read(idx)) + io.tiles(j).xact_rep.bits.tileTransactionID := tile_xact_id_arr.read(idx) + io.tiles(j).xact_rep.bits.globalTransactionID := idx + io.tiles(j).xact_rep_data.bits.data := io.mem.resp_data + readys := Mux(xrep_cnt === UFix(0), io.tiles(j).xact_rep.ready && io.tiles(j).xact_rep_data.ready, io.tiles(j).xact_rep_data.ready) + val this_rep_valid = UFix(j) === tile_id_arr.read(idx) && io.mem.resp_val + io.tiles(j).xact_rep.valid := this_rep_valid && xrep_cnt === UFix(0) + io.tiles(j).xact_rep_data.valid := this_rep_valid + } + // If there were a ready signal due to e.g. intervening network: + //io.mem.resp_rdy := readys(tile_id_arr.read(idx)).xact_rep.ready + + // Pick a single request of these types to process //val xact_init_arb = (new Arbiter(NTILES)) { new TransactionInit() } //val probe_reply_arb = (new Arbiter(NTILES)) { new ProbeReply() } From 52da831aa30ed4b69173ac3f257aa4c095b1b850 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 23 Feb 2012 18:12:50 -0800 Subject: [PATCH 0200/1087] finished xact_finish and xact_abort transactors in coherence hub --- rocket/src/main/scala/coherence.scala | 32 +++++++-------------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 3744fdb6..534c4bb9 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -205,7 +205,9 @@ class CoherenceHubNoDir extends CoherenceHub { trackerList(i).io.xact_finish := free_arr.read(UFix(i)) } - // In parallel, every cycle: nack conflicting transactions, free finished ones + // Nack conflicting transaction init attempts + val aborting = Wire() { Bits(width = NTILES) } + val initiating = Wire() { Bits(width = NTILES) } for( j <- 0 until NTILES ) { val init = io.tiles(j).xact_init val abort = io.tiles(j).xact_abort @@ -214,34 +216,16 @@ class CoherenceHubNoDir extends CoherenceHub { val t = trackerList(i).io conflicts(i) := t.busy(i) && coherenceConflict(t.addr, init.bits.address) } - abort.valid := init.valid && (conflicts.orR || busy_arr.flatten().andR) + aborting(j) := (conflicts.orR || busy_arr.flatten().andR) + abort.valid := init.valid && aborting abort.bits.tileTransactionID := init.bits.tileTransactionID - // TODO: - // Reg(aborted) := (abort.ready && abort.valid) - // Reg(allocated) : = had_priority(j) & !(abort.ready && abort.valid) - // init.rdy = aborted || allocated + init.ready := aborting(j) || initiating(j) } - -/* -// Todo: which implementation is clearer? - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - val freed = Bits(width = NTILES) - for( j <- 0 until NTILES ) { - val finish = io.tiles(j).xact_finish - free(j) := finish.valid && (UFix(i) === finish.bits.globalTransactionID) - finish.ready := Bool(true) // finsh.pop() - } - t.xact_finish := freed.orR - } -*/ - free_arr := Bits(0, width=NGLOBAL_XACTS) + // Free finished transactions for( j <- 0 until NTILES ) { val finish = io.tiles(j).xact_finish - when(finish.valid) { - free_arr.write(finish.bits.globalTransactionID, Bool(true)) - } + free_arr.write(finish.bits.globalTransactionID, finish.valid) finish.ready := Bool(true) } From 012028efaa6d1f864155c80f924714d238e5790f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 23 Feb 2012 22:19:11 -0800 Subject: [PATCH 0201/1087] fix fpga build --- rocket/src/main/scala/htif.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 39cbdc90..8c46ee31 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -125,7 +125,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component } io.mem.req_val := state === state_mem_req io.mem.req_rw := cmd === cmd_writemem - io.mem.req_addr := addr >> UFix(3) + io.mem.req_addr := addr >> UFix(OFFSET_BITS-3) io.mem.req_wdata := mem_req_data pcr_done := Bool(false) From 91a0bb6f61a9d236bdf5e26f5be2acc20f430a6d Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 23 Feb 2012 22:30:38 -0800 Subject: [PATCH 0202/1087] add vector prefetch queues --- rocket/src/main/scala/ctrl_vec.scala | 144 +++++++++++++++++---------- 1 file changed, 94 insertions(+), 50 deletions(-) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index a7526e14..e2d1f9d8 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -24,6 +24,14 @@ class ioCtrlVecInterface extends Bundle val vximm1q_ready = Bool(INPUT) val vximm2q_valid = Bool(OUTPUT) val vximm2q_ready = Bool(INPUT) + + val vpfcmdq_valid = Bool(OUTPUT) + val vpfcmdq_ready = Bool(INPUT) + val vpfximm1q_valid = Bool(OUTPUT) + val vpfximm1q_ready = Bool(INPUT) + val vpfximm2q_valid = Bool(OUTPUT) + val vpfximm2q_ready = Bool(INPUT) + val vackq_valid = Bool(INPUT) val vackq_ready = Bool(OUTPUT) } @@ -45,75 +53,111 @@ class rocketCtrlVec extends Component ListLookup(io.dpath.inst, // appvlmask // | vcmdq - // wen | | vximm1q - // val vcmd vimm | fn | | | vximm2q - // | | | | | | | | | cpfence - // | | | | | | | | | | - List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,Y), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,Y), - VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,N) + // | | vximm1q + // | | | vximm2q + // | | | | vpfcmdq + // wen | | | | | vpximm1q + // val vcmd vimm | fn | | | | | | vpximm2q + // | | | | | | | | | | | | cpfence + // | | | | | | | | | | | | | + List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,Y), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,Y), + VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cpfence :: Nil = veccs0 + val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: veecs1 = veecs0 + val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_cpfence :: Nil = veccs1 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.iface.vcmdq_ready val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.iface.vximm1q_ready val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.iface.vximm2q_ready + val mask_wb_vec_pfcmdq_ready = !wb_vec_pfcmdq_enq || io.iface.vpfcmdq_ready + val mask_wb_vec_pfximm1q_ready = !wb_vec_pfximm1q_enq || io.iface.vpfximm1q_ready + val mask_wb_vec_pfximm2q_ready = !wb_vec_pfximm2q_enq || io.iface.vpfximm2q_ready io.dpath.wen := wb_vec_wen.toBool io.dpath.fn := wb_vec_fn io.dpath.sel_vcmd := wb_sel_vcmd io.dpath.sel_vimm := wb_sel_vimm - io.iface.vcmdq_valid := valid_common && wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready - io.iface.vximm1q_valid := valid_common && mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready - io.iface.vximm2q_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq + io.iface.vcmdq_valid := + valid_common && + wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && + mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + + io.iface.vximm1q_valid := + valid_common && + mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready && + mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + + io.iface.vximm2q_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq && + mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + + io.iface.vpfcmdq_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && + wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + + io.iface.vpfximm1q_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && + mask_wb_vec_pfcmdq_enq && wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + + io.iface.vpfximm2q_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && + mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_ready io.replay := valid_common && ( wb_vec_cmdq_enq && !io.iface.vcmdq_ready || wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || - wb_vec_ximm2q_enq && !io.iface.vximm2q_ready + wb_vec_ximm2q_enq && !io.iface.vximm2q_ready || + wb_vec_pfcmdq_enq && !io.iface.vpfcmdq_ready || + wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || + wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready || ) io.cpfence := valid_common && wb_vec_cpfence && !io.replay } From 2ea309cf80a689d93827ebf48647ee8b24c8014f Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 23 Feb 2012 22:35:05 -0800 Subject: [PATCH 0203/1087] bug fixes to ctrl_vec --- rocket/src/main/scala/ctrl_vec.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index e2d1f9d8..b245e2e2 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -104,7 +104,7 @@ class rocketCtrlVec extends Component )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: veecs1 = veecs0 + val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: veccs1 = veccs0 val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_cpfence :: Nil = veccs1 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) @@ -124,17 +124,17 @@ class rocketCtrlVec extends Component io.iface.vcmdq_valid := valid_common && wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready io.iface.vximm1q_valid := valid_common && mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready io.iface.vximm2q_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq && - mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready io.iface.vpfcmdq_valid := valid_common && @@ -144,12 +144,12 @@ class rocketCtrlVec extends Component io.iface.vpfximm1q_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_enq && wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq && mask_wb_vec_pfximm2q_ready io.iface.vpfximm2q_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_ready + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq io.replay := valid_common && ( wb_vec_cmdq_enq && !io.iface.vcmdq_ready || @@ -157,7 +157,7 @@ class rocketCtrlVec extends Component wb_vec_ximm2q_enq && !io.iface.vximm2q_ready || wb_vec_pfcmdq_enq && !io.iface.vpfcmdq_ready || wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || - wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready || + wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready ) io.cpfence := valid_common && wb_vec_cpfence && !io.replay } From 7b3cce79e387583c7f54901a8441a261635449fb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 23 Feb 2012 22:37:13 -0800 Subject: [PATCH 0204/1087] allocate a primary miss on a prefetch --- rocket/src/main/scala/nbdcache.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9a8e6d86..96eacb66 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -741,10 +741,11 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val r_req_store = (r_cpu_req_cmd === M_XWR) val r_req_flush = (r_cpu_req_cmd === M_FLA) val r_req_fence = (r_cpu_req_cmd === M_FENCE) + val r_req_prefetch = (r_cpu_req_cmd === M_PFR) || (r_cpu_req_cmd === M_PFW) val r_req_amo = r_cpu_req_cmd(3).toBool val r_req_read = r_req_load || r_req_amo val r_req_write = r_req_store || r_req_amo - val r_req_readwrite = r_req_read || r_req_write + val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch // replay unit val replayer = new ReplayUnit() From bf1e643913673656f2060b2f58c1cef12fdd2937 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 23 Feb 2012 22:55:12 -0800 Subject: [PATCH 0205/1087] fix ctrl vec iface hookup --- rocket/src/main/scala/ctrl.scala | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 19f902ed..ab499257 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -597,12 +597,7 @@ class rocketCtrl extends Component io.vec_dpath <> vec.io.dpath - io.vec_iface.vcmdq_valid := vec.io.iface.vcmdq_valid - io.vec_iface.vximm1q_valid := vec.io.iface.vximm1q_valid - io.vec_iface.vximm2q_valid := vec.io.iface.vximm2q_valid - vec.io.iface.vcmdq_ready := io.vec_iface.vcmdq_ready - vec.io.iface.vximm1q_ready := io.vec_iface.vximm1q_ready - vec.io.iface.vximm2q_ready := io.vec_iface.vximm2q_ready + io.vec_iface <> vec.io.iface vec_replay = vec.io.replay vec_cpfence = Reg(resetVal = Bool(false)) From 63939efd0c0316b236430e0130ee5f5095f6683b Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 23 Feb 2012 23:03:44 -0800 Subject: [PATCH 0206/1087] fix ctrl vec iface hookup - final --- rocket/src/main/scala/ctrl.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ab499257..cfff5659 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -597,7 +597,19 @@ class rocketCtrl extends Component io.vec_dpath <> vec.io.dpath - io.vec_iface <> vec.io.iface + io.vec_iface.vcmdq_valid := vec.io.iface.vcmdq_valid + io.vec_iface.vximm1q_valid := vec.io.iface.vximm1q_valid + io.vec_iface.vximm2q_valid := vec.io.iface.vximm2q_valid + vec.io.iface.vcmdq_ready := io.vec_iface.vcmdq_ready + vec.io.iface.vximm1q_ready := io.vec_iface.vximm1q_ready + vec.io.iface.vximm2q_ready := io.vec_iface.vximm2q_ready + + io.vec_iface.vpfcmdq_valid := vec.io.iface.vpfcmdq_valid + io.vec_iface.vpfximm1q_valid := vec.io.iface.vpfximm1q_valid + io.vec_iface.vpfximm2q_valid := vec.io.iface.vpfximm2q_valid + vec.io.iface.vpfcmdq_ready := io.vec_iface.vpfcmdq_ready + vec.io.iface.vpfximm1q_ready := io.vec_iface.vpfximm1q_ready + vec.io.iface.vpfximm2q_ready := io.vec_iface.vpfximm2q_ready vec_replay = vec.io.replay vec_cpfence = Reg(resetVal = Bool(false)) From 477f3cde02fbbd39731dfc99b741b603f216f7cb Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Fri, 24 Feb 2012 00:44:13 -0800 Subject: [PATCH 0207/1087] added prefetch queues for vu --- rocket/src/main/scala/cpu.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index a9b8c29c..8e003399 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -137,9 +137,22 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_ximm2q.valid := ctrl.io.vec_iface.vximm2q_valid vu.io.vec_ximm2q.bits := dpath.io.vec_iface.vximm2q_bits + // prefetch queues + vu.io.vec_pfcmdq.valid := ctrl.io.vec_iface.vpfcmdq_valid + vu.io.vec_pfcmdq.bits := dpath.io.vec_iface.vcmdq_bits + vu.io.vec_pfximm1q.valid := ctrl.io.vec_iface.vpfximm1q_valid + vu.io.vec_pfximm1q.bits := dpath.io.vec_iface.vximm1q_bits + vu.io.vec_pfximm2q.valid := ctrl.io.vec_iface.vpfximm2q_valid + vu.io.vec_pfximm2q.bits := dpath.io.vec_iface.vximm2q_bits + + // don't have to use pf ready signals + // if cmdq is not a load or store ctrl.io.vec_iface.vcmdq_ready := vu.io.vec_cmdq.ready ctrl.io.vec_iface.vximm1q_ready := vu.io.vec_ximm1q.ready ctrl.io.vec_iface.vximm2q_ready := vu.io.vec_ximm2q.ready + ctrl.io.vec_iface.vpfcmdq_ready := vu.io.vec_pfcmdq.ready + ctrl.io.vec_iface.vpfximm1q_ready := vu.io.vec_pfximm1q.ready + ctrl.io.vec_iface.vpfximm2q_ready := vu.io.vec_pfximm2q.ready ctrl.io.vec_iface.vackq_valid := vu.io.vec_ackq.valid vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready From b3a3289d3470fce04bda0bcf0509dea977263f7d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 24 Feb 2012 01:42:33 -0800 Subject: [PATCH 0208/1087] fix (?) external memory request nack interface --- rocket/src/main/scala/ctrl.scala | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cfff5659..88d7fe47 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -368,6 +368,7 @@ class rocketCtrl extends Component val wb_reg_cause = Reg(){UFix()}; val wb_reg_fp_val = Reg(resetVal = Bool(false)); val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); + val wb_reg_ext_mem_nack = Reg(resetVal = Bool(false)) val take_pc = Wire() { Bool() }; @@ -448,6 +449,8 @@ class rocketCtrl extends Component ex_reg_mem_cmd := Mux(io.ext_mem.req_val, io.ext_mem.req_cmd, id_mem_cmd).toUFix ex_reg_mem_type := Mux(io.ext_mem.req_val, io.ext_mem.req_type, id_mem_type).toUFix + val ex_ext_mem_val = ex_reg_ext_mem_val && !wb_reg_ext_mem_nack + val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; val blt = io.dpath.br_lt; @@ -513,7 +516,7 @@ class rocketCtrl extends Component mem_reg_fp_val := ex_reg_fp_val mem_reg_fp_sboard_set := ex_reg_fp_sboard_set } - mem_reg_ext_mem_val := ex_reg_ext_mem_val; + mem_reg_ext_mem_val := ex_ext_mem_val mem_reg_mem_cmd := ex_reg_mem_cmd; mem_reg_mem_type := ex_reg_mem_type; @@ -543,6 +546,7 @@ class rocketCtrl extends Component wb_reg_fp_val := mem_reg_fp_val wb_reg_fp_sboard_set := mem_reg_fp_sboard_set } + wb_reg_ext_mem_nack := io.ext_mem.resp_nack val sboard = new rocketCtrlSboard(32, 3, 2); sboard.io.r(0).addr := id_raddr2.toUFix; @@ -682,7 +686,7 @@ class rocketCtrl extends Component val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill - val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || Reg(io.ext_mem.resp_nack) + val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || mem_reg_ext_mem_val && wb_reg_ext_mem_nack // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions @@ -801,7 +805,7 @@ class rocketCtrl extends Component io.dpath.div_val := id_div_val.toBool; io.dpath.mul_fn := id_mul_fn; io.dpath.mul_val := id_mul_val.toBool; - io.dpath.ex_ext_mem_val := ex_reg_ext_mem_val; + io.dpath.ex_ext_mem_val := ex_ext_mem_val; io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; io.dpath.ex_wen := ex_reg_wen; @@ -822,14 +826,14 @@ class rocketCtrl extends Component io.fpu.killx := kill_ex io.fpu.killm := kill_mem - io.dtlb_val := ex_reg_mem_val || ex_reg_ext_mem_val; + io.dtlb_val := ex_reg_mem_val || ex_ext_mem_val io.dtlb_kill := mem_reg_kill; - io.dmem.req_val := ex_reg_mem_val || ex_reg_ext_mem_val; + io.dmem.req_val := ex_reg_mem_val || ex_ext_mem_val io.dmem.req_kill := kill_dcache; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; - io.ext_mem.resp_nack:= mem_reg_ext_mem_val && (io.dmem.req_kill || io.dmem.resp_nack || Reg(!io.dmem.req_rdy)) + io.ext_mem.resp_nack:= mem_reg_ext_mem_val && !wb_reg_ext_mem_nack && (io.dmem.req_kill || io.dmem.resp_nack || Reg(!io.dmem.req_rdy)) } } From 4121fb178cc096b01bab670e42c90f52ce9645f7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 24 Feb 2012 19:22:35 -0800 Subject: [PATCH 0209/1087] clean up mul/div interface; use VU mul if HAVE_VEC --- rocket/src/main/scala/consts.scala | 6 +- rocket/src/main/scala/cpu.scala | 4 + rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/divider.scala | 71 +++++------- rocket/src/main/scala/dpath.scala | 79 +++++++------ rocket/src/main/scala/multiplier.scala | 149 ++++++++++++++++--------- rocket/src/main/scala/queues.scala | 38 ++++--- 7 files changed, 192 insertions(+), 157 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6cdb9b28..3758cbd0 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -37,9 +37,9 @@ object Constants val MUL_X = UFix(0, 2); val MUL_LO = UFix(0, 2); - val MUL_HU = UFix(1, 2); - val MUL_HS = UFix(2, 2); - val MUL_HSU = UFix(3, 2); + val MUL_H = UFix(1, 2); + val MUL_HSU = UFix(2, 2); + val MUL_HU = UFix(3, 2); val DIV_X = UFix(0, 2); val DIV_D = UFix(0, 2); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 8e003399..2bd23f49 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -173,6 +173,10 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type + // share vector integer multiplier with rocket + dpath.io.vec_imul_req <> vu.io.cp_imul_req + dpath.io.vec_imul_resp <> vu.io.cp_imul_resp + fpu.io.sfma.valid := Bool(false) fpu.io.dfma.valid := Bool(false) } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 88d7fe47..696eb9a4 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -183,7 +183,7 @@ class rocketCtrl extends Component SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HS, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 95b128ce..d3057697 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -1,28 +1,11 @@ -package Top { +package Top import Chisel._ -import Node._; -import Constants._; +import Node._ +import Constants._ -class ioDivider(width: Int) extends Bundle { - // requests - val div_val = Bool(INPUT); - val div_kill = Bool(INPUT); - val div_rdy = Bool(OUTPUT); - val dw = UFix(1, INPUT); - val div_fn = UFix(2, INPUT); - val div_tag = UFix(5, INPUT); - val in0 = Bits(width, INPUT); - val in1 = Bits(width, INPUT); - // responses - val result = Bits(width, OUTPUT); - val result_tag = UFix(5, OUTPUT); - val result_val = Bool(OUTPUT); - val result_rdy = Bool(INPUT); -} - -class rocketDivider(width : Int) extends Component { - val io = new ioDivider(width); +class rocketDivider(width: Int) extends Component { + val io = new ioMultiplier val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); @@ -31,21 +14,23 @@ class rocketDivider(width : Int) extends Component { val divby0 = Reg() { Bool() }; val neg_quo = Reg() { Bool() }; val neg_rem = Reg() { Bool() }; - val reg_tag = Reg() { UFix() }; + val reg_tag = Reg() { Bits() }; val rem = Reg() { Bool() }; val half = Reg() { Bool() }; val divisor = Reg() { UFix() }; val remainder = Reg() { UFix() }; val subtractor = remainder(2*width, width).toUFix - divisor; - - val tc = (io.div_fn === DIV_D) || (io.div_fn === DIV_R); + + val dw = io.req.bits.fn(io.req.bits.fn.width-1) + val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) + val tc = (fn === DIV_D) || (fn === DIV_R); - val do_kill = io.div_kill && Reg(io.div_rdy) // kill on 1st cycle only + val do_kill = io.req_kill && Reg(io.req.ready) // kill on 1st cycle only switch (state) { is (s_ready) { - when (io.div_val) { + when (io.req.valid) { state := Mux(tc, s_neg_inputs, s_busy) } } @@ -64,7 +49,7 @@ class rocketDivider(width : Int) extends Component { state := s_done } is (s_done) { - when (io.result_rdy) { + when (io.resp_rdy) { state := s_ready } } @@ -72,21 +57,21 @@ class rocketDivider(width : Int) extends Component { // state machine - val lhs_sign = tc && Mux(io.dw === DW_64, io.in0(width-1), io.in0(width/2-1)).toBool - val lhs_hi = Mux(io.dw === DW_64, io.in0(width-1,width/2), Fill(width/2, lhs_sign)) - val lhs_in = Cat(lhs_hi, io.in0(width/2-1,0)) + val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(width-1), io.req.bits.in0(width/2-1)).toBool + val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(width-1,width/2), Fill(width/2, lhs_sign)) + val lhs_in = Cat(lhs_hi, io.req.bits.in0(width/2-1,0)) - val rhs_sign = tc && Mux(io.dw === DW_64, io.in1(width-1), io.in1(width/2-1)).toBool - val rhs_hi = Mux(io.dw === DW_64, io.in1(width-1,width/2), Fill(width/2, rhs_sign)) - val rhs_in = Cat(rhs_hi, io.in1(width/2-1,0)) + val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(width-1), io.req.bits.in1(width/2-1)).toBool + val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(width-1,width/2), Fill(width/2, rhs_sign)) + val rhs_in = Cat(rhs_hi, io.req.bits.in1(width/2-1,0)) - when ((state === s_ready) && io.div_val) { + when ((state === s_ready) && io.req.valid) { count := UFix(0, log2up(width+1)); - half := (io.dw === DW_32); + half := (dw === DW_32); neg_quo := Bool(false); neg_rem := Bool(false); - rem := (io.div_fn === DIV_R) || (io.div_fn === DIV_RU); - reg_tag := io.div_tag; + rem := (fn === DIV_R) || (fn === DIV_RU); + reg_tag := io.req_tag; divby0 := Bool(true); divisor := rhs_in.toUFix; remainder := Cat(UFix(0,width+1), lhs_in).toUFix; @@ -126,11 +111,9 @@ class rocketDivider(width : Int) extends Component { val result = Mux(rem, remainder(2*width, width+1), remainder(width-1,0)); - io.result := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result); - io.result_tag := reg_tag; - io.result_val := (state === s_done); - - io.div_rdy := (state === s_ready); -} + io.resp_bits := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result); + io.resp_tag := reg_tag; + io.resp_val := (state === s_done); + io.req.ready := (state === s_ready); } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index da4162da..68f58184 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -40,6 +40,8 @@ class ioDpathAll extends Bundle() val fpu = new ioDpathFPU(); val vec_ctrl = new ioCtrlDpathVec().flip() val vec_iface = new ioDpathVecInterface() + val vec_imul_req = new io_imul_req + val vec_imul_resp = Bits(hwacha.Config.DEF_XLEN, INPUT) } class rocketDpath extends Component @@ -56,16 +58,6 @@ class rocketDpath extends Component val alu = new rocketDpathALU(); val ex_alu_out = alu.io.out; val ex_alu_adder_out = alu.io.adder_out; - - val div = new rocketDivider(64); - val div_result = div.io.result; - val div_result_tag = div.io.result_tag; - val div_result_val = div.io.result_val; - - val mul = new rocketMultiplier(); - val mul_result = mul.io.result; - val mul_result_tag = mul.io.result_tag; - val mul_result_val = mul.io.result_val; val rfile = new rocketDpathRegfile(); @@ -251,34 +243,39 @@ class rocketDpath extends Component alu.io.fn := ex_reg_ctrl_fn_alu; alu.io.in2 := ex_reg_op2.toUFix; alu.io.in1 := ex_reg_rs1.toUFix; - - // divider - div.io.dw := ex_reg_ctrl_fn_dw; - div.io.div_fn := ex_reg_ctrl_div_fn; - div.io.div_val := ex_reg_ctrl_div_val; - div.io.div_kill := io.ctrl.killm; - div.io.div_tag := ex_reg_waddr; - div.io.in0 := ex_reg_rs1; - div.io.in1 := ex_reg_rs2; - div.io.result_rdy:= !dmem_resp_replay - - io.ctrl.div_rdy := div.io.div_rdy; - io.ctrl.div_result_val := div.io.result_val; - - // multiplier - mul.io.mul_val := ex_reg_ctrl_mul_val; - mul.io.mul_kill:= io.ctrl.killm; - mul.io.dw := ex_reg_ctrl_fn_dw; - mul.io.mul_fn := ex_reg_ctrl_mul_fn; - mul.io.mul_tag := ex_reg_waddr; - mul.io.in0 := ex_reg_rs1; - mul.io.in1 := ex_reg_rs2; io.fpu.fromint_data := ex_reg_rs1 - - io.ctrl.mul_rdy := mul.io.mul_rdy - io.ctrl.mul_result_val := mul.io.result_val; - mul.io.result_rdy := !dmem_resp_replay && !div.io.result_val + + // divider + val div = new rocketDivider(64) + div.io.req.valid := ex_reg_ctrl_div_val + div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_div_fn) + div.io.req.bits.in0 := ex_reg_rs1 + div.io.req.bits.in1 := ex_reg_rs2 + div.io.req_tag := ex_reg_waddr + div.io.req_kill := io.ctrl.killm + div.io.resp_rdy := !dmem_resp_replay + io.ctrl.div_rdy := div.io.req.ready + io.ctrl.div_result_val := div.io.resp_val + + // multiplier + var mul_io = new rocketMultiplier().io + if (HAVE_VEC) + { + val vu_mul = new rocketVUMultiplier(nwbq = 1) + vu_mul.io.vu.req <> io.vec_imul_req + vu_mul.io.vu.resp <> io.vec_imul_resp + mul_io = vu_mul.io.cpu + } + mul_io.req.valid := ex_reg_ctrl_mul_val; + mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_mul_fn) + mul_io.req.bits.in0 := ex_reg_rs1 + mul_io.req.bits.in1 := ex_reg_rs2 + mul_io.req_tag := ex_reg_waddr + mul_io.req_kill := io.ctrl.killm + mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val + io.ctrl.mul_rdy := mul_io.req.ready + io.ctrl.mul_result_val := mul_io.resp_val io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control @@ -358,14 +355,14 @@ class rocketDpath extends Component r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu; val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, - Mux(div_result_val, div_result_tag, - Mux(mul_result_val, mul_result_tag, + Mux(div.io.resp_val, div.io.resp_tag, + Mux(mul_io.resp_val, mul_io.resp_tag, mem_reg_waddr))) - val mem_ll_wdata = Mux(div_result_val, div_result, - Mux(mul_result_val, mul_result, + val mem_ll_wdata = Mux(div.io.resp_val, div.io.resp_bits, + Mux(mul_io.resp_val, mul_io.resp_bits, Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata))) - val mem_ll_wb = dmem_resp_replay || div_result_val || mul_result_val + val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp_data diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 38ba04c3..cfe4658e 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -1,75 +1,118 @@ -package Top { +package Top import Chisel._ -import Node._; -import Constants._; +import Node._ +import Constants._ +import hwacha._ +import hwacha.Config._ -class ioMultiplier(width: Int) extends Bundle { - // requests - val mul_val = Bool(INPUT); - val mul_kill= Bool(INPUT); - val mul_rdy = Bool(OUTPUT); - val dw = UFix(1, INPUT); - val mul_fn = UFix(2, INPUT); - val mul_tag = UFix(CPU_TAG_BITS, INPUT); - val in0 = Bits(width, INPUT); - val in1 = Bits(width, INPUT); - - // responses - val result = Bits(width, OUTPUT); - val result_tag = UFix(CPU_TAG_BITS, OUTPUT); - val result_val = Bool(OUTPUT); - val result_rdy = Bool(INPUT); +class ioMultiplier extends Bundle { + val req = new io_imul_req().flip() + val req_tag = Bits(5, INPUT) + val req_kill = Bool(INPUT) + val resp_val = Bool(OUTPUT) + val resp_rdy = Bool(INPUT) + val resp_tag = Bits(5, OUTPUT) + val resp_bits = Bits(DEF_XLEN, OUTPUT) +} + +class rocketVUMultiplier(nwbq: Int) extends Component { + val io = new Bundle { + val cpu = new ioMultiplier + val vu = new Bundle { + val req = new io_imul_req + val resp = Bits(DEF_XLEN, INPUT) + } + } + + val valid = Reg(resetVal = Bits(0, IMUL_STAGES)) + val wbq_cnt = Reg(resetVal = Bits(0, log2up(nwbq+1))) + val tag = Vec(IMUL_STAGES) { Reg() { Bits() } } + + val fire = io.cpu.req.valid && io.cpu.req.ready + + valid := Cat(fire, valid(IMUL_STAGES-1) && !io.cpu.req_kill, valid(IMUL_STAGES-2,1)) + when (fire) { + tag(IMUL_STAGES-1) := io.cpu.req_tag + } + for (i <- 0 until IMUL_STAGES-1) { + tag(i) := tag(i+1) + } + when (valid(0) != (io.cpu.resp_val && io.cpu.resp_rdy)) { + wbq_cnt := Mux(valid(0), wbq_cnt + UFix(1), wbq_cnt - UFix(1)) + } + + var inflight_cnt = valid(0) + for (i <- 1 until IMUL_STAGES) + inflight_cnt = inflight_cnt + valid(i) + inflight_cnt = inflight_cnt + wbq_cnt + val wbq_rdy = inflight_cnt < UFix(nwbq) + + val wbq = (new queue(nwbq)) { Bits(width = io.cpu.resp_bits.width + io.cpu.resp_tag.width) } + wbq.io.enq.valid := valid(0) + wbq.io.enq.bits := Cat(io.vu.resp, tag(0)) + wbq.io.deq.ready := io.cpu.resp_rdy + + io.cpu.req.ready := io.vu.req.ready && wbq_rdy + io.cpu.resp_val := wbq.io.deq.valid + io.cpu.resp_bits := wbq.io.deq.bits >> UFix(io.cpu.resp_tag.width) + io.cpu.resp_tag := wbq.io.deq.bits(io.cpu.resp_tag.width-1,0) + + io.vu.req <> io.cpu.req } class rocketMultiplier extends Component { - val io = new ioMultiplier(64); - // width must be even (booth). + val io = new ioMultiplier + // w must be even (booth). // we need an extra bit to handle signed vs. unsigned, - // so we need to add a second to keep width even. - val width = 64 + 2 - // unroll must divide width/2 + // so we need to add a second to keep w even. + val w = 64 + 2 val unroll = 3 - val cycles = width/unroll/2 + require(w % 2 == 0 && (w/2) % unroll == 0) + + val cycles = w/unroll/2 val r_val = Reg(resetVal = Bool(false)); - val r_dw = Reg { UFix() } - val r_fn = Reg { UFix() } - val r_tag = Reg { UFix() } + val r_dw = Reg { Bits() } + val r_fn = Reg { Bits() } + val r_tag = Reg { Bits() } val r_lhs = Reg { Bits() } - val r_prod= Reg { Bits(width = width*2) } + val r_prod= Reg { Bits(width = w*2) } val r_lsb = Reg { Bits() } val r_cnt = Reg { UFix(width = log2up(cycles+1)) } - val lhs_msb = Mux(io.dw === DW_64, io.in0(63), io.in0(31)).toBool - val lhs_sign = ((io.mul_fn === MUL_HS) || (io.mul_fn === MUL_HSU)) && lhs_msb - val lhs_hi = Mux(io.dw === DW_64, io.in0(63,32), Fill(32, lhs_sign)) - val lhs_in = Cat(lhs_sign, lhs_hi, io.in0(31,0)) + val dw = io.req.bits.fn(io.req.bits.fn.width-1) + val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) - val rhs_msb = Mux(io.dw === DW_64, io.in1(63), io.in1(31)).toBool - val rhs_sign = (io.mul_fn === MUL_HS) && rhs_msb - val rhs_hi = Mux(io.dw === DW_64, io.in1(63,32), Fill(32, rhs_sign)) - val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.in1(31,0)) + val lhs_msb = Mux(dw === DW_64, io.req.bits.in0(63), io.req.bits.in0(31)).toBool + val lhs_sign = ((fn === MUL_H) || (fn === MUL_HSU)) && lhs_msb + val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(63,32), Fill(32, lhs_sign)) + val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(31,0)) - val do_kill = io.mul_kill && r_cnt === UFix(0) // can only kill on 1st cycle + val rhs_msb = Mux(dw === DW_64, io.req.bits.in1(63), io.req.bits.in1(31)).toBool + val rhs_sign = (fn === MUL_H) && rhs_msb + val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(63,32), Fill(32, rhs_sign)) + val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.req.bits.in1(31,0)) + + val do_kill = io.req_kill && r_cnt === UFix(0) // can only kill on 1st cycle - when (io.mul_val && io.mul_rdy) { + when (io.req.valid && io.req.ready) { r_val := Bool(true) r_cnt := UFix(0, log2up(cycles+1)) - r_dw := io.dw - r_fn := io.mul_fn - r_tag := io.mul_tag + r_dw := dw + r_fn := fn + r_tag := io.req_tag r_lhs := lhs_in r_prod:= rhs_in r_lsb := Bool(false) } - .elsewhen (io.result_val && io.result_rdy || do_kill) { // can only kill on first cycle + .elsewhen (io.resp_val && io.resp_rdy || do_kill) { // can only kill on first cycle r_val := Bool(false) } - val lhs_sext = Cat(r_lhs(width-2), r_lhs(width-2), r_lhs).toUFix - val lhs_twice = Cat(r_lhs(width-2), r_lhs, Bits(0,1)).toUFix + val lhs_sext = Cat(r_lhs(w-2), r_lhs(w-2), r_lhs).toUFix + val lhs_twice = Cat(r_lhs(w-2), r_lhs, Bits(0,1)).toUFix var prod = r_prod var lsb = r_lsb @@ -79,12 +122,12 @@ class rocketMultiplier extends Component { Mux(prod(0) != prod(1), lhs_twice, UFix(0))); val sub = prod(1) - val adder_lhs = Cat(prod(width*2-1), prod(width*2-1,width)).toUFix + val adder_lhs = Cat(prod(w*2-1), prod(w*2-1,w)).toUFix val adder_rhs = Mux(sub, ~addend, addend) - val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(width,0) + val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(w,0) lsb = prod(1) - prod = Cat(adder_out(width), adder_out, prod(width-1,2)) + prod = Cat(adder_out(w), adder_out, prod(w-1,2)) } when (r_val && (r_cnt != UFix(cycles))) { @@ -99,10 +142,8 @@ class rocketMultiplier extends Component { val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext) - io.mul_rdy := !r_val - io.result := mul_output; - io.result_tag := r_tag; - io.result_val := r_val && (r_cnt === UFix(cycles)) -} - + io.req.ready := !r_val + io.resp_bits := mul_output; + io.resp_tag := r_tag; + io.resp_val := r_val && (r_cnt === UFix(cycles)) } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 986e32aa..db99ce8b 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -14,32 +14,42 @@ class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) ext { val io = new ioQueue(flushable)(data) - val enq_ptr = Reg(resetVal = UFix(0, log2up(entries))) - val deq_ptr = Reg(resetVal = UFix(0, log2up(entries))) - val maybe_full = Reg(resetVal = Bool(false)) - - io.deq.valid := maybe_full || enq_ptr != deq_ptr - io.enq.ready := !maybe_full || enq_ptr != deq_ptr - val do_enq = io.enq.ready && io.enq.valid val do_deq = io.deq.ready && io.deq.valid - when (do_deq) { - deq_ptr := deq_ptr + UFix(1) - } - when (do_enq) { - enq_ptr := enq_ptr + UFix(1) + var enq_ptr = UFix(0) + var deq_ptr = UFix(0) + + if (entries > 1) + { + enq_ptr = Reg(resetVal = UFix(0, log2up(entries))) + deq_ptr = Reg(resetVal = UFix(0, log2up(entries))) + + when (do_deq) { + deq_ptr := deq_ptr + UFix(1) + } + when (do_enq) { + enq_ptr := enq_ptr + UFix(1) + } + if (flushable) { + when (io.flush) { + deq_ptr := UFix(0) + enq_ptr := UFix(0) + } + } } + + val maybe_full = Reg(resetVal = Bool(false)) when (do_enq != do_deq) { maybe_full := do_enq } if (flushable) { when (io.flush) { - deq_ptr := UFix(0) - enq_ptr := UFix(0) maybe_full := Bool(false) } } + io.deq.valid := maybe_full || enq_ptr != deq_ptr + io.enq.ready := !maybe_full || enq_ptr != deq_ptr io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) } From 137fd6200723a5c05314e96481d6063810e81adc Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 25 Feb 2012 12:20:36 -0800 Subject: [PATCH 0210/1087] refactor cpfences --- rocket/src/main/scala/ctrl.scala | 27 ++++----------------------- rocket/src/main/scala/ctrl_vec.scala | 12 +++++++++++- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 696eb9a4..a40b388d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -600,32 +600,13 @@ class rocketCtrl extends Component val vec = new rocketCtrlVec() io.vec_dpath <> vec.io.dpath + io.vec_iface <> vec.io.iface - io.vec_iface.vcmdq_valid := vec.io.iface.vcmdq_valid - io.vec_iface.vximm1q_valid := vec.io.iface.vximm1q_valid - io.vec_iface.vximm2q_valid := vec.io.iface.vximm2q_valid - vec.io.iface.vcmdq_ready := io.vec_iface.vcmdq_ready - vec.io.iface.vximm1q_ready := io.vec_iface.vximm1q_ready - vec.io.iface.vximm2q_ready := io.vec_iface.vximm2q_ready - - io.vec_iface.vpfcmdq_valid := vec.io.iface.vpfcmdq_valid - io.vec_iface.vpfximm1q_valid := vec.io.iface.vpfximm1q_valid - io.vec_iface.vpfximm2q_valid := vec.io.iface.vpfximm2q_valid - vec.io.iface.vpfcmdq_ready := io.vec_iface.vpfcmdq_ready - vec.io.iface.vpfximm1q_ready := io.vec_iface.vpfximm1q_ready - vec.io.iface.vpfximm2q_ready := io.vec_iface.vpfximm2q_ready + vec.io.sr_ev := io.dpath.status(SR_EV) + vec.io.exception := wb_reg_exception vec_replay = vec.io.replay - vec_cpfence = Reg(resetVal = Bool(false)) - when (vec.io.cpfence) { - vec_cpfence := Bool(true) - } - when (io.vec_iface.vackq_valid || wb_reg_exception) { - vec_cpfence := Bool(false) - } - - io.vec_iface.vackq_ready := Bool(true) - vec.io.sr_ev := io.dpath.status(SR_EV) + vec_cpfence = vec.io.cpfence } // exception handling diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index b245e2e2..1d5560f5 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -41,6 +41,7 @@ class ioCtrlVec extends Bundle val dpath = new ioCtrlDpathVec() val iface = new ioCtrlVecInterface() val sr_ev = Bool(INPUT) + val exception = Bool(INPUT) val replay = Bool(OUTPUT) val cpfence = Bool(OUTPUT) } @@ -151,6 +152,8 @@ class rocketCtrlVec extends Component mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq + io.iface.vackq_ready := Bool(true) + io.replay := valid_common && ( wb_vec_cmdq_enq && !io.iface.vcmdq_ready || wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || @@ -159,5 +162,12 @@ class rocketCtrlVec extends Component wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready ) - io.cpfence := valid_common && wb_vec_cpfence && !io.replay + + val reg_cpfence = Reg(resetVal = Bool(false)) + val do_cpfence = valid_common && wb_vec_cpfence && !io.replay + + when (do_cpfence) { reg_cpfence := Bool(true) } + when (io.iface.vackq_valid || io.exception) { reg_cpfence := Bool(false) } + + io.cpfence := reg_cpfence } From a1600d95dbeb20aaca180aa8ea2b0dd3a02a9611 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 25 Feb 2012 12:21:10 -0800 Subject: [PATCH 0211/1087] fix bug related to waddr and wdata in wb stage for the instructions which don't use waddr/wdata for writeback, the contents were getting overwritten by the ll ops it manifested itself after cp imul were sharing the alu with the vu --- rocket/src/main/scala/ctrl.scala | 66 +++++++++++++++---------------- rocket/src/main/scala/dpath.scala | 20 ++++++---- 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a40b388d..86a46dbd 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -247,43 +247,43 @@ class rocketCtrl extends Component VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_L_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_G_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) + VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) )) val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 68f58184..247bd8c7 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -103,8 +103,10 @@ class rocketDpath extends Component val wb_reg_pc = Reg() { UFix() }; val wb_reg_inst = Reg() { Bits() }; val wb_reg_rs2 = Reg() { Bits() }; - val wb_reg_waddr = Reg() { UFix() }; - val wb_reg_wdata = Reg() { Bits() }; + val wb_reg_waddr = Reg() { UFix() } + val wb_reg_wdata = Reg() { Bits() } + val wb_reg_vec_waddr = Reg() { UFix() } + val wb_reg_vec_wdata = Reg() { Bits() } val wb_reg_raddr1 = Reg() { UFix() }; val wb_reg_raddr2 = Reg() { UFix() }; val wb_reg_ll_wb = Reg(resetVal = Bool(false)); @@ -376,6 +378,8 @@ class rocketDpath extends Component wb_reg_rs2 := mem_reg_rs2 wb_reg_waddr := mem_ll_waddr wb_reg_wdata := mem_ll_wdata + wb_reg_vec_waddr := mem_reg_waddr + wb_reg_vec_wdata := mem_reg_wdata wb_reg_raddr1 := mem_reg_raddr1 wb_reg_raddr2 := mem_reg_raddr2; @@ -392,23 +396,23 @@ class rocketDpath extends Component vec.io.valid := io.ctrl.wb_valid vec.io.inst := wb_reg_inst - vec.io.waddr := wb_reg_waddr + vec.io.waddr := wb_reg_vec_waddr vec.io.raddr1 := wb_reg_raddr1 vec.io.vecbank := pcr.io.vecbank vec.io.vecbankcnt := pcr.io.vecbankcnt - vec.io.wdata := wb_reg_wdata + vec.io.wdata := wb_reg_vec_wdata vec.io.rs2 := wb_reg_rs2 wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), Mux(wb_src_dmem, io.dmem.resp_data_subword, - wb_reg_wdata)) + wb_reg_wdata)) } else { wb_wdata := Mux(wb_src_dmem, io.dmem.resp_data_subword, - wb_reg_wdata) + wb_reg_wdata) } rfile.io.w0.addr := wb_reg_waddr @@ -420,7 +424,7 @@ class rocketDpath extends Component io.ext_mem.resp_type := Reg(io.dmem.resp_type) io.ext_mem.resp_data := io.dmem.resp_data_subword - io.ctrl.wb_waddr := wb_reg_waddr; + io.ctrl.wb_waddr := wb_reg_waddr io.ctrl.mem_wb := dmem_resp_replay; // scoreboard clear (for div/mul and D$ load miss writebacks) @@ -432,7 +436,7 @@ class rocketDpath extends Component // processor control regfile write pcr.io.w.addr := wb_reg_raddr2; pcr.io.w.en := io.ctrl.wen_pcr - pcr.io.w.data := wb_reg_wdata; + pcr.io.w.data := wb_reg_wdata pcr.io.di := io.ctrl.irq_disable; pcr.io.ei := io.ctrl.irq_enable; From 4fa31b300bb52f2cea8c8357ccd8a558cdf5a9a4 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 24 Feb 2012 16:25:36 -0800 Subject: [PATCH 0212/1087] Added popcount util --- rocket/src/main/scala/util.scala | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index da2343e3..34277543 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -32,6 +32,41 @@ object FillInterleaved } } +// http://aggregate.ee.engr.uky.edu/MAGIC/#Population%20Count%20%28Ones%20Count%29 +// http://bits.stephan-brumme.com/countBits.html +object PopCount +{ + def apply(in: Bits) = + { + require(in.width <= 32) + val w = log2up(in.width+1) + var x = in + if(in.width == 2) { + x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) + } else if(in.width <= 4) { + x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) + x = (((x >> UFix(2)) & Bits("h_3333_3333")) + (x & Bits("h_3333_3333"))) + } else if(in.width <= 8) { + x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) + x = (((x >> UFix(2)) & Bits("h_3333_3333")) + (x & Bits("h_3333_3333"))) + x = ((x >> UFix(4)) + x) + } else { + // count bits of each 2-bit chunk + x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) + // count bits of each 4-bit chunk + x = (((x >> UFix(2)) & Bits("h_3333_3333")) + (x & Bits("h_3333_3333"))) + // count bits of each 8-bit chunk + x = ((x >> UFix(4)) + x) + // mask junk in upper bits + x = x & Bits("h_0f0f_0f0f") + // add all four 8-bit chunks + x = x + (x >> UFix(8)) + x = x + (x >> UFix(16)) + } + x(w-1,0) + } +} + object Reverse { def apply(in: Bits) = From df97de0fd39e83d913f4947689a868a1ea5f5381 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sat, 25 Feb 2012 12:56:09 -0800 Subject: [PATCH 0213/1087] Better abstraction of data bundles --- rocket/src/main/scala/coherence.scala | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 534c4bb9..579af518 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -4,13 +4,27 @@ import Chisel._ import Constants._ import hwacha.GenArray +class HubMemReq extends Bundle { + val rw = Bool() + val addr = UFix(width = PADDR_BITS-OFFSET_BITS) + val tag = Bits(width = GLOBAL_XACT_ID_BITS) + // Figure out which data-in port to pull from + val data_idx = Bits(width = TILE_ID_BITS) + val is_probe_rep = Bool() +} + +class MemData extends Bundle { + val data = Bits(width = MEM_DATA_BITS) +} + class TransactionInit extends Bundle { val ttype = Bits(width = TTYPE_BITS) val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) val address = Bits(width = PADDR_BITS) - val data = Bits(width = MEM_DATA_BITS) } +class TransactionInitData extends MemData + class TransactionAbort extends Bundle { val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) } @@ -27,9 +41,7 @@ class ProbeReply extends Bundle { val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) } -class ProbeReplyData extends Bundle { - val data = Bits(width = MEM_DATA_BITS) -} +class ProbeReplyData extends MemData class TransactionReply extends Bundle { val ttype = Bits(width = TTYPE_BITS) @@ -37,9 +49,7 @@ class TransactionReply extends Bundle { val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) } -class TransactionReplyData extends Bundle { - val data = Bits(width = MEM_DATA_BITS) -} +class TransactionReplyData extends MemData class TransactionFinish extends Bundle { val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) @@ -47,6 +57,7 @@ class TransactionFinish extends Bundle { class ioTileLink extends Bundle { val xact_init = (new ioDecoupled) { new TransactionInit() }.flip + val xact_init_data = (new ioDecoupled) { new TransactionInitData() }.flip val xact_abort = (new ioDecoupled) { new TransactionAbort() } val probe_req = (new ioDecoupled) { new ProbeRequest() } val probe_rep = (new ioDecoupled) { new ProbeReply() }.flip From db6d4807781b0965d2a03e56bdb93f2237b33a11 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sat, 25 Feb 2012 15:27:09 -0800 Subject: [PATCH 0214/1087] Better foldR --- rocket/src/main/scala/coherence.scala | 28 ++++++++++++++------------- rocket/src/main/scala/util.scala | 5 +++-- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 579af518..bb2f7d65 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -18,41 +18,43 @@ class MemData extends Bundle { } class TransactionInit extends Bundle { - val ttype = Bits(width = TTYPE_BITS) - val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) + val t_type = Bits(width = TTYPE_BITS) + val has_data = Bool() + val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val address = Bits(width = PADDR_BITS) } class TransactionInitData extends MemData class TransactionAbort extends Bundle { - val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) + val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) } class ProbeRequest extends Bundle { - val ptype = Bits(width = PTYPE_BITS) - val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) + val p_type = Bits(width = PTYPE_BITS) + val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) val address = Bits(width = PADDR_BITS) } class ProbeReply extends Bundle { - val ptype = Bits(width = PTYPE_BITS) - val hasData = Bool() - val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) + val p_type = Bits(width = PTYPE_BITS) + val has_data = Bool() + val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } class ProbeReplyData extends MemData class TransactionReply extends Bundle { - val ttype = Bits(width = TTYPE_BITS) - val tileTransactionID = Bits(width = TILE_XACT_ID_BITS) - val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) + val t_type = Bits(width = TTYPE_BITS) + val has_data = Bool() + val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) + val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } class TransactionReplyData extends MemData class TransactionFinish extends Bundle { - val globalTransactionID = Bits(width = GLOBAL_XACT_ID_BITS) + val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } class ioTileLink extends Bundle { @@ -142,7 +144,7 @@ trait FourStateCoherence extends CoherencePolicy { def getMetaUpdateOnProbe (incoming: ProbeRequest): Bits = { val state = UFix(0) - switch(incoming.ptype) { + switch(incoming.p_type) { is(probeInvalidate) { state := tileInvalid } is(probeDowngrade) { state := tileShared } } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 34277543..a24afc62 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -5,10 +5,11 @@ import Chisel._ import Node._ import scala.math._ + object foldR { - def apply[T <: Bits](x: Seq[T], f: (T, T) => T): T = - if (x.length == 1) x(0) else f(x(0), foldR(x.slice(1, x.length), f)) + def apply[T <: Bits](x: Seq[T])(f: (T, T) => T): T = + if (x.length == 1) x(0) else f(x(0), foldR(x.slice(1, x.length))(f)) } object log2up From 3980120279c6be41b437b7a3839591814d36e022 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sat, 25 Feb 2012 15:27:53 -0800 Subject: [PATCH 0215/1087] More stylish bundle param names, some hub progress --- rocket/src/main/scala/coherence.scala | 105 +++++++++++++++++++------- 1 file changed, 78 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index bb2f7d65..d7597cdf 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -154,23 +154,27 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(id: Int) extends Component { val io = new Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit() } - val probe_rep = (new ioDecoupled) { new ProbeReply() } - val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip - val xact_rep = (new ioDecoupled) { new TransactionReply() }.flip - val mem_req = (new ioDecoupled) { new MemReq() }.flip + val mem_req = (new ioDecoupled) { new HubMemReq() }.flip val xact_finish = Bool(INPUT) + val p_rep_has_data = Bool(INPUT) + val x_init_has_data = Bool(INPUT) + val p_rep_data_idx = Bits(log2up(NTILES), INPUT) + val x_init_data_idx = Bits(log2up(NTILES), INPUT) + val rep_cnt_dec = Bits(NTILES, INPUT) val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) val tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS, OUTPUT) - val ttype = Bits(TTYPE_BITS, OUTPUT) + val t_type = Bits(TTYPE_BITS, OUTPUT) + val pop_p_rep = Bool(OUTPUT) + val pop_p_rep_data = Bool(OUTPUT) + val send_x_rep_ack = Bool(OUTPUT) } val valid = Reg(resetVal = Bool(false)) val addr = Reg{ Bits() } - val ttype = Reg{ Bits() } + val t_type = Reg{ Bits() } val tile_id = Reg{ Bits() } val tile_xact_id = Reg{ Bits() } val probe_done = Reg{ Bits() } @@ -184,9 +188,9 @@ class CoherenceHubNoDir extends CoherenceHub { def coherenceConflict(addr1: Bits, addr2: Bits): Bool = { addr1(PADDR_BITS-1, OFFSET_BITS) === addr2(PADDR_BITS-1, OFFSET_BITS) } - def getTransactionReplyType(ttype: UFix, count: UFix): Bits = { + def getTransactionReplyType(t_type: UFix, count: UFix): Bits = { val ret = Wire() { Bits(width = TTYPE_BITS) } - switch (ttype) { + switch (t_type) { is(X_READ_SHARED) { ret := Mux(count > UFix(0), X_READ_SHARED, X_READ_EXCLUSIVE) } is(X_READ_EXCLUSIVE) { ret := X_READ_EXCLUSIVE } is(X_READ_UNCACHED) { ret := X_READ_UNCACHED } @@ -205,17 +209,27 @@ class CoherenceHubNoDir extends CoherenceHub { val addr_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } val tile_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val tile_xact_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } + val t_type_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } val sh_count_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val ttype_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } - val free_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val send_x_rep_ack_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + + val do_free_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_has_data_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_data_idx_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } + val rep_cnt_dec_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } + for( i <- 0 until NGLOBAL_XACTS) { - busy_arr.write( UFix(i), trackerList(i).io.busy) - addr_arr.write( UFix(i), trackerList(i).io.addr) - tile_id_arr.write( UFix(i), trackerList(i).io.tile_id) - tile_xact_id_arr.write(UFix(i), trackerList(i).io.tile_xact_id) - ttype_arr.write( UFix(i), trackerList(i).io.ttype) - sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) - trackerList(i).io.xact_finish := free_arr.read(UFix(i)) + busy_arr.write( UFix(i), trackerList(i).io.busy) + addr_arr.write( UFix(i), trackerList(i).io.addr) + tile_id_arr.write( UFix(i), trackerList(i).io.tile_id) + tile_xact_id_arr.write( UFix(i), trackerList(i).io.tile_xact_id) + t_type_arr.write( UFix(i), trackerList(i).io.t_type) + sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) + send_x_rep_ack_arr.write( UFix(i), trackerList(i).io.send_x_rep_ack) + trackerList(i).io.xact_finish := do_free_arr.read(UFix(i)) + trackerList(i).io.p_rep_has_data := p_rep_has_data_arr.read(UFix(i)) + trackerList(i).io.p_rep_data_idx := p_rep_data_idx_arr.read(UFix(i)) + trackerList(i).io.rep_cnt_dec := rep_cnt_dec_arr.read(UFix(i)) } // Nack conflicting transaction init attempts @@ -231,14 +245,14 @@ class CoherenceHubNoDir extends CoherenceHub { } aborting(j) := (conflicts.orR || busy_arr.flatten().andR) abort.valid := init.valid && aborting - abort.bits.tileTransactionID := init.bits.tileTransactionID + abort.bits.tile_xact_id := init.bits.tile_xact_id init.ready := aborting(j) || initiating(j) } // Free finished transactions for( j <- 0 until NTILES ) { val finish = io.tiles(j).xact_finish - free_arr.write(finish.bits.globalTransactionID, finish.valid) + do_free_arr.write(finish.bits.global_xact_id, finish.valid) finish.ready := Bool(true) } @@ -249,18 +263,55 @@ class CoherenceHubNoDir extends CoherenceHub { val idx = io.mem.resp_tag val readys = Bits(width = NTILES) for( j <- 0 until NTILES ) { - io.tiles(j).xact_rep.bits.ttype := getTransactionReplyType(ttype_arr.read(idx), sh_count_arr.read(idx)) - io.tiles(j).xact_rep.bits.tileTransactionID := tile_xact_id_arr.read(idx) - io.tiles(j).xact_rep.bits.globalTransactionID := idx + io.tiles(j).xact_rep.bits.t_type := getTransactionReplyType(t_type_arr.read(idx), sh_count_arr.read(idx)) + io.tiles(j).xact_rep.bits.tile_xact_id := tile_xact_id_arr.read(idx) + io.tiles(j).xact_rep.bits.global_xact_id := idx io.tiles(j).xact_rep_data.bits.data := io.mem.resp_data readys := Mux(xrep_cnt === UFix(0), io.tiles(j).xact_rep.ready && io.tiles(j).xact_rep_data.ready, io.tiles(j).xact_rep_data.ready) - val this_rep_valid = UFix(j) === tile_id_arr.read(idx) && io.mem.resp_val - io.tiles(j).xact_rep.valid := this_rep_valid && xrep_cnt === UFix(0) - io.tiles(j).xact_rep_data.valid := this_rep_valid + io.tiles(j).xact_rep.valid := (UFix(j) === tile_id_arr.read(idx)) && ((io.mem.resp_val && xrep_cnt === UFix(0)) || send_x_rep_ack_arr.read(idx)) + io.tiles(j).xact_rep_data.valid := (UFix(j) === tile_id_arr.read(idx)) } - // If there were a ready signal due to e.g. intervening network: + // If there were a ready signal due to e.g. intervening network use: //io.mem.resp_rdy := readys(tile_id_arr.read(idx)).xact_rep.ready + // Create an arbiter for the one memory port + // We have to arbitrate between the different trackers' memory requests + // and once we have picked a request, get the right write data + + val mem_req_arb = (new Arbiter(NGLOBAL_XACTS)) { new HubMemReq() } + for( i <- 0 until NGLOBAL_XACTS ) { + mem_req_arb.io.in(i) <> trackerList(i).io.mem_req + } + mem_req_arb.io.out.ready := io.mem.req_rdy + io.mem.req_val := mem_req_arb.io.out.valid + io.mem.req_rw := mem_req_arb.io.out.bits.rw + io.mem.req_tag := mem_req_arb.io.out.bits.tag + io.mem.req_addr := mem_req_arb.io.out.bits.addr + io.mem.req_wdata := MuxLookup(mem_req_arb.io.out.bits.data_idx, + Bits(0, width = MEM_DATA_BITS), + (0 until NTILES).map( j => + UFix(j) -> Mux(mem_req_arb.io.out.bits.is_probe_rep, + io.tiles(j).probe_rep_data.bits.data, + io.tiles(j).xact_init_data.bits.data))) + + for( j <- 0 until NTILES ) { + val p_rep = io.tiles(j).probe_rep + val p_rep_data = io.tiles(j).probe_rep_data + val idx = p_rep.bits.global_xact_id + p_rep_has_data_arr.write(idx, p_rep.valid && p_rep.bits.has_data) + p_rep_data_idx_arr.write(idx, UFix(j)) + p_rep.ready := foldR(trackerList.map(_.io.pop_p_rep))(_ || _) + p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data))(_ || _) + } + for( i <- 0 until NGLOBAL_XACTS ) { + val flags = Bits(width = NTILES) + for( j <- 0 until NTILES) { + val p_rep = io.tiles(j).probe_rep + flags(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) + } + rep_cnt_dec_arr.write(UFix(i), flags) + } + // Pick a single request of these types to process //val xact_init_arb = (new Arbiter(NTILES)) { new TransactionInit() } From 3839e3a318fe45c5129519b9defe837e1222852a Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 25 Feb 2012 15:55:10 -0800 Subject: [PATCH 0216/1087] massive refactoring of vector constants --- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dpath_vec.scala | 8 ++++---- rocket/src/main/scala/multiplier.scala | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 247bd8c7..36650cf4 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -41,7 +41,7 @@ class ioDpathAll extends Bundle() val vec_ctrl = new ioCtrlDpathVec().flip() val vec_iface = new ioDpathVecInterface() val vec_imul_req = new io_imul_req - val vec_imul_resp = Bits(hwacha.Config.DEF_XLEN, INPUT) + val vec_imul_resp = Bits(hwacha.Constants.SZ_XLEN, INPUT) } class rocketDpath extends Component diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 7e778685..6a27a9bb 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -4,13 +4,13 @@ import Chisel._ import Node._ import Constants._ import Instructions._ -import hwacha.Interface._ +import hwacha.Constants._ class ioDpathVecInterface extends Bundle { - val vcmdq_bits = Bits(VCMD_SZ, OUTPUT) - val vximm1q_bits = Bits(VIMM_SZ, OUTPUT) - val vximm2q_bits = Bits(VSTRIDE_SZ, OUTPUT) + val vcmdq_bits = Bits(SZ_VCMD, OUTPUT) + val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) + val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) } class ioDpathVec extends Bundle diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index cfe4658e..9e35dcc9 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -4,7 +4,7 @@ import Chisel._ import Node._ import Constants._ import hwacha._ -import hwacha.Config._ +import hwacha.Constants._ class ioMultiplier extends Bundle { val req = new io_imul_req().flip() @@ -13,7 +13,7 @@ class ioMultiplier extends Bundle { val resp_val = Bool(OUTPUT) val resp_rdy = Bool(INPUT) val resp_tag = Bits(5, OUTPUT) - val resp_bits = Bits(DEF_XLEN, OUTPUT) + val resp_bits = Bits(SZ_XLEN, OUTPUT) } class rocketVUMultiplier(nwbq: Int) extends Component { @@ -21,7 +21,7 @@ class rocketVUMultiplier(nwbq: Int) extends Component { val cpu = new ioMultiplier val vu = new Bundle { val req = new io_imul_req - val resp = Bits(DEF_XLEN, INPUT) + val resp = Bits(SZ_XLEN, INPUT) } } From 946e0c6e4ea289945f6d9ac3640e6e43eb398447 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 25 Feb 2012 16:37:56 -0800 Subject: [PATCH 0217/1087] add vector exception infrastructure --- rocket/src/main/scala/consts.scala | 4 ++++ rocket/src/main/scala/cpu.scala | 4 ++++ rocket/src/main/scala/dpath.scala | 2 ++ rocket/src/main/scala/dpath_util.scala | 9 +++++++++ rocket/src/main/scala/dpath_vec.scala | 7 +++++++ 5 files changed, 26 insertions(+) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 3758cbd0..7c6b2a3c 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -139,6 +139,10 @@ object Constants val PCR_FROMHOST = UFix(17, 5); val PCR_VECBANK = UFix(18, 5); + // temporaries for vector, these will go away + val PCR_VEC_TMP1 = UFix(30, 5) + val PCR_VEC_TMP2 = UFix(31, 5) + // definition of bits in PCR status reg val SR_ET = 0; // enable traps val SR_EF = 1; // enable floating point diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 2bd23f49..56afb479 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -156,6 +156,10 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.vec_iface.vackq_valid := vu.io.vec_ackq.valid vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready + // exceptions + // dpath.io.vec_iface.eaddr + // dpath.io.vec_iface.exception + // hooking up vector memory interface ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid ctrl.io.ext_mem.req_cmd := vu.io.dmem_req.bits.cmd diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 36650cf4..94b73ead 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -402,6 +402,8 @@ class rocketDpath extends Component vec.io.vecbankcnt := pcr.io.vecbankcnt vec.io.wdata := wb_reg_vec_wdata vec.io.rs2 := wb_reg_rs2 + vec.io.vec_eaddr := pcr.io.vec_eaddr + vec.io.vec_exception := pcr.io.vec_exception wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 62aab094..53c4ffc7 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -80,6 +80,8 @@ class ioDpathPCR extends Bundle() val irq_ipi = Bool(OUTPUT); val vecbank = Bits(8, OUTPUT) val vecbankcnt = UFix(4, OUTPUT) + val vec_eaddr = Bits(VADDR_BITS, OUTPUT) + val vec_exception = Bool(OUTPUT) } class rocketDpathPCR extends Component @@ -98,6 +100,8 @@ class rocketDpathPCR extends Component val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) + val reg_vec_eaddr = Reg() { Bits() } + val reg_vec_exception = Reg() { Bool() } val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); @@ -139,6 +143,9 @@ class rocketDpathPCR extends Component cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) + io.vec_eaddr := reg_vec_eaddr + io.vec_exception := reg_vec_exception + val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) when (io.badvaddr_wen) { reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; @@ -205,6 +212,8 @@ class rocketDpathPCR extends Component when (waddr === PCR_K1) { reg_k1 := wdata; } when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } + when (waddr === PCR_VEC_TMP1) { reg_vec_eaddr := wdata(VADDR_BITS,0) } + when (waddr === PCR_VEC_TMP2) { reg_vec_exception:= wdata(0) } } rdata := Bits(0, 64) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 6a27a9bb..6fc55c36 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -11,6 +11,8 @@ class ioDpathVecInterface extends Bundle val vcmdq_bits = Bits(SZ_VCMD, OUTPUT) val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) + val eaddr = Bits(64, OUTPUT) + val exception = Bool(OUTPUT) } class ioDpathVec extends Bundle @@ -25,6 +27,8 @@ class ioDpathVec extends Bundle val vecbankcnt = UFix(4, INPUT) val wdata = Bits(64, INPUT) val rs2 = Bits(64, INPUT) + val vec_eaddr = Bits(64, INPUT) + val vec_exception = Bool(INPUT) val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) } @@ -125,6 +129,9 @@ class rocketDpathVec extends Component io.iface.vximm2q_bits := io.rs2 + io.iface.eaddr := io.vec_eaddr + io.iface.exception := io.vec_exception + io.ctrl.valid := io.valid io.ctrl.inst := io.inst io.ctrl.appvl0 := reg_appvl0 From 94ba32bbd309a8c0f3b8a4997a704e941f1d7bbb Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 25 Feb 2012 17:09:26 -0800 Subject: [PATCH 0218/1087] change package name and sbt project name to rocket --- rocket/src/main/scala/arbiter.scala | 4 +--- rocket/src/main/scala/coherence.scala | 4 +--- rocket/src/main/scala/consts.scala | 4 +--- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/ctrl.scala | 4 +--- rocket/src/main/scala/ctrl_util.scala | 2 +- rocket/src/main/scala/ctrl_vec.scala | 2 +- rocket/src/main/scala/divider.scala | 2 +- rocket/src/main/scala/dpath.scala | 4 +--- rocket/src/main/scala/dpath_alu.scala | 5 +---- rocket/src/main/scala/dpath_util.scala | 5 +---- rocket/src/main/scala/dpath_vec.scala | 2 +- rocket/src/main/scala/dtlb.scala | 4 +--- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/htif.scala | 2 +- rocket/src/main/scala/icache.scala | 4 +--- rocket/src/main/scala/icache_prefetch.scala | 4 +--- rocket/src/main/scala/instructions.scala | 5 ++--- rocket/src/main/scala/itlb.scala | 4 +--- rocket/src/main/scala/multiplier.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 4 +--- rocket/src/main/scala/ptw.scala | 4 +--- rocket/src/main/scala/queues.scala | 2 +- rocket/src/main/scala/top.scala | 2 +- rocket/src/main/scala/util.scala | 5 +---- 25 files changed, 26 insertions(+), 58 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index a5d9d7da..dcbada4c 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._; import Node._; @@ -61,5 +61,3 @@ class rocketMemArbiter(n: Int) extends Component { io.requestor(i).resp_tag := io.mem.resp_tag >> UFix(log2up(n)) } } - -} diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index d7597cdf..d419dd52 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._ import Constants._ @@ -319,5 +319,3 @@ class CoherenceHubNoDir extends CoherenceHub { } - -} diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 7c6b2a3c..071d3e1f 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._ import scala.math._ @@ -239,5 +239,3 @@ object Constants val VIMM_ALU = UFix(1, 1) val VIMM_X = UFix(0, 1) } - -} diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 56afb479..4cca083b 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._; import Node._; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 86a46dbd..f067aff1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._ import Node._; @@ -816,5 +816,3 @@ class rocketCtrl extends Component io.ext_mem.resp_nack:= mem_reg_ext_mem_val && !wb_reg_ext_mem_nack && (io.dmem.req_kill || io.dmem.resp_nack || Reg(!io.dmem.req_rdy)) } - -} diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 86ffcd78..f0b577e4 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._; diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 1d5560f5..8c2351e7 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._ diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index d3057697..26fc42b8 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._ diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 94b73ead..f97ba5d8 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._ import Node._; @@ -448,5 +448,3 @@ class rocketDpath extends Component pcr.io.pc := wb_reg_pc; pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; } - -} diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 72d22cae..c15b08a8 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -1,5 +1,4 @@ -package Top { - +package rocket import Chisel._ import Node._; @@ -56,5 +55,3 @@ class rocketDpathALU extends Component io.out := Cat(out_hi, out64(31,0)).toUFix io.adder_out := sum } - -} diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 53c4ffc7..b36a5af8 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -1,5 +1,4 @@ -package Top -{ +package rocket import Chisel._; import Node._; @@ -269,5 +268,3 @@ class rocketDpathRegfile extends Component io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); } - -} diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 6fc55c36..c68db9de 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._ diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index ee64e753..32ddb797 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -1,5 +1,4 @@ -package Top -{ +package rocket import Chisel._; import Node._; @@ -181,4 +180,3 @@ class rocketDTLB(entries: Int) extends Component } } } -} diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 20236b9c..a0647631 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._ diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 8c46ee31..d229667e 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._; diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index f52ad748..ca3c9219 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._; import Node._; @@ -163,5 +163,3 @@ class rocketICache(sets: Int, assoc: Int) extends Component { } } } - -} diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index db0c2cc0..b9144cd7 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._; import Node._; @@ -80,5 +80,3 @@ class rocketIPrefetcher extends Component() { state := s_invalid } } - -} diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 0b23c3cd..1511e1f9 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -1,4 +1,5 @@ -package Top { +package rocket + import Chisel._ import Node._; @@ -248,5 +249,3 @@ object Instructions val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); } - -} diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 6e0e6e4d..231010b9 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -1,5 +1,4 @@ -package Top -{ +package rocket import Chisel._; import Node._; @@ -201,4 +200,3 @@ class rocketITLB(entries: Int) extends Component } } } -} diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 9e35dcc9..0138afd0 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._ diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 96eacb66..16b6fb63 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._ import Constants._ @@ -993,5 +993,3 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix io.mem.req_addr := wb.io.mem_req.bits.addr } - -} diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 83fb1e6c..ed8b347a 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -1,4 +1,4 @@ -package Top { +package rocket import Chisel._; import Node._; @@ -213,5 +213,3 @@ class rocketPTW extends Component } } } - -} diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index db99ce8b..fb1dd542 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._; diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 481ca6be..f379c017 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -1,4 +1,4 @@ -package Top +package rocket import Chisel._ import Node._; diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index a24afc62..5ac3b41b 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -1,5 +1,4 @@ -package Top -{ +package rocket import Chisel._ import Node._ @@ -249,5 +248,3 @@ class priorityEncoder(width: Int) extends Component io.out := l_out; } - -} From 569698b8243753d913920e4f1af10665ab010a46 Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Sat, 25 Feb 2012 22:05:30 -0800 Subject: [PATCH 0219/1087] dtlb now arbitrates between cpu, vec, and vec pf --- rocket/src/main/scala/consts.scala | 4 ++ rocket/src/main/scala/cpu.scala | 101 +++++++++++++++++++++-------- rocket/src/main/scala/dtlb.scala | 73 +++++++++++---------- 3 files changed, 118 insertions(+), 60 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 071d3e1f..0e10b21d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -238,4 +238,8 @@ object Constants val VIMM_VLEN = UFix(0, 1) val VIMM_ALU = UFix(1, 1) val VIMM_X = UFix(0, 1) + + val DTLB_VEC = 0 + val DTLB_VPF = 1 + val DTLB_CPU = 2 } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 4cca083b..088779dc 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -32,6 +32,80 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(); + var vu: vu = null + if (HAVE_VEC) + { + vu = new vu() + // cpu, vector prefetch, and vector use the DTLB + val dtlbarb = new cArbiter(3)({new ioDTLB_CPU_req()}) + val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) + when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } + + val chosen_vec = dtlbchosen === Bits(DTLB_VEC) + val chosen_pf = dtlbchosen === Bits(DTLB_VPF) + val chosen_cpu = dtlbchosen === Bits(DTLB_CPU) + + // vector prefetch doesn't care about exceptions + // and shouldn't cause any anyways + vu.io.vec_tlb_resp.xcpt_ld := chosen_vec && dtlb.io.cpu_resp.xcpt_ld + vu.io.vec_tlb_resp.xcpt_st := chosen_vec && dtlb.io.cpu_resp.xcpt_st + vu.io.vec_tlb_resp.miss := chosen_vec && dtlb.io.cpu_resp.miss + vu.io.vec_tlb_resp.ppn := dtlb.io.cpu_resp.ppn + + vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) + vu.io.vec_pftlb_resp.xcpt_st := Bool(false) + vu.io.vec_pftlb_resp.miss := chosen_pf && dtlb.io.cpu_resp.miss + vu.io.vec_pftlb_resp.ppn := dtlb.io.cpu_resp.ppn + + // connect DTLB to ctrl+dpath + dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val + dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill + dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req_cmd + dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS) + ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready + + ctrl.io.xcpt_dtlb_ld := chosen_cpu && dtlb.io.cpu_resp.xcpt_ld + ctrl.io.xcpt_dtlb_st := chosen_cpu && dtlb.io.cpu_resp.xcpt_st + ctrl.io.dtlb_miss := chosen_cpu && dtlb.io.cpu_resp.miss + + dtlbarb.io.in(DTLB_VEC) <> vu.io.vec_tlb_req + dtlbarb.io.in(DTLB_VPF) <> vu.io.vec_pftlb_req + + + dtlb.io.cpu_req <> dtlbarb.io.out + } + else + { + // connect DTLB to ctrl+dpath + dtlb.io.cpu_req.valid := ctrl.io.dtlb_val + dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill + dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req_cmd + dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlb.io.cpu_req.bits.vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS) + ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld + ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st + ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready + ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss + } + + dtlb.io.invalidate := dpath.io.ptbr_wen + dtlb.io.status := dpath.io.ctrl.status + + arb.io.cpu.req_ppn := dtlb.io.cpu_resp.ppn; + ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.cpu.req_rdy; + + // connect DTLB to D$ arbiter + ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld + ctrl.io.xcpt_ma_st := io.dmem.xcpt_ma_st + // connect page table walker to TLBs, page table base register (from PCR) + // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) + ptw.io.dtlb <> dtlb.io.ptw; + ptw.io.itlb <> itlb.io.ptw; + ptw.io.ptbr := dpath.io.ptbr; + arb.io.ptw <> ptw.io.dmem; + arb.io.mem <> io.dmem + ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; dpath.io.debug <> io.debug; @@ -53,39 +127,14 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.xcpt_itlb := itlb.io.cpu.exception; io.imem.itlb_miss := itlb.io.cpu.resp_miss; - // connect DTLB to D$ arbiter, ctrl+dpath - dtlb.io.cpu.invalidate := dpath.io.ptbr_wen; - dtlb.io.cpu.status := dpath.io.ctrl.status; - dtlb.io.cpu.req_val := ctrl.io.dtlb_val; - dtlb.io.cpu.req_kill := ctrl.io.dtlb_kill; - dtlb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; - dtlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - dtlb.io.cpu.req_vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS); - ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu.xcpt_ld; - ctrl.io.xcpt_dtlb_st := dtlb.io.cpu.xcpt_st; - ctrl.io.dtlb_rdy := dtlb.io.cpu.req_rdy; - ctrl.io.dtlb_miss := dtlb.io.cpu.resp_miss; - ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld; - ctrl.io.xcpt_ma_st := io.dmem.xcpt_ma_st; - - // connect page table walker to TLBs, page table base register (from PCR) - // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) - ptw.io.dtlb <> dtlb.io.ptw; - ptw.io.itlb <> itlb.io.ptw; - ptw.io.ptbr := dpath.io.ptbr; - arb.io.ptw <> ptw.io.dmem; - arb.io.mem <> io.dmem - // connect arbiter to ctrl+dpath+DTLB arb.io.cpu.req_val := ctrl.io.dmem.req_val; arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; arb.io.cpu.req_type := ctrl.io.dmem.req_type; arb.io.cpu.req_kill := ctrl.io.dmem.req_kill; arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); - arb.io.cpu.req_ppn := dtlb.io.cpu.resp_ppn; arb.io.cpu.req_data := dpath.io.dmem.req_data; arb.io.cpu.req_tag := dpath.io.dmem.req_tag; - ctrl.io.dmem.req_rdy := dtlb.io.cpu.req_rdy && arb.io.cpu.req_rdy; ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; ctrl.io.dmem.resp_replay:= arb.io.cpu.resp_replay; ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack; @@ -111,8 +160,6 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { dpath.io.vec_ctrl <> ctrl.io.vec_dpath - val vu = new vu() - // hooking up vector I$ vitlb.io.cpu.invalidate := dpath.io.ptbr_wen vitlb.io.cpu.status := dpath.io.ctrl.status diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 32ddb797..176f1ab5 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -4,33 +4,39 @@ import Chisel._; import Node._; import Constants._; import scala.math._; +import hwacha._ -// interface between DTLB and pipeline -class ioDTLB_CPU(view: List[String] = null) extends Bundle(view) +// ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala +// should keep them in sync + +class ioDTLB_CPU_req_bundle extends Bundle { - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(17, INPUT); - // invalidate all TLB entries - val invalidate = Bool(INPUT); // lookup requests - val req_val = Bool(INPUT); - val req_kill = Bool(INPUT); - val req_cmd = Bits(4, INPUT); // load/store/amo - val req_rdy = Bool(OUTPUT); - val req_asid = Bits(ASID_BITS, INPUT); - val req_vpn = UFix(VPN_BITS+1, INPUT); + val kill = Bool() + val cmd = Bits(width=4) // load/store/amo + val asid = Bits(width=ASID_BITS) + val vpn = UFix(width=VPN_BITS+1) +} +class ioDTLB_CPU_req extends io_ready_valid()( { new ioDTLB_CPU_req_bundle() } ) + +class ioDTLB_CPU_resp extends Bundle +{ // lookup responses - val resp_miss = Bool(OUTPUT); -// val resp_val = Bool(OUTPUT); - val resp_ppn = UFix(PPN_BITS, OUTPUT); - val xcpt_ld = Bool(OUTPUT); - val xcpt_st = Bool(OUTPUT); + val miss = Bool(OUTPUT) + val ppn = UFix(PPN_BITS, OUTPUT) + val xcpt_ld = Bool(OUTPUT) + val xcpt_st = Bool(OUTPUT) } class ioDTLB extends Bundle { - val cpu = new ioDTLB_CPU(); - val ptw = new ioTLB_PTW(); + // status bits (from PCR), to check current permission and whether VM is enabled + val status = Bits(17,INPUT) + // invalidate all TLB entries + val invalidate = Bool(INPUT) + val cpu_req = new ioDTLB_CPU_req().flip() + val cpu_resp = new ioDTLB_CPU_resp() + val ptw = new ioTLB_PTW() } class rocketDTLB(entries: Int) extends Component @@ -50,10 +56,10 @@ class rocketDTLB(entries: Int) extends Component val r_refill_waddr = Reg() { UFix() } val repl_count = Reg(resetVal = UFix(0,addr_bits)); - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_vpn := io.cpu.req_vpn; - r_cpu_req_cmd := io.cpu.req_cmd; - r_cpu_req_asid := io.cpu.req_asid; + when (io.cpu_req.valid && io.cpu_req.ready) { + r_cpu_req_vpn := io.cpu_req.bits.vpn; + r_cpu_req_cmd := io.cpu_req.bits.cmd; + r_cpu_req_asid := io.cpu_req.bits.asid; r_cpu_req_val := Bool(true); } .otherwise { @@ -63,6 +69,7 @@ class rocketDTLB(entries: Int) extends Component val req_load = (r_cpu_req_cmd === M_XRD); val req_store = (r_cpu_req_cmd === M_XWR); val req_amo = r_cpu_req_cmd(3).toBool; + val req_pf = (r_cpu_req_cmd === M_PFR) || (r_cpu_req_cmd === M_PFW) val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); @@ -70,7 +77,7 @@ class rocketDTLB(entries: Int) extends Component val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - tag_cam.io.clear := io.cpu.invalidate; + tag_cam.io.clear := io.invalidate; tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; @@ -79,9 +86,9 @@ class rocketDTLB(entries: Int) extends Component val tag_hit_addr = tag_cam.io.hit_addr; // extract fields from status register - val status_s = io.cpu.status(SR_S).toBool; // user/supervisor mode + val status_s = io.status(SR_S).toBool; // user/supervisor mode val status_u = !status_s; - val status_vm = io.cpu.status(SR_VM).toBool // virtual memory enable + val status_vm = io.status(SR_VM).toBool // virtual memory enable // extract fields from PT permission bits val ptw_perm_ur = io.ptw.resp_perm(2); @@ -118,7 +125,7 @@ class rocketDTLB(entries: Int) extends Component val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; - val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu.req_kill && (req_load || req_store || req_amo); + val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu_req.bits.kill && (req_load || req_store || req_amo || req_pf); val lookup_hit = lookup && tag_hit; val lookup_miss = lookup && !tag_hit; val tlb_hit = status_vm && lookup_hit; @@ -135,7 +142,7 @@ class rocketDTLB(entries: Int) extends Component } // exception check - val outofrange = !tlb_miss && (io.cpu.resp_ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); + val outofrange = !tlb_miss && (io.cpu_resp.ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); val access_fault_ld = tlb_hit && (req_load || req_amo) && @@ -143,7 +150,7 @@ class rocketDTLB(entries: Int) extends Component (status_u && !ur_array(tag_hit_addr).toBool) || bad_va); - io.cpu.xcpt_ld := access_fault_ld; + io.cpu_resp.xcpt_ld := access_fault_ld; val access_fault_st = tlb_hit && (req_store || req_amo) && @@ -151,11 +158,11 @@ class rocketDTLB(entries: Int) extends Component (status_u && !uw_array(tag_hit_addr).toBool) || bad_va); - io.cpu.xcpt_st := access_fault_st; + io.cpu_resp.xcpt_st := access_fault_st; - io.cpu.req_rdy := (state === s_ready) && !tlb_miss; - io.cpu.resp_miss := tlb_miss; - io.cpu.resp_ppn := + io.cpu_req.ready := (state === s_ready) && !tlb_miss; + io.cpu_resp.miss := tlb_miss; + io.cpu_resp.ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; io.ptw.req_val := (state === s_request); From 47dbc2a417d6aac2d4708062e9b1a682edce1fd9 Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Sun, 26 Feb 2012 00:30:50 -0800 Subject: [PATCH 0220/1087] head should be working again --- rocket/src/main/scala/cpu.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 088779dc..7541cf52 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -37,7 +37,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { vu = new vu() // cpu, vector prefetch, and vector use the DTLB - val dtlbarb = new cArbiter(3)({new ioDTLB_CPU_req()}) + val dtlbarb = new rArbiter(3)({new ioDTLB_CPU_req()}) val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } @@ -204,8 +204,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready // exceptions - // dpath.io.vec_iface.eaddr - // dpath.io.vec_iface.exception + vu.io.cpu_exception.addr := dpath.io.vec_iface.eaddr.toUFix + vu.io.cpu_exception.exception := dpath.io.vec_iface.exception // hooking up vector memory interface ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid From 49efe4b7449c536a91fc200a0094a65ef0b4c6f8 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 26 Feb 2012 01:54:42 -0800 Subject: [PATCH 0221/1087] now vu steals cycles from the fpu's fma alu --- rocket/src/main/scala/cpu.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 7541cf52..84c33fcf 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -228,8 +228,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dpath.io.vec_imul_req <> vu.io.cp_imul_req dpath.io.vec_imul_resp <> vu.io.cp_imul_resp - fpu.io.sfma.valid := Bool(false) - fpu.io.dfma.valid := Bool(false) + fpu.io.sfma <> vu.io.cp_sfma + fpu.io.dfma <> vu.io.cp_dfma } else { From 69260756bdef2828f64a57840fbfcb739a461886 Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Sun, 26 Feb 2012 02:54:16 -0800 Subject: [PATCH 0222/1087] change ppn and vpn in dtlb from ufix to bits --- rocket/src/main/scala/dtlb.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 176f1ab5..f7c8f17b 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -15,7 +15,7 @@ class ioDTLB_CPU_req_bundle extends Bundle val kill = Bool() val cmd = Bits(width=4) // load/store/amo val asid = Bits(width=ASID_BITS) - val vpn = UFix(width=VPN_BITS+1) + val vpn = Bits(width=VPN_BITS+1) } class ioDTLB_CPU_req extends io_ready_valid()( { new ioDTLB_CPU_req_bundle() } ) @@ -23,7 +23,7 @@ class ioDTLB_CPU_resp extends Bundle { // lookup responses val miss = Bool(OUTPUT) - val ppn = UFix(PPN_BITS, OUTPUT) + val ppn = Bits(PPN_BITS, OUTPUT) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) } @@ -163,7 +163,7 @@ class rocketDTLB(entries: Int) extends Component io.cpu_req.ready := (state === s_ready) && !tlb_miss; io.cpu_resp.miss := tlb_miss; io.cpu_resp.ppn := - Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; + Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)); io.ptw.req_val := (state === s_request); io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); From 766a039ffe516929d6d311e4aff41c79584f3cac Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 26 Feb 2012 16:19:50 -0800 Subject: [PATCH 0223/1087] small changes to the dtlb arbiter --- rocket/src/main/scala/cpu.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 84c33fcf..419bb315 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -41,17 +41,22 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } + // tlb respones come out a cycle later val chosen_vec = dtlbchosen === Bits(DTLB_VEC) val chosen_pf = dtlbchosen === Bits(DTLB_VPF) val chosen_cpu = dtlbchosen === Bits(DTLB_CPU) - // vector prefetch doesn't care about exceptions - // and shouldn't cause any anyways + dtlbarb.io.in(DTLB_VEC) <> vu.io.vec_tlb_req + vu.io.vec_tlb_resp.xcpt_ld := chosen_vec && dtlb.io.cpu_resp.xcpt_ld vu.io.vec_tlb_resp.xcpt_st := chosen_vec && dtlb.io.cpu_resp.xcpt_st vu.io.vec_tlb_resp.miss := chosen_vec && dtlb.io.cpu_resp.miss vu.io.vec_tlb_resp.ppn := dtlb.io.cpu_resp.ppn + // vector prefetch doesn't care about exceptions + // and shouldn't cause any anyways + dtlbarb.io.in(DTLB_VPF) <> vu.io.vec_pftlb_req + vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) vu.io.vec_pftlb_resp.xcpt_st := Bool(false) vu.io.vec_pftlb_resp.miss := chosen_pf && dtlb.io.cpu_resp.miss @@ -69,10 +74,6 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.xcpt_dtlb_st := chosen_cpu && dtlb.io.cpu_resp.xcpt_st ctrl.io.dtlb_miss := chosen_cpu && dtlb.io.cpu_resp.miss - dtlbarb.io.in(DTLB_VEC) <> vu.io.vec_tlb_req - dtlbarb.io.in(DTLB_VPF) <> vu.io.vec_pftlb_req - - dtlb.io.cpu_req <> dtlbarb.io.out } else From 5b0f7ccf6889ebc04e8316652adacf676599cdd6 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Sun, 26 Feb 2012 17:24:08 -0800 Subject: [PATCH 0224/1087] updating rocket code to lastest version of chisel, passes assembly tests in C++ and Verilog as long as you dont use the vector unit --- rocket/src/main/scala/coherence.scala | 25 ++++++++++++------------- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 10 +++++----- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index d419dd52..af0d27c8 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -2,7 +2,6 @@ package rocket import Chisel._ import Constants._ -import hwacha.GenArray class HubMemReq extends Bundle { val rw = Bool() @@ -205,18 +204,18 @@ class CoherenceHubNoDir extends CoherenceHub { } val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) - val busy_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } - val addr_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } - val tile_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val tile_xact_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val t_type_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } - val sh_count_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val send_x_rep_ack_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } + val tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } + val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } + val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val do_free_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_has_data_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_data_idx_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } - val rep_cnt_dec_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } + val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_has_data_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_data_idx_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } + val rep_cnt_dec_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } for( i <- 0 until NGLOBAL_XACTS) { busy_arr.write( UFix(i), trackerList(i).io.busy) @@ -243,7 +242,7 @@ class CoherenceHubNoDir extends CoherenceHub { val t = trackerList(i).io conflicts(i) := t.busy(i) && coherenceConflict(t.addr, init.bits.address) } - aborting(j) := (conflicts.orR || busy_arr.flatten().andR) + aborting(j) := (conflicts.orR || busy_arr.toBits().andR) abort.valid := init.valid && aborting abort.bits.tile_xact_id := init.bits.tile_xact_id init.ready := aborting(j) || initiating(j) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index b36a5af8..269029f4 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -261,7 +261,7 @@ class rocketDpathRegfile extends Component { override val io = new ioRegfile(); - val regfile = Mem(32, io.w0.data); + val regfile = Mem(32){ Bits(width=64) } regfile.setReadLatency(0); regfile.setTarget('inst); regfile.write(io.w0.addr, io.w0.data, io.w0.en); diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index ca3c9219..2686b658 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -94,7 +94,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { for (i <- 0 until assoc) { val repl_me = (repl_way === UFix(i)) - val tag_array = Mem(lines, r_cpu_miss_tag); + val tag_array = Mem(lines){ Bits(width=tagmsb-taglsb+1) } tag_array.setReadLatency(1); tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_miss_tag, tag_we && repl_me); @@ -112,7 +112,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array - val data_array = Mem(lines*REFILL_CYCLES, io.mem.resp_data); + val data_array = Mem(lines*REFILL_CYCLES){ Bits(width = MEM_DATA_BITS) } data_array.setReadLatency(1); data_array.setTarget('inst); val data_out = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val && repl_me) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 16b6fb63..08acdf01 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -391,7 +391,7 @@ class ReplayUnit extends Component { val sdq_wen = io.sdq_enq.valid && io.sdq_enq.ready val sdq_addr = Mux(sdq_ren_retry, rp.sdq_id, Mux(sdq_ren_new, io.replay.bits.sdq_id, sdq_alloc_id)) - val sdq = Mem(NSDQ, io.sdq_enq.bits) + val sdq = Mem(NSDQ){ Bits(width=CPU_DATA_BITS) } sdq.setReadLatency(1); sdq.setTarget('inst) val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) @@ -522,7 +522,7 @@ class MetaDataArray(lines: Int) extends Component { val state_req = (new ioDecoupled) { new MetaArrayReq() } } - val permissions_array = Mem(lines, Bits(width = 2)) + val permissions_array = Mem(lines){ Bits(width = 2) } permissions_array.setReadLatency(1); permissions_array.write(io.state_req.bits.idx, io.state_req.bits.data.state, io.state_req.valid && io.state_req.bits.rw) val permissions_rdata1 = permissions_array.rw(io.req.bits.idx, io.req.bits.data.state, io.req.valid && io.req.bits.rw) @@ -531,7 +531,7 @@ class MetaDataArray(lines: Int) extends Component { // this could be eliminated if the read port were combinational. val permissions_conflict = io.state_req.valid && (io.req.bits.idx === io.state_req.bits.idx) - val tag_array = Mem(lines, io.resp.tag) + val tag_array = Mem(lines){ Bits(width=TAG_BITS) } tag_array.setReadLatency(1); tag_array.setTarget('inst) val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) @@ -580,7 +580,7 @@ class DataArray(lines: Int) extends Component { val wmask = FillInterleaved(8, io.req.bits.wmask) - val array = Mem(lines*REFILL_CYCLES, io.resp) + val array = Mem(lines*REFILL_CYCLES){ Bits(width=MEM_DATA_BITS) } array.setReadLatency(1); array.setTarget('inst) val addr = Cat(io.req.bits.idx, io.req.bits.offset) @@ -802,7 +802,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match - val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use GenArray + val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(NWAYS, meta_resp_way_oh, meta.io.resp) From f3bb02b2ea4596a7e29acaee0de522d92bb7531c Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 26 Feb 2012 17:37:56 -0800 Subject: [PATCH 0225/1087] refactored dmem arbiter --- rocket/src/main/scala/consts.scala | 3 +- rocket/src/main/scala/cpu.scala | 40 +++++++------- rocket/src/main/scala/ptw.scala | 86 ++++++++++++++++++------------ 3 files changed, 73 insertions(+), 56 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 0e10b21d..ceeb433d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -164,9 +164,10 @@ object Constants val PERM_BITS = 6; // rocketNBDCache parameters + val DCACHE_PORTS = 2 val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; - val DCACHE_TAG_BITS = 1 + CPU_TAG_BITS; + val DCACHE_TAG_BITS = log2up(DCACHE_PORTS) + CPU_TAG_BITS val OFFSET_BITS = 6; // log2(cache line size in bytes) val NMSHR = 2; // number of primary misses val NRPQ = 16; // number of secondary misses diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 419bb315..e124e179 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -30,7 +30,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val itlb = new rocketITLB(ITLB_ENTRIES); val vitlb = new rocketITLB(ITLB_ENTRIES); val ptw = new rocketPTW(); - val arb = new rocketDmemArbiter(); + val arb = new rocketDmemArbiter(DCACHE_PORTS) var vu: vu = null if (HAVE_VEC) @@ -93,8 +93,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.status := dpath.io.ctrl.status - arb.io.cpu.req_ppn := dtlb.io.cpu_resp.ppn; - ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.cpu.req_rdy; + arb.io.requestor(0).req_ppn := dtlb.io.cpu_resp.ppn; + ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.requestor(0).req_rdy; // connect DTLB to D$ arbiter ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld @@ -104,8 +104,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ptw.io.dtlb <> dtlb.io.ptw; ptw.io.itlb <> itlb.io.ptw; ptw.io.ptbr := dpath.io.ptbr; - arb.io.ptw <> ptw.io.dmem; - arb.io.mem <> io.dmem + arb.io.requestor(1) <> ptw.io.dmem + arb.io.dmem <> io.dmem ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; @@ -129,22 +129,22 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect arbiter to ctrl+dpath+DTLB - arb.io.cpu.req_val := ctrl.io.dmem.req_val; - arb.io.cpu.req_cmd := ctrl.io.dmem.req_cmd; - arb.io.cpu.req_type := ctrl.io.dmem.req_type; - arb.io.cpu.req_kill := ctrl.io.dmem.req_kill; - arb.io.cpu.req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); - arb.io.cpu.req_data := dpath.io.dmem.req_data; - arb.io.cpu.req_tag := dpath.io.dmem.req_tag; - ctrl.io.dmem.resp_miss := arb.io.cpu.resp_miss; - ctrl.io.dmem.resp_replay:= arb.io.cpu.resp_replay; - ctrl.io.dmem.resp_nack := arb.io.cpu.resp_nack; - dpath.io.dmem.resp_val := arb.io.cpu.resp_val; - dpath.io.dmem.resp_miss := arb.io.cpu.resp_miss; - dpath.io.dmem.resp_replay := arb.io.cpu.resp_replay; + arb.io.requestor(0).req_val := ctrl.io.dmem.req_val; + arb.io.requestor(0).req_cmd := ctrl.io.dmem.req_cmd; + arb.io.requestor(0).req_type := ctrl.io.dmem.req_type; + arb.io.requestor(0).req_kill := ctrl.io.dmem.req_kill; + arb.io.requestor(0).req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); + arb.io.requestor(0).req_data := dpath.io.dmem.req_data; + arb.io.requestor(0).req_tag := dpath.io.dmem.req_tag; + ctrl.io.dmem.resp_miss := arb.io.requestor(0).resp_miss; + ctrl.io.dmem.resp_replay:= arb.io.requestor(0).resp_replay; + ctrl.io.dmem.resp_nack := arb.io.requestor(0).resp_nack; + dpath.io.dmem.resp_val := arb.io.requestor(0).resp_val; + dpath.io.dmem.resp_miss := arb.io.requestor(0).resp_miss; + dpath.io.dmem.resp_replay := arb.io.requestor(0).resp_replay; dpath.io.dmem.resp_type := io.dmem.resp_type; - dpath.io.dmem.resp_tag := arb.io.cpu.resp_tag; - dpath.io.dmem.resp_data := arb.io.cpu.resp_data; + dpath.io.dmem.resp_tag := arb.io.requestor(0).resp_tag; + dpath.io.dmem.resp_data := arb.io.requestor(0).resp_data; dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; var fpu: rocketFPU = null diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index ed8b347a..ab7ad5e7 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,47 +5,63 @@ import Node._; import Constants._; import scala.math._; -class ioDmemArbiter extends Bundle +class ioDmemArbiter(n: Int) extends Bundle { - val ptw = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_idx", "req_ppn", "resp_data", "resp_val", "resp_replay", "resp_nack")); - val cpu = new ioDmem(); - val mem = new ioDmem().flip(); + val dmem = new ioDmem().flip() + val requestor = Vec(n) { new ioDmem() } } -class rocketDmemArbiter extends Component +class rocketDmemArbiter(n: Int) extends Component { - val io = new ioDmemArbiter(); - - // must delay ppn part of address from PTW by 1 cycle (to match TLB behavior) - val r_ptw_req_val = Reg(io.ptw.req_val); - val r_ptw_req_ppn = Reg(io.ptw.req_ppn); - val r_cpu_req_val = Reg(io.cpu.req_val && io.cpu.req_rdy); - - io.mem.req_val := io.ptw.req_val || io.cpu.req_val; - io.mem.req_cmd := Mux(io.ptw.req_val, io.ptw.req_cmd, io.cpu.req_cmd); - io.mem.req_type := Mux(io.ptw.req_val, io.ptw.req_type, io.cpu.req_type); - io.mem.req_idx := Mux(io.ptw.req_val, io.ptw.req_idx, io.cpu.req_idx); - io.mem.req_ppn := Mux(r_ptw_req_val, r_ptw_req_ppn, io.cpu.req_ppn); - io.mem.req_data := io.cpu.req_data; - io.mem.req_tag := Cat(io.cpu.req_tag, io.ptw.req_val); - io.mem.req_kill := io.cpu.req_kill && r_cpu_req_val; - - io.ptw.req_rdy := io.mem.req_rdy; - io.cpu.req_rdy := io.mem.req_rdy && !io.ptw.req_val; - io.cpu.resp_miss := io.mem.resp_miss && !io.mem.resp_tag(0).toBool; + val io = new ioDmemArbiter(n) + require(DCACHE_TAG_BITS >= log2up(n) + CPU_TAG_BITS) - io.cpu.resp_nack := io.mem.resp_nack && !r_ptw_req_val - io.ptw.resp_nack := io.mem.resp_nack && r_ptw_req_val + var req_val = Bool(false) + var req_rdy = io.dmem.req_rdy + for (i <- 0 until n) + { + io.requestor(i).req_rdy := req_rdy + req_val = req_val || io.requestor(i).req_val + req_rdy = req_rdy && !io.requestor(i).req_val + } - io.cpu.resp_val := io.mem.resp_val && !io.mem.resp_tag(0).toBool; - io.ptw.resp_val := io.mem.resp_val && io.mem.resp_tag(0).toBool; + var req_cmd = io.requestor(n-1).req_cmd + var req_type = io.requestor(n-1).req_type + var req_idx = io.requestor(n-1).req_idx + var req_ppn = io.requestor(n-1).req_ppn + var req_data = io.requestor(n-1).req_data + var req_tag = io.requestor(n-1).req_tag + var req_kill = io.requestor(n-1).req_kill + for (i <- n-1 to 0 by -1) + { + req_cmd = Mux(io.requestor(i).req_val, io.requestor(i).req_cmd, req_cmd) + req_type = Mux(io.requestor(i).req_val, io.requestor(i).req_type, req_type) + req_idx = Mux(io.requestor(i).req_val, io.requestor(i).req_idx, req_idx) + req_ppn = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_ppn, req_ppn) + req_data = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_data, req_data) + req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) + req_kill = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_kill, req_kill) + } - io.cpu.resp_replay := io.mem.resp_replay && !io.mem.resp_tag(0).toBool; - io.ptw.resp_replay := io.mem.resp_replay && io.mem.resp_tag(0).toBool; + io.dmem.req_val := req_val + io.dmem.req_cmd := req_cmd + io.dmem.req_type := req_type + io.dmem.req_idx := req_idx + io.dmem.req_ppn := req_ppn + io.dmem.req_data := req_data + io.dmem.req_tag := req_tag + io.dmem.req_kill := req_kill - io.ptw.resp_data := io.mem.resp_data; - io.cpu.resp_data := io.mem.resp_data; - io.cpu.resp_tag := io.mem.resp_tag >> UFix(1); + for (i <- 0 until n) + { + val tag_hit = io.dmem.resp_tag(log2up(n)-1,0) === UFix(i) + io.requestor(i).resp_miss := io.dmem.resp_miss && tag_hit + io.requestor(i).resp_nack := io.dmem.resp_nack && Reg(io.requestor(i).req_val) + io.requestor(i).resp_val := io.dmem.resp_val && tag_hit + io.requestor(i).resp_replay := io.dmem.resp_replay && tag_hit + io.requestor(i).resp_data := io.dmem.resp_data + io.requestor(i).resp_tag := io.dmem.resp_tag >> UFix(log2up(n)) + } } class ioPTW extends Bundle @@ -102,9 +118,9 @@ class rocketPTW extends Component io.dmem.req_cmd := M_XRD; io.dmem.req_type := MT_D; -// io.dmem.req_addr := req_addr; io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); - io.dmem.req_ppn := req_addr(PADDR_BITS-1,PGIDX_BITS); + io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) + io.dmem.req_kill := Bool(false) val resp_val = (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); val resp_err = (state === s_error); From ad713a5d83d61ea81c6a53eca40584f149cf9a43 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 26 Feb 2012 17:51:46 -0800 Subject: [PATCH 0226/1087] fix icache ram depth; new chisel --- rocket/src/main/scala/icache.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 2686b658..0c8d758b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -94,15 +94,15 @@ class rocketICache(sets: Int, assoc: Int) extends Component { for (i <- 0 until assoc) { val repl_me = (repl_way === UFix(i)) - val tag_array = Mem(lines){ Bits(width=tagmsb-taglsb+1) } + val tag_array = Mem(sets){ r_cpu_miss_tag } tag_array.setReadLatency(1); tag_array.setTarget('inst); val tag_rdata = tag_array.rw(tag_addr, r_cpu_miss_tag, tag_we && repl_me); // valid bit array - val vb_array = Reg(resetVal = Bits(0, lines)); + val vb_array = Reg(resetVal = Bits(0, sets)); when (io.cpu.invalidate) { - vb_array := Bits(0,lines); + vb_array := Bits(0,sets); } .elsewhen (tag_we && repl_me) { vb_array := vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, UFix(1,1)); @@ -112,7 +112,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array - val data_array = Mem(lines*REFILL_CYCLES){ Bits(width = MEM_DATA_BITS) } + val data_array = Mem(sets*REFILL_CYCLES){ io.mem.resp_data } data_array.setReadLatency(1); data_array.setTarget('inst); val data_out = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val && repl_me) From 2d04664a983b8a7e2ab39674411eb8e60711af25 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 26 Feb 2012 18:26:29 -0800 Subject: [PATCH 0227/1087] simplify cpu-cache interface --- rocket/src/main/scala/cpu.scala | 23 ++++------------------- rocket/src/main/scala/dpath.scala | 20 ++++---------------- rocket/src/main/scala/dtlb.scala | 2 +- rocket/src/main/scala/ptw.scala | 4 +++- 4 files changed, 12 insertions(+), 37 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index e124e179..eb381ed6 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -67,7 +67,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req_cmd dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS) + dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready ctrl.io.xcpt_dtlb_ld := chosen_cpu && dtlb.io.cpu_resp.xcpt_ld @@ -83,7 +83,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req_cmd dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - dtlb.io.cpu_req.bits.vpn := dpath.io.dmem.req_addr(VADDR_BITS,PGIDX_BITS) + dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready @@ -129,23 +129,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect arbiter to ctrl+dpath+DTLB - arb.io.requestor(0).req_val := ctrl.io.dmem.req_val; - arb.io.requestor(0).req_cmd := ctrl.io.dmem.req_cmd; - arb.io.requestor(0).req_type := ctrl.io.dmem.req_type; - arb.io.requestor(0).req_kill := ctrl.io.dmem.req_kill; - arb.io.requestor(0).req_idx := dpath.io.dmem.req_addr(PGIDX_BITS-1,0); - arb.io.requestor(0).req_data := dpath.io.dmem.req_data; - arb.io.requestor(0).req_tag := dpath.io.dmem.req_tag; - ctrl.io.dmem.resp_miss := arb.io.requestor(0).resp_miss; - ctrl.io.dmem.resp_replay:= arb.io.requestor(0).resp_replay; - ctrl.io.dmem.resp_nack := arb.io.requestor(0).resp_nack; - dpath.io.dmem.resp_val := arb.io.requestor(0).resp_val; - dpath.io.dmem.resp_miss := arb.io.requestor(0).resp_miss; - dpath.io.dmem.resp_replay := arb.io.requestor(0).resp_replay; - dpath.io.dmem.resp_type := io.dmem.resp_type; - dpath.io.dmem.resp_tag := arb.io.requestor(0).resp_tag; - dpath.io.dmem.resp_data := arb.io.requestor(0).resp_data; - dpath.io.dmem.resp_data_subword := io.dmem.resp_data_subword; + arb.io.requestor(0) <> ctrl.io.dmem + arb.io.requestor(0) <> dpath.io.dmem var fpu: rocketFPU = null if (HAVE_FPU) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f97ba5d8..be301c9c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -7,20 +7,6 @@ import Constants._ import Instructions._ import hwacha._ -class ioDpathDmem extends Bundle() -{ - val req_addr = UFix(VADDR_BITS+1, OUTPUT); - val req_tag = UFix(CPU_TAG_BITS, OUTPUT); - val req_data = Bits(64, OUTPUT); - val resp_val = Bool(INPUT); - val resp_miss = Bool(INPUT); - val resp_replay = Bool(INPUT); - val resp_type = Bits(3, INPUT); - val resp_tag = Bits(CPU_TAG_BITS, INPUT); - val resp_data = Bits(64, INPUT); - val resp_data_subword = Bits(64, INPUT); -} - class ioDpathImem extends Bundle() { val req_addr = UFix(VADDR_BITS+1, OUTPUT); @@ -32,7 +18,8 @@ class ioDpathAll extends Bundle() val host = new ioHTIF(); val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); - val dmem = new ioDpathDmem(); + val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip(); + val dtlb = new ioDTLB_CPU_req_bundle(List("vpn")) val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "req_tag", "resp_val", "resp_data", "resp_type", "resp_tag")) val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); @@ -283,9 +270,10 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_addr := ex_effective_address.toUFix; + io.dmem.req_idx := ex_effective_address io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req_tag := Cat(Mux(io.ctrl.ex_ext_mem_val, ex_reg_ext_mem_tag(CPU_TAG_BITS-2, 0), Cat(ex_reg_waddr, io.ctrl.ex_fp_val)), io.ctrl.ex_ext_mem_val).toUFix + io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) // processor control regfile read pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index f7c8f17b..a7dffbf6 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -9,7 +9,7 @@ import hwacha._ // ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala // should keep them in sync -class ioDTLB_CPU_req_bundle extends Bundle +class ioDTLB_CPU_req_bundle(view: List[String] = null) extends Bundle(view) { // lookup requests val kill = Bool() diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index ab7ad5e7..1753aa98 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -60,6 +60,8 @@ class rocketDmemArbiter(n: Int) extends Component io.requestor(i).resp_val := io.dmem.resp_val && tag_hit io.requestor(i).resp_replay := io.dmem.resp_replay && tag_hit io.requestor(i).resp_data := io.dmem.resp_data + io.requestor(i).resp_data_subword := io.dmem.resp_data_subword + io.requestor(i).resp_type := io.dmem.resp_type io.requestor(i).resp_tag := io.dmem.resp_tag >> UFix(log2up(n)) } } @@ -68,7 +70,7 @@ class ioPTW extends Bundle { val itlb = new ioTLB_PTW().flip(); val dtlb = new ioTLB_PTW().flip(); - val dmem = new ioDmem(List("req_val", "req_rdy", "req_cmd", "req_type", "req_ppn", "req_idx", "resp_data", "resp_val", "resp_nack")).flip(); + val dmem = new ioDmem().flip() val ptbr = UFix(PADDR_BITS, INPUT); } From e12b9eae9369dc793120f52a5accbdcff4eb70cd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 26 Feb 2012 18:53:39 -0800 Subject: [PATCH 0228/1087] remove ext_mem interface hindsight is 20/20 --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/cpu.scala | 12 ++++------ rocket/src/main/scala/ctrl.scala | 35 +++++++++--------------------- rocket/src/main/scala/dpath.scala | 24 +++++--------------- 4 files changed, 21 insertions(+), 52 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index ceeb433d..61f35e0c 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -164,7 +164,7 @@ object Constants val PERM_BITS = 6; // rocketNBDCache parameters - val DCACHE_PORTS = 2 + val DCACHE_PORTS = 3 val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; val DCACHE_TAG_BITS = log2up(DCACHE_PORTS) + CPU_TAG_BITS diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index eb381ed6..910bf56c 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -96,9 +96,6 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) arb.io.requestor(0).req_ppn := dtlb.io.cpu_resp.ppn; ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.requestor(0).req_rdy; - // connect DTLB to D$ arbiter - ctrl.io.xcpt_ma_ld := io.dmem.xcpt_ma_ld - ctrl.io.xcpt_ma_st := io.dmem.xcpt_ma_st // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) ptw.io.dtlb <> dtlb.io.ptw; @@ -194,7 +191,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.cpu_exception.exception := dpath.io.vec_iface.exception // hooking up vector memory interface - ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid + //arb.io.requestor(2) <> vu.io.dmem_req + /*ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid ctrl.io.ext_mem.req_cmd := vu.io.dmem_req.bits.cmd ctrl.io.ext_mem.req_type := vu.io.dmem_req.bits.typ @@ -208,7 +206,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.dmem_resp.bits.nack := ctrl.io.ext_mem.resp_nack vu.io.dmem_resp.bits.data := dpath.io.ext_mem.resp_data vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag - vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type + vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type*/ // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -219,9 +217,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) } else { - ctrl.io.ext_mem.req_val := Bool(false) - dpath.io.ext_mem.req_val := Bool(false) - + arb.io.requestor(2).req_val := Bool(false) if (HAVE_FPU) { fpu.io.sfma.valid := Bool(false) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f067aff1..dc244396 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -35,7 +35,6 @@ class ioCtrlDpath extends Bundle() val id_eret = Bool(OUTPUT); val wb_eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); - val ex_ext_mem_val = Bool(OUTPUT); val ex_fp_val= Bool(OUTPUT); val mem_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); @@ -79,8 +78,7 @@ class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); val imem = new ioImem(List("req_val", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack")).flip(); - val ext_mem = new ioDmem(List("req_val", "req_cmd", "req_type", "resp_nack")) + val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack", "xcpt_ma_ld", "xcpt_ma_st")).flip(); val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); @@ -88,8 +86,6 @@ class ioCtrlAll extends Bundle() val xcpt_dtlb_ld = Bool(INPUT); val xcpt_dtlb_st = Bool(INPUT); val xcpt_itlb = Bool(INPUT); - val xcpt_ma_ld = Bool(INPUT); - val xcpt_ma_st = Bool(INPUT); val fpu = new ioCtrlFPU(); val vec_dpath = new ioCtrlDpathVec() val vec_iface = new ioCtrlVecInterface() @@ -333,7 +329,6 @@ class rocketCtrl extends Component val ex_reg_vec_val = Reg(resetVal = Bool(false)); val ex_reg_replay = Reg(resetVal = Bool(false)); val ex_reg_load_use = Reg(resetVal = Bool(false)); - val ex_reg_ext_mem_val = Reg(resetVal = Bool(false)) val mem_reg_valid = Reg(resetVal = Bool(false)); val mem_reg_wen_pcr = Reg(resetVal = Bool(false)); @@ -352,7 +347,6 @@ class rocketCtrl extends Component val mem_reg_fp_val = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); - val mem_reg_ext_mem_val = Reg(resetVal = Bool(false)) val mem_reg_fp_sboard_set = Reg(resetVal = Bool(false)); val wb_reg_valid = Reg(resetVal = Bool(false)); @@ -368,7 +362,6 @@ class rocketCtrl extends Component val wb_reg_cause = Reg(){UFix()}; val wb_reg_fp_val = Reg(resetVal = Bool(false)); val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); - val wb_reg_ext_mem_nack = Reg(resetVal = Bool(false)) val take_pc = Wire() { Bool() }; @@ -445,11 +438,8 @@ class rocketCtrl extends Component ex_reg_replay := id_reg_replay ex_reg_load_use := id_load_use; } - ex_reg_ext_mem_val := io.ext_mem.req_val - ex_reg_mem_cmd := Mux(io.ext_mem.req_val, io.ext_mem.req_cmd, id_mem_cmd).toUFix - ex_reg_mem_type := Mux(io.ext_mem.req_val, io.ext_mem.req_type, id_mem_type).toUFix - - val ex_ext_mem_val = ex_reg_ext_mem_val && !wb_reg_ext_mem_nack + ex_reg_mem_cmd := id_mem_cmd + ex_reg_mem_type := id_mem_type.toUFix val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; @@ -516,7 +506,6 @@ class rocketCtrl extends Component mem_reg_fp_val := ex_reg_fp_val mem_reg_fp_sboard_set := ex_reg_fp_sboard_set } - mem_reg_ext_mem_val := ex_ext_mem_val mem_reg_mem_cmd := ex_reg_mem_cmd; mem_reg_mem_type := ex_reg_mem_type; @@ -546,7 +535,6 @@ class rocketCtrl extends Component wb_reg_fp_val := mem_reg_fp_val wb_reg_fp_sboard_set := mem_reg_fp_sboard_set } - wb_reg_ext_mem_nack := io.ext_mem.resp_nack val sboard = new rocketCtrlSboard(32, 3, 2); sboard.io.r(0).addr := id_raddr2.toUFix; @@ -623,8 +611,8 @@ class rocketCtrl extends Component Mux(p_irq_timer, UFix(23,5), UFix(0,5))); - val mem_xcpt_ma_ld = io.xcpt_ma_ld && !mem_reg_kill - val mem_xcpt_ma_st = io.xcpt_ma_st && !mem_reg_kill + val mem_xcpt_ma_ld = io.dmem.xcpt_ma_ld && !mem_reg_kill + val mem_xcpt_ma_st = io.dmem.xcpt_ma_st && !mem_reg_kill val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill @@ -667,7 +655,7 @@ class rocketCtrl extends Component val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill - val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || mem_reg_ext_mem_val && wb_reg_ext_mem_nack + val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions @@ -757,7 +745,7 @@ class rocketCtrl extends Component ( id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || - id_stall_fpu || io.ext_mem.req_val || + id_stall_fpu || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || id_vec_val.toBool && !(io.vec_iface.vcmdq_ready && io.vec_iface.vximm1q_ready && io.vec_iface.vximm2q_ready) || // being conservative ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || @@ -779,14 +767,13 @@ class rocketCtrl extends Component io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; - io.dpath.sel_alu2 := Mux(io.ext_mem.req_val, A2_ZERO, id_sel_alu2) + io.dpath.sel_alu2 := id_sel_alu2 io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu; io.dpath.div_fn := id_div_fn; io.dpath.div_val := id_div_val.toBool; io.dpath.mul_fn := id_mul_fn; io.dpath.mul_val := id_mul_val.toBool; - io.dpath.ex_ext_mem_val := ex_ext_mem_val; io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; io.dpath.ex_wen := ex_reg_wen; @@ -807,12 +794,10 @@ class rocketCtrl extends Component io.fpu.killx := kill_ex io.fpu.killm := kill_mem - io.dtlb_val := ex_reg_mem_val || ex_ext_mem_val + io.dtlb_val := ex_reg_mem_val io.dtlb_kill := mem_reg_kill; - io.dmem.req_val := ex_reg_mem_val || ex_ext_mem_val + io.dmem.req_val := ex_reg_mem_val io.dmem.req_kill := kill_dcache; io.dmem.req_cmd := ex_reg_mem_cmd; io.dmem.req_type := ex_reg_mem_type; - - io.ext_mem.resp_nack:= mem_reg_ext_mem_val && !wb_reg_ext_mem_nack && (io.dmem.req_kill || io.dmem.resp_nack || Reg(!io.dmem.req_rdy)) } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index be301c9c..9149d44d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -20,7 +20,6 @@ class ioDpathAll extends Bundle() val debug = new ioDebug(); val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip(); val dtlb = new ioDTLB_CPU_req_bundle(List("vpn")) - val ext_mem = new ioDmem(List("req_val", "req_idx", "req_ppn", "req_data", "req_tag", "resp_val", "resp_data", "resp_type", "resp_tag")) val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); @@ -73,7 +72,6 @@ class rocketDpath extends Component val ex_reg_ctrl_div_fn = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); - val ex_reg_ext_mem_tag = Reg() { Bits() }; val ex_wdata = Wire() { Bits() }; // memory definitions @@ -166,18 +164,16 @@ class rocketDpath extends Component // bypass muxes val id_rs1 = - Mux(io.ext_mem.req_val, Cat(io.ext_mem.req_ppn, io.ext_mem.req_idx), Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, - id_rdata1)))) + id_rdata1))) val id_rs2 = - Mux(io.ext_mem.req_val, io.ext_mem.req_data, Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, - id_rdata2)))) + id_rdata2))) // immediate generation val id_imm_bj = io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE @@ -215,7 +211,6 @@ class rocketDpath extends Component ex_reg_ctrl_div_fn := io.ctrl.div_fn; ex_reg_ctrl_sel_wb := io.ctrl.sel_wb; ex_reg_ctrl_ren_pcr := io.ctrl.ren_pcr; - ex_reg_ext_mem_tag := io.ext_mem.req_tag when(io.ctrl.killd) { ex_reg_ctrl_div_val := Bool(false); @@ -272,7 +267,7 @@ class rocketDpath extends Component // other signals (req_val, req_rdy) connect to control module io.dmem.req_idx := ex_effective_address io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) - io.dmem.req_tag := Cat(Mux(io.ctrl.ex_ext_mem_val, ex_reg_ext_mem_tag(CPU_TAG_BITS-2, 0), Cat(ex_reg_waddr, io.ctrl.ex_fp_val)), io.ctrl.ex_ext_mem_val).toUFix + io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) // processor control regfile read @@ -334,11 +329,9 @@ class rocketDpath extends Component // 32/64 bit load handling (moved to earlier in file) // writeback arbitration - val dmem_resp_ext = io.dmem.resp_tag(0).toBool - val dmem_resp_xpu = !io.dmem.resp_tag(0).toBool && !io.dmem.resp_tag(1).toBool - val dmem_resp_fpu = !io.dmem.resp_tag(0).toBool && io.dmem.resp_tag(1).toBool - val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(2) - val dmem_resp_ext_tag = io.dmem.resp_tag.toUFix >> UFix(1) + val dmem_resp_xpu = !io.dmem.resp_tag(0).toBool + val dmem_resp_fpu = io.dmem.resp_tag(0).toBool + val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(1) dmem_resp_replay := io.dmem.resp_replay && dmem_resp_xpu; r_dmem_resp_replay := dmem_resp_replay r_dmem_resp_waddr := dmem_resp_waddr @@ -409,11 +402,6 @@ class rocketDpath extends Component rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb rfile.io.w0.data := wb_wdata - io.ext_mem.resp_val := Reg(io.dmem.resp_val && dmem_resp_ext, resetVal = Bool(false)) - io.ext_mem.resp_tag := Reg(dmem_resp_ext_tag) - io.ext_mem.resp_type := Reg(io.dmem.resp_type) - io.ext_mem.resp_data := io.dmem.resp_data_subword - io.ctrl.wb_waddr := wb_reg_waddr io.ctrl.mem_wb := dmem_resp_replay; From 6e706c7c744c06bca1fc591cf78aea3c0fef58e9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 26 Feb 2012 20:20:45 -0800 Subject: [PATCH 0229/1087] fix yet another AMO-related replay bug --- rocket/src/main/scala/nbdcache.scala | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 08acdf01..e8ce3549 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -916,24 +916,25 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // replays val replay = replayer.io.data_req.bits val stall_replay = r_replay_amo || p_amo || p_store_valid - val replay_val = replayer.io.data_req.valid && !stall_replay - val replay_rdy = data_arb.io.in(1).ready + val replay_val = replayer.io.data_req.valid + val replay_rdy = data_arb.io.in(1).ready && !stall_replay + val replay_fire = replay_val && replay_rdy data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) data_arb.io.in(1).bits.inner_req.idx := replay.idx data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR - data_arb.io.in(1).valid := replay_val + data_arb.io.in(1).valid := replay_val && !stall_replay data_arb.io.in(1).bits.way_en := replayer.io.way_oh - replayer.io.data_req.ready := replay_rdy && !stall_replay - r_replay_amo := replay_amo_val && replay_rdy && !stall_replay + replayer.io.data_req.ready := replay_rdy + r_replay_amo := replay_amo_val && replay_rdy // store write mask generation. // assumes store replays are higher-priority than pending stores. val maskgen = new StoreMaskGen - val store_offset = Mux(!replay_val, p_store_idx(offsetmsb,0), replay.offset) - maskgen.io.typ := Mux(!replay_val, p_store_type, replay.typ) + val store_offset = Mux(!replay_fire, p_store_idx(offsetmsb,0), replay.offset) + maskgen.io.typ := Mux(!replay_fire, p_store_type, replay.typ) maskgen.io.addr := store_offset(offsetlsb-1,0) val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix - val store_data = Mux(!replay_val, p_store_data, replay.data) + val store_data = Mux(!replay_fire, p_store_data, replay.data) val store_data_wide = Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) data_arb.io.in(1).bits.inner_req.data := store_data_wide data_arb.io.in(1).bits.inner_req.wmask := store_wmask_wide @@ -943,7 +944,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // load data subword mux/sign extension. // subword loads are delayed by one cycle. val loadgen = new LoadDataGen - val loadgen_use_replay = Reg(replay_val && replay_rdy) + val loadgen_use_replay = Reg(replay_fire) loadgen.io.typ := Mux(loadgen_use_replay, Reg(replay.typ), r_cpu_req_type) loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(ramindexlsb-1,0) loadgen.io.din := data_resp_mux @@ -970,7 +971,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // fences and flushes are the exceptions. val pending_fence = Reg(resetVal = Bool(false)) pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !flush_rdy - val nack_hit = p_store_match || r_req_write && !p_store_rdy + val nack_hit = p_store_match || replay_val || r_req_write && !p_store_rdy val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush From bfd0ae125ea51697e4daa5ffa7a5559cbb32890f Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 26 Feb 2012 23:46:51 -0800 Subject: [PATCH 0230/1087] upgrade to new rocket/vu memory interface, fix amo nack bug in hellacache --- rocket/src/main/scala/cpu.scala | 33 ++++++++++++++++------------ rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dtlb.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/ptw.scala | 4 +++- 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 910bf56c..73a37b03 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -191,27 +191,32 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.cpu_exception.exception := dpath.io.vec_iface.exception // hooking up vector memory interface - //arb.io.requestor(2) <> vu.io.dmem_req - /*ctrl.io.ext_mem.req_val := vu.io.dmem_req.valid - ctrl.io.ext_mem.req_cmd := vu.io.dmem_req.bits.cmd - ctrl.io.ext_mem.req_type := vu.io.dmem_req.bits.typ + val storegen = new StoreDataGen + storegen.io.typ := vu.io.dmem_req.bits.typ + storegen.io.din := vu.io.dmem_req.bits.data - dpath.io.ext_mem.req_val := vu.io.dmem_req.valid - dpath.io.ext_mem.req_idx := vu.io.dmem_req.bits.idx - dpath.io.ext_mem.req_ppn := vu.io.dmem_req.bits.ppn - dpath.io.ext_mem.req_data := vu.io.dmem_req.bits.data - dpath.io.ext_mem.req_tag := vu.io.dmem_req.bits.tag + arb.io.requestor(2).req_val := vu.io.dmem_req.valid + arb.io.requestor(2).req_kill := Reg(vu.io.dmem_req.bits.kill) + arb.io.requestor(2).req_cmd := vu.io.dmem_req.bits.cmd + arb.io.requestor(2).req_type := vu.io.dmem_req.bits.typ + arb.io.requestor(2).req_idx := vu.io.dmem_req.bits.idx + arb.io.requestor(2).req_ppn := Reg(vu.io.dmem_req.bits.ppn) + arb.io.requestor(2).req_data := Reg(storegen.io.dout) + arb.io.requestor(2).req_tag := vu.io.dmem_req.bits.tag - vu.io.dmem_resp.valid := dpath.io.ext_mem.resp_val - vu.io.dmem_resp.bits.nack := ctrl.io.ext_mem.resp_nack - vu.io.dmem_resp.bits.data := dpath.io.ext_mem.resp_data - vu.io.dmem_resp.bits.tag := dpath.io.ext_mem.resp_tag - vu.io.dmem_resp.bits.typ := dpath.io.ext_mem.resp_type*/ + vu.io.dmem_resp.valid := Reg(arb.io.requestor(2).resp_val) + // the vu doesn't look at the ready signal, it's simply a nack + // but should be delayed one cycle to match the nack semantics + vu.io.dmem_resp.bits.nack := arb.io.requestor(2).resp_nack || Reg(!arb.io.requestor(2).req_rdy) + vu.io.dmem_resp.bits.data := arb.io.requestor(2).resp_data_subword + vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(2).resp_tag) + vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(2).resp_type) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req dpath.io.vec_imul_resp <> vu.io.cp_imul_resp + // share sfma and dfma pipelines with rocket fpu.io.sfma <> vu.io.cp_sfma fpu.io.dfma <> vu.io.cp_dfma } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 9149d44d..1a3d5cee 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -19,7 +19,7 @@ class ioDpathAll extends Bundle() val ctrl = new ioCtrlDpath().flip(); val debug = new ioDebug(); val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip(); - val dtlb = new ioDTLB_CPU_req_bundle(List("vpn")) + val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index a7dffbf6..f7c8f17b 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -9,7 +9,7 @@ import hwacha._ // ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala // should keep them in sync -class ioDTLB_CPU_req_bundle(view: List[String] = null) extends Bundle(view) +class ioDTLB_CPU_req_bundle extends Bundle { // lookup requests val kill = Bool() diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e8ce3549..2147cd21 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -954,7 +954,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { amoalu.io.lhs := loadgen.io.r_dout.toUFix amoalu.io.rhs := p_store_data.toUFix - early_nack := early_tag_nack || early_load_nack || r_cpu_req_val_ && r_req_amo || replay_amo_val || r_replay_amo + early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo // reset and flush unit val flusher = new FlushUnit(lines) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 1753aa98..30b37603 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -55,8 +55,10 @@ class rocketDmemArbiter(n: Int) extends Component for (i <- 0 until n) { val tag_hit = io.dmem.resp_tag(log2up(n)-1,0) === UFix(i) - io.requestor(i).resp_miss := io.dmem.resp_miss && tag_hit + io.requestor(i).xcpt_ma_ld := io.dmem.xcpt_ma_ld && Reg(io.requestor(i).req_val) + io.requestor(i).xcpt_ma_st := io.dmem.xcpt_ma_st && Reg(io.requestor(i).req_val) io.requestor(i).resp_nack := io.dmem.resp_nack && Reg(io.requestor(i).req_val) + io.requestor(i).resp_miss := io.dmem.resp_miss && tag_hit io.requestor(i).resp_val := io.dmem.resp_val && tag_hit io.requestor(i).resp_replay := io.dmem.resp_replay && tag_hit io.requestor(i).resp_data := io.dmem.resp_data From 2275239f335f56af0a679e5b897a327e75c8cfaa Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 26 Feb 2012 00:34:40 -0800 Subject: [PATCH 0231/1087] xact init transactors in coherence hub --- rocket/src/main/scala/coherence.scala | 125 +++++++++++++++++--------- 1 file changed, 82 insertions(+), 43 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index af0d27c8..520f7c05 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -12,6 +12,12 @@ class HubMemReq extends Bundle { val is_probe_rep = Bool() } +class TrackerAllocReq extends Bundle { + val xact_init = new TransactionInit() + val init_tile_id = Bits(width = TILE_ID_BITS) + val data_valid = Bool() +} + class MemData extends Bundle { val data = Bits(width = MEM_DATA_BITS) } @@ -154,6 +160,8 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(id: Int) extends Component { val io = new Bundle { val mem_req = (new ioDecoupled) { new HubMemReq() }.flip + val alloc_req = (new ioDecoupled) { new TrackerAllocReq() } + val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_has_data = Bool(INPUT) val x_init_has_data = Bool(INPUT) @@ -162,22 +170,27 @@ class XactTracker(id: Int) extends Component { val rep_cnt_dec = Bits(NTILES, INPUT) val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) - val tile_id = Bits(TILE_ID_BITS, OUTPUT) + val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS, OUTPUT) val t_type = Bits(TTYPE_BITS, OUTPUT) - val pop_p_rep = Bool(OUTPUT) - val pop_p_rep_data = Bool(OUTPUT) + val pop_p_rep = Bits(NTILES, OUTPUT) + val pop_p_rep_data = Bits(NTILES, OUTPUT) + val pop_x_init = Bool(OUTPUT) + val pop_x_init_data = Bool(OUTPUT) val send_x_rep_ack = Bool(OUTPUT) } val valid = Reg(resetVal = Bool(false)) val addr = Reg{ Bits() } val t_type = Reg{ Bits() } - val tile_id = Reg{ Bits() } + val init_tile_id = Reg{ Bits() } val tile_xact_id = Reg{ Bits() } val probe_done = Reg{ Bits() } + //TODO: Decrement the probe count when final data piece is written + // Connent io.mem.ready sig to correct pop* outputs + // P_rep and x_init must be popped on same cycle of receipt } abstract class CoherenceHub extends Component @@ -204,23 +217,23 @@ class CoherenceHubNoDir extends CoherenceHub { } val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) - val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } - val tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } - val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val busy_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val addr_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } + val init_tile_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val tile_xact_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } + val t_type_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } + val sh_count_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val send_x_rep_ack_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } - val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_has_data_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_data_idx_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } - val rep_cnt_dec_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } + val do_free_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_has_data_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_data_idx_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } + val rep_cnt_dec_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } for( i <- 0 until NGLOBAL_XACTS) { busy_arr.write( UFix(i), trackerList(i).io.busy) addr_arr.write( UFix(i), trackerList(i).io.addr) - tile_id_arr.write( UFix(i), trackerList(i).io.tile_id) + init_tile_id_arr.write( UFix(i), trackerList(i).io.init_tile_id) tile_xact_id_arr.write( UFix(i), trackerList(i).io.tile_xact_id) t_type_arr.write( UFix(i), trackerList(i).io.t_type) sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) @@ -231,23 +244,6 @@ class CoherenceHubNoDir extends CoherenceHub { trackerList(i).io.rep_cnt_dec := rep_cnt_dec_arr.read(UFix(i)) } - // Nack conflicting transaction init attempts - val aborting = Wire() { Bits(width = NTILES) } - val initiating = Wire() { Bits(width = NTILES) } - for( j <- 0 until NTILES ) { - val init = io.tiles(j).xact_init - val abort = io.tiles(j).xact_abort - val conflicts = Bits(width = NGLOBAL_XACTS) - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - conflicts(i) := t.busy(i) && coherenceConflict(t.addr, init.bits.address) - } - aborting(j) := (conflicts.orR || busy_arr.toBits().andR) - abort.valid := init.valid && aborting - abort.bits.tile_xact_id := init.bits.tile_xact_id - init.ready := aborting(j) || initiating(j) - } - // Free finished transactions for( j <- 0 until NTILES ) { val finish = io.tiles(j).xact_finish @@ -255,6 +251,7 @@ class CoherenceHubNoDir extends CoherenceHub { finish.ready := Bool(true) } + // Reply to initial requestor // Forward memory responses from mem to tile val xrep_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val xrep_cnt_next = xrep_cnt + UFix(1) @@ -267,11 +264,11 @@ class CoherenceHubNoDir extends CoherenceHub { io.tiles(j).xact_rep.bits.global_xact_id := idx io.tiles(j).xact_rep_data.bits.data := io.mem.resp_data readys := Mux(xrep_cnt === UFix(0), io.tiles(j).xact_rep.ready && io.tiles(j).xact_rep_data.ready, io.tiles(j).xact_rep_data.ready) - io.tiles(j).xact_rep.valid := (UFix(j) === tile_id_arr.read(idx)) && ((io.mem.resp_val && xrep_cnt === UFix(0)) || send_x_rep_ack_arr.read(idx)) - io.tiles(j).xact_rep_data.valid := (UFix(j) === tile_id_arr.read(idx)) + io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && ((io.mem.resp_val && xrep_cnt === UFix(0)) || send_x_rep_ack_arr.read(idx)) + io.tiles(j).xact_rep_data.valid := (UFix(j) === init_tile_id_arr.read(idx)) } // If there were a ready signal due to e.g. intervening network use: - //io.mem.resp_rdy := readys(tile_id_arr.read(idx)).xact_rep.ready + //io.mem.resp_rdy := readys(init_tile_id_arr.read(idx)).xact_rep.ready // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests @@ -293,28 +290,70 @@ class CoherenceHubNoDir extends CoherenceHub { io.tiles(j).probe_rep_data.bits.data, io.tiles(j).xact_init_data.bits.data))) + // Handle probe replies, which may or may not have data for( j <- 0 until NTILES ) { val p_rep = io.tiles(j).probe_rep val p_rep_data = io.tiles(j).probe_rep_data val idx = p_rep.bits.global_xact_id - p_rep_has_data_arr.write(idx, p_rep.valid && p_rep.bits.has_data) + p_rep_has_data_arr.write(idx, p_rep.valid && p_rep.bits.has_data && p_rep_data.valid) p_rep_data_idx_arr.write(idx, UFix(j)) - p_rep.ready := foldR(trackerList.map(_.io.pop_p_rep))(_ || _) - p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data))(_ || _) + p_rep.ready := foldR(trackerList.map(_.io.pop_p_rep(j)))(_ || _) + p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) } for( i <- 0 until NGLOBAL_XACTS ) { val flags = Bits(width = NTILES) for( j <- 0 until NTILES) { val p_rep = io.tiles(j).probe_rep - flags(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) + flags(j) := p_rep.valid && !p_rep.bits.has_data && (p_rep.bits.global_xact_id === UFix(i)) } rep_cnt_dec_arr.write(UFix(i), flags) } + // Nack conflicting transaction init attempts + val aborting = Wire() { Bits(width = NTILES) } + for( j <- 0 until NTILES ) { + val x_init = io.tiles(j).xact_init + val x_abort = io.tiles(j).xact_abort + val conflicts = Bits(width = NGLOBAL_XACTS) + for( i <- 0 until NGLOBAL_XACTS) { + val t = trackerList(i).io + conflicts(i) := t.busy(i) && coherenceConflict(t.addr, x_init.bits.address) && + !(x_init.bits.has_data && (UFix(j) === t.init_tile_id)) + // Don't abort writebacks stalled on mem. + // TODO: This assumes overlapped writeback init reqs to + // the same addr will never be issued; is this ok? + } + x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id + val want_to_abort = conflicts.orR || busy_arr.flatten().andR + x_abort.valid := want_to_abort && x_init.valid + aborting(j) := want_to_abort && x_abort.ready + } + + // Handle transaction initiation requests + // Only one allocation per cycle + // Init requests may or may not have data + val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } + val init_arb = (new Arbiter(NTILES)) { new TrackerAllocReq() } + for( i <- 0 until NGLOBAL_XACTS ) { + alloc_arb.io.in(i).valid := !trackerList(i).io.busy + trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready + trackerList(i).io.alloc_req.bits := init_arb.io.out.bits + trackerList(i).io.alloc_req.valid := init_arb.io.out.valid + } - // Pick a single request of these types to process - //val xact_init_arb = (new Arbiter(NTILES)) { new TransactionInit() } - //val probe_reply_arb = (new Arbiter(NTILES)) { new ProbeReply() } + for( j <- 0 until NTILES ) { + val x_init = io.tiles(j).xact_init + val x_init_data = io.tiles(j).xact_init_data + init_arb.io.in(j).valid := x_init.valid + init_arb.io.in(j).bits.xact_init := x_init.bits + init_arb.io.in(j).bits.init_tile_id := UFix(j) + init_arb.io.in(j).bits.data_valid := x_init_data.valid + x_init.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.init_tile_id === UFix(j)))(_||_) + x_init_data.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.init_tile_id === UFix(j)))(_||_) + } + + alloc_arb.io.out.ready := init_arb.io.out.valid && !busy_arr.flatten().andR && + !foldR(trackerList.map(t => t.io.busy && coherenceConflict(t.io.addr, init_arb.io.out.bits.xact_init.address)))(_||_) } From 7a8f53a1177359860b919f31c053c40c0851df58 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 26 Feb 2012 00:53:07 -0800 Subject: [PATCH 0232/1087] probe req transactors in coherence hub --- rocket/src/main/scala/coherence.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 520f7c05..8cc616de 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -161,6 +161,7 @@ class XactTracker(id: Int) extends Component { val io = new Bundle { val mem_req = (new ioDecoupled) { new HubMemReq() }.flip val alloc_req = (new ioDecoupled) { new TrackerAllocReq() } + val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_has_data = Bool(INPUT) @@ -174,6 +175,7 @@ class XactTracker(id: Int) extends Component { val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS, OUTPUT) val t_type = Bits(TTYPE_BITS, OUTPUT) + val push_p_req = Bits(NTILES, OUTPUT) val pop_p_rep = Bits(NTILES, OUTPUT) val pop_p_rep_data = Bits(NTILES, OUTPUT) val pop_x_init = Bool(OUTPUT) @@ -356,4 +358,17 @@ class CoherenceHubNoDir extends CoherenceHub { !foldR(trackerList.map(t => t.io.busy && coherenceConflict(t.io.addr, init_arb.io.out.bits.xact_init.address)))(_||_) + // Handle probe request generation + // Must arbitrate for each request port + for( j <- 0 until NTILES ) { + val p_req_arb = (new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() } + for( i <- 0 until NGLOBAL_XACTS ) { + val t = trackerList(i).io + p_req_arb.io.in(i).bits := t.probe_req.bits + p_req_arb.io.in(i).ready := t.probe_req.ready + p_req_arb.io.in(i).valid := t.probe_req.valid && t.push_p_req(j) + } + p_req_arb.io.out <> io.tiles(j).probe_req + } + } From f0588a005278712e15fd29b836af83869f0c40c2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 27 Feb 2012 11:26:18 -0800 Subject: [PATCH 0233/1087] Added probe_req ready sigs, GenArray to Vec --- rocket/src/main/scala/coherence.scala | 68 +++++++++++++++------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 8cc616de..8aff1592 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -168,7 +168,8 @@ class XactTracker(id: Int) extends Component { val x_init_has_data = Bool(INPUT) val p_rep_data_idx = Bits(log2up(NTILES), INPUT) val x_init_data_idx = Bits(log2up(NTILES), INPUT) - val rep_cnt_dec = Bits(NTILES, INPUT) + val p_rep_cnt_dec = Bits(NTILES, INPUT) + val p_req_cnt_inc = Bits(NTILES, INPUT) val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) @@ -218,32 +219,35 @@ class CoherenceHubNoDir extends CoherenceHub { val mem = new ioDCache().flip } - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) - val busy_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } - val addr_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } - val init_tile_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val tile_xact_id_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val t_type_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } - val sh_count_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val send_x_rep_ack_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) - val do_free_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_has_data_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_data_idx_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } - val rep_cnt_dec_arr = GenArray(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } + val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } + val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } + val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } + val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + + val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_has_data_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_data_idx_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } + val p_rep_cnt_dec_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } + val p_req_cnt_inc_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } for( i <- 0 until NGLOBAL_XACTS) { - busy_arr.write( UFix(i), trackerList(i).io.busy) - addr_arr.write( UFix(i), trackerList(i).io.addr) - init_tile_id_arr.write( UFix(i), trackerList(i).io.init_tile_id) - tile_xact_id_arr.write( UFix(i), trackerList(i).io.tile_xact_id) + busy_arr.write( UFix(i), trackerList(i).io.busy) + addr_arr.write( UFix(i), trackerList(i).io.addr) + init_tile_id_arr.write( UFix(i), trackerList(i).io.init_tile_id) + tile_xact_id_arr.write( UFix(i), trackerList(i).io.tile_xact_id) t_type_arr.write( UFix(i), trackerList(i).io.t_type) - sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) - send_x_rep_ack_arr.write( UFix(i), trackerList(i).io.send_x_rep_ack) - trackerList(i).io.xact_finish := do_free_arr.read(UFix(i)) + sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) + send_x_rep_ack_arr.write(UFix(i), trackerList(i).io.send_x_rep_ack) + trackerList(i).io.xact_finish := do_free_arr.read(UFix(i)) trackerList(i).io.p_rep_has_data := p_rep_has_data_arr.read(UFix(i)) trackerList(i).io.p_rep_data_idx := p_rep_data_idx_arr.read(UFix(i)) - trackerList(i).io.rep_cnt_dec := rep_cnt_dec_arr.read(UFix(i)) + trackerList(i).io.p_rep_cnt_dec := p_rep_cnt_dec_arr.read(UFix(i)) + trackerList(i).io.p_req_cnt_inc := p_req_cnt_inc_arr.read(UFix(i)) } // Free finished transactions @@ -308,7 +312,7 @@ class CoherenceHubNoDir extends CoherenceHub { val p_rep = io.tiles(j).probe_rep flags(j) := p_rep.valid && !p_rep.bits.has_data && (p_rep.bits.global_xact_id === UFix(i)) } - rep_cnt_dec_arr.write(UFix(i), flags) + p_rep_cnt_dec_arr.write(UFix(i), flags) } // Nack conflicting transaction init attempts @@ -326,7 +330,7 @@ class CoherenceHubNoDir extends CoherenceHub { // the same addr will never be issued; is this ok? } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - val want_to_abort = conflicts.orR || busy_arr.flatten().andR + val want_to_abort = conflicts.orR || busy_arr.toBits.andR x_abort.valid := want_to_abort && x_init.valid aborting(j) := want_to_abort && x_abort.ready } @@ -354,21 +358,27 @@ class CoherenceHubNoDir extends CoherenceHub { x_init_data.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.init_tile_id === UFix(j)))(_||_) } - alloc_arb.io.out.ready := init_arb.io.out.valid && !busy_arr.flatten().andR && + alloc_arb.io.out.ready := init_arb.io.out.valid && !busy_arr.toBits.andR && !foldR(trackerList.map(t => t.io.busy && coherenceConflict(t.io.addr, init_arb.io.out.bits.xact_init.address)))(_||_) // Handle probe request generation // Must arbitrate for each request port + val p_req_arb_arr = List.fill(NTILES)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) for( j <- 0 until NTILES ) { - val p_req_arb = (new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() } for( i <- 0 until NGLOBAL_XACTS ) { val t = trackerList(i).io - p_req_arb.io.in(i).bits := t.probe_req.bits - p_req_arb.io.in(i).ready := t.probe_req.ready - p_req_arb.io.in(i).valid := t.probe_req.valid && t.push_p_req(j) + p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits + p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) } - p_req_arb.io.out <> io.tiles(j).probe_req + p_req_arb_arr(j).io.out <> io.tiles(j).probe_req + } + for( i <- 0 until NGLOBAL_XACTS ) { + val flags = Bits(width = NTILES) + for( j <- 0 until NTILES ) { + flags(j) := p_req_arb_arr(j).io.in(i).ready + } + p_rep_cnt_dec_arr.write(UFix(i), flags) } } From 3d96a2d4f07629574d3586b251841d09a86b0940 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 27 Feb 2012 14:00:58 -0800 Subject: [PATCH 0234/1087] add fpu.dec.wen := false when HAVE_FPU is turned off --- rocket/src/main/scala/cpu.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 73a37b03..7ac5e943 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -137,7 +137,10 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.fpu <> fpu.io.ctrl } else + { ctrl.io.fpu.dec.valid := Bool(false) + ctrl.io.fpu.dec.wen := Bool(false) + } if (HAVE_VEC) { From 1d41a41afae5059c49ce1eaf232a63066eaa31d3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 27 Feb 2012 17:49:48 -0800 Subject: [PATCH 0235/1087] remove extraneous constants --- rocket/src/main/scala/consts.scala | 5 ----- rocket/src/main/scala/dtlb.scala | 3 --- 2 files changed, 8 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 61f35e0c..8b55c39f 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -207,11 +207,6 @@ object Constants val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; - // physical memory size (# 8K pages) - // if you change this value, make sure to also change MEMORY_SIZE variable in memif.h - val MEMSIZE_PAGES = 0x8000; // 256 megs - val MEMSIZE_BYTES = MEMSIZE_PAGES*8192; - val START_ADDR = 0x2000; val HAVE_RVC = false diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index f7c8f17b..6c0af2a0 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -141,9 +141,6 @@ class rocketDTLB(entries: Int) extends Component } } - // exception check - val outofrange = !tlb_miss && (io.cpu_resp.ppn > UFix(MEMSIZE_PAGES, PPN_BITS)); - val access_fault_ld = tlb_hit && (req_load || req_amo) && ((status_s && !sr_array(tag_hit_addr).toBool) || From 2b1c07c723ed5a4d879c3c60fa910a9bdcb886ee Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 27 Feb 2012 18:36:09 -0800 Subject: [PATCH 0236/1087] replace ioDCache with ioMem --- rocket/src/main/scala/arbiter.scala | 3 +-- rocket/src/main/scala/coherence.scala | 2 +- rocket/src/main/scala/consts.scala | 13 ++++++------- rocket/src/main/scala/htif.scala | 2 +- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/icache_prefetch.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 19 +++---------------- 7 files changed, 15 insertions(+), 30 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index dcbada4c..50f66a58 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -20,12 +20,11 @@ class ioMem() extends Bundle class ioMemArbiter(n: Int) extends Bundle() { val mem = new ioMem(); - val requestor = Vec(n) { new ioDCache() } + val requestor = Vec(n) { new ioMem().flip() } } class rocketMemArbiter(n: Int) extends Component { val io = new ioMemArbiter(n); - require(io.mem.req_tag.getWidth >= log2up(n) + io.requestor(0).req_tag.getWidth) var req_val = Bool(false) var req_rdy = io.mem.req_rdy diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 8aff1592..d5a5e414 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -216,7 +216,7 @@ class CoherenceHubNoDir extends CoherenceHub { val io = new Bundle { val tiles = Vec(NTILES) { new ioTileLink() } - val mem = new ioDCache().flip + val mem = new ioMem } val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 8b55c39f..17294bae 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -183,8 +183,8 @@ object Constants val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val TILE_ID_BITS = 1 val TILE_XACT_ID_BITS = 1 // log2(NMSHR) - val GLOBAL_XACT_ID_BITS = IDX_BITS // if one active xact per set - val NGLOBAL_XACTS = 1 << IDX_BITS + val GLOBAL_XACT_ID_BITS = 4 + val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS val TTYPE_BITS = 2 val X_READ_SHARED = UFix(0, TTYPE_BITS) @@ -198,11 +198,10 @@ object Constants val P_COPY = UFix(2, PTYPE_BITS) // external memory interface - val IMEM_TAG_BITS = 1; - val DMEM_TAG_BITS = ceil(log(NMSHR)/log(2)).toInt; - val MEM_TAG_BITS = 2 + max(IMEM_TAG_BITS, DMEM_TAG_BITS); - val MEM_DATA_BITS = 128; - val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS; + val MEM_TAG_BITS = 4 + val MEM_DATA_BITS = 128 + val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS + require(MEM_TAG_BITS >= max(log2up(NMSHR)+1, GLOBAL_XACT_ID_BITS)) val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index d229667e..77d189eb 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -26,7 +26,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component val io = new Bundle { val host = new ioHost(w) val cpu = Vec(ncores) { new ioHTIF().flip() } - val mem = new ioDCache().flip() + val mem = new ioMem } val short_request_bits = 64 diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 0c8d758b..09cbb156 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -20,7 +20,7 @@ class ioImem(view: List[String] = null) extends Bundle (view) class ioRocketICache extends Bundle() { val cpu = new ioImem(); - val mem = new ioDCache().flip() + val mem = new ioMem } // basic direct mapped instruction cache diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index b9144cd7..104257e0 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -6,8 +6,8 @@ import Constants._; import scala.math._; class ioIPrefetcher extends Bundle() { - val icache = new ioDCache(); - val mem = new ioDCache().flip() + val icache = new ioMem().flip + val mem = new ioMem val invalidate = Bool(INPUT) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2147cd21..89a4e5c6 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -136,7 +136,7 @@ class DataArrayArrayReq extends Bundle { class MemReq extends Bundle { val rw = Bool() val addr = UFix(width = PADDR_BITS-OFFSET_BITS) - val tag = Bits(width = DMEM_TAG_BITS) + val tag = Bits(width = MEM_TAG_BITS) } class WritebackReq extends Bundle { @@ -281,7 +281,7 @@ class MSHRFile extends Component { val req_way_oh = Bits(NWAYS, INPUT) val mem_resp_val = Bool(INPUT) - val mem_resp_tag = Bits(DMEM_TAG_BITS, INPUT) + val mem_resp_tag = Bits(MEM_TAG_BITS, INPUT) val mem_resp_idx = Bits(IDX_BITS, OUTPUT) val mem_resp_way_oh = Bits(NWAYS, OUTPUT) @@ -670,19 +670,6 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val resp_data_subword = Bits(64, OUTPUT); val resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT); } - -// interface between D$ and next level in memory hierarchy -class ioDCache(view: List[String] = null) extends Bundle(view) { - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, INPUT); - val req_tag = UFix(DMEM_TAG_BITS, INPUT); - val req_val = Bool(INPUT); - val req_rdy = Bool(OUTPUT); - val req_wdata = Bits(MEM_DATA_BITS, INPUT); - val req_rw = Bool(INPUT); - val resp_data = Bits(MEM_DATA_BITS, OUTPUT); - val resp_tag = Bits(DMEM_TAG_BITS, OUTPUT); - val resp_val = Bool(OUTPUT); -} abstract class HellaCache extends Component { def isHit ( cmd: Bits, state: UFix): Bool @@ -698,7 +685,7 @@ abstract class HellaCache extends Component { class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val io = new Bundle { val cpu = new ioDmem() - val mem = new ioDCache().flip + val mem = new ioMem } val lines = 1 << IDX_BITS From 5cc10337b49df706450eb4fb4334c6be5d4a66e4 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 27 Feb 2012 19:10:15 -0800 Subject: [PATCH 0237/1087] Null coherence hub. Begin work on internal tracker logic --- rocket/src/main/scala/coherence.scala | 163 +++++++++++++++++++++----- 1 file changed, 133 insertions(+), 30 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index d5a5e414..20c95e78 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -157,46 +157,149 @@ trait FourStateCoherence extends CoherencePolicy { } } -class XactTracker(id: Int) extends Component { +class XactTracker(id: Int) extends Component with CoherencePolicy { val io = new Bundle { - val mem_req = (new ioDecoupled) { new HubMemReq() }.flip - val alloc_req = (new ioDecoupled) { new TrackerAllocReq() } - val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip - val can_alloc = Bool(INPUT) - val xact_finish = Bool(INPUT) - val p_rep_has_data = Bool(INPUT) - val x_init_has_data = Bool(INPUT) - val p_rep_data_idx = Bits(log2up(NTILES), INPUT) - val x_init_data_idx = Bits(log2up(NTILES), INPUT) - val p_rep_cnt_dec = Bits(NTILES, INPUT) - val p_req_cnt_inc = Bits(NTILES, INPUT) - val busy = Bool(OUTPUT) - val addr = Bits(PADDR_BITS, OUTPUT) - val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) - val sharer_count = Bits(TILE_ID_BITS, OUTPUT) - val t_type = Bits(TTYPE_BITS, OUTPUT) - val push_p_req = Bits(NTILES, OUTPUT) - val pop_p_rep = Bits(NTILES, OUTPUT) - val pop_p_rep_data = Bits(NTILES, OUTPUT) - val pop_x_init = Bool(OUTPUT) + val alloc_req = (new ioDecoupled) { new TrackerAllocReq() } + val can_alloc = Bool(INPUT) + val xact_finish = Bool(INPUT) + val p_rep_has_data = Bool(INPUT) + val p_rep_data_idx = Bits(log2up(NTILES), INPUT) + val p_rep_cnt_dec = Bits(NTILES, INPUT) + val p_req_cnt_inc = Bits(NTILES, INPUT) + + val mem_req = (new ioDecoupled) { new HubMemReq() }.flip + val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip + val busy = Bool(OUTPUT) + val addr = Bits(PADDR_BITS, OUTPUT) + val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) + val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) + val sharer_count = Bits(TILE_ID_BITS, OUTPUT) + val t_type = Bits(TTYPE_BITS, OUTPUT) + val push_p_req = Bits(NTILES, OUTPUT) + val pop_p_rep = Bits(NTILES, OUTPUT) + val pop_p_rep_data = Bits(NTILES, OUTPUT) + val pop_x_init = Bool(OUTPUT) val pop_x_init_data = Bool(OUTPUT) - val send_x_rep_ack = Bool(OUTPUT) + val send_x_rep_ack = Bool(OUTPUT) } - val valid = Reg(resetVal = Bool(false)) - val addr = Reg{ Bits() } - val t_type = Reg{ Bits() } - val init_tile_id = Reg{ Bits() } - val tile_xact_id = Reg{ Bits() } - val probe_done = Reg{ Bits() } + val s_idle :: s_mem_r :: s_mem_w :: mem_wr :: s_probe :: Nil = Enum(5){ UFix() } + val state = Reg(resetVal = s_idle) + val addr_ = Reg{ Bits() } + val t_type_ = Reg{ Bits() } + val init_tile_id_ = Reg{ Bits() } + val tile_xact_id_ = Reg{ Bits() } + val probe_done = Reg{ Bits() } + val mem_count = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + + io.addr := addr_ + io.init_tile_id := init_tile_id_ + io.tile_xact_id := tile_xact_id_ + io.sharer_count := UFix(NTILES) // TODO: Broadcast only + io.t_type := t_type_ + +/* +class HubMemReq extends Bundle { + val rw = Bool() + val addr = UFix(width = PADDR_BITS-OFFSET_BITS) + val tag = Bits(width = GLOBAL_XACT_ID_BITS) + // Figure out which data-in port to pull from + val data_idx = Bits(width = TILE_ID_BITS) + val is_probe_rep = Bool() +} + +class TrackerAllocReq extends Bundle { + val xact_init = new TransactionInit() + val t_type = Bits(width = TTYPE_BITS) + val has_data = Bool() + val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) + val address = Bits(width = PADDR_BITS) + val init_tile_id = Bits(width = TILE_ID_BITS) + val data_valid = Bool() +*/ +/* + when( alloc_req.valid && can_alloc ) { + valid := Bool(true) + addr := alloc_req.bits.xact_init.address + t_type := alloc_req.bits.xact_init.t_type + init_tile_id := alloc_req.bits.init_tile_id + tile_xact_id := alloc_req.bits.xact_init.tile_xact_id + [counter] := REFILL_CYCLES-1 if alloc_req.bits.xact_init.has_data else 0 + } + when ( alloc_req.bits.data_valid ) { + io.mem_req.valid := + io.mem_req.bits.rw := + io.mem_req.bits.addr := + io.mem_req.bits.tag := + io.mem_req.bits.data_idx := + io.mem_req.bits.is_probe_rep := + := io.mem.ready + } + when( p_rep_has_data ) { + io.mem_req.valid := + io.mem_req.bits.rw := + io.mem_req.bits.addr := + io.mem_req.bits.tag := + io.mem_req.bits.data_idx := + io.mem_req.bits.is_probe_rep := + := io.mem.ready + + } + + val mem_req = (new ioDecoupled) { new HubMemReq() }.flip + val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip + push_p_req = Bits(0, width = NTILES) + pop_p_rep = Bits(0, width = NTILES) + pop_p_rep_data = Bits(0, width = NTILES) + pop_x_init = Bool(false) + pop_x_init_data = Bool(false) + send_x_rep_ack = Bool(false) + + + } +*/ //TODO: Decrement the probe count when final data piece is written // Connent io.mem.ready sig to correct pop* outputs // P_rep and x_init must be popped on same cycle of receipt } -abstract class CoherenceHub extends Component +abstract class CoherenceHub extends Component with CoherencePolicy + +class CoherenceHubNull extends Component { + val io = new Bundle { + val tile = new ioTileLink() + val mem = new ioMem() + } + + val x_init = io.tile.xact_init + val is_write = x_init.bits.t_type === X_WRITE_UNCACHED + x_init.ready := io.mem.req_rdy + io.mem.req_val := x_init.valid + io.mem.req_rw := is_write + io.mem.req_tag := x_init.bits.tile_xact_id + io.mem.req_addr := x_init.bits.address + + val x_rep = io.tile.xact_rep + x_rep.bits.t_type := Bits(width = TTYPE_BITS) + x_rep.bits.has_data := !is_write + x_rep.bits.tile_xact_id := Mux(is_write, x_init.bits.tile_xact_id, io.mem.resp_tag) + x_rep.bits.global_xact_id := UFix(0) // don't care + x_rep.valid := io.mem.resp_val + + //TODO: + val x_init_data = io.tile.xact_init_data + val x_rep_data = io.tile.xact_rep_data + x_init_data.ready := io.mem.req_rdy + io.mem.req_wdata := x_init_data.bits.data + x_rep_data.bits.data := io.mem.resp_data + x_rep_data.valid := io.mem.resp_val + // Should be: + //io.mem.req_data <> x_init_data + //x_rep_data <> io.mem.resp_data + +} + class CoherenceHubNoDir extends CoherenceHub { From 3f998b13530cb7cd90dfe3def45eac878f7fc178 Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Tue, 28 Feb 2012 14:54:48 -0800 Subject: [PATCH 0238/1087] send vcfg and setvl to vu prefetch queues --- rocket/src/main/scala/ctrl_vec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 8c2351e7..8ca0ce68 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -62,8 +62,8 @@ class rocketCtrlVec extends Component // | | | | | | | | | | | | cpfence // | | | | | | | | | | | | | List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,N,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,N,N,N,N), + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,Y,Y,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,Y,Y,N,N), VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N), VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N), VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N), From 040aa9fe02b5af4edf0d97679efe3841e17bdc52 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 28 Feb 2012 17:33:06 -0800 Subject: [PATCH 0239/1087] Added temporary ioMemHub and made coherence hub implementations depend on it rather than ioMem --- rocket/src/main/scala/coherence.scala | 281 ++++++++++++++------------ 1 file changed, 157 insertions(+), 124 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 20c95e78..11c764ec 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -3,13 +3,39 @@ package rocket import Chisel._ import Constants._ +class MemData extends Bundle { + val data = Bits(width = MEM_DATA_BITS) +} + +class MemReqCmd() extends Bundle +{ + val rw = Bool() + val addr = UFix(PADDR_BITS - OFFSET_BITS) + val tag = Bits(MEM_TAG_BITS) +} + +class MemResp () extends Bundle +{ + val tag = Bits(MEM_TAG_BITS) + val data = Bits(width = MEM_DATA_BITS) + val valid = Bool() +} + +class ioMemHub() extends Bundle +{ + val req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip + val req_data = (new ioDecoupled) { new MemData() }.flip + val resp = new MemResp() +} + class HubMemReq extends Bundle { - val rw = Bool() - val addr = UFix(width = PADDR_BITS-OFFSET_BITS) - val tag = Bits(width = GLOBAL_XACT_ID_BITS) - // Figure out which data-in port to pull from - val data_idx = Bits(width = TILE_ID_BITS) - val is_probe_rep = Bool() + val req_cmd = (new ioDecoupled) { new MemReqCmd() } + val req_data = (new ioDecoupled) { new MemData() } +} + +class HubProbeRep extends Bundle { + val reply = (new ioDecoupled) { new ProbeReply } + val data_idx = Bits(width = log2up(NTILES)) } class TrackerAllocReq extends Bundle { @@ -18,9 +44,6 @@ class TrackerAllocReq extends Bundle { val data_valid = Bool() } -class MemData extends Bundle { - val data = Bits(width = MEM_DATA_BITS) -} class TransactionInit extends Bundle { val t_type = Bits(width = TTYPE_BITS) @@ -49,15 +72,13 @@ class ProbeReply extends Bundle { class ProbeReplyData extends MemData -class TransactionReply extends Bundle { +class TransactionReply extends MemData { val t_type = Bits(width = TTYPE_BITS) val has_data = Bool() val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } -class TransactionReplyData extends MemData - class TransactionFinish extends Bundle { val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } @@ -70,7 +91,6 @@ class ioTileLink extends Bundle { val probe_rep = (new ioDecoupled) { new ProbeReply() }.flip val probe_rep_data = (new ioDecoupled) { new ProbeReplyData() }.flip val xact_rep = (new ioDecoupled) { new TransactionReply() } - val xact_rep_data = (new ioDecoupled) { new TransactionReplyData() } val xact_finish = (new ioDecoupled) { new TransactionFinish() }.flip } @@ -160,18 +180,21 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(id: Int) extends Component with CoherencePolicy { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq() } + val probe_rep = (new ioDecoupled) { new HubProbeRep() } val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_has_data = Bool(INPUT) - val p_rep_data_idx = Bits(log2up(NTILES), INPUT) val p_rep_cnt_dec = Bits(NTILES, INPUT) val p_req_cnt_inc = Bits(NTILES, INPUT) + val p_rep_data = (new ioDecoupled) { new ProbeReplyData() } + val x_init_data = (new ioDecoupled) { new TransactionInitData() } val mem_req = (new ioDecoupled) { new HubMemReq() }.flip val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) + val p_rep_tile_id = Bits(log2up(NTILES), INPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS, OUTPUT) val t_type = Bits(TTYPE_BITS, OUTPUT) @@ -183,8 +206,14 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val send_x_rep_ack = Bool(OUTPUT) } - - val s_idle :: s_mem_r :: s_mem_w :: mem_wr :: s_probe :: Nil = Enum(5){ UFix() } + def sendProbeReqType(t_type: UFix, global_state: UFix): UFix = { + MuxCase(P_COPY, Array((t_type === X_READ_SHARED) -> P_DOWNGRADE, + (t_type === X_READ_EXCLUSIVE) -> P_INVALIDATE, + (t_type === X_READ_UNCACHED) -> P_COPY, + (t_type === X_WRITE_UNCACHED) -> P_INVALIDATE)) + } + + val s_idle :: s_mem_r :: s_mem_w :: s_mem_wr :: s_probe :: s_busy :: Nil = Enum(6){ UFix() } val state = Reg(resetVal = s_idle) val addr_ = Reg{ Bits() } val t_type_ = Reg{ Bits() } @@ -192,73 +221,99 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val tile_xact_id_ = Reg{ Bits() } val probe_done = Reg{ Bits() } val mem_count = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val p_rep_count = Reg(resetVal = UFix(0, width = log2up(NTILES))) + val p_req_flags = Reg(resetVal = UFix(0, width = NTILES)) + val p_rep_data_idx_ = Reg{ Bits() } + val x_init_data_needs_wb = Reg{ Bool() } + val p_rep_data_needs_wb = Reg{ Bool() } + io.busy := state != s_idle io.addr := addr_ io.init_tile_id := init_tile_id_ io.tile_xact_id := tile_xact_id_ io.sharer_count := UFix(NTILES) // TODO: Broadcast only io.t_type := t_type_ -/* -class HubMemReq extends Bundle { - val rw = Bool() - val addr = UFix(width = PADDR_BITS-OFFSET_BITS) - val tag = Bits(width = GLOBAL_XACT_ID_BITS) - // Figure out which data-in port to pull from - val data_idx = Bits(width = TILE_ID_BITS) - val is_probe_rep = Bool() -} - -class TrackerAllocReq extends Bundle { - val xact_init = new TransactionInit() - val t_type = Bits(width = TTYPE_BITS) - val has_data = Bool() - val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) - val address = Bits(width = PADDR_BITS) - val init_tile_id = Bits(width = TILE_ID_BITS) - val data_valid = Bool() -*/ -/* - when( alloc_req.valid && can_alloc ) { - valid := Bool(true) - addr := alloc_req.bits.xact_init.address - t_type := alloc_req.bits.xact_init.t_type - init_tile_id := alloc_req.bits.init_tile_id - tile_xact_id := alloc_req.bits.xact_init.tile_xact_id - [counter] := REFILL_CYCLES-1 if alloc_req.bits.xact_init.has_data else 0 - } - when ( alloc_req.bits.data_valid ) { - io.mem_req.valid := - io.mem_req.bits.rw := - io.mem_req.bits.addr := - io.mem_req.bits.tag := - io.mem_req.bits.data_idx := - io.mem_req.bits.is_probe_rep := - := io.mem.ready - } - when( p_rep_has_data ) { - io.mem_req.valid := - io.mem_req.bits.rw := - io.mem_req.bits.addr := - io.mem_req.bits.tag := - io.mem_req.bits.data_idx := - io.mem_req.bits.is_probe_rep := - := io.mem.ready + io.mem_req.valid := Bool(false) + io.mem_req.bits.req_cmd.bits.rw := state === s_mem_w || state === s_mem_wr + io.mem_req.bits.req_cmd.bits.addr := addr_ + io.mem_req.bits.req_cmd.bits.tag := UFix(id) + // := io.mem.ready //sent mem req + io.probe_req.valid := Bool(false) + io.probe_req.bits.p_type := sendProbeReqType(t_type_, UFix(0)) + io.probe_req.bits.global_xact_id := UFix(id) + io.probe_req.bits.address := addr_ + // := io.probe_req.ready //got through arbiter ---- p_rep_dec_arr + io.push_p_req := Bits(0, width = NTILES) + io.pop_p_rep := Bits(0, width = NTILES) + io.pop_p_rep_data := Bits(0, width = NTILES) + io.pop_x_init := Bool(false) + io.pop_x_init_data := Bool(false) + io.send_x_rep_ack := Bool(false) + switch (state) { + is(s_idle) { + when( io.alloc_req.valid && io.can_alloc ) { + addr_ := io.alloc_req.bits.xact_init.address + t_type_ := io.alloc_req.bits.xact_init.t_type + init_tile_id_ := io.alloc_req.bits.init_tile_id + tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id + x_init_data_needs_wb := io.alloc_req.bits.xact_init.has_data + p_rep_count := UFix(NTILES) + p_req_flags := ~Bits(0, width = NTILES) + state := s_probe + io.pop_x_init := Bool(true) + } + } + is(s_mem_r) { + io.mem_req.valid := Bool(true) + when(io.mem_req.ready) { state := s_busy } + } + is(s_mem_w) { + io.mem_req.valid := Bool(true) + when(io.mem_req.ready) { state := s_busy } + } + is(s_mem_wr) { + when(io.probe_rep.bits.reply.bits.has_data) { + //io.pop_p_rep(p_rep_data_idx) := io.mem_req_rdy + //io.pop_p_rep_data(p_rep_data_idx) := io.mem_req_rdy //TODO + } . otherwise { + //io.pop_x_init := io.mem_req_rdy + //io.pop_x_init_data := io.mem_req_rdy + } + io.mem_req.valid := Bool(true) + when(io.mem_req.ready) { state := s_mem_r } + } + is(s_probe) { + when(p_req_flags.orR) { + io.push_p_req := p_req_flags + io.probe_req.valid := Bool(true) + } + when(io.p_req_cnt_inc.orR) { + p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs + } + val p_rep_has_data = Bool(INPUT) + val p_rep_data_idx = Bits(log2up(NTILES), INPUT) + val p_rep_cnt_dec = Bits(NTILES, INPUT) + when(io.p_rep_cnt_dec.orR) { + val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) + p_rep_count := p_rep_count_next + when(p_rep_count_next === UFix(0)) { + state := s_busy //TODO: XXXXXXXXXX + } + } + when(p_rep_has_data) { + p_rep_data_needs_wb := Bool(true) + p_rep_data_idx_ := p_rep_data_idx + } + } + is(s_busy) { + when (io.xact_finish) { + state := s_idle + } + } } - val mem_req = (new ioDecoupled) { new HubMemReq() }.flip - val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip - push_p_req = Bits(0, width = NTILES) - pop_p_rep = Bits(0, width = NTILES) - pop_p_rep_data = Bits(0, width = NTILES) - pop_x_init = Bool(false) - pop_x_init_data = Bool(false) - send_x_rep_ack = Bool(false) - - - } -*/ //TODO: Decrement the probe count when final data piece is written // Connent io.mem.ready sig to correct pop* outputs // P_rep and x_init must be popped on same cycle of receipt @@ -269,35 +324,24 @@ abstract class CoherenceHub extends Component with CoherencePolicy class CoherenceHubNull extends Component { val io = new Bundle { val tile = new ioTileLink() - val mem = new ioMem() + val mem = new ioMemHub() } val x_init = io.tile.xact_init val is_write = x_init.bits.t_type === X_WRITE_UNCACHED - x_init.ready := io.mem.req_rdy - io.mem.req_val := x_init.valid - io.mem.req_rw := is_write - io.mem.req_tag := x_init.bits.tile_xact_id - io.mem.req_addr := x_init.bits.address + x_init.ready := io.mem.req_cmd.ready + io.mem.req_cmd.valid := x_init.valid + io.mem.req_cmd.bits.rw := is_write + io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id + io.mem.req_cmd.bits.addr := x_init.bits.address + io.mem.req_data <> io.tile.xact_init_data val x_rep = io.tile.xact_rep - x_rep.bits.t_type := Bits(width = TTYPE_BITS) - x_rep.bits.has_data := !is_write - x_rep.bits.tile_xact_id := Mux(is_write, x_init.bits.tile_xact_id, io.mem.resp_tag) + x_rep.bits.t_type := X_READ_EXCLUSIVE + x_rep.bits.tile_xact_id := Mux(is_write, x_init.bits.tile_xact_id, io.mem.resp.tag) x_rep.bits.global_xact_id := UFix(0) // don't care - x_rep.valid := io.mem.resp_val - - //TODO: - val x_init_data = io.tile.xact_init_data - val x_rep_data = io.tile.xact_rep_data - x_init_data.ready := io.mem.req_rdy - io.mem.req_wdata := x_init_data.bits.data - x_rep_data.bits.data := io.mem.resp_data - x_rep_data.valid := io.mem.resp_val - // Should be: - //io.mem.req_data <> x_init_data - //x_rep_data <> io.mem.resp_data - + x_rep.bits.data := io.mem.resp.data + x_rep.valid := io.mem.resp.valid } @@ -319,7 +363,7 @@ class CoherenceHubNoDir extends CoherenceHub { val io = new Bundle { val tiles = Vec(NTILES) { new ioTileLink() } - val mem = new ioMem + val mem = new ioMemHub } val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) @@ -333,8 +377,6 @@ class CoherenceHubNoDir extends CoherenceHub { val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_has_data_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_data_idx_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=log2up(NTILES))} } val p_rep_cnt_dec_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } val p_req_cnt_inc_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } @@ -347,8 +389,6 @@ class CoherenceHubNoDir extends CoherenceHub { sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) send_x_rep_ack_arr.write(UFix(i), trackerList(i).io.send_x_rep_ack) trackerList(i).io.xact_finish := do_free_arr.read(UFix(i)) - trackerList(i).io.p_rep_has_data := p_rep_has_data_arr.read(UFix(i)) - trackerList(i).io.p_rep_data_idx := p_rep_data_idx_arr.read(UFix(i)) trackerList(i).io.p_rep_cnt_dec := p_rep_cnt_dec_arr.read(UFix(i)) trackerList(i).io.p_req_cnt_inc := p_req_cnt_inc_arr.read(UFix(i)) } @@ -362,22 +402,16 @@ class CoherenceHubNoDir extends CoherenceHub { // Reply to initial requestor // Forward memory responses from mem to tile - val xrep_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val xrep_cnt_next = xrep_cnt + UFix(1) - when (io.mem.resp_val) { xrep_cnt := xrep_cnt_next } - val idx = io.mem.resp_tag - val readys = Bits(width = NTILES) + val idx = io.mem.resp.tag for( j <- 0 until NTILES ) { io.tiles(j).xact_rep.bits.t_type := getTransactionReplyType(t_type_arr.read(idx), sh_count_arr.read(idx)) io.tiles(j).xact_rep.bits.tile_xact_id := tile_xact_id_arr.read(idx) io.tiles(j).xact_rep.bits.global_xact_id := idx - io.tiles(j).xact_rep_data.bits.data := io.mem.resp_data - readys := Mux(xrep_cnt === UFix(0), io.tiles(j).xact_rep.ready && io.tiles(j).xact_rep_data.ready, io.tiles(j).xact_rep_data.ready) - io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && ((io.mem.resp_val && xrep_cnt === UFix(0)) || send_x_rep_ack_arr.read(idx)) - io.tiles(j).xact_rep_data.valid := (UFix(j) === init_tile_id_arr.read(idx)) + io.tiles(j).xact_rep.bits.data := io.mem.resp.data + io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && (io.mem.resp.valid || send_x_rep_ack_arr.read(idx)) } // If there were a ready signal due to e.g. intervening network use: - //io.mem.resp_rdy := readys(init_tile_id_arr.read(idx)).xact_rep.ready + //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(idx)).xact_rep.ready // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests @@ -387,33 +421,30 @@ class CoherenceHubNoDir extends CoherenceHub { for( i <- 0 until NGLOBAL_XACTS ) { mem_req_arb.io.in(i) <> trackerList(i).io.mem_req } - mem_req_arb.io.out.ready := io.mem.req_rdy - io.mem.req_val := mem_req_arb.io.out.valid - io.mem.req_rw := mem_req_arb.io.out.bits.rw - io.mem.req_tag := mem_req_arb.io.out.bits.tag - io.mem.req_addr := mem_req_arb.io.out.bits.addr - io.mem.req_wdata := MuxLookup(mem_req_arb.io.out.bits.data_idx, - Bits(0, width = MEM_DATA_BITS), - (0 until NTILES).map( j => - UFix(j) -> Mux(mem_req_arb.io.out.bits.is_probe_rep, - io.tiles(j).probe_rep_data.bits.data, - io.tiles(j).xact_init_data.bits.data))) + //mem_req_arb.io.out.ready := io.mem.req_cmd.ready || io.mem.req_data.ready + io.mem.req_cmd <> mem_req_arb.io.out.bits.req_cmd + io.mem.req_data <> mem_req_arb.io.out.bits.req_data + //io.mem.req_wdata := MuxLookup(mem_req_arb.io.out.bits.data_idx, + // Bits(0, width = MEM_DATA_BITS), + // (0 until NTILES).map( j => + // UFix(j) -> Mux(mem_req_arb.io.out.bits.is_probe_rep, + // io.tiles(j).probe_rep_data.bits.data, + // io.tiles(j).xact_init_data.bits.data))) // Handle probe replies, which may or may not have data for( j <- 0 until NTILES ) { val p_rep = io.tiles(j).probe_rep val p_rep_data = io.tiles(j).probe_rep_data val idx = p_rep.bits.global_xact_id - p_rep_has_data_arr.write(idx, p_rep.valid && p_rep.bits.has_data && p_rep_data.valid) - p_rep_data_idx_arr.write(idx, UFix(j)) p_rep.ready := foldR(trackerList.map(_.io.pop_p_rep(j)))(_ || _) p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) } for( i <- 0 until NGLOBAL_XACTS ) { + trackerList(i).io.p_rep_data := MuxLookup(trackerList(i).io.p_rep_tile_id, Bits(0), (0 until NTILES).map { j => UFix(j) -> io.tiles(j).probe_rep_data }) val flags = Bits(width = NTILES) for( j <- 0 until NTILES) { val p_rep = io.tiles(j).probe_rep - flags(j) := p_rep.valid && !p_rep.bits.has_data && (p_rep.bits.global_xact_id === UFix(i)) + flags(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) } p_rep_cnt_dec_arr.write(UFix(i), flags) } @@ -448,6 +479,8 @@ class CoherenceHubNoDir extends CoherenceHub { trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready trackerList(i).io.alloc_req.bits := init_arb.io.out.bits trackerList(i).io.alloc_req.valid := init_arb.io.out.valid + + trackerList(i).io.x_init_data := MuxLookup(trackerList(i).io.init_tile_id, Bits(0), (0 until NTILES).map { j => UFix(j) -> io.tiles(j).xact_init_data }) } for( j <- 0 until NTILES ) { From c99f6bbeb7e727cad460b6d114624f801071fa0d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 28 Feb 2012 18:59:15 -0800 Subject: [PATCH 0240/1087] separate memory request command and data also, merge some VLSI/C++ test harness functionality --- rocket/src/main/scala/arbiter.scala | 24 ++++++++++--- rocket/src/main/scala/coherence.scala | 6 ---- rocket/src/main/scala/htif.scala | 23 +++++------- rocket/src/main/scala/icache.scala | 1 + rocket/src/main/scala/icache_prefetch.scala | 1 + rocket/src/main/scala/nbdcache.scala | 40 ++++++++++----------- rocket/src/main/scala/top.scala | 3 ++ 7 files changed, 53 insertions(+), 45 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 50f66a58..c9b442f1 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -10,8 +10,11 @@ class ioMem() extends Bundle val req_rdy = Bool(INPUT); val req_rw = Bool(OUTPUT); val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT); - val req_wdata = Bits(MEM_DATA_BITS, OUTPUT); val req_tag = Bits(MEM_TAG_BITS, OUTPUT); + + val req_data_val = Bool(OUTPUT); + val req_data_rdy = Bool(INPUT); + val req_data_bits = Bits(MEM_DATA_BITS, OUTPUT); val resp_val = Bool(INPUT); val resp_tag = Bits(MEM_TAG_BITS, INPUT); @@ -35,24 +38,37 @@ class rocketMemArbiter(n: Int) extends Component { req_rdy = req_rdy && !io.requestor(i).req_val } + var req_data_val = Bool(false) + var req_data_rdy = io.mem.req_data_rdy + for (i <- 0 until n) + { + io.requestor(i).req_data_rdy := req_data_rdy + req_data_val = req_data_val || io.requestor(i).req_data_val + req_data_rdy = req_data_rdy && !io.requestor(i).req_data_val + } + var req_rw = io.requestor(n-1).req_rw var req_addr = io.requestor(n-1).req_addr - var req_wdata = io.requestor(n-1).req_wdata var req_tag = Cat(io.requestor(n-1).req_tag, UFix(n-1, log2up(n))) for (i <- n-1 to 0 by -1) { req_rw = Mux(io.requestor(i).req_val, io.requestor(i).req_rw, req_rw) req_addr = Mux(io.requestor(i).req_val, io.requestor(i).req_addr, req_addr) - req_wdata = Mux(io.requestor(i).req_val, io.requestor(i).req_wdata, req_wdata) req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) } + var req_data_bits = io.requestor(n-1).req_data_bits + for (i <- n-1 to 0 by -1) + req_data_bits = Mux(io.requestor(i).req_data_val, io.requestor(i).req_data_bits, req_data_bits) + io.mem.req_val := req_val io.mem.req_rw := req_rw io.mem.req_addr := req_addr - io.mem.req_wdata := req_wdata io.mem.req_tag := req_tag + io.mem.req_data_val := req_data_val + io.mem.req_data_bits := req_data_bits + for (i <- 0 until n) { io.requestor(i).resp_val := io.mem.resp_val && io.mem.resp_tag(log2up(n)-1,0) === UFix(i) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 11c764ec..75c430a3 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -424,12 +424,6 @@ class CoherenceHubNoDir extends CoherenceHub { //mem_req_arb.io.out.ready := io.mem.req_cmd.ready || io.mem.req_data.ready io.mem.req_cmd <> mem_req_arb.io.out.bits.req_cmd io.mem.req_data <> mem_req_arb.io.out.bits.req_data - //io.mem.req_wdata := MuxLookup(mem_req_arb.io.out.bits.data_idx, - // Bits(0, width = MEM_DATA_BITS), - // (0 until NTILES).map( j => - // UFix(j) -> Mux(mem_req_arb.io.out.bits.is_probe_rep, - // io.tiles(j).probe_rep_data.bits.data, - // io.tiles(j).xact_init_data.bits.data))) // Handle probe replies, which may or may not have data for( j <- 0 until NTILES ) { diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 77d189eb..04e2bf5a 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -78,7 +78,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component Mux(!nack && cmd === cmd_readcr, UFix(1), UFix(0))) val tx_done = packet_ram_raddr - UFix(1) === tx_size - val state_rx :: state_pcr :: state_mem_req :: state_mem_resp :: state_tx :: Nil = Enum(5) { UFix() } + val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_rdata :: state_tx :: Nil = Enum(6) { UFix() } val state = Reg(resetVal = state_rx) when (state === state_rx && rx_done) { @@ -94,18 +94,11 @@ class rocketHTIF(w: Int, ncores: Int) extends Component val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) when (state === state_mem_req && io.mem.req_rdy) { - when (cmd === cmd_writemem) { - when (mem_cnt.andR) { - state := state_tx - } - mem_cnt := mem_cnt + UFix(1) - } - .otherwise { - state := state_mem_resp - } + state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } - when (state === state_mem_resp && io.mem.resp_val) { - when (mem_cnt.andR) { + when (state === state_mem_wdata && io.mem.req_data_rdy || + state === state_mem_rdata && io.mem.resp_val) { + when (mem_cnt.andR) { state := state_tx } mem_cnt := mem_cnt + UFix(1) @@ -120,13 +113,15 @@ class rocketHTIF(w: Int, ncores: Int) extends Component for (i <- 0 until MEM_DATA_BITS/short_request_bits) { val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) packet_ram.write(idx, io.mem.resp_data((i+1)*short_request_bits-1, i*short_request_bits), - state === state_mem_resp && io.mem.resp_val) + state === state_mem_rdata && io.mem.resp_val) mem_req_data = Cat(packet_ram.read(idx), mem_req_data) } io.mem.req_val := state === state_mem_req io.mem.req_rw := cmd === cmd_writemem io.mem.req_addr := addr >> UFix(OFFSET_BITS-3) - io.mem.req_wdata := mem_req_data + + io.mem.req_data_val := state === state_mem_wdata + io.mem.req_data_bits := mem_req_data pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 09cbb156..92bfa0f6 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -131,6 +131,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { io.mem.req_val := (state === s_request); io.mem.req_rw := Bool(false) io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix + io.mem.req_data_val := Bool(false) // control state machine switch (state) { diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 104257e0..d47914ec 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -33,6 +33,7 @@ class rocketIPrefetcher extends Component() { io.mem.req_rw := Bool(false) io.mem.req_tag := Mux(io.icache.req_val && !hit, UFix(0), UFix(1)) io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); + io.mem.req_data_val := Bool(false) val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 89a4e5c6..6317a764 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -423,29 +423,21 @@ class WritebackUnit extends Component { val data_resp = Bits(MEM_DATA_BITS, INPUT) val refill_req = (new ioDecoupled) { new MemReq() } val mem_req = (new ioDecoupled) { new MemReq() }.flip() - val mem_req_data = Bits(MEM_DATA_BITS, OUTPUT) + val mem_req_data = (new ioDecoupled) { Bits(width = MEM_DATA_BITS) }.flip() } - val wbq = (new queue(REFILL_CYCLES)) { Bits(width = MEM_DATA_BITS) } val valid = Reg(resetVal = Bool(false)) + val data_req_fired = Reg(resetVal = Bool(false)) val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } val addr = Reg() { new WritebackReq() } - // don't allow memory requests to bypass conflicting writebacks. - // also don't allow a refill request once a writeback has started. - // TODO: turn this into a victim buffer. - val block_refill = valid && ((io.refill_req.bits.addr(IDX_BITS-1,0) === addr.idx) || (cnt === UFix(REFILL_CYCLES))) - val refill_val = io.refill_req.valid && !block_refill - - wbq.io.enq.valid := valid && Reg(io.data_req.valid && io.data_req.ready) - wbq.io.enq.bits := io.data_resp - wbq.io.deq.ready := io.mem_req.ready && !refill_val && (cnt === UFix(REFILL_CYCLES)) - - when (io.data_req.valid && io.data_req.ready) { cnt := cnt + UFix(1) } - when ((cnt === UFix(REFILL_CYCLES)) && !wbq.io.deq.valid) { valid := Bool(false) } + data_req_fired := Bool(false) + when (io.data_req.valid && io.data_req.ready) { data_req_fired := Bool(true); cnt := cnt + UFix(1) } + when (data_req_fired && !io.mem_req_data.ready) { data_req_fired := Bool(false); cnt := cnt - UFix(1) } + when ((cnt === UFix(REFILL_CYCLES)) && io.mem_req_data.ready) { valid := Bool(false) } when (io.req.valid && io.req.ready) { valid := Bool(true); cnt := UFix(0); addr := io.req.bits } - io.req.ready := !valid + io.req.ready := !valid && io.mem_req.ready io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) io.data_req.bits.way_en := addr.way_oh io.data_req.bits.inner_req.idx := addr.idx @@ -454,12 +446,15 @@ class WritebackUnit extends Component { io.data_req.bits.inner_req.wmask := Bits(0) io.data_req.bits.inner_req.data := Bits(0) - io.refill_req.ready := io.mem_req.ready && !block_refill - io.mem_req.valid := refill_val || wbq.io.deq.valid && (cnt === UFix(REFILL_CYCLES)) - io.mem_req.bits.rw := !refill_val - io.mem_req.bits.addr := Mux(refill_val, io.refill_req.bits.addr, Cat(addr.ppn, addr.idx).toUFix) + val wb_req_val = io.req.valid && !valid + io.refill_req.ready := io.mem_req.ready && !wb_req_val + io.mem_req.valid := io.refill_req.valid || wb_req_val + io.mem_req.bits.rw := wb_req_val + io.mem_req.bits.addr := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.addr) io.mem_req.bits.tag := io.refill_req.bits.tag - io.mem_req_data := wbq.io.deq.bits + + io.mem_req_data.valid := data_req_fired + io.mem_req_data.bits := io.data_resp } class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ @@ -977,7 +972,10 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { wb.io.mem_req.ready := io.mem.req_rdy io.mem.req_val := wb.io.mem_req.valid io.mem.req_rw := wb.io.mem_req.bits.rw - io.mem.req_wdata := wb.io.mem_req_data io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix io.mem.req_addr := wb.io.mem_req.bits.addr + + io.mem.req_data_val := wb.io.mem_req_data.valid + wb.io.mem_req_data.ready := io.mem.req_data_rdy + io.mem.req_data_bits := wb.io.mem_req_data.bits } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index f379c017..3f3fd1d2 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -34,7 +34,10 @@ class Top() extends Component { cpu.io.vimem <> vicache.io.cpu; } else + { arbiter.io.requestor(2).req_val := Bool(false) + arbiter.io.requestor(2).req_data_val := Bool(false) + } htif.io.host <> io.host cpu.io.host <> htif.io.cpu(0); From 8ff6e21e3a7a7e1ee281d95ac87dac378032ced5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 29 Feb 2012 00:44:03 -0800 Subject: [PATCH 0241/1087] Fixed race between read resps/reps and write req/reps in null hub --- rocket/src/main/scala/coherence.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 75c430a3..d7443d71 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -329,19 +329,19 @@ class CoherenceHubNull extends Component { val x_init = io.tile.xact_init val is_write = x_init.bits.t_type === X_WRITE_UNCACHED - x_init.ready := io.mem.req_cmd.ready - io.mem.req_cmd.valid := x_init.valid + x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp + io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id io.mem.req_cmd.bits.addr := x_init.bits.address io.mem.req_data <> io.tile.xact_init_data val x_rep = io.tile.xact_rep - x_rep.bits.t_type := X_READ_EXCLUSIVE + x_rep.bits.t_type := Mux(is_write, X_WRITE_UNCACHED, X_READ_EXCLUSIVE) x_rep.bits.tile_xact_id := Mux(is_write, x_init.bits.tile_xact_id, io.mem.resp.tag) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.data - x_rep.valid := io.mem.resp.valid + x_rep.valid := io.mem.resp.valid || is_write } From 082b38d315e823d2bc773a53de45d5b7cdb2e81e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 29 Feb 2012 02:59:27 -0800 Subject: [PATCH 0242/1087] Broadcast hub nears completion. Still does not handle generation/arbitration for decoupled mem reqs. --- rocket/src/main/scala/coherence.scala | 107 ++++++++++++++++---------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index d7443d71..75796a9d 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -33,9 +33,9 @@ class HubMemReq extends Bundle { val req_data = (new ioDecoupled) { new MemData() } } -class HubProbeRep extends Bundle { - val reply = (new ioDecoupled) { new ProbeReply } - val data_idx = Bits(width = log2up(NTILES)) +class TrackerProbeData extends Bundle { + val valid = Bool() + val data_tile_id = Bits(width = log2up(NTILES)) } class TrackerAllocReq extends Bundle { @@ -180,10 +180,9 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(id: Int) extends Component with CoherencePolicy { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq() } - val probe_rep = (new ioDecoupled) { new HubProbeRep() } + val probe_data = (new TrackerProbeData).asInput val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) - val p_rep_has_data = Bool(INPUT) val p_rep_cnt_dec = Bits(NTILES, INPUT) val p_req_cnt_inc = Bits(NTILES, INPUT) val p_rep_data = (new ioDecoupled) { new ProbeReplyData() } @@ -207,13 +206,21 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { } def sendProbeReqType(t_type: UFix, global_state: UFix): UFix = { - MuxCase(P_COPY, Array((t_type === X_READ_SHARED) -> P_DOWNGRADE, + MuxCase(P_COPY, Array((t_type === X_READ_SHARED) -> P_DOWNGRADE, (t_type === X_READ_EXCLUSIVE) -> P_INVALIDATE, - (t_type === X_READ_UNCACHED) -> P_COPY, + (t_type === X_READ_UNCACHED) -> P_COPY, (t_type === X_WRITE_UNCACHED) -> P_INVALIDATE)) } - val s_idle :: s_mem_r :: s_mem_w :: s_mem_wr :: s_probe :: s_busy :: Nil = Enum(6){ UFix() } + def needsMemRead(t_type: UFix, global_state: UFix): Bool = { + (t_type != X_WRITE_UNCACHED) + } + + def needsAckRep(t_type: UFix, global_state: UFix): Bool = { + (t_type === X_WRITE_UNCACHED) + } + + val s_idle :: s_mem :: s_probe :: s_busy :: Nil = Enum(4){ UFix() } val state = Reg(resetVal = s_idle) val addr_ = Reg{ Bits() } val t_type_ = Reg{ Bits() } @@ -223,9 +230,10 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val mem_count = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) val p_rep_count = Reg(resetVal = UFix(0, width = log2up(NTILES))) val p_req_flags = Reg(resetVal = UFix(0, width = NTILES)) - val p_rep_data_idx_ = Reg{ Bits() } - val x_init_data_needs_wb = Reg{ Bool() } - val p_rep_data_needs_wb = Reg{ Bool() } + val p_rep_tile_id_ = Reg{ Bits() } + val x_needs_read = Reg{ Bool() } + val x_init_data_needs_write = Reg{ Bool() } + val p_rep_data_needs_write = Reg{ Bool() } io.busy := state != s_idle io.addr := addr_ @@ -235,15 +243,17 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { io.t_type := t_type_ io.mem_req.valid := Bool(false) - io.mem_req.bits.req_cmd.bits.rw := state === s_mem_w || state === s_mem_wr + io.mem_req.bits.req_cmd.valid := Bool(false) + io.mem_req.bits.req_cmd.bits.rw := Bool(false) io.mem_req.bits.req_cmd.bits.addr := addr_ io.mem_req.bits.req_cmd.bits.tag := UFix(id) + io.mem_req.bits.req_data.valid := Bool(false) + io.mem_req.bits.req_data.bits.data := UFix(0) // := io.mem.ready //sent mem req io.probe_req.valid := Bool(false) io.probe_req.bits.p_type := sendProbeReqType(t_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) io.probe_req.bits.address := addr_ - // := io.probe_req.ready //got through arbiter ---- p_rep_dec_arr io.push_p_req := Bits(0, width = NTILES) io.pop_p_rep := Bits(0, width = NTILES) io.pop_p_rep_data := Bits(0, width = NTILES) @@ -258,32 +268,14 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { t_type_ := io.alloc_req.bits.xact_init.t_type init_tile_id_ := io.alloc_req.bits.init_tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id - x_init_data_needs_wb := io.alloc_req.bits.xact_init.has_data + x_init_data_needs_write := io.alloc_req.bits.xact_init.has_data + x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) p_rep_count := UFix(NTILES) p_req_flags := ~Bits(0, width = NTILES) state := s_probe io.pop_x_init := Bool(true) } } - is(s_mem_r) { - io.mem_req.valid := Bool(true) - when(io.mem_req.ready) { state := s_busy } - } - is(s_mem_w) { - io.mem_req.valid := Bool(true) - when(io.mem_req.ready) { state := s_busy } - } - is(s_mem_wr) { - when(io.probe_rep.bits.reply.bits.has_data) { - //io.pop_p_rep(p_rep_data_idx) := io.mem_req_rdy - //io.pop_p_rep_data(p_rep_data_idx) := io.mem_req_rdy //TODO - } . otherwise { - //io.pop_x_init := io.mem_req_rdy - //io.pop_x_init_data := io.mem_req_rdy - } - io.mem_req.valid := Bool(true) - when(io.mem_req.ready) { state := s_mem_r } - } is(s_probe) { when(p_req_flags.orR) { io.push_p_req := p_req_flags @@ -292,22 +284,55 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { when(io.p_req_cnt_inc.orR) { p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs } - val p_rep_has_data = Bool(INPUT) - val p_rep_data_idx = Bits(log2up(NTILES), INPUT) - val p_rep_cnt_dec = Bits(NTILES, INPUT) when(io.p_rep_cnt_dec.orR) { val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) p_rep_count := p_rep_count_next when(p_rep_count_next === UFix(0)) { - state := s_busy //TODO: XXXXXXXXXX + state := s_mem } } - when(p_rep_has_data) { - p_rep_data_needs_wb := Bool(true) - p_rep_data_idx_ := p_rep_data_idx + when(io.probe_data.valid) { + p_rep_data_needs_write := Bool(true) + p_rep_tile_id_ := io.p_rep_tile_id } } - is(s_busy) { + is(s_mem) { + when(x_init_data_needs_write) { + //io.mem_req.valid := //?TODO ??? || io.x_init_data.valid + //io.mem_req.bits.req_cmd.valid := // TODO ??? + io.mem_req.bits.req_cmd.bits.rw := Bool(true) + io.mem_req.bits.req_data <> io.x_init_data + when(io.mem_req.ready && io.mem_req.bits.req_cmd.ready) { + //TODO + } + when(io.mem_req.ready && io.mem_req.bits.req_data.ready) { + io.pop_x_init_data := Bool(true) + //TODO: count with mem_count somehow + } + } . elsewhen (p_rep_data_needs_write) { + //io.mem_req.valid := //TODO ??? || io.p_rep_data.valid + //io.mem_req.bits.req_cmd.valid := //TODO ??? + io.mem_req.bits.req_cmd.bits.rw := Bool(true) + io.mem_req.bits.req_data <> io.p_rep_data + when(io.mem_req.ready && io.mem_req.bits.req_cmd.ready) { + //TODO + } + when(io.mem_req.ready && io.mem_req.bits.req_data.ready) { + io.pop_p_rep_data := Bool(true) + //TODO: count with mem_count somehow + } + } . elsewhen (x_needs_read) { + io.mem_req.valid := Bool(true) + io.mem_req.bits.req_cmd.valid := Bool(true) + when(io.mem_req.ready && io.mem_req.bits.req_cmd.ready) { + x_needs_read := Bool(false) + } + } . otherwise { + io.send_x_rep_ack := needsAckRep(t_type_, UFix(0)) + state := s_busy + } + } + is(s_busy) { // Nothing left to do but wait for transaction to complete when (io.xact_finish) { state := s_idle } From 012da6002e4b503e3540e9ae15bd578ad68c8bee Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 29 Feb 2012 03:08:04 -0800 Subject: [PATCH 0243/1087] replace tile memory interface with ioTileLink work in progress towards coherent HTIF. for now, requests are incoherently passed through a null coherence hub. --- rocket/src/main/scala/arbiter.scala | 89 +++++++++------------ rocket/src/main/scala/coherence.scala | 31 ++++--- rocket/src/main/scala/consts.scala | 4 +- rocket/src/main/scala/htif.scala | 23 +++--- rocket/src/main/scala/icache.scala | 27 ++++--- rocket/src/main/scala/icache_prefetch.scala | 37 ++++----- rocket/src/main/scala/nbdcache.scala | 66 +++++++-------- rocket/src/main/scala/queues.scala | 4 +- rocket/src/main/scala/top.scala | 36 ++++++--- rocket/src/main/scala/util.scala | 6 ++ 10 files changed, 163 insertions(+), 160 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index c9b442f1..e323cf7a 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -4,75 +4,64 @@ import Chisel._; import Node._; import Constants._; -class ioMem() extends Bundle -{ - val req_val = Bool(OUTPUT); - val req_rdy = Bool(INPUT); - val req_rw = Bool(OUTPUT); - val req_addr = UFix(PADDR_BITS - OFFSET_BITS, OUTPUT); - val req_tag = Bits(MEM_TAG_BITS, OUTPUT); - - val req_data_val = Bool(OUTPUT); - val req_data_rdy = Bool(INPUT); - val req_data_bits = Bits(MEM_DATA_BITS, OUTPUT); - - val resp_val = Bool(INPUT); - val resp_tag = Bits(MEM_TAG_BITS, INPUT); - val resp_data = Bits(MEM_DATA_BITS, INPUT); -} - -class ioMemArbiter(n: Int) extends Bundle() { - val mem = new ioMem(); - val requestor = Vec(n) { new ioMem().flip() } -} - class rocketMemArbiter(n: Int) extends Component { - val io = new ioMemArbiter(n); + val io = new Bundle { + val mem = new ioTileLink + val requestor = Vec(n) { new ioTileLink().flip } + } var req_val = Bool(false) - var req_rdy = io.mem.req_rdy + var req_rdy = io.mem.xact_init.ready for (i <- 0 until n) { - io.requestor(i).req_rdy := req_rdy - req_val = req_val || io.requestor(i).req_val - req_rdy = req_rdy && !io.requestor(i).req_val + io.requestor(i).xact_init.ready := req_rdy + req_val = req_val || io.requestor(i).xact_init.valid + req_rdy = req_rdy && !io.requestor(i).xact_init.valid } + // if more than one requestor at a time can write back, the data + // arbiter needs to be made stateful: one xact's write data must + // be sent to the memory system contiguously. var req_data_val = Bool(false) - var req_data_rdy = io.mem.req_data_rdy + var req_data_rdy = io.mem.xact_init_data.ready for (i <- 0 until n) { - io.requestor(i).req_data_rdy := req_data_rdy - req_data_val = req_data_val || io.requestor(i).req_data_val - req_data_rdy = req_data_rdy && !io.requestor(i).req_data_val + io.requestor(i).xact_init_data.ready := req_data_rdy + req_data_val = req_data_val || io.requestor(i).xact_init_data.valid + req_data_rdy = req_data_rdy && !io.requestor(i).xact_init_data.valid } - var req_rw = io.requestor(n-1).req_rw - var req_addr = io.requestor(n-1).req_addr - var req_tag = Cat(io.requestor(n-1).req_tag, UFix(n-1, log2up(n))) - for (i <- n-1 to 0 by -1) + var req_bits = Wire() { new TransactionInit } + req_bits := io.requestor(n-1).xact_init.bits + req_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2up(n))) + for (i <- n-2 to 0 by -1) { - req_rw = Mux(io.requestor(i).req_val, io.requestor(i).req_rw, req_rw) - req_addr = Mux(io.requestor(i).req_val, io.requestor(i).req_addr, req_addr) - req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) + var my_req_bits = Wire() { new TransactionInit } + my_req_bits := io.requestor(i).xact_init.bits + my_req_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2up(n))) + + req_bits = Mux(io.requestor(i).xact_init.valid, my_req_bits, req_bits) } - var req_data_bits = io.requestor(n-1).req_data_bits - for (i <- n-1 to 0 by -1) - req_data_bits = Mux(io.requestor(i).req_data_val, io.requestor(i).req_data_bits, req_data_bits) + var req_data_bits = io.requestor(n-1).xact_init_data.bits + for (i <- n-2 to 0 by -1) + req_data_bits = Mux(io.requestor(i).xact_init_data.valid, io.requestor(i).xact_init_data.bits, req_data_bits) - io.mem.req_val := req_val - io.mem.req_rw := req_rw - io.mem.req_addr := req_addr - io.mem.req_tag := req_tag + io.mem.xact_init.valid := req_val + io.mem.xact_init.bits := req_bits - io.mem.req_data_val := req_data_val - io.mem.req_data_bits := req_data_bits + io.mem.xact_init_data.valid := req_data_val + io.mem.xact_init_data.bits := req_data_bits for (i <- 0 until n) { - io.requestor(i).resp_val := io.mem.resp_val && io.mem.resp_tag(log2up(n)-1,0) === UFix(i) - io.requestor(i).resp_data := io.mem.resp_data - io.requestor(i).resp_tag := io.mem.resp_tag >> UFix(log2up(n)) + val tag = io.mem.xact_rep.bits.tile_xact_id + io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2up(n)-1,0) === UFix(i) + io.requestor(i).xact_rep.bits.data := io.mem.xact_rep.bits.data + io.requestor(i).xact_rep.bits.t_type := io.mem.xact_rep.bits.t_type + io.requestor(i).xact_rep.bits.has_data := io.mem.xact_rep.bits.has_data + io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2up(n)) + io.requestor(i).xact_rep.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id } + io.mem.xact_rep.ready := Bool(true) // XXX we shouldn't have xact_rep.ready } diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 75796a9d..9fbd69e9 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -10,22 +10,21 @@ class MemData extends Bundle { class MemReqCmd() extends Bundle { val rw = Bool() - val addr = UFix(PADDR_BITS - OFFSET_BITS) - val tag = Bits(MEM_TAG_BITS) + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val tag = Bits(width = MEM_TAG_BITS) } class MemResp () extends Bundle { - val tag = Bits(MEM_TAG_BITS) + val tag = Bits(width = MEM_TAG_BITS) val data = Bits(width = MEM_DATA_BITS) - val valid = Bool() } -class ioMemHub() extends Bundle +class ioMem() extends Bundle { val req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip val req_data = (new ioDecoupled) { new MemData() }.flip - val resp = new MemResp() + val resp = (new ioValid) { new MemResp() } } class HubMemReq extends Bundle { @@ -49,7 +48,7 @@ class TransactionInit extends Bundle { val t_type = Bits(width = TTYPE_BITS) val has_data = Bool() val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) - val address = Bits(width = PADDR_BITS) + val address = UFix(width = PADDR_BITS) } class TransactionInitData extends MemData @@ -348,8 +347,8 @@ abstract class CoherenceHub extends Component with CoherencePolicy class CoherenceHubNull extends Component { val io = new Bundle { - val tile = new ioTileLink() - val mem = new ioMemHub() + val tile = new ioTileLink().flip + val mem = new ioMem } val x_init = io.tile.xact_init @@ -362,11 +361,11 @@ class CoherenceHubNull extends Component { io.mem.req_data <> io.tile.xact_init_data val x_rep = io.tile.xact_rep - x_rep.bits.t_type := Mux(is_write, X_WRITE_UNCACHED, X_READ_EXCLUSIVE) - x_rep.bits.tile_xact_id := Mux(is_write, x_init.bits.tile_xact_id, io.mem.resp.tag) + x_rep.bits.t_type := Mux(io.mem.resp.valid, X_READ_EXCLUSIVE, X_WRITE_UNCACHED) + x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care - x_rep.bits.data := io.mem.resp.data - x_rep.valid := io.mem.resp.valid || is_write + x_rep.bits.data := io.mem.resp.bits.data + x_rep.valid := io.mem.resp.valid || x_init.valid && is_write } @@ -388,7 +387,7 @@ class CoherenceHubNoDir extends CoherenceHub { val io = new Bundle { val tiles = Vec(NTILES) { new ioTileLink() } - val mem = new ioMemHub + val mem = new ioMem } val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) @@ -427,12 +426,12 @@ class CoherenceHubNoDir extends CoherenceHub { // Reply to initial requestor // Forward memory responses from mem to tile - val idx = io.mem.resp.tag + val idx = io.mem.resp.bits.tag for( j <- 0 until NTILES ) { io.tiles(j).xact_rep.bits.t_type := getTransactionReplyType(t_type_arr.read(idx), sh_count_arr.read(idx)) io.tiles(j).xact_rep.bits.tile_xact_id := tile_xact_id_arr.read(idx) io.tiles(j).xact_rep.bits.global_xact_id := idx - io.tiles(j).xact_rep.bits.data := io.mem.resp.data + io.tiles(j).xact_rep.bits.data := io.mem.resp.bits.data io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && (io.mem.resp.valid || send_x_rep_ack_arr.read(idx)) } // If there were a ready signal due to e.g. intervening network use: diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 17294bae..6377d8c1 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -182,7 +182,7 @@ object Constants val NTILES = 1 val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val TILE_ID_BITS = 1 - val TILE_XACT_ID_BITS = 1 // log2(NMSHR) + val TILE_XACT_ID_BITS = log2up(NMSHR)+2 val GLOBAL_XACT_ID_BITS = 4 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS @@ -201,7 +201,7 @@ object Constants val MEM_TAG_BITS = 4 val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS - require(MEM_TAG_BITS >= max(log2up(NMSHR)+1, GLOBAL_XACT_ID_BITS)) + require(MEM_TAG_BITS >= max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS)) val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 04e2bf5a..551f3d25 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -26,7 +26,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component val io = new Bundle { val host = new ioHost(w) val cpu = Vec(ncores) { new ioHTIF().flip() } - val mem = new ioMem + val mem = new ioTileLink } val short_request_bits = 64 @@ -93,11 +93,11 @@ class rocketHTIF(w: Int, ncores: Int) extends Component } val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - when (state === state_mem_req && io.mem.req_rdy) { + when (state === state_mem_req && io.mem.xact_init.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } - when (state === state_mem_wdata && io.mem.req_data_rdy || - state === state_mem_rdata && io.mem.resp_val) { + when (state === state_mem_wdata && io.mem.xact_init_data.ready || + state === state_mem_rdata && io.mem.xact_rep.valid) { when (mem_cnt.andR) { state := state_tx } @@ -112,16 +112,17 @@ class rocketHTIF(w: Int, ncores: Int) extends Component var mem_req_data: Bits = null for (i <- 0 until MEM_DATA_BITS/short_request_bits) { val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) - packet_ram.write(idx, io.mem.resp_data((i+1)*short_request_bits-1, i*short_request_bits), - state === state_mem_rdata && io.mem.resp_val) + packet_ram.write(idx, io.mem.xact_rep.bits.data((i+1)*short_request_bits-1, i*short_request_bits), + state === state_mem_rdata && io.mem.xact_rep.valid) mem_req_data = Cat(packet_ram.read(idx), mem_req_data) } - io.mem.req_val := state === state_mem_req - io.mem.req_rw := cmd === cmd_writemem - io.mem.req_addr := addr >> UFix(OFFSET_BITS-3) + io.mem.xact_init.valid := state === state_mem_req + io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_WRITE_UNCACHED, X_READ_UNCACHED) + io.mem.xact_init.bits.has_data := cmd === cmd_writemem + io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) - io.mem.req_data_val := state === state_mem_wdata - io.mem.req_data_bits := mem_req_data + io.mem.xact_init_data.valid:= state === state_mem_wdata + io.mem.xact_init_data.bits.data := mem_req_data pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 92bfa0f6..3d18f974 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -20,7 +20,7 @@ class ioImem(view: List[String] = null) extends Bundle (view) class ioRocketICache extends Bundle() { val cpu = new ioImem(); - val mem = new ioMem + val mem = new ioTileLink } // basic direct mapped instruction cache @@ -75,7 +75,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { // refill counter val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); - when (io.mem.resp_val) { + when (io.mem.xact_rep.valid) { refill_count := refill_count + UFix(1); } @@ -84,7 +84,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val tag_addr = Mux((state === s_refill_wait), r_cpu_req_idx(indexmsb,indexlsb), io.cpu.req_idx(indexmsb,indexlsb)).toUFix; - val tag_we = (state === s_refill_wait) && io.mem.resp_val; + val tag_we = (state === s_refill_wait) && io.mem.xact_rep.valid; val data_addr = Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; @@ -112,10 +112,10 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array - val data_array = Mem(sets*REFILL_CYCLES){ io.mem.resp_data } + val data_array = Mem(sets*REFILL_CYCLES){ io.mem.xact_rep.bits.data } data_array.setReadLatency(1); data_array.setTarget('inst); - val data_out = data_array.rw(data_addr, io.mem.resp_data, io.mem.resp_val && repl_me) + val data_out = data_array.rw(data_addr, io.mem.xact_rep.bits.data, io.mem.xact_rep.valid && repl_me) data_mux.io.sel(i) := hit data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0); @@ -128,10 +128,11 @@ class rocketICache(sets: Int, assoc: Int) extends Component { io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out - io.mem.req_val := (state === s_request); - io.mem.req_rw := Bool(false) - io.mem.req_addr := r_cpu_miss_addr(tagmsb,indexlsb).toUFix - io.mem.req_data_val := Bool(false) + io.mem.xact_init.valid := (state === s_request) + io.mem.xact_init.bits.t_type := X_READ_UNCACHED + io.mem.xact_init.bits.has_data := Bool(false) + io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix + io.mem.xact_init_data.valid := Bool(false) // control state machine switch (state) { @@ -148,19 +149,19 @@ class rocketICache(sets: Int, assoc: Int) extends Component { } is (s_request) { - when (io.mem.req_rdy) { + when (io.mem.xact_init.ready) { state := s_refill_wait; } } is (s_refill_wait) { - when (io.mem.resp_val) { + when (io.mem.xact_rep.valid) { state := s_refill; } } is (s_refill) { - when (io.mem.resp_val && (~refill_count === UFix(0))) { + when (io.mem.xact_rep.valid && refill_count.andR) { state := s_ready; } } - } + } } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index d47914ec..ba666cd9 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -6,8 +6,8 @@ import Constants._; import scala.math._; class ioIPrefetcher extends Bundle() { - val icache = new ioMem().flip - val mem = new ioMem + val icache = new ioTileLink().flip + val mem = new ioTileLink val invalidate = Bool(INPUT) } @@ -18,22 +18,23 @@ class rocketIPrefetcher extends Component() { val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_invalid); - val demand_miss = io.icache.req_val & io.icache.req_rdy; - val prefetch_addr = Reg() { UFix(width = io.icache.req_addr.width) }; - when (demand_miss) { prefetch_addr := io.icache.req_addr + UFix(1); } - - val addr_match = (prefetch_addr === io.icache.req_addr); + val demand_miss = io.icache.xact_init.valid && io.icache.xact_init.ready + val prefetch_addr = Reg() { UFix(width = io.icache.xact_init.bits.address.width) }; + val addr_match = (prefetch_addr === io.icache.xact_init.bits.address); val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; + val prefetch_miss = io.icache.xact_init.valid && !hit + when (demand_miss) { prefetch_addr := io.icache.xact_init.bits.address + UFix(1); } - io.icache.req_rdy := io.mem.req_rdy; - val ip_mem_req_rdy = io.mem.req_rdy & ~(io.icache.req_val & ~hit); - val ip_mem_resp_val = io.mem.resp_val && io.mem.resp_tag(0).toBool; + io.icache.xact_init.ready := io.mem.xact_init.ready + val ip_mem_req_rdy = io.mem.xact_init.ready && !prefetch_miss + val ip_mem_resp_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id(0) - io.mem.req_val := io.icache.req_val & ~hit | (state === s_req_wait); - io.mem.req_rw := Bool(false) - io.mem.req_tag := Mux(io.icache.req_val && !hit, UFix(0), UFix(1)) - io.mem.req_addr := Mux(io.mem.req_tag(0).toBool, prefetch_addr, io.icache.req_addr); - io.mem.req_data_val := Bool(false) + io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) + io.mem.xact_init.bits.t_type := X_READ_UNCACHED + io.mem.xact_init.bits.has_data := Bool(false) + io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) + io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); + io.mem.xact_init_data.valid := Bool(false) val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } @@ -45,11 +46,11 @@ class rocketIPrefetcher extends Component() { val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; forward := (demand_miss & hit | forward & ~forward_done); - io.icache.resp_val := (io.mem.resp_val && !io.mem.resp_tag(0).toBool) || (forward && pdq.io.deq.valid); - io.icache.resp_data := Mux(forward, pdq.io.deq.bits, io.mem.resp_data); + io.icache.xact_rep.valid := io.mem.xact_rep.valid && !io.mem.xact_rep.bits.tile_xact_id(0) || (forward && pdq.io.deq.valid) + io.icache.xact_rep.bits.data := Mux(forward, pdq.io.deq.bits, io.mem.xact_rep.bits.data) pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false)) - pdq.io.enq.bits := io.mem.resp_data; + pdq.io.enq.bits := io.mem.xact_rep.bits.data pdq.io.enq.valid := ip_mem_resp_val.toBool; pdq.io.deq.ready := forward; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 6317a764..50dd9fc3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -133,12 +133,6 @@ class DataArrayArrayReq extends Bundle { val way_en = Bits(width = NWAYS) } -class MemReq extends Bundle { - val rw = Bool() - val addr = UFix(width = PADDR_BITS-OFFSET_BITS) - val tag = Bits(width = MEM_TAG_BITS) -} - class WritebackReq extends Bundle { val ppn = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) @@ -182,7 +176,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val way_oh = Bits(NWAYS, OUTPUT) val mem_resp_val = Bool(INPUT) - val mem_req = (new ioDecoupled) { new MemReq() }.flip + val mem_req = (new ioDecoupled) { new TransactionInit }.flip val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip val replay = (new ioDecoupled) { new Replay() }.flip } @@ -257,10 +251,10 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { io.meta_req.bits.way_en := way_oh_ io.mem_req.valid := valid && !requested - //io.mem_req.bits.itm := next_dirty - io.mem_req.bits.rw := Bool(false) - io.mem_req.bits.addr := Cat(ppn, idx_).toUFix - io.mem_req.bits.tag := Bits(id) + io.mem_req.bits.t_type := Mux(needsWriteback(next_state), X_READ_EXCLUSIVE, X_READ_SHARED) + io.mem_req.bits.has_data := Bool(false) + io.mem_req.bits.address := Cat(ppn, idx_).toUFix + io.mem_req.bits.tile_xact_id := Bits(id) io.replay.valid := rpq.io.deq.valid && refilled io.replay.bits.idx := idx_ @@ -287,7 +281,7 @@ class MSHRFile extends Component { val fence_rdy = Bool(OUTPUT) - val mem_req = (new ioDecoupled) { new MemReq() }.flip() + val mem_req = (new ioDecoupled) { new TransactionInit }.flip() val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() val replay = (new ioDecoupled) { new Replay() }.flip() } @@ -296,7 +290,7 @@ class MSHRFile extends Component { val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } - val mem_req_arb = (new Arbiter(NMSHR)) { new MemReq() } + val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } @@ -421,9 +415,9 @@ class WritebackUnit extends Component { val req = (new ioDecoupled) { new WritebackReq() } val data_req = (new ioDecoupled) { new DataArrayArrayReq() }.flip() val data_resp = Bits(MEM_DATA_BITS, INPUT) - val refill_req = (new ioDecoupled) { new MemReq() } - val mem_req = (new ioDecoupled) { new MemReq() }.flip() - val mem_req_data = (new ioDecoupled) { Bits(width = MEM_DATA_BITS) }.flip() + val refill_req = (new ioDecoupled) { new TransactionInit } + val mem_req = (new ioDecoupled) { new TransactionInit }.flip + val mem_req_data = (new ioDecoupled) { new TransactionInitData }.flip } val valid = Reg(resetVal = Bool(false)) @@ -449,12 +443,13 @@ class WritebackUnit extends Component { val wb_req_val = io.req.valid && !valid io.refill_req.ready := io.mem_req.ready && !wb_req_val io.mem_req.valid := io.refill_req.valid || wb_req_val - io.mem_req.bits.rw := wb_req_val - io.mem_req.bits.addr := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.addr) - io.mem_req.bits.tag := io.refill_req.bits.tag + io.mem_req.bits.t_type := Mux(wb_req_val, X_WRITE_UNCACHED, io.refill_req.bits.t_type) + io.mem_req.bits.has_data := wb_req_val + io.mem_req.bits.address := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.address) + io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) io.mem_req_data.valid := data_req_fired - io.mem_req_data.bits := io.data_resp + io.mem_req_data.bits.data := io.data_resp } class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ @@ -680,7 +675,7 @@ abstract class HellaCache extends Component { class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val io = new Bundle { val cpu = new ioDmem() - val mem = new ioMem + val mem = new ioTileLink } val lines = 1 << IDX_BITS @@ -749,9 +744,11 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter + val mem_resp_type = io.mem.xact_rep.bits.t_type + val refill_val = io.mem.xact_rep.valid && (mem_resp_type === X_READ_SHARED || mem_resp_type === X_READ_EXCLUSIVE) val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count_next = rr_count + UFix(1) - when (io.mem.resp_val) { rr_count := rr_count_next } + when (refill_val) { rr_count := rr_count_next } val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || @@ -806,19 +803,19 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val needs_writeback = needsWriteback(meta_wb_mux.state) // refill response - val block_during_refill = !io.mem.resp_val && (rr_count != UFix(0)) + val block_during_refill = !refill_val && (rr_count != UFix(0)) data_arb.io.in(0).bits.inner_req.offset := rr_count data_arb.io.in(0).bits.inner_req.rw := !block_during_refill data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) - data_arb.io.in(0).bits.inner_req.data := io.mem.resp_data - data_arb.io.in(0).valid := io.mem.resp_val || block_during_refill + data_arb.io.in(0).bits.inner_req.data := io.mem.xact_rep.bits.data + data_arb.io.in(0).valid := refill_val || block_during_refill // load hits data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) data_arb.io.in(4).bits.inner_req.rw := Bool(false) data_arb.io.in(4).bits.inner_req.wmask := UFix(0) // don't care - data_arb.io.in(4).bits.inner_req.data := io.mem.resp_data // don't care + data_arb.io.in(4).bits.inner_req.data := io.mem.xact_rep.bits.data // don't care data_arb.io.in(4).valid := io.cpu.req_val && req_read data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check val early_load_nack = req_read && !data_arb.io.in(4).ready @@ -884,8 +881,8 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { mshr.io.req_type := r_cpu_req_type mshr.io.req_sdq_id := replayer.io.sdq_id mshr.io.req_way_oh := replaced_way_oh - mshr.io.mem_resp_val := io.mem.resp_val && (~rr_count === UFix(0)) - mshr.io.mem_resp_tag := io.mem.resp_tag + mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0)) + mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) mshr.io.replay <> replayer.io.replay @@ -968,14 +965,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data_subword := loadgen.io.r_dout_subword - - wb.io.mem_req.ready := io.mem.req_rdy - io.mem.req_val := wb.io.mem_req.valid - io.mem.req_rw := wb.io.mem_req.bits.rw - io.mem.req_tag := wb.io.mem_req.bits.tag.toUFix - io.mem.req_addr := wb.io.mem_req.bits.addr - - io.mem.req_data_val := wb.io.mem_req_data.valid - wb.io.mem_req_data.ready := io.mem.req_data_rdy - io.mem.req_data_bits := wb.io.mem_req_data.bits + + io.mem.xact_init <> wb.io.mem_req + io.mem.xact_init_data <> wb.io.mem_req_data } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index fb1dd542..14f416ba 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -10,7 +10,7 @@ class ioQueue[T <: Data](flushable: Boolean)(data: => T) extends Bundle val deq = new ioDecoupled()(data).flip } -class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) extends Component +class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component { val io = new ioQueue(flushable)(data) @@ -50,6 +50,6 @@ class queue[T <: Data](entries: Int, flushable: Boolean = false)(data: => T) ext } io.deq.valid := maybe_full || enq_ptr != deq_ptr - io.enq.ready := !maybe_full || enq_ptr != deq_ptr + io.enq.ready := !maybe_full || enq_ptr != deq_ptr || (if (pipe) io.deq.ready else Bool(false)) io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 3f3fd1d2..7a472a94 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -7,7 +7,7 @@ import Constants._; class ioTop(htif_width: Int) extends Bundle { val debug = new ioDebug(); val host = new ioHost(htif_width); - val mem = new ioMem(); + val mem = new ioMem } class Top() extends Component { @@ -21,23 +21,39 @@ class Top() extends Component { val icache_pf = new rocketIPrefetcher(); val dcache = new HellaCacheUniproc(); - val arbiter = new rocketMemArbiter(4); + val arbiter = new rocketMemArbiter(3 + (if (HAVE_VEC) 1 else 0)); arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache_pf.io.mem - arbiter.io.requestor(3) <> htif.io.mem - arbiter.io.mem <> io.mem + arbiter.io.requestor(2) <> htif.io.mem + + val hub = new CoherenceHubNull + // connect tile to hub (figure out how to do this more compactly) + val xact_init_q = (new queue(2)) { new TransactionInit } + xact_init_q.io.enq <> arbiter.io.mem.xact_init + xact_init_q.io.deq <> hub.io.tile.xact_init + val xact_init_data_q = (new queue(2)) { new TransactionInitData } + xact_init_data_q.io.enq <> arbiter.io.mem.xact_init_data + xact_init_data_q.io.deq <> hub.io.tile.xact_init_data + val xact_rep_q = (new queue(1, pipe = true)) { new TransactionReply } + xact_rep_q.io.enq <> hub.io.tile.xact_rep + xact_rep_q.io.deq <> arbiter.io.mem.xact_rep + // connect hub to memory + val mem_req_q = (new queue(2)) { new MemReqCmd } + mem_req_q.io.enq <> hub.io.mem.req_cmd + mem_req_q.io.deq <> io.mem.req_cmd + val mem_req_data_q = (new queue(2)) { new MemData } + mem_req_data_q.io.enq <> hub.io.mem.req_data + mem_req_data_q.io.deq <> io.mem.req_data + hub.io.mem.resp.valid := Reg(io.mem.resp.valid, resetVal = Bool(false)) + hub.io.mem.resp.bits := Reg(io.mem.resp.bits) + if (HAVE_VEC) { val vicache = new rocketICache(128, 2); // 128 sets x 2 ways - arbiter.io.requestor(2) <> vicache.io.mem + arbiter.io.requestor(3) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu; } - else - { - arbiter.io.requestor(2).req_val := Bool(false) - arbiter.io.requestor(2).req_data_val := Bool(false) - } htif.io.host <> io.host cpu.io.host <> htif.io.cpu(0); diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 5ac3b41b..47361d2e 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -180,6 +180,12 @@ class ioDecoupled[T <: Data]()(data: => T) extends Bundle val bits = data.asInput } +class ioValid[T <: Data]()(data: => T) extends Bundle +{ + val valid = Bool(INPUT) + val bits = data.asInput +} + class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val in = Vec(n) { (new ioDecoupled()) { data } } val out = (new ioDecoupled()) { data }.flip() From b9ec69f8f51523adab16b0ca9d2f650a5f115105 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 29 Feb 2012 14:21:42 -0800 Subject: [PATCH 0244/1087] add new Queue singleton --- rocket/src/main/scala/queues.scala | 34 ++++++++++++++++++++++++++++++ rocket/src/main/scala/top.scala | 25 ++++++---------------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 14f416ba..12f4aeae 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -53,3 +53,37 @@ class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = io.enq.ready := !maybe_full || enq_ptr != deq_ptr || (if (pipe) io.deq.ready else Bool(false)) io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) } + +object Queue +{ + def apply[T <: Data](enq: ioDecoupled[T], entries: Int = 2, pipe: Boolean = false) = { + val q = (new queue(entries, pipe)) { enq.bits.clone } + q.io.enq <> enq + q.io.deq + } +} + +class pipereg[T <: Data]()(data: => T) extends Component +{ + val io = new Bundle { + val enq = new ioValid()(data) + val deq = new ioValid()(data).flip + } + + //val bits = Reg() { io.enq.bits.clone } + //when (io.enq.valid) { + // bits := io.enq.bits + //} + + io.deq.valid := Reg(io.enq.valid, resetVal = Bool(false)) + io.deq.bits <> Mem(1, io.enq.valid, UFix(0), io.enq.bits).read(UFix(0)) +} + +object PipeReg +{ + def apply[T <: Data](enq: ioValid[T]) = { + val q = (new pipereg) { enq.bits.clone } + q.io.enq <> enq + q.io.deq + } +} diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 7a472a94..52925b3e 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -27,25 +27,14 @@ class Top() extends Component { arbiter.io.requestor(2) <> htif.io.mem val hub = new CoherenceHubNull - // connect tile to hub (figure out how to do this more compactly) - val xact_init_q = (new queue(2)) { new TransactionInit } - xact_init_q.io.enq <> arbiter.io.mem.xact_init - xact_init_q.io.deq <> hub.io.tile.xact_init - val xact_init_data_q = (new queue(2)) { new TransactionInitData } - xact_init_data_q.io.enq <> arbiter.io.mem.xact_init_data - xact_init_data_q.io.deq <> hub.io.tile.xact_init_data - val xact_rep_q = (new queue(1, pipe = true)) { new TransactionReply } - xact_rep_q.io.enq <> hub.io.tile.xact_rep - xact_rep_q.io.deq <> arbiter.io.mem.xact_rep + // connect tile to hub + hub.io.tile.xact_init <> Queue(arbiter.io.mem.xact_init) + hub.io.tile.xact_init_data <> Queue(arbiter.io.mem.xact_init_data) + arbiter.io.mem.xact_rep <> Queue(hub.io.tile.xact_rep, 1, pipe = true) // connect hub to memory - val mem_req_q = (new queue(2)) { new MemReqCmd } - mem_req_q.io.enq <> hub.io.mem.req_cmd - mem_req_q.io.deq <> io.mem.req_cmd - val mem_req_data_q = (new queue(2)) { new MemData } - mem_req_data_q.io.enq <> hub.io.mem.req_data - mem_req_data_q.io.deq <> io.mem.req_data - hub.io.mem.resp.valid := Reg(io.mem.resp.valid, resetVal = Bool(false)) - hub.io.mem.resp.bits := Reg(io.mem.resp.bits) + io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) + io.mem.req_data <> Queue(hub.io.mem.req_data) + hub.io.mem.resp <> PipeReg(io.mem.resp) if (HAVE_VEC) From c38065d0e879644c6b519c52a17239742e59a410 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 29 Feb 2012 16:13:14 -0800 Subject: [PATCH 0245/1087] clean up priority encoders --- rocket/src/main/scala/dtlb.scala | 11 +++---- rocket/src/main/scala/itlb.scala | 11 +++---- rocket/src/main/scala/nbdcache.scala | 6 ++-- rocket/src/main/scala/util.scala | 46 ++++------------------------ 4 files changed, 16 insertions(+), 58 deletions(-) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 6c0af2a0..eb7a91cf 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -118,12 +118,9 @@ class rocketDTLB(entries: Int) extends Component } // high if there are any unused (invalid) entries in the TLB - val invalid_entry = (tag_cam.io.valid_bits != ~Bits(0,entries)); - val ie_enc = new priorityEncoder(entries); - ie_enc.io.in := ~tag_cam.io.valid_bits.toUFix; - val ie_addr = ie_enc.io.out; - - val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; + val has_invalid_entry = !tag_cam.io.valid_bits.andR + val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) + val repl_waddr = Mux(has_invalid_entry, invalid_entry, repl_count).toUFix; val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu_req.bits.kill && (req_load || req_store || req_amo || req_pf); val lookup_hit = lookup && tag_hit; @@ -136,7 +133,7 @@ class rocketDTLB(entries: Int) extends Component when (tlb_miss) { r_refill_tag := lookup_tag; r_refill_waddr := repl_waddr; - when (!invalid_entry) { + when (!has_invalid_entry) { repl_count := repl_count + UFix(1); } } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 231010b9..06cd6e2b 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -146,12 +146,9 @@ class rocketITLB(entries: Int) extends Component } // high if there are any unused entries in the ITLB - val invalid_entry = (tag_cam.io.valid_bits != ~Bits(0,entries)); - val ie_enc = new priorityEncoder(entries); - ie_enc.io.in := ~tag_cam.io.valid_bits.toUFix; - val ie_addr = ie_enc.io.out; - - val repl_waddr = Mux(invalid_entry, ie_addr, repl_count).toUFix; + val has_invalid_entry = !tag_cam.io.valid_bits.andR + val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) + val repl_waddr = Mux(has_invalid_entry, invalid_entry, repl_count).toUFix; val lookup = (state === s_ready) && r_cpu_req_val; val lookup_hit = lookup && tag_hit; @@ -162,7 +159,7 @@ class rocketITLB(entries: Int) extends Component when (tlb_miss) { r_refill_tag := lookup_tag; r_refill_waddr := repl_waddr; - when (!invalid_entry) { + when (!has_invalid_entry) { repl_count := repl_count + UFix(1); } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 50dd9fc3..57d223b6 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -361,10 +361,8 @@ class ReplayUnit extends Component { val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) } - val sdq_val = Reg(resetVal = UFix(0, NSDQ)) - val sdq_allocator = new priorityEncoder(NSDQ) - sdq_allocator.io.in := ~sdq_val - val sdq_alloc_id = sdq_allocator.io.out.toUFix + val sdq_val = Reg(resetVal = UFix(0)) + val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) val replay_val = Reg(resetVal = Bool(false)) val replay_retry = replay_val && !io.data_req.ready diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 47361d2e..23209a4f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -211,46 +211,12 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { dout <> io.out.bits } -class ioPriorityDecoder(in_width: Int, out_width: Int) extends Bundle +object PriorityEncoder { - val in = UFix(in_width, INPUT); - val out = Bits(out_width, OUTPUT); -} - -class priorityDecoder(width: Int) extends Component -{ - val in_width = ceil(log10(width)/log10(2)).toInt; - val io = new ioPriorityEncoder(in_width, width); - val l_out = Wire() { Bits() }; - - l_out := Bits(0, width); - for (i <- width-1 to 0 by -1) { - when (io.in === UFix(i, in_width)) { - l_out := Bits(1,1) << UFix(i); - } + def apply(in: Bits, n: Int = 0): UFix = { + if (n >= in.getWidth-1) + UFix(n) + else + Mux(in(n), UFix(n), PriorityEncoder(in, n+1)) } - - io.out := l_out; -} - -class ioPriorityEncoder(in_width: Int, out_width: Int) extends Bundle -{ - val in = Bits(in_width, INPUT); - val out = UFix(out_width, OUTPUT); -} - -class priorityEncoder(width: Int) extends Component -{ - val out_width = ceil(log10(width)/log10(2)).toInt; - val io = new ioPriorityDecoder(width, out_width); - val l_out = Wire() { UFix() }; - - l_out := UFix(0, out_width); - for (i <- width-1 to 1 by -1) { - when (io.in(i).toBool) { - l_out := UFix(i, out_width); - } - } - - io.out := l_out; } From c723ef4c50e2baa3c0f9876c2b140718ff9999e8 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 29 Feb 2012 12:29:33 -0800 Subject: [PATCH 0246/1087] ioDecoupled now allows inner bundle to be used in covariant positions, i.e. it accepts subtypes --- rocket/src/main/scala/util.scala | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 23209a4f..c6d6aa09 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -166,14 +166,7 @@ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component } - - - - - - - -class ioDecoupled[T <: Data]()(data: => T) extends Bundle +class ioDecoupled[+T <: Data]()(data: => T) extends Bundle { val valid = Bool(INPUT) val ready = Bool(OUTPUT) From 008ad1f45bbe6bd8c7e6e3a1786a0046a737ec1f Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 29 Feb 2012 16:45:18 -0800 Subject: [PATCH 0247/1087] Added 'locking' arbiter that won't rearbitrate until the lock signal on the current winning input is low --- rocket/src/main/scala/util.scala | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index c6d6aa09..1856a3ba 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -204,6 +204,45 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { dout <> io.out.bits } +class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { + val in = Vec(n) { (new ioDecoupled()) { data } } + val lock = Vec(n) { Bool() } + val out = (new ioDecoupled()) { data }.flip() +} + +class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { + val io = new ioLockingArbiter(n)(data) + val locked = Reg(){ Bits(n) } + var dout = Wire(){ data } + var vout = Wire(){ Bool() } + + when((locked && io.lock.toBits).orR) { + dout := io.in(0).bits + for (i <- 0 until n) { + io.in(i).ready := io.out.ready && locked(i) + vout := io.in(i).valid && locked(i) + dout := Mux(locked(i), io.in(i).bits, dout) + } + } .otherwise { + io.in(0).ready := io.out.ready + for (i <- 1 until n) { + io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready + locked(i) := !io.in(i-1).valid && io.in(i-1).ready && io.lock(i) + } + + dout := io.in(n-1).bits + for (i <- 1 until n) + dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) + + vout := io.in(0).valid + for (i <- 1 until n) + vout = vout || io.in(i).valid + } + + vout <> io.out.valid + dout <> io.out.bits +} + object PriorityEncoder { def apply(in: Bits, n: Int = 0): UFix = { From 20d0088f6648531bdb6bab70f4b11e334003f26c Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 29 Feb 2012 17:09:31 -0800 Subject: [PATCH 0248/1087] temporary fix to match bit widths for Mem --- rocket/src/main/scala/fpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index a0647631..e45355fa 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -615,7 +615,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component Mux(wsrc === UFix(2), fastpipe.io.exc_d, fastpipe.io.exc_s))) val waddr = winfo(0).toUFix >> UFix(2) - regfile.write(waddr, wdata, wen(0)) + regfile.write(waddr(4,0), wdata, wen(0)) when (wb_reg_valid && wb_ctrl.toint || wen(0)) { fsr_exc := fsr_exc | From 813ffcbf3e3e604fe1bc8d7f467143d96ed50cc6 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 29 Feb 2012 17:58:15 -0800 Subject: [PATCH 0249/1087] Finished broadcast hub with split mem req types. Untested. --- rocket/src/main/scala/coherence.scala | 119 ++++++++++++++------------ 1 file changed, 64 insertions(+), 55 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 9fbd69e9..f63e633d 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -28,8 +28,7 @@ class ioMem() extends Bundle } class HubMemReq extends Bundle { - val req_cmd = (new ioDecoupled) { new MemReqCmd() } - val req_data = (new ioDecoupled) { new MemData() } + val lock = Bool() } class TrackerProbeData extends Bundle { @@ -187,7 +186,9 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val p_rep_data = (new ioDecoupled) { new ProbeReplyData() } val x_init_data = (new ioDecoupled) { new TransactionInitData() } - val mem_req = (new ioDecoupled) { new HubMemReq() }.flip + val mem_req_cmd = (new ioDecoupled) { new MemReqCmd() } + val mem_req_data = (new ioDecoupled) { new MemData() } + val mem_req_lock = Bool(OUTPUT) val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) @@ -225,14 +226,40 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val t_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } - val probe_done = Reg{ Bits() } - val mem_count = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) val p_rep_count = Reg(resetVal = UFix(0, width = log2up(NTILES))) val p_req_flags = Reg(resetVal = UFix(0, width = NTILES)) val p_rep_tile_id_ = Reg{ Bits() } - val x_needs_read = Reg{ Bool() } - val x_init_data_needs_write = Reg{ Bool() } - val p_rep_data_needs_write = Reg{ Bool() } + val x_needs_read = Reg(resetVal = Bool(false)) + val x_init_data_needs_write = Reg(resetVal = Bool(false)) + val p_rep_data_needs_write = Reg(resetVal = Bool(false)) + val mem_cmd_sent = Reg(resetVal = Bool(false)) + val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val mem_cnt_next = mem_cnt + UFix(1) + + def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioDecoupled[MemData], trigger: Bool, pop: Bool) { + req_cmd.valid := mem_cmd_sent + req_cmd.bits.rw := Bool(true) + req_data <> data + lock := Bool(true) + when(req_cmd.ready && req_cmd.valid) { + mem_cmd_sent := Bool(false) + } + when(req_data.ready && req_data.valid) { + pop := Bool(true) + mem_cnt := mem_cnt_next + } + when(mem_cnt === ~UFix(0)) { + trigger := Bool(false) + } + } + + def doMemReqRead(req_cmd: ioDecoupled[MemReqCmd], trigger: Bool) { + req_cmd.valid := Bool(true) + req_cmd.bits.rw := Bool(false) + when(req_cmd.ready ) { + trigger := Bool(false) + } + } io.busy := state != s_idle io.addr := addr_ @@ -241,14 +268,13 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { io.sharer_count := UFix(NTILES) // TODO: Broadcast only io.t_type := t_type_ - io.mem_req.valid := Bool(false) - io.mem_req.bits.req_cmd.valid := Bool(false) - io.mem_req.bits.req_cmd.bits.rw := Bool(false) - io.mem_req.bits.req_cmd.bits.addr := addr_ - io.mem_req.bits.req_cmd.bits.tag := UFix(id) - io.mem_req.bits.req_data.valid := Bool(false) - io.mem_req.bits.req_data.bits.data := UFix(0) - // := io.mem.ready //sent mem req + io.mem_req_cmd.valid := Bool(false) + io.mem_req_cmd.bits.rw := Bool(false) + io.mem_req_cmd.bits.addr := addr_ + io.mem_req_cmd.bits.tag := UFix(id) + io.mem_req_data.valid := Bool(false) + io.mem_req_data.bits.data := UFix(0) + io.mem_req_lock := Bool(false) io.probe_req.valid := Bool(false) io.probe_req.bits.p_type := sendProbeReqType(t_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) @@ -269,9 +295,11 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := io.alloc_req.bits.xact_init.has_data x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) - p_rep_count := UFix(NTILES) - p_req_flags := ~Bits(0, width = NTILES) - state := s_probe + p_rep_count := UFix(NTILES-1) + p_req_flags := ~( UFix(1) << io.alloc_req.bits.init_tile_id ) + state := Mux(p_req_flags.orR, s_probe, s_mem) + mem_cnt := UFix(0) + mem_cmd_sent := Bool(false) io.pop_x_init := Bool(true) } } @@ -285,8 +313,11 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { } when(io.p_rep_cnt_dec.orR) { val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) + io.pop_p_rep := io.p_rep_cnt_dec p_rep_count := p_rep_count_next when(p_rep_count_next === UFix(0)) { + mem_cnt := UFix(0) + mem_cmd_sent := Bool(false) state := s_mem } } @@ -296,36 +327,12 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { } } is(s_mem) { - when(x_init_data_needs_write) { - //io.mem_req.valid := //?TODO ??? || io.x_init_data.valid - //io.mem_req.bits.req_cmd.valid := // TODO ??? - io.mem_req.bits.req_cmd.bits.rw := Bool(true) - io.mem_req.bits.req_data <> io.x_init_data - when(io.mem_req.ready && io.mem_req.bits.req_cmd.ready) { - //TODO - } - when(io.mem_req.ready && io.mem_req.bits.req_data.ready) { - io.pop_x_init_data := Bool(true) - //TODO: count with mem_count somehow - } - } . elsewhen (p_rep_data_needs_write) { - //io.mem_req.valid := //TODO ??? || io.p_rep_data.valid - //io.mem_req.bits.req_cmd.valid := //TODO ??? - io.mem_req.bits.req_cmd.bits.rw := Bool(true) - io.mem_req.bits.req_data <> io.p_rep_data - when(io.mem_req.ready && io.mem_req.bits.req_cmd.ready) { - //TODO - } - when(io.mem_req.ready && io.mem_req.bits.req_data.ready) { - io.pop_p_rep_data := Bool(true) - //TODO: count with mem_count somehow - } + when (p_rep_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.p_rep_data, p_rep_data_needs_write, io.pop_p_rep_data) + } . elsewhen(x_init_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.x_init_data, x_init_data_needs_write, io.pop_x_init_data) } . elsewhen (x_needs_read) { - io.mem_req.valid := Bool(true) - io.mem_req.bits.req_cmd.valid := Bool(true) - when(io.mem_req.ready && io.mem_req.bits.req_cmd.ready) { - x_needs_read := Bool(false) - } + doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { io.send_x_rep_ack := needsAckRep(t_type_, UFix(0)) state := s_busy @@ -369,7 +376,7 @@ class CoherenceHubNull extends Component { } -class CoherenceHubNoDir extends CoherenceHub { +class CoherenceHubBroadcast extends CoherenceHub { def coherenceConflict(addr1: Bits, addr2: Bits): Bool = { addr1(PADDR_BITS-1, OFFSET_BITS) === addr2(PADDR_BITS-1, OFFSET_BITS) @@ -440,14 +447,16 @@ class CoherenceHubNoDir extends CoherenceHub { // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests // and once we have picked a request, get the right write data - - val mem_req_arb = (new Arbiter(NGLOBAL_XACTS)) { new HubMemReq() } + val mem_req_cmd_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemReqCmd() } + val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } for( i <- 0 until NGLOBAL_XACTS ) { - mem_req_arb.io.in(i) <> trackerList(i).io.mem_req + mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd + mem_req_cmd_arb.io.lock(i) <> trackerList(i).io.mem_req_lock + mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data + mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock } - //mem_req_arb.io.out.ready := io.mem.req_cmd.ready || io.mem.req_data.ready - io.mem.req_cmd <> mem_req_arb.io.out.bits.req_cmd - io.mem.req_data <> mem_req_arb.io.out.bits.req_data + io.mem.req_cmd <> mem_req_cmd_arb.io.out + io.mem.req_data <> mem_req_data_arb.io.out // Handle probe replies, which may or may not have data for( j <- 0 until NTILES ) { From f641b44fb8e8c49f422af0c653c13bab92d46ee8 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 29 Feb 2012 22:00:36 -0800 Subject: [PATCH 0250/1087] changes after the module uniquify bug fix --- rocket/src/main/scala/cpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 7ac5e943..4ca6c28f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -37,7 +37,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { vu = new vu() // cpu, vector prefetch, and vector use the DTLB - val dtlbarb = new rArbiter(3)({new ioDTLB_CPU_req()}) + val dtlbarb = new hwacha.Arbiter(3)({new ioDTLB_CPU_req()}) val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } From 68471603437edb9296e7ecfc3b4d2a9860aac5a4 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 1 Mar 2012 00:22:34 -0800 Subject: [PATCH 0251/1087] refactor arbiter priorities --- rocket/src/main/scala/consts.scala | 10 +++++--- rocket/src/main/scala/cpu.scala | 38 +++++++++++++++--------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6377d8c1..57981b34 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -234,7 +234,11 @@ object Constants val VIMM_ALU = UFix(1, 1) val VIMM_X = UFix(0, 1) - val DTLB_VEC = 0 - val DTLB_VPF = 1 - val DTLB_CPU = 2 + val DTLB_CPU = 0 + val DTLB_VEC = 1 + val DTLB_VPF = 2 + + val DMEM_CPU = 0 + val DMEM_PTW = 1 + val DMEM_VU = 2 } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 4ca6c28f..7cb9090e 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -93,15 +93,15 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.status := dpath.io.ctrl.status - arb.io.requestor(0).req_ppn := dtlb.io.cpu_resp.ppn; - ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.requestor(0).req_rdy; + arb.io.requestor(DMEM_CPU).req_ppn := dtlb.io.cpu_resp.ppn + ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req_rdy // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) ptw.io.dtlb <> dtlb.io.ptw; ptw.io.itlb <> itlb.io.ptw; ptw.io.ptbr := dpath.io.ptbr; - arb.io.requestor(1) <> ptw.io.dmem + arb.io.requestor(DMEM_PTW) <> ptw.io.dmem arb.io.dmem <> io.dmem ctrl.io.dpath <> dpath.io.ctrl; @@ -126,8 +126,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect arbiter to ctrl+dpath+DTLB - arb.io.requestor(0) <> ctrl.io.dmem - arb.io.requestor(0) <> dpath.io.dmem + arb.io.requestor(DMEM_CPU) <> ctrl.io.dmem + arb.io.requestor(DMEM_CPU) <> dpath.io.dmem var fpu: rocketFPU = null if (HAVE_FPU) @@ -198,22 +198,22 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) storegen.io.typ := vu.io.dmem_req.bits.typ storegen.io.din := vu.io.dmem_req.bits.data - arb.io.requestor(2).req_val := vu.io.dmem_req.valid - arb.io.requestor(2).req_kill := Reg(vu.io.dmem_req.bits.kill) - arb.io.requestor(2).req_cmd := vu.io.dmem_req.bits.cmd - arb.io.requestor(2).req_type := vu.io.dmem_req.bits.typ - arb.io.requestor(2).req_idx := vu.io.dmem_req.bits.idx - arb.io.requestor(2).req_ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(2).req_data := Reg(storegen.io.dout) - arb.io.requestor(2).req_tag := vu.io.dmem_req.bits.tag + arb.io.requestor(DMEM_VU).req_val := vu.io.dmem_req.valid + arb.io.requestor(DMEM_VU).req_kill := Reg(vu.io.dmem_req.bits.kill) + arb.io.requestor(DMEM_VU).req_cmd := vu.io.dmem_req.bits.cmd + arb.io.requestor(DMEM_VU).req_type := vu.io.dmem_req.bits.typ + arb.io.requestor(DMEM_VU).req_idx := vu.io.dmem_req.bits.idx + arb.io.requestor(DMEM_VU).req_ppn := Reg(vu.io.dmem_req.bits.ppn) + arb.io.requestor(DMEM_VU).req_data := Reg(storegen.io.dout) + arb.io.requestor(DMEM_VU).req_tag := vu.io.dmem_req.bits.tag - vu.io.dmem_resp.valid := Reg(arb.io.requestor(2).resp_val) + vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp_val) // the vu doesn't look at the ready signal, it's simply a nack // but should be delayed one cycle to match the nack semantics - vu.io.dmem_resp.bits.nack := arb.io.requestor(2).resp_nack || Reg(!arb.io.requestor(2).req_rdy) - vu.io.dmem_resp.bits.data := arb.io.requestor(2).resp_data_subword - vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(2).resp_tag) - vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(2).resp_type) + vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp_nack || Reg(!arb.io.requestor(DMEM_VU).req_rdy) + vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp_data_subword + vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp_tag) + vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp_type) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -225,7 +225,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) } else { - arb.io.requestor(2).req_val := Bool(false) + arb.io.requestor(DMEM_VU).req_val := Bool(false) if (HAVE_FPU) { fpu.io.sfma.valid := Bool(false) From a8ef5e9e270770ec8540d56fac8788eda06a9209 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 1 Mar 2012 01:07:47 -0800 Subject: [PATCH 0252/1087] change NMSHR when HAVE_VEC is true --- rocket/src/main/scala/consts.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 57981b34..e173aca2 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -5,6 +5,10 @@ import scala.math._ object Constants { + val HAVE_RVC = false + val HAVE_FPU = true + val HAVE_VEC = false + val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); val BR_NE = UFix(2, 4); @@ -169,7 +173,7 @@ object Constants val CPU_TAG_BITS = 9; val DCACHE_TAG_BITS = log2up(DCACHE_PORTS) + CPU_TAG_BITS val OFFSET_BITS = 6; // log2(cache line size in bytes) - val NMSHR = 2; // number of primary misses + val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses val NRPQ = 16; // number of secondary misses val NSDQ = 17; // number of secondary stores/AMOs val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) @@ -207,10 +211,6 @@ object Constants val ITLB_ENTRIES = 8; val START_ADDR = 0x2000; - - val HAVE_RVC = false - val HAVE_FPU = true - val HAVE_VEC = false val FPU_N = UFix(0, 1); val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N; From c6162ac7432aa085a253a0fb36b836b8b1c38e4e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 1 Mar 2012 01:19:09 -0800 Subject: [PATCH 0253/1087] Unified hub ios. Fixed some hub elaboration errors. --- rocket/src/main/scala/coherence.scala | 52 +++++++++++---------------- rocket/src/main/scala/top.scala | 6 ++-- rocket/src/main/scala/util.scala | 22 ++++++------ 3 files changed, 36 insertions(+), 44 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index f63e633d..0ded6f22 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -222,12 +222,12 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val s_idle :: s_mem :: s_probe :: s_busy :: Nil = Enum(4){ UFix() } val state = Reg(resetVal = s_idle) - val addr_ = Reg{ Bits() } + val addr_ = Reg{ UFix() } val t_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } val p_rep_count = Reg(resetVal = UFix(0, width = log2up(NTILES))) - val p_req_flags = Reg(resetVal = UFix(0, width = NTILES)) + val p_req_flags = Reg(resetVal = Bits(0, width = NTILES)) val p_rep_tile_id_ = Reg{ Bits() } val x_needs_read = Reg(resetVal = Bool(false)) val x_init_data_needs_write = Reg(resetVal = Bool(false)) @@ -350,24 +350,25 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { // P_rep and x_init must be popped on same cycle of receipt } -abstract class CoherenceHub extends Component with CoherencePolicy - -class CoherenceHubNull extends Component { +abstract class CoherenceHub extends Component with CoherencePolicy { val io = new Bundle { - val tile = new ioTileLink().flip + val tiles = Vec(NTILES) { new ioTileLink() }.flip val mem = new ioMem } +} - val x_init = io.tile.xact_init +class CoherenceHubNull extends CoherenceHub { + + val x_init = io.tiles(0).xact_init val is_write = x_init.bits.t_type === X_WRITE_UNCACHED x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id io.mem.req_cmd.bits.addr := x_init.bits.address - io.mem.req_data <> io.tile.xact_init_data + io.mem.req_data <> io.tiles(0).xact_init_data - val x_rep = io.tile.xact_rep + val x_rep = io.tiles(0).xact_rep x_rep.bits.t_type := Mux(io.mem.resp.valid, X_READ_EXCLUSIVE, X_WRITE_UNCACHED) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care @@ -392,13 +393,9 @@ class CoherenceHubBroadcast extends CoherenceHub { ret } - val io = new Bundle { - val tiles = Vec(NTILES) { new ioTileLink() } - val mem = new ioMem - } - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) +/* val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } @@ -467,13 +464,12 @@ class CoherenceHubBroadcast extends CoherenceHub { p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) } for( i <- 0 until NGLOBAL_XACTS ) { - trackerList(i).io.p_rep_data := MuxLookup(trackerList(i).io.p_rep_tile_id, Bits(0), (0 until NTILES).map { j => UFix(j) -> io.tiles(j).probe_rep_data }) - val flags = Bits(width = NTILES) + trackerList(i).io.p_rep_data <> io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data for( j <- 0 until NTILES) { val p_rep = io.tiles(j).probe_rep - flags(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) + val dec = p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) + p_rep_cnt_dec_arr(UFix(i)) := p_rep_cnt_dec_arr(UFix(i)).bitSet(UFix(j), dec) } - p_rep_cnt_dec_arr.write(UFix(i), flags) } // Nack conflicting transaction init attempts @@ -484,8 +480,8 @@ class CoherenceHubBroadcast extends CoherenceHub { val conflicts = Bits(width = NGLOBAL_XACTS) for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(i) := t.busy(i) && coherenceConflict(t.addr, x_init.bits.address) && - !(x_init.bits.has_data && (UFix(j) === t.init_tile_id)) + conflicts(UFix(i), t.busy(i) && coherenceConflict(t.addr, x_init.bits.address) && + !(x_init.bits.has_data && (UFix(j) === t.init_tile_id))) // Don't abort writebacks stalled on mem. // TODO: This assumes overlapped writeback init reqs to // the same addr will never be issued; is this ok? @@ -493,7 +489,7 @@ class CoherenceHubBroadcast extends CoherenceHub { x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id val want_to_abort = conflicts.orR || busy_arr.toBits.andR x_abort.valid := want_to_abort && x_init.valid - aborting(j) := want_to_abort && x_abort.ready + aborting.bitSet(UFix(j), want_to_abort && x_abort.ready) } // Handle transaction initiation requests @@ -504,10 +500,10 @@ class CoherenceHubBroadcast extends CoherenceHub { for( i <- 0 until NGLOBAL_XACTS ) { alloc_arb.io.in(i).valid := !trackerList(i).io.busy trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready - trackerList(i).io.alloc_req.bits := init_arb.io.out.bits + trackerList(i).io.alloc_req.bits <> init_arb.io.out.bits trackerList(i).io.alloc_req.valid := init_arb.io.out.valid - trackerList(i).io.x_init_data := MuxLookup(trackerList(i).io.init_tile_id, Bits(0), (0 until NTILES).map { j => UFix(j) -> io.tiles(j).xact_init_data }) + trackerList(i).io.x_init_data <> io.tiles(trackerList(i).io.init_tile_id).xact_init_data } for( j <- 0 until NTILES ) { @@ -533,15 +529,9 @@ class CoherenceHubBroadcast extends CoherenceHub { val t = trackerList(i).io p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) + p_rep_cnt_dec_arr(i) = p_rep_cnt_dec_arr(i).bitSet(UFix(j), p_req_arb_arr(j).io.in(i).ready) } p_req_arb_arr(j).io.out <> io.tiles(j).probe_req } - for( i <- 0 until NGLOBAL_XACTS ) { - val flags = Bits(width = NTILES) - for( j <- 0 until NTILES ) { - flags(j) := p_req_arb_arr(j).io.in(i).ready - } - p_rep_cnt_dec_arr.write(UFix(i), flags) - } - +*/ } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 52925b3e..1c0fe2af 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -28,9 +28,9 @@ class Top() extends Component { val hub = new CoherenceHubNull // connect tile to hub - hub.io.tile.xact_init <> Queue(arbiter.io.mem.xact_init) - hub.io.tile.xact_init_data <> Queue(arbiter.io.mem.xact_init_data) - arbiter.io.mem.xact_rep <> Queue(hub.io.tile.xact_rep, 1, pipe = true) + hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) + hub.io.tiles(0).xact_init_data <> Queue(arbiter.io.mem.xact_init_data) + arbiter.io.mem.xact_rep <> Queue(hub.io.tiles(0).xact_rep, 1, pipe = true) // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 1856a3ba..219d7539 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -212,29 +212,31 @@ class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioLockingArbiter(n)(data) - val locked = Reg(){ Bits(n) } - var dout = Wire(){ data } - var vout = Wire(){ Bool() } + val locked = Reg(resetVal = Bits(0, width = n)) + var dout = io.in(0).bits + var vout = Bool(false) - when((locked && io.lock.toBits).orR) { - dout := io.in(0).bits + val any_lock_held = (locked & io.lock.toBits).orR + when(any_lock_held) { + vout = io.in(0).valid && locked(0) for (i <- 0 until n) { io.in(i).ready := io.out.ready && locked(i) - vout := io.in(i).valid && locked(i) - dout := Mux(locked(i), io.in(i).bits, dout) + dout = Mux(locked(i), io.in(i).bits, dout) + vout = vout || io.in(i).valid && locked(i) } } .otherwise { io.in(0).ready := io.out.ready + locked.bitSet(UFix(0), io.out.ready && io.lock(0)) for (i <- 1 until n) { io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready - locked(i) := !io.in(i-1).valid && io.in(i-1).ready && io.lock(i) + locked.bitSet(UFix(i), !io.in(i-1).valid && io.in(i-1).ready && io.lock(i)) } - dout := io.in(n-1).bits + dout = io.in(n-1).bits for (i <- 1 until n) dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) - vout := io.in(0).valid + vout = io.in(0).valid for (i <- 1 until n) vout = vout || io.in(i).valid } From c7b01230f4ed43d926a8ccf9103f0b4fb69402b5 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 1 Mar 2012 10:14:49 -0800 Subject: [PATCH 0254/1087] fix mul/div when waddr=0, can't believe torture didn't find this one --- rocket/src/main/scala/ctrl.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dc244396..119d5b1e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -771,9 +771,9 @@ class rocketCtrl extends Component io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu; io.dpath.div_fn := id_div_fn; - io.dpath.div_val := id_div_val.toBool; + io.dpath.div_val := id_div_val.toBool && id_waddr != UFix(0); io.dpath.mul_fn := id_mul_fn; - io.dpath.mul_val := id_mul_val.toBool; + io.dpath.mul_val := id_mul_val.toBool && id_waddr != UFix(0); io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; io.dpath.ex_wen := ex_reg_wen; From 9d7707a0a2eca0cff06738db99cd02125b847abe Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 1 Mar 2012 17:03:56 -0800 Subject: [PATCH 0255/1087] Made xact_rep an ioValid, removed has_data member --- rocket/src/main/scala/arbiter.scala | 2 -- rocket/src/main/scala/coherence.scala | 10 ++++++---- rocket/src/main/scala/top.scala | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index e323cf7a..9cdeb58f 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -59,9 +59,7 @@ class rocketMemArbiter(n: Int) extends Component { io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2up(n)-1,0) === UFix(i) io.requestor(i).xact_rep.bits.data := io.mem.xact_rep.bits.data io.requestor(i).xact_rep.bits.t_type := io.mem.xact_rep.bits.t_type - io.requestor(i).xact_rep.bits.has_data := io.mem.xact_rep.bits.has_data io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2up(n)) io.requestor(i).xact_rep.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id } - io.mem.xact_rep.ready := Bool(true) // XXX we shouldn't have xact_rep.ready } diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 0ded6f22..12c1e4a9 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -14,10 +14,9 @@ class MemReqCmd() extends Bundle val tag = Bits(width = MEM_TAG_BITS) } -class MemResp () extends Bundle +class MemResp () extends MemData { val tag = Bits(width = MEM_TAG_BITS) - val data = Bits(width = MEM_DATA_BITS) } class ioMem() extends Bundle @@ -72,7 +71,6 @@ class ProbeReplyData extends MemData class TransactionReply extends MemData { val t_type = Bits(width = TTYPE_BITS) - val has_data = Bool() val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } @@ -88,7 +86,7 @@ class ioTileLink extends Bundle { val probe_req = (new ioDecoupled) { new ProbeRequest() } val probe_rep = (new ioDecoupled) { new ProbeReply() }.flip val probe_rep_data = (new ioDecoupled) { new ProbeReplyData() }.flip - val xact_rep = (new ioDecoupled) { new TransactionReply() } + val xact_rep = (new ioValid) { new TransactionReply() } val xact_finish = (new ioDecoupled) { new TransactionFinish() }.flip } @@ -173,6 +171,10 @@ trait FourStateCoherence extends CoherencePolicy { } state.toBits } + + def replyTypeHasData (reply: TransactionReply): Bool = { + (reply.t_type != X_WRITE_UNCACHED) + } } class XactTracker(id: Int) extends Component with CoherencePolicy { diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 1c0fe2af..a78e75e2 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -30,7 +30,7 @@ class Top() extends Component { // connect tile to hub hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) hub.io.tiles(0).xact_init_data <> Queue(arbiter.io.mem.xact_init_data) - arbiter.io.mem.xact_rep <> Queue(hub.io.tiles(0).xact_rep, 1, pipe = true) + arbiter.io.mem.xact_rep <> PipeReg(hub.io.tiles(0).xact_rep) // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) From da39810bb29e4266076651b3f52b96322b2cd9c2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 1 Mar 2012 18:23:46 -0800 Subject: [PATCH 0256/1087] Fixed elaboration errors in LockingArbiter and BoradcastHub. Fixed ioDecoupled direction error in XactTracker --- rocket/src/main/scala/coherence.scala | 130 +++++++++++++++----------- rocket/src/main/scala/util.scala | 14 ++- 2 files changed, 85 insertions(+), 59 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 12c1e4a9..5f70d81f 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -163,13 +163,12 @@ trait FourStateCoherence extends CoherencePolicy { def needsSecondaryXact (cmd: Bits, outstanding: TransactionInit): Bool - def getMetaUpdateOnProbe (incoming: ProbeRequest): Bits = { - val state = UFix(0) - switch(incoming.p_type) { - is(probeInvalidate) { state := tileInvalid } - is(probeDowngrade) { state := tileShared } - } - state.toBits + def newStateOnProbe (incoming: ProbeRequest, state: UFix): Bits = { + MuxLookup(incoming.p_type, state, Array( + probeInvalidate -> tileInvalid, + probeDowngrade -> tileShared, + probeCopy -> state + )) } def replyTypeHasData (reply: TransactionReply): Bool = { @@ -187,9 +186,10 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val p_req_cnt_inc = Bits(NTILES, INPUT) val p_rep_data = (new ioDecoupled) { new ProbeReplyData() } val x_init_data = (new ioDecoupled) { new TransactionInitData() } + val sent_x_rep_ack = Bool(INPUT) - val mem_req_cmd = (new ioDecoupled) { new MemReqCmd() } - val mem_req_data = (new ioDecoupled) { new MemData() } + val mem_req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip + val mem_req_data = (new ioDecoupled) { new MemData() }.flip val mem_req_lock = Bool(OUTPUT) val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip val busy = Bool(OUTPUT) @@ -222,7 +222,7 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { (t_type === X_WRITE_UNCACHED) } - val s_idle :: s_mem :: s_probe :: s_busy :: Nil = Enum(4){ UFix() } + val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } val state = Reg(resetVal = s_idle) val addr_ = Reg{ UFix() } val t_type_ = Reg{ Bits() } @@ -241,7 +241,10 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioDecoupled[MemData], trigger: Bool, pop: Bool) { req_cmd.valid := mem_cmd_sent req_cmd.bits.rw := Bool(true) - req_data <> data + //TODO: why does req_data <> data segfault? + req_data.valid := data.valid + req_data.bits.data := data.bits.data + data.ready := req_data.ready lock := Bool(true) when(req_cmd.ready && req_cmd.valid) { mem_cmd_sent := Bool(false) @@ -270,7 +273,7 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { io.sharer_count := UFix(NTILES) // TODO: Broadcast only io.t_type := t_type_ - io.mem_req_cmd.valid := Bool(false) + io.mem_req_cmd.valid := Bool(false) io.mem_req_cmd.bits.rw := Bool(false) io.mem_req_cmd.bits.addr := addr_ io.mem_req_cmd.bits.tag := UFix(id) @@ -279,7 +282,7 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { io.mem_req_lock := Bool(false) io.probe_req.valid := Bool(false) io.probe_req.bits.p_type := sendProbeReqType(t_type_, UFix(0)) - io.probe_req.bits.global_xact_id := UFix(id) + io.probe_req.bits.global_xact_id := UFix(id) io.probe_req.bits.address := addr_ io.push_p_req := Bits(0, width = NTILES) io.pop_p_rep := Bits(0, width = NTILES) @@ -287,6 +290,8 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { io.pop_x_init := Bool(false) io.pop_x_init_data := Bool(false) io.send_x_rep_ack := Bool(false) + io.x_init_data.ready := Bool(false) // don't care + io.p_rep_data.ready := Bool(false) // don't care switch (state) { is(s_idle) { @@ -336,20 +341,19 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { - io.send_x_rep_ack := needsAckRep(t_type_, UFix(0)) - state := s_busy + state := Mux(needsAckRep(t_type_, UFix(0)), s_ack, s_busy) } } + is(s_ack) { + io.send_x_rep_ack := Bool(true) + when(io.sent_x_rep_ack) { state := s_busy } + } is(s_busy) { // Nothing left to do but wait for transaction to complete when (io.xact_finish) { state := s_idle } } } - - //TODO: Decrement the probe count when final data piece is written - // Connent io.mem.ready sig to correct pop* outputs - // P_rep and x_init must be popped on same cycle of receipt } abstract class CoherenceHub extends Component with CoherencePolicy { @@ -385,19 +389,16 @@ class CoherenceHubBroadcast extends CoherenceHub { addr1(PADDR_BITS-1, OFFSET_BITS) === addr2(PADDR_BITS-1, OFFSET_BITS) } def getTransactionReplyType(t_type: UFix, count: UFix): Bits = { - val ret = Wire() { Bits(width = TTYPE_BITS) } - switch (t_type) { - is(X_READ_SHARED) { ret := Mux(count > UFix(0), X_READ_SHARED, X_READ_EXCLUSIVE) } - is(X_READ_EXCLUSIVE) { ret := X_READ_EXCLUSIVE } - is(X_READ_UNCACHED) { ret := X_READ_UNCACHED } - is(X_WRITE_UNCACHED) { ret := X_WRITE_UNCACHED } - } - ret + MuxLookup(t_type, X_READ_UNCACHED, Array( + X_READ_SHARED -> Mux(count > UFix(0), X_READ_SHARED, X_READ_EXCLUSIVE), + X_READ_EXCLUSIVE -> X_READ_EXCLUSIVE, + X_READ_UNCACHED -> X_READ_UNCACHED, + X_WRITE_UNCACHED -> X_WRITE_UNCACHED + )) } val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) -/* val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } @@ -409,40 +410,61 @@ class CoherenceHubBroadcast extends CoherenceHub { val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val p_rep_cnt_dec_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } val p_req_cnt_inc_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } + val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } for( i <- 0 until NGLOBAL_XACTS) { - busy_arr.write( UFix(i), trackerList(i).io.busy) - addr_arr.write( UFix(i), trackerList(i).io.addr) - init_tile_id_arr.write( UFix(i), trackerList(i).io.init_tile_id) - tile_xact_id_arr.write( UFix(i), trackerList(i).io.tile_xact_id) - t_type_arr.write( UFix(i), trackerList(i).io.t_type) - sh_count_arr.write( UFix(i), trackerList(i).io.sharer_count) - send_x_rep_ack_arr.write(UFix(i), trackerList(i).io.send_x_rep_ack) - trackerList(i).io.xact_finish := do_free_arr.read(UFix(i)) - trackerList(i).io.p_rep_cnt_dec := p_rep_cnt_dec_arr.read(UFix(i)) - trackerList(i).io.p_req_cnt_inc := p_req_cnt_inc_arr.read(UFix(i)) + val t = trackerList(i).io + busy_arr(i) := t.busy + addr_arr(i) := t.addr + init_tile_id_arr(i) := t.init_tile_id + tile_xact_id_arr(i) := t.tile_xact_id + t_type_arr(i) := t.t_type + sh_count_arr(i) := t.sharer_count + send_x_rep_ack_arr(i) := t.send_x_rep_ack + do_free_arr(i) := Bool(false) + p_rep_cnt_dec_arr(i) := Bits(0) + p_req_cnt_inc_arr(i) := Bits(0) + sent_x_rep_ack_arr(i) := Bool(false) + t.xact_finish := do_free_arr(i) + t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i) + t.p_req_cnt_inc := p_req_cnt_inc_arr(i) + t.sent_x_rep_ack := sent_x_rep_ack_arr(i) } // Free finished transactions for( j <- 0 until NTILES ) { val finish = io.tiles(j).xact_finish - do_free_arr.write(finish.bits.global_xact_id, finish.valid) + do_free_arr(finish.bits.global_xact_id) := finish.valid finish.ready := Bool(true) } // Reply to initial requestor - // Forward memory responses from mem to tile - val idx = io.mem.resp.bits.tag + // Forward memory responses from mem to tile or arbitrate to ack + val mem_idx = io.mem.resp.bits.tag + val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits, NGLOBAL_XACTS) for( j <- 0 until NTILES ) { - io.tiles(j).xact_rep.bits.t_type := getTransactionReplyType(t_type_arr.read(idx), sh_count_arr.read(idx)) - io.tiles(j).xact_rep.bits.tile_xact_id := tile_xact_id_arr.read(idx) - io.tiles(j).xact_rep.bits.global_xact_id := idx + val rep = io.tiles(j).xact_rep + rep.bits.t_type := UFix(0) + rep.bits.tile_xact_id := UFix(0) + rep.bits.global_xact_id := UFix(0) + rep.valid := Bool(false) + when(io.mem.resp.valid) { + rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) + rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) + rep.bits.global_xact_id := mem_idx + rep.valid := (UFix(j) === init_tile_id_arr(mem_idx)) + } . otherwise { + rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) + rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) + rep.bits.global_xact_id := ack_idx + rep.valid := (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr(ack_idx) + } io.tiles(j).xact_rep.bits.data := io.mem.resp.bits.data - io.tiles(j).xact_rep.valid := (UFix(j) === init_tile_id_arr.read(idx)) && (io.mem.resp.valid || send_x_rep_ack_arr.read(idx)) } + sent_x_rep_ack_arr(ack_idx) := !io.mem.resp.valid && send_x_rep_ack_arr(ack_idx) // If there were a ready signal due to e.g. intervening network use: - //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(idx)).xact_rep.ready - + //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready + // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests // and once we have picked a request, get the right write data @@ -466,7 +488,8 @@ class CoherenceHubBroadcast extends CoherenceHub { p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) } for( i <- 0 until NGLOBAL_XACTS ) { - trackerList(i).io.p_rep_data <> io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data + trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid + trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits for( j <- 0 until NTILES) { val p_rep = io.tiles(j).probe_rep val dec = p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) @@ -475,14 +498,14 @@ class CoherenceHubBroadcast extends CoherenceHub { } // Nack conflicting transaction init attempts - val aborting = Wire() { Bits(width = NTILES) } + val aborting = Bits(0, width = NTILES) for( j <- 0 until NTILES ) { val x_init = io.tiles(j).xact_init val x_abort = io.tiles(j).xact_abort val conflicts = Bits(width = NGLOBAL_XACTS) for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(UFix(i), t.busy(i) && coherenceConflict(t.addr, x_init.bits.address) && + conflicts(UFix(i), t.busy && coherenceConflict(t.addr, x_init.bits.address) && !(x_init.bits.has_data && (UFix(j) === t.init_tile_id))) // Don't abort writebacks stalled on mem. // TODO: This assumes overlapped writeback init reqs to @@ -505,9 +528,9 @@ class CoherenceHubBroadcast extends CoherenceHub { trackerList(i).io.alloc_req.bits <> init_arb.io.out.bits trackerList(i).io.alloc_req.valid := init_arb.io.out.valid - trackerList(i).io.x_init_data <> io.tiles(trackerList(i).io.init_tile_id).xact_init_data + trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits + trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid } - for( j <- 0 until NTILES ) { val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data @@ -522,7 +545,6 @@ class CoherenceHubBroadcast extends CoherenceHub { alloc_arb.io.out.ready := init_arb.io.out.valid && !busy_arr.toBits.andR && !foldR(trackerList.map(t => t.io.busy && coherenceConflict(t.io.addr, init_arb.io.out.bits.xact_init.address)))(_||_) - // Handle probe request generation // Must arbitrate for each request port val p_req_arb_arr = List.fill(NTILES)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) @@ -535,5 +557,5 @@ class CoherenceHubBroadcast extends CoherenceHub { } p_req_arb_arr(j).io.out <> io.tiles(j).probe_req } -*/ + } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 219d7539..bbb53e83 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -206,17 +206,21 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val in = Vec(n) { (new ioDecoupled()) { data } } - val lock = Vec(n) { Bool() } + val lock = Vec(n) { Bool() }.asInput val out = (new ioDecoupled()) { data }.flip() } class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioLockingArbiter(n)(data) - val locked = Reg(resetVal = Bits(0, width = n)) + val locked = Vec(n) { Reg(resetVal = Bool(false)) } var dout = io.in(0).bits var vout = Bool(false) - val any_lock_held = (locked & io.lock.toBits).orR + for (i <- 0 until n) { + io.in(i).ready := io.out.ready + } + + val any_lock_held = (locked.toBits & io.lock.toBits).orR when(any_lock_held) { vout = io.in(0).valid && locked(0) for (i <- 0 until n) { @@ -226,10 +230,10 @@ class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { } } .otherwise { io.in(0).ready := io.out.ready - locked.bitSet(UFix(0), io.out.ready && io.lock(0)) + locked(0) := io.out.ready && io.lock(0) for (i <- 1 until n) { io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready - locked.bitSet(UFix(i), !io.in(i-1).valid && io.in(i-1).ready && io.lock(i)) + locked(i) := !io.in(i-1).valid && io.in(i-1).ready && io.lock(i) } dout = io.in(n-1).bits From 52101373e06f7f3d5ac58883355390c9f8de04e9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 1 Mar 2012 18:49:00 -0800 Subject: [PATCH 0257/1087] clean up D$ store data unit --- rocket/src/main/scala/coherence.scala | 7 ++-- rocket/src/main/scala/nbdcache.scala | 51 +++++++++------------------ rocket/src/main/scala/queues.scala | 9 +++-- rocket/src/main/scala/top.scala | 2 +- 4 files changed, 30 insertions(+), 39 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 5f70d81f..2c5c6edf 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -90,8 +90,8 @@ class ioTileLink extends Bundle { val xact_finish = (new ioDecoupled) { new TransactionFinish() }.flip } -trait CoherencePolicy { - def cpuCmdToRW( cmd: Bits): (Bool, Bool) = { +object cpuCmdToRW { + def apply(cmd: Bits): (Bool, Bool) = { val store = (cmd === M_XWR) val load = (cmd === M_XRD) val amo = cmd(3).toBool @@ -101,6 +101,9 @@ trait CoherencePolicy { } } +trait CoherencePolicy { +} + trait ThreeStateIncoherence extends CoherencePolicy { val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 57d223b6..cece5676 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -361,51 +361,34 @@ class ReplayUnit extends Component { val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) } - val sdq_val = Reg(resetVal = UFix(0)) + val sdq_val = Reg(resetVal = Bits(0, NSDQ)) val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) - val replay_val = Reg(resetVal = Bool(false)) - val replay_retry = replay_val && !io.data_req.ready - replay_val := io.replay.valid || replay_retry + val rpq = Queue(io.replay, 1, pipe = true) + rpq.ready := io.data_req.ready + val (rp_read, rp_write) = cpuCmdToRW(rpq.bits.cmd) - val rp = Reg { new Replay() } - when (io.replay.valid && io.replay.ready) { rp := io.replay.bits } - - val rp_amo = rp.cmd(3).toBool - val rp_store = (rp.cmd === M_XWR) - val rp_load = (rp.cmd === M_XRD) - val rp_write = rp_store || rp_amo - val rp_read = rp_load || rp_amo - - val sdq_ren_new = io.replay.valid && (io.replay.bits.cmd != M_XRD) - val sdq_ren_retry = replay_retry && rp_write - val sdq_ren = sdq_ren_new || sdq_ren_retry val sdq_wen = io.sdq_enq.valid && io.sdq_enq.ready - val sdq_addr = Mux(sdq_ren_retry, rp.sdq_id, Mux(sdq_ren_new, io.replay.bits.sdq_id, sdq_alloc_id)) - - val sdq = Mem(NSDQ){ Bits(width=CPU_DATA_BITS) } + val sdq = Mem(NSDQ, sdq_wen, sdq_alloc_id, io.sdq_enq.bits) sdq.setReadLatency(1); sdq.setTarget('inst) - val sdq_dout = sdq.rw(sdq_addr, io.sdq_enq.bits, sdq_wen, cs = sdq_ren || sdq_wen) - val sdq_free = replay_val && !replay_retry && rp_write - sdq_val := sdq_val & ~(sdq_free.toUFix << rp.sdq_id) | (sdq_wen.toUFix << sdq_alloc_id) + val sdq_free = rpq.valid && rpq.ready && rp_write + sdq_val := sdq_val & ~(sdq_free.toUFix << rpq.bits.sdq_id) | (sdq_wen.toUFix << sdq_alloc_id) - io.sdq_enq.ready := (~sdq_val != UFix(0)) && !sdq_ren + io.sdq_enq.ready := !sdq_val.andR io.sdq_id := sdq_alloc_id - io.replay.ready := !replay_retry + io.data_req.valid := rpq.valid + io.way_oh := rpq.bits.way_oh + io.data_req.bits.idx := rpq.bits.idx + io.data_req.bits.offset := rpq.bits.offset + io.data_req.bits.cmd := rpq.bits.cmd + io.data_req.bits.typ := rpq.bits.typ + io.data_req.bits.data := sdq.read(Mux(rpq.valid && !rpq.ready, rpq.bits.sdq_id, io.replay.bits.sdq_id)) - io.data_req.valid := replay_val - io.way_oh := rp.way_oh - io.data_req.bits.idx := rp.idx - io.data_req.bits.offset := rp.offset - io.data_req.bits.cmd := rp.cmd - io.data_req.bits.typ := rp.typ - io.data_req.bits.data := sdq_dout - - io.cpu_resp_val := Reg(replay_val && !replay_retry && rp_read, resetVal = Bool(false)) - io.cpu_resp_tag := Reg(rp.tag) + io.cpu_resp_val := Reg(rpq.valid && rpq.ready && rp_read, resetVal = Bool(false)) + io.cpu_resp_tag := Reg(rpq.bits.tag) } class WritebackUnit extends Component { diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 12f4aeae..803b86ef 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -79,11 +79,16 @@ class pipereg[T <: Data]()(data: => T) extends Component io.deq.bits <> Mem(1, io.enq.valid, UFix(0), io.enq.bits).read(UFix(0)) } -object PipeReg +object Pipe { - def apply[T <: Data](enq: ioValid[T]) = { + def apply[T <: Data](enq: ioValid[T], latency: Int = 1): ioValid[T] = { val q = (new pipereg) { enq.bits.clone } q.io.enq <> enq q.io.deq + + if (latency > 1) + Pipe(q.io.deq, latency-1) + else + q.io.deq } } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index a78e75e2..4627e078 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -34,7 +34,7 @@ class Top() extends Component { // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) - hub.io.mem.resp <> PipeReg(io.mem.resp) + hub.io.mem.resp <> Pipe(io.mem.resp) if (HAVE_VEC) From 28cacd953f17224b7fb883228f9e501ad7eb5287 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 1 Mar 2012 19:30:56 -0800 Subject: [PATCH 0258/1087] D$ cleanup - merge ReplayUnit and MSHRFile --- rocket/src/main/scala/nbdcache.scala | 122 +++++++++++---------------- rocket/src/main/scala/top.scala | 2 +- 2 files changed, 48 insertions(+), 76 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index cece5676..ad9be9f0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -102,13 +102,8 @@ class RPQEntry extends Bundle { val tag = Bits(width = DCACHE_TAG_BITS) } -class Replay extends Bundle { +class Replay extends RPQEntry { val idx = Bits(width = IDX_BITS) - val offset = Bits(width = OFFSET_BITS) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) - val sdq_id = UFix(width = log2up(NSDQ)) - val tag = Bits(width = DCACHE_TAG_BITS) val way_oh = Bits(width = NWAYS) } @@ -118,6 +113,7 @@ class DataReq extends Bundle { val cmd = Bits(width = 4) val typ = Bits(width = 3) val data = Bits(width = CPU_DATA_BITS) + val way_oh = Bits(width = NWAYS) } class DataArrayReq extends Bundle { @@ -271,8 +267,8 @@ class MSHRFile extends Component { val req_cmd = Bits(4, INPUT) val req_type = Bits(3, INPUT) val req_tag = Bits(DCACHE_TAG_BITS, INPUT) - val req_sdq_id = UFix(log2up(NSDQ), INPUT) val req_way_oh = Bits(NWAYS, INPUT) + val req_data = Bits(CPU_DATA_BITS, INPUT) val mem_resp_val = Bool(INPUT) val mem_resp_tag = Bits(MEM_TAG_BITS, INPUT) @@ -283,16 +279,27 @@ class MSHRFile extends Component { val mem_req = (new ioDecoupled) { new TransactionInit }.flip() val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() - val replay = (new ioDecoupled) { new Replay() }.flip() + val data_req = (new ioDecoupled) { new DataReq() }.flip() + + val cpu_resp_val = Bool(OUTPUT) + val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) } + val sdq_val = Reg(resetVal = Bits(0, NSDQ)) + val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) + val sdq_rdy = !sdq_val.andR + val (req_read, req_write) = cpuCmdToRW(io.req_cmd) + val sdq_enq = io.req_val && io.req_rdy && req_write + val sdq = Mem(NSDQ, sdq_enq, sdq_alloc_id, io.req_data) + sdq.setReadLatency(1); + sdq.setTarget('inst) + val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val replay_arb = (new Arbiter(NMSHR)) { new Replay() } - val alloc_arb = (new Arbiter(NMSHR)) { Bool() } val tag_match = tag_mux.io.out === io.req_ppn @@ -311,14 +318,14 @@ class MSHRFile extends Component { alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready - mshr.io.req_sec_val := io.req_val && tag_match + mshr.io.req_sec_val := io.req_val && sdq_rdy && tag_match mshr.io.req_ppn := io.req_ppn mshr.io.req_tag := io.req_tag mshr.io.req_idx := io.req_idx mshr.io.req_offset := io.req_offset mshr.io.req_cmd := io.req_cmd mshr.io.req_type := io.req_type - mshr.io.req_sdq_id := io.req_sdq_id + mshr.io.req_sdq_id := sdq_alloc_id mshr.io.req_way_oh := io.req_way_oh mshr.io.meta_req <> meta_req_arb.io.in(i) @@ -338,57 +345,27 @@ class MSHRFile extends Component { idx_match = idx_match || mshr.io.idx_match } - alloc_arb.io.out.ready := io.req_val && !idx_match + alloc_arb.io.out.ready := io.req_val && sdq_rdy && !idx_match meta_req_arb.io.out <> io.meta_req mem_req_arb.io.out <> io.mem_req - replay_arb.io.out <> io.replay - io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) + io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.mem_resp_idx := mem_resp_idx_mux.io.out io.mem_resp_way_oh := mem_resp_way_oh_mux.io.out io.fence_rdy := !fence -} -class ReplayUnit extends Component { - val io = new Bundle { - val sdq_enq = (new ioDecoupled) { Bits(width = CPU_DATA_BITS) } - val sdq_id = UFix(log2up(NSDQ), OUTPUT) - val way_oh = Bits(NWAYS, OUTPUT) - val replay = (new ioDecoupled) { new Replay() } - val data_req = (new ioDecoupled) { new DataReq() }.flip() - val cpu_resp_val = Bool(OUTPUT) - val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) - } + val replay = Queue(replay_arb.io.out, 1, pipe = true) + replay.ready := io.data_req.ready + io.data_req <> replay - val sdq_val = Reg(resetVal = Bits(0, NSDQ)) - val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) + val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) + val sdq_free = replay.valid && replay.ready && replay_write + sdq_val := sdq_val & ~(sdq_free.toUFix << replay.bits.sdq_id) | (sdq_enq.toUFix << sdq_alloc_id) + io.data_req.bits.data := sdq.read(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) - val rpq = Queue(io.replay, 1, pipe = true) - rpq.ready := io.data_req.ready - val (rp_read, rp_write) = cpuCmdToRW(rpq.bits.cmd) - - val sdq_wen = io.sdq_enq.valid && io.sdq_enq.ready - val sdq = Mem(NSDQ, sdq_wen, sdq_alloc_id, io.sdq_enq.bits) - sdq.setReadLatency(1); - sdq.setTarget('inst) - - val sdq_free = rpq.valid && rpq.ready && rp_write - sdq_val := sdq_val & ~(sdq_free.toUFix << rpq.bits.sdq_id) | (sdq_wen.toUFix << sdq_alloc_id) - - io.sdq_enq.ready := !sdq_val.andR - io.sdq_id := sdq_alloc_id - - io.data_req.valid := rpq.valid - io.way_oh := rpq.bits.way_oh - io.data_req.bits.idx := rpq.bits.idx - io.data_req.bits.offset := rpq.bits.offset - io.data_req.bits.cmd := rpq.bits.cmd - io.data_req.bits.typ := rpq.bits.typ - io.data_req.bits.data := sdq.read(Mux(rpq.valid && !rpq.ready, rpq.bits.sdq_id, io.replay.bits.sdq_id)) - - io.cpu_resp_val := Reg(rpq.valid && rpq.ready && rp_read, resetVal = Bool(false)) - io.cpu_resp_tag := Reg(rpq.bits.tag) + io.cpu_resp_val := Reg(replay.valid && replay.ready && replay_read, resetVal = Bool(false)) + io.cpu_resp_tag := Reg(replay.bits.tag) } class WritebackUnit extends Component { @@ -705,9 +682,8 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val r_req_write = r_req_store || r_req_amo val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch - // replay unit - val replayer = new ReplayUnit() - val replay_amo_val = replayer.io.data_req.valid && replayer.io.data_req.bits.cmd(3).toBool + val mshr = new MSHRFile() + val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool when (io.cpu.req_val) { r_cpu_req_idx := io.cpu.req_idx @@ -716,11 +692,11 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { r_cpu_req_tag := io.cpu.req_tag } when (replay_amo_val) { - r_cpu_req_idx := Cat(replayer.io.data_req.bits.idx, replayer.io.data_req.bits.offset) - r_cpu_req_cmd := replayer.io.data_req.bits.cmd - r_cpu_req_type := replayer.io.data_req.bits.typ - r_amo_replay_data := replayer.io.data_req.bits.data - r_way_oh := replayer.io.way_oh + r_cpu_req_idx := Cat(mshr.io.data_req.bits.idx, mshr.io.data_req.bits.offset) + r_cpu_req_cmd := mshr.io.data_req.bits.cmd + r_cpu_req_type := mshr.io.data_req.bits.typ + r_amo_replay_data := mshr.io.data_req.bits.data + r_way_oh := mshr.io.data_req.bits.way_oh } val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) @@ -816,7 +792,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { data_arb.io.in(2).bits.way_en := p_store_way_oh val drain_store = drain_store_val && data_arb.io.in(2).ready val p_amo = Reg(resetVal = Bool(false)) - val p_store_rdy = !(p_store_valid && !drain_store) && !(replayer.io.data_req.valid || r_replay_amo || p_amo) + val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo) p_amo := tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && p_store_rdy) || p_amo @@ -852,39 +828,35 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { } // miss handling - val mshr = new MSHRFile() - mshr.io.req_val := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) && (!r_req_write || replayer.io.sdq_enq.ready) + mshr.io.req_val := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) mshr.io.req_ppn := cpu_req_tag mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) mshr.io.req_tag := r_cpu_req_tag mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req_cmd := r_cpu_req_cmd mshr.io.req_type := r_cpu_req_type - mshr.io.req_sdq_id := replayer.io.sdq_id mshr.io.req_way_oh := replaced_way_oh + mshr.io.req_data := cpu_req_data mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0)) mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) - mshr.io.replay <> replayer.io.replay - replayer.io.sdq_enq.valid := tag_miss && r_req_write && (!needs_writeback || wb_rdy) && mshr.io.req_rdy - replayer.io.sdq_enq.bits := cpu_req_data data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy // replays - val replay = replayer.io.data_req.bits + val replay = mshr.io.data_req.bits val stall_replay = r_replay_amo || p_amo || p_store_valid - val replay_val = replayer.io.data_req.valid + val replay_val = mshr.io.data_req.valid val replay_rdy = data_arb.io.in(1).ready && !stall_replay val replay_fire = replay_val && replay_rdy data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) data_arb.io.in(1).bits.inner_req.idx := replay.idx data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR data_arb.io.in(1).valid := replay_val && !stall_replay - data_arb.io.in(1).bits.way_en := replayer.io.way_oh - replayer.io.data_req.ready := replay_rdy + data_arb.io.in(1).bits.way_en := mshr.io.data_req.bits.way_oh + mshr.io.data_req.ready := replay_rdy r_replay_amo := replay_amo_val && replay_rdy // store write mask generation. @@ -932,17 +904,17 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val pending_fence = Reg(resetVal = Bool(false)) pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !flush_rdy val nack_hit = p_store_match || replay_val || r_req_write && !p_store_rdy - val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy || r_req_write && !replayer.io.sdq_enq.ready + val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || replayer.io.cpu_resp_val - io.cpu.resp_replay := replayer.io.cpu_resp_val + io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || mshr.io.cpu_resp_val + io.cpu.resp_replay := mshr.io.cpu_resp_val io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read - io.cpu.resp_tag := Mux(replayer.io.cpu_resp_val, replayer.io.cpu_resp_tag, r_cpu_req_tag) + io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data_subword := loadgen.io.r_dout_subword diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 4627e078..526dc4e8 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -30,7 +30,7 @@ class Top() extends Component { // connect tile to hub hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) hub.io.tiles(0).xact_init_data <> Queue(arbiter.io.mem.xact_init_data) - arbiter.io.mem.xact_rep <> PipeReg(hub.io.tiles(0).xact_rep) + arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep) // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) From 6d03d758351bbbd83bd17bf9b21b7027fb58d763 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 1 Mar 2012 20:20:15 -0800 Subject: [PATCH 0259/1087] improve D$ internal interfaces --- rocket/src/main/scala/nbdcache.scala | 114 +++++++++++---------------- 1 file changed, 47 insertions(+), 67 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ad9be9f0..08067420 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -94,6 +94,18 @@ class LoadDataGen extends Component { io.r_dout_subword := extended_subword } +class MSHRReq extends Bundle { + val ppn = Bits(width = TAG_BITS) + val idx = Bits(width = IDX_BITS) + val way_oh = Bits(width = NWAYS) + + val offset = Bits(width = OFFSET_BITS) + val cmd = Bits(width = 4) + val typ = Bits(width = 3) + val tag = Bits(width = DCACHE_TAG_BITS) + val data = Bits(width = CPU_DATA_BITS) +} + class RPQEntry extends Bundle { val offset = Bits(width = OFFSET_BITS) val cmd = Bits(width = 4) @@ -157,14 +169,8 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val req_pri_rdy = Bool(OUTPUT) val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) - val req_ppn = Bits(TAG_BITS, INPUT) - val req_idx = Bits(IDX_BITS, INPUT) - val req_offset = Bits(OFFSET_BITS, INPUT) - val req_cmd = Bits(4, INPUT) - val req_type = Bits(3, INPUT) + val req_bits = new MSHRReq().asInput val req_sdq_id = UFix(log2up(NSDQ), INPUT) - val req_tag = Bits(DCACHE_TAG_BITS, INPUT) - val req_way_oh = Bits(NWAYS, INPUT) val idx_match = Bool(OUTPUT) val idx = Bits(IDX_BITS, OUTPUT) @@ -185,39 +191,26 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val idx_ = Reg { Bits() } val way_oh_ = Reg { Bits() } - val req_load = (io.req_cmd === M_XRD) || (io.req_cmd === M_PFR) - val req_use_rpq = (io.req_cmd != M_PFR) && (io.req_cmd != M_PFW) - val next_state = Mux(io.req_sec_val && io.req_sec_rdy, newStateOnSecondaryMiss(io.req_cmd, state), state) + val req_cmd = io.req_bits.cmd + val req_load = (req_cmd === M_XRD) || (req_cmd === M_PFR) + val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) + val next_state = Mux(io.req_sec_val && io.req_sec_rdy, newStateOnSecondaryMiss(req_cmd, state), state) val sec_rdy = io.idx_match && !refilled && (needsWriteback(state) || !requested || req_load) - // XXX why doesn't this work? - // val rpq = (new queue(NRPQ)) { new RPQEntry() } - val rpq_enq_bits = Cat(io.req_offset, io.req_cmd, io.req_type, io.req_sdq_id, io.req_tag) - val rpq = (new queue(NRPQ)) { Bits(width = rpq_enq_bits.getWidth) } + val rpq = (new queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq - rpq.io.enq.bits := rpq_enq_bits + rpq.io.enq.bits := io.req_bits + rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && refilled - var rpq_deq_bits = rpq.io.deq.bits - io.replay.bits.tag := rpq_deq_bits - rpq_deq_bits = rpq_deq_bits >> UFix(io.req_tag.width) - io.replay.bits.sdq_id := rpq_deq_bits.toUFix - rpq_deq_bits = rpq_deq_bits >> UFix(io.req_sdq_id.width) - io.replay.bits.typ := rpq_deq_bits - rpq_deq_bits = rpq_deq_bits >> UFix(io.req_type.width) - io.replay.bits.cmd := rpq_deq_bits - rpq_deq_bits = rpq_deq_bits >> UFix(io.req_cmd.width) - io.replay.bits.offset := rpq_deq_bits - rpq_deq_bits = rpq_deq_bits >> UFix(io.req_offset.width) - when (io.req_pri_val && io.req_pri_rdy) { valid := Bool(true) - state := newStateOnPrimaryMiss(io.req_cmd) + state := newStateOnPrimaryMiss(req_cmd) requested := Bool(false) refilled := Bool(false) - ppn := io.req_ppn - idx_ := io.req_idx - way_oh_ := io.req_way_oh + ppn := io.req_bits.ppn + idx_ := io.req_bits.idx + way_oh_ := io.req_bits.way_oh } .otherwise { when (io.mem_req.valid && io.mem_req.ready) { @@ -232,7 +225,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { state := next_state } - io.idx_match := valid && (idx_ === io.req_idx) + io.idx_match := valid && (idx_ === io.req_bits.idx) io.idx := idx_ io.tag := ppn io.way_oh := way_oh_ @@ -253,22 +246,14 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { io.mem_req.bits.tile_xact_id := Bits(id) io.replay.valid := rpq.io.deq.valid && refilled + io.replay.bits <> rpq.io.deq.bits io.replay.bits.idx := idx_ io.replay.bits.way_oh := way_oh_ } class MSHRFile extends Component { val io = new Bundle { - val req_val = Bool(INPUT) - val req_rdy = Bool(OUTPUT) - val req_ppn = Bits(TAG_BITS, INPUT) - val req_idx = Bits(IDX_BITS, INPUT) - val req_offset = Bits(OFFSET_BITS, INPUT) - val req_cmd = Bits(4, INPUT) - val req_type = Bits(3, INPUT) - val req_tag = Bits(DCACHE_TAG_BITS, INPUT) - val req_way_oh = Bits(NWAYS, INPUT) - val req_data = Bits(CPU_DATA_BITS, INPUT) + val req = (new ioDecoupled) { new MSHRReq } val mem_resp_val = Bool(INPUT) val mem_resp_tag = Bits(MEM_TAG_BITS, INPUT) @@ -288,9 +273,9 @@ class MSHRFile extends Component { val sdq_val = Reg(resetVal = Bits(0, NSDQ)) val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) val sdq_rdy = !sdq_val.andR - val (req_read, req_write) = cpuCmdToRW(io.req_cmd) - val sdq_enq = io.req_val && io.req_rdy && req_write - val sdq = Mem(NSDQ, sdq_enq, sdq_alloc_id, io.req_data) + val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) + val sdq_enq = io.req.valid && io.req.ready && req_write + val sdq = Mem(NSDQ, sdq_enq, sdq_alloc_id, io.req.bits.data) sdq.setReadLatency(1); sdq.setTarget('inst) @@ -302,7 +287,7 @@ class MSHRFile extends Component { val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } - val tag_match = tag_mux.io.out === io.req_ppn + val tag_match = tag_mux.io.out === io.req.bits.ppn var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -318,15 +303,9 @@ class MSHRFile extends Component { alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready - mshr.io.req_sec_val := io.req_val && sdq_rdy && tag_match - mshr.io.req_ppn := io.req_ppn - mshr.io.req_tag := io.req_tag - mshr.io.req_idx := io.req_idx - mshr.io.req_offset := io.req_offset - mshr.io.req_cmd := io.req_cmd - mshr.io.req_type := io.req_type + mshr.io.req_sec_val := io.req.valid && sdq_rdy && tag_match + mshr.io.req_bits := io.req.bits mshr.io.req_sdq_id := sdq_alloc_id - mshr.io.req_way_oh := io.req_way_oh mshr.io.meta_req <> meta_req_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) @@ -345,12 +324,12 @@ class MSHRFile extends Component { idx_match = idx_match || mshr.io.idx_match } - alloc_arb.io.out.ready := io.req_val && sdq_rdy && !idx_match + alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match meta_req_arb.io.out <> io.meta_req mem_req_arb.io.out <> io.mem_req - io.req_rdy := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy + io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.mem_resp_idx := mem_resp_idx_mux.io.out io.mem_resp_way_oh := mem_resp_way_oh_mux.io.out io.fence_rdy := !fence @@ -828,22 +807,23 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { } // miss handling - mshr.io.req_val := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) - mshr.io.req_ppn := cpu_req_tag - mshr.io.req_idx := r_cpu_req_idx(indexmsb,indexlsb) - mshr.io.req_tag := r_cpu_req_tag - mshr.io.req_offset := r_cpu_req_idx(offsetmsb,0) - mshr.io.req_cmd := r_cpu_req_cmd - mshr.io.req_type := r_cpu_req_type - mshr.io.req_way_oh := replaced_way_oh - mshr.io.req_data := cpu_req_data + mshr.io.req.valid := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) + mshr.io.req.bits.ppn := cpu_req_tag + mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) + mshr.io.req.bits.tag := r_cpu_req_tag + mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) + mshr.io.req.bits.cmd := r_cpu_req_cmd + mshr.io.req.bits.typ := r_cpu_req_type + mshr.io.req.bits.way_oh := replaced_way_oh + mshr.io.req.bits.data := cpu_req_data + mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0)) mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - replacer.io.pick_new_way := !io.cpu.req_kill && mshr.io.req_val && mshr.io.req_rdy + replacer.io.pick_new_way := mshr.io.req.valid && mshr.io.req.ready // replays val replay = mshr.io.data_req.bits @@ -904,7 +884,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val pending_fence = Reg(resetVal = Bool(false)) pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !flush_rdy val nack_hit = p_store_match || replay_val || r_req_write && !p_store_rdy - val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req_rdy + val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req.ready val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush From 8678b3d70c2ad4e3b5e1106b19af5262a7332c10 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 1 Mar 2012 20:48:46 -0800 Subject: [PATCH 0260/1087] clean up ioDecoupled/ioPipe interface --- rocket/src/main/scala/coherence.scala | 34 ++++++++++---------- rocket/src/main/scala/cpu.scala | 6 ++-- rocket/src/main/scala/ctrl.scala | 4 +-- rocket/src/main/scala/dpath.scala | 6 ++-- rocket/src/main/scala/dpath_vec.scala | 2 +- rocket/src/main/scala/dtlb.scala | 4 +-- rocket/src/main/scala/fpu.scala | 4 +-- rocket/src/main/scala/htif.scala | 6 ++-- rocket/src/main/scala/multiplier.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 44 +++++++++++++------------- rocket/src/main/scala/ptw.scala | 8 ++--- rocket/src/main/scala/queues.scala | 10 +++--- rocket/src/main/scala/util.scala | 16 +++++----- 13 files changed, 73 insertions(+), 73 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 2c5c6edf..97d098bd 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -21,9 +21,9 @@ class MemResp () extends MemData class ioMem() extends Bundle { - val req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip - val req_data = (new ioDecoupled) { new MemData() }.flip - val resp = (new ioValid) { new MemResp() } + val req_cmd = (new ioDecoupled) { new MemReqCmd() } + val req_data = (new ioDecoupled) { new MemData() } + val resp = (new ioPipe) { new MemResp() } } class HubMemReq extends Bundle { @@ -80,14 +80,14 @@ class TransactionFinish extends Bundle { } class ioTileLink extends Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit() }.flip - val xact_init_data = (new ioDecoupled) { new TransactionInitData() }.flip - val xact_abort = (new ioDecoupled) { new TransactionAbort() } - val probe_req = (new ioDecoupled) { new ProbeRequest() } - val probe_rep = (new ioDecoupled) { new ProbeReply() }.flip - val probe_rep_data = (new ioDecoupled) { new ProbeReplyData() }.flip - val xact_rep = (new ioValid) { new TransactionReply() } - val xact_finish = (new ioDecoupled) { new TransactionFinish() }.flip + val xact_init = (new ioDecoupled) { new TransactionInit() } + val xact_init_data = (new ioDecoupled) { new TransactionInitData() } + val xact_abort = (new ioDecoupled) { new TransactionAbort() }.flip + val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip + val probe_rep = (new ioDecoupled) { new ProbeReply() } + val probe_rep_data = (new ioDecoupled) { new ProbeReplyData() } + val xact_rep = (new ioPipe) { new TransactionReply() } + val xact_finish = (new ioDecoupled) { new TransactionFinish() } } object cpuCmdToRW { @@ -181,20 +181,20 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(id: Int) extends Component with CoherencePolicy { val io = new Bundle { - val alloc_req = (new ioDecoupled) { new TrackerAllocReq() } + val alloc_req = (new ioDecoupled) { new TrackerAllocReq() }.flip val probe_data = (new TrackerProbeData).asInput val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(NTILES, INPUT) val p_req_cnt_inc = Bits(NTILES, INPUT) - val p_rep_data = (new ioDecoupled) { new ProbeReplyData() } - val x_init_data = (new ioDecoupled) { new TransactionInitData() } + val p_rep_data = (new ioDecoupled) { new ProbeReplyData() }.flip + val x_init_data = (new ioDecoupled) { new TransactionInitData() }.flip val sent_x_rep_ack = Bool(INPUT) - val mem_req_cmd = (new ioDecoupled) { new MemReqCmd() }.flip - val mem_req_data = (new ioDecoupled) { new MemData() }.flip + val mem_req_cmd = (new ioDecoupled) { new MemReqCmd() } + val mem_req_data = (new ioDecoupled) { new MemData() } val mem_req_lock = Bool(OUTPUT) - val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip + val probe_req = (new ioDecoupled) { new ProbeRequest() } val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 7cb9090e..b6d1a9e2 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -14,9 +14,9 @@ class ioRocket extends Bundle() { val debug = new ioDebug(); val host = new ioHTIF(); - val imem = new ioImem().flip(); - val vimem = new ioImem().flip(); - val dmem = new ioDmem().flip(); + val imem = new ioImem().flip + val vimem = new ioImem().flip + val dmem = new ioDmem().flip } class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 119d5b1e..ac544a68 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -77,8 +77,8 @@ class ioCtrlDpath extends Bundle() class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); - val imem = new ioImem(List("req_val", "resp_val")).flip(); - val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack", "xcpt_ma_ld", "xcpt_ma_st")).flip(); + val imem = new ioImem(List("req_val", "resp_val")).flip + val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack", "xcpt_ma_ld", "xcpt_ma_st")).flip val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 1a3d5cee..817e001b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -16,15 +16,15 @@ class ioDpathImem extends Bundle() class ioDpathAll extends Bundle() { val host = new ioHTIF(); - val ctrl = new ioCtrlDpath().flip(); + val ctrl = new ioCtrlDpath().flip val debug = new ioDebug(); - val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip(); + val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); val fpu = new ioDpathFPU(); - val vec_ctrl = new ioCtrlDpathVec().flip() + val vec_ctrl = new ioCtrlDpathVec().flip val vec_iface = new ioDpathVecInterface() val vec_imul_req = new io_imul_req val vec_imul_resp = Bits(hwacha.Constants.SZ_XLEN, INPUT) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index c68db9de..ddb629ff 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -17,7 +17,7 @@ class ioDpathVecInterface extends Bundle class ioDpathVec extends Bundle { - val ctrl = new ioCtrlDpathVec().flip() + val ctrl = new ioCtrlDpathVec().flip val iface = new ioDpathVecInterface() val valid = Bool(INPUT) val inst = Bits(32, INPUT) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index eb7a91cf..50844713 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -17,7 +17,7 @@ class ioDTLB_CPU_req_bundle extends Bundle val asid = Bits(width=ASID_BITS) val vpn = Bits(width=VPN_BITS+1) } -class ioDTLB_CPU_req extends io_ready_valid()( { new ioDTLB_CPU_req_bundle() } ) +class ioDTLB_CPU_req extends hwacha.ioDecoupled()( { new ioDTLB_CPU_req_bundle() } ) class ioDTLB_CPU_resp extends Bundle { @@ -34,7 +34,7 @@ class ioDTLB extends Bundle val status = Bits(17,INPUT) // invalidate all TLB entries val invalidate = Bool(INPUT) - val cpu_req = new ioDTLB_CPU_req().flip() + val cpu_req = new ioDTLB_CPU_req().flip val cpu_resp = new ioDTLB_CPU_resp() val ptw = new ioTLB_PTW() } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index e45355fa..bf81d49e 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -445,8 +445,8 @@ class rocketFPUDFMAPipe(latency: Int) extends Component class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { - val ctrl = new ioCtrlFPU().flip() - val dpath = new ioDpathFPU().flip() + val ctrl = new ioCtrlFPU().flip + val dpath = new ioDpathFPU().flip val sfma = new ioFMA(33) val dfma = new ioFMA(65) } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 551f3d25..0357d359 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -6,8 +6,8 @@ import Constants._; class ioHost(w: Int, view: List[String] = null) extends Bundle(view) { - val in = new ioDecoupled()(Bits(width = w)) - val out = new ioDecoupled()(Bits(width = w)).flip() + val in = new ioDecoupled()(Bits(width = w)).flip + val out = new ioDecoupled()(Bits(width = w)) } class ioHTIF extends Bundle @@ -25,7 +25,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component { val io = new Bundle { val host = new ioHost(w) - val cpu = Vec(ncores) { new ioHTIF().flip() } + val cpu = Vec(ncores) { new ioHTIF().flip } val mem = new ioTileLink } diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 0138afd0..a29ab658 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -7,7 +7,7 @@ import hwacha._ import hwacha.Constants._ class ioMultiplier extends Bundle { - val req = new io_imul_req().flip() + val req = new io_imul_req().flip val req_tag = Bits(5, INPUT) val req_kill = Bool(INPUT) val resp_val = Bool(OUTPUT) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 08067420..e928360c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -178,9 +178,9 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val way_oh = Bits(NWAYS, OUTPUT) val mem_resp_val = Bool(INPUT) - val mem_req = (new ioDecoupled) { new TransactionInit }.flip - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip - val replay = (new ioDecoupled) { new Replay() }.flip + val mem_req = (new ioDecoupled) { new TransactionInit } + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } + val replay = (new ioDecoupled) { new Replay() } } val valid = Reg(resetVal = Bool(false)) @@ -253,7 +253,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { class MSHRFile extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MSHRReq } + val req = (new ioDecoupled) { new MSHRReq }.flip val mem_resp_val = Bool(INPUT) val mem_resp_tag = Bits(MEM_TAG_BITS, INPUT) @@ -262,9 +262,9 @@ class MSHRFile extends Component { val fence_rdy = Bool(OUTPUT) - val mem_req = (new ioDecoupled) { new TransactionInit }.flip() - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() - val data_req = (new ioDecoupled) { new DataReq() }.flip() + val mem_req = (new ioDecoupled) { new TransactionInit } + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } + val data_req = (new ioDecoupled) { new DataReq() } val cpu_resp_val = Bool(OUTPUT) val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) @@ -349,12 +349,12 @@ class MSHRFile extends Component { class WritebackUnit extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new WritebackReq() } - val data_req = (new ioDecoupled) { new DataArrayArrayReq() }.flip() + val req = (new ioDecoupled) { new WritebackReq() }.flip + val data_req = (new ioDecoupled) { new DataArrayArrayReq() } val data_resp = Bits(MEM_DATA_BITS, INPUT) - val refill_req = (new ioDecoupled) { new TransactionInit } - val mem_req = (new ioDecoupled) { new TransactionInit }.flip - val mem_req_data = (new ioDecoupled) { new TransactionInitData }.flip + val refill_req = (new ioDecoupled) { new TransactionInit }.flip + val mem_req = (new ioDecoupled) { new TransactionInit } + val mem_req_data = (new ioDecoupled) { new TransactionInitData } } val valid = Reg(resetVal = Bool(false)) @@ -391,11 +391,11 @@ class WritebackUnit extends Component { class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ val io = new Bundle { - val req = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) } - val resp = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) }.flip() - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip() + val req = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) }.flip + val resp = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) } + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } val meta_resp = (new MetaData).asInput() - val wb_req = (new ioDecoupled) { new WritebackReq() }.flip() + val wb_req = (new ioDecoupled) { new WritebackReq() } } val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_meta_write :: s_done :: Nil = Enum(6) { UFix() } @@ -444,9 +444,9 @@ class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ class MetaDataArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MetaArrayReq() } + val req = (new ioDecoupled) { new MetaArrayReq() }.flip val resp = (new MetaData).asOutput() - val state_req = (new ioDecoupled) { new MetaArrayReq() } + val state_req = (new ioDecoupled) { new MetaArrayReq() }.flip } val permissions_array = Mem(lines){ Bits(width = 2) } @@ -470,9 +470,9 @@ class MetaDataArray(lines: Int) extends Component { class MetaDataArrayArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MetaArrayArrayReq() } + val req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip val resp = Vec(NWAYS){ (new MetaData).asOutput } - val state_req = (new ioDecoupled) { new MetaArrayArrayReq() } + val state_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip val way_en = Bits(width = NWAYS, dir = OUTPUT) } @@ -501,7 +501,7 @@ class MetaDataArrayArray(lines: Int) extends Component { class DataArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new DataArrayReq() } + val req = (new ioDecoupled) { new DataArrayReq() }.flip val resp = Bits(width = MEM_DATA_BITS, dir = OUTPUT) } @@ -518,7 +518,7 @@ class DataArray(lines: Int) extends Component { class DataArrayArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new DataArrayArrayReq() } + val req = (new ioDecoupled) { new DataArrayArrayReq() }.flip val resp = Vec(NWAYS){ Bits(width = MEM_DATA_BITS, dir = OUTPUT) } val way_en = Bits(width = NWAYS, dir = OUTPUT) } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 30b37603..524e26bb 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -7,7 +7,7 @@ import scala.math._; class ioDmemArbiter(n: Int) extends Bundle { - val dmem = new ioDmem().flip() + val dmem = new ioDmem().flip val requestor = Vec(n) { new ioDmem() } } @@ -70,9 +70,9 @@ class rocketDmemArbiter(n: Int) extends Component class ioPTW extends Bundle { - val itlb = new ioTLB_PTW().flip(); - val dtlb = new ioTLB_PTW().flip(); - val dmem = new ioDmem().flip() + val itlb = new ioTLB_PTW().flip + val dtlb = new ioTLB_PTW().flip + val dmem = new ioDmem().flip val ptbr = UFix(PADDR_BITS, INPUT); } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 803b86ef..46f1ddec 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -6,8 +6,8 @@ import Node._; class ioQueue[T <: Data](flushable: Boolean)(data: => T) extends Bundle { val flush = if (flushable) Bool(INPUT) else null - val enq = new ioDecoupled()(data) - val deq = new ioDecoupled()(data).flip + val enq = new ioDecoupled()(data).flip + val deq = new ioDecoupled()(data) } class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component @@ -66,8 +66,8 @@ object Queue class pipereg[T <: Data]()(data: => T) extends Component { val io = new Bundle { - val enq = new ioValid()(data) - val deq = new ioValid()(data).flip + val enq = new ioPipe()(data) + val deq = new ioPipe()(data).flip } //val bits = Reg() { io.enq.bits.clone } @@ -81,7 +81,7 @@ class pipereg[T <: Data]()(data: => T) extends Component object Pipe { - def apply[T <: Data](enq: ioValid[T], latency: Int = 1): ioValid[T] = { + def apply[T <: Data](enq: ioPipe[T], latency: Int = 1): ioPipe[T] = { val q = (new pipereg) { enq.bits.clone } q.io.enq <> enq q.io.deq diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index bbb53e83..4b59d022 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -168,20 +168,20 @@ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component class ioDecoupled[+T <: Data]()(data: => T) extends Bundle { - val valid = Bool(INPUT) - val ready = Bool(OUTPUT) - val bits = data.asInput + val ready = Bool(INPUT) + val valid = Bool(OUTPUT) + val bits = data.asOutput } -class ioValid[T <: Data]()(data: => T) extends Bundle +class ioPipe[T <: Data]()(data: => T) extends Bundle { val valid = Bool(INPUT) val bits = data.asInput } class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { - val in = Vec(n) { (new ioDecoupled()) { data } } - val out = (new ioDecoupled()) { data }.flip() + val in = Vec(n) { (new ioDecoupled()) { data } }.flip + val out = (new ioDecoupled()) { data } } class Arbiter[T <: Data](n: Int)(data: => T) extends Component { @@ -205,9 +205,9 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { } class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { - val in = Vec(n) { (new ioDecoupled()) { data } } + val in = Vec(n) { (new ioDecoupled()) { data } }.flip val lock = Vec(n) { Bool() }.asInput - val out = (new ioDecoupled()) { data }.flip() + val out = (new ioDecoupled()) { data } } class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { From 1054cec0871b8a14f5fd6111330905e5e63f38cf Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 2 Mar 2012 00:43:32 -0800 Subject: [PATCH 0261/1087] add vec countq interface --- rocket/src/main/scala/consts.scala | 5 +++-- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/ctrl_vec.scala | 4 +++- rocket/src/main/scala/dpath.scala | 3 +++ rocket/src/main/scala/dpath_util.scala | 4 ++-- rocket/src/main/scala/dpath_vec.scala | 6 ++++++ 6 files changed, 18 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e173aca2..f0c2432a 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -144,8 +144,9 @@ object Constants val PCR_VECBANK = UFix(18, 5); // temporaries for vector, these will go away - val PCR_VEC_TMP1 = UFix(30, 5) - val PCR_VEC_TMP2 = UFix(31, 5) + val PCR_VEC_CNT = UFix(29, 5) + val PCR_VEC_EADDR = UFix(30, 5) + val PCR_VEC_XCPT = UFix(31, 5) // definition of bits in PCR status reg val SR_ET = 0; // enable traps diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index b6d1a9e2..06d27ca4 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -169,6 +169,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_ximm1q.bits := dpath.io.vec_iface.vximm1q_bits vu.io.vec_ximm2q.valid := ctrl.io.vec_iface.vximm2q_valid vu.io.vec_ximm2q.bits := dpath.io.vec_iface.vximm2q_bits + vu.io.vec_cntq <> dpath.io.vec_iface.vcntq // prefetch queues vu.io.vec_pfcmdq.valid := ctrl.io.vec_iface.vpfcmdq_valid diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 8ca0ce68..835478ec 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -10,6 +10,7 @@ class ioCtrlDpathVec extends Bundle val valid = Bool(INPUT) val inst = Bits(32, INPUT) val appvl0 = Bool(INPUT) + val replay_cntq = Bool(INPUT) val wen = Bool(OUTPUT) val fn = Bits(1, OUTPUT) val sel_vcmd = Bits(3, OUTPUT) @@ -160,7 +161,8 @@ class rocketCtrlVec extends Component wb_vec_ximm2q_enq && !io.iface.vximm2q_ready || wb_vec_pfcmdq_enq && !io.iface.vpfcmdq_ready || wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || - wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready + wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready || + io.dpath.replay_cntq ) val reg_cpfence = Reg(resetVal = Bool(false)) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 817e001b..a09a3f26 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -385,6 +385,9 @@ class rocketDpath extends Component vec.io.rs2 := wb_reg_rs2 vec.io.vec_eaddr := pcr.io.vec_eaddr vec.io.vec_exception := pcr.io.vec_exception + vec.io.pcr_wport.addr := wb_reg_raddr2 + vec.io.pcr_wport.en := io.ctrl.wen_pcr + vec.io.pcr_wport.data := wb_reg_wdata wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 269029f4..6afc8a8e 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -211,8 +211,8 @@ class rocketDpathPCR extends Component when (waddr === PCR_K1) { reg_k1 := wdata; } when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } - when (waddr === PCR_VEC_TMP1) { reg_vec_eaddr := wdata(VADDR_BITS,0) } - when (waddr === PCR_VEC_TMP2) { reg_vec_exception:= wdata(0) } + when (waddr === PCR_VEC_EADDR) { reg_vec_eaddr := wdata(VADDR_BITS,0) } + when (waddr === PCR_VEC_XCPT) { reg_vec_exception:= wdata(0) } } rdata := Bits(0, 64) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index ddb629ff..f801ac5c 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -11,6 +11,7 @@ class ioDpathVecInterface extends Bundle val vcmdq_bits = Bits(SZ_VCMD, OUTPUT) val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) + val vcntq = (new ioDecoupled()){ Bits(width = 11) } val eaddr = Bits(64, OUTPUT) val exception = Bool(OUTPUT) } @@ -29,6 +30,7 @@ class ioDpathVec extends Bundle val rs2 = Bits(64, INPUT) val vec_eaddr = Bits(64, INPUT) val vec_exception = Bool(INPUT) + val pcr_wport = new ioWritePort() val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) } @@ -129,6 +131,10 @@ class rocketDpathVec extends Component io.iface.vximm2q_bits := io.rs2 + io.iface.vcntq.bits := io.pcr_wport.data + io.iface.vcntq.valid := io.pcr_wport.en && io.pcr_wport.addr === PCR_VEC_CNT + io.ctrl.replay_cntq := io.iface.vcntq.valid && !io.iface.vcntq.ready + io.iface.eaddr := io.vec_eaddr io.iface.exception := io.vec_exception From 54baa0713c0a4231aeb8880c924fef09b2f85899 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 2 Mar 2012 02:10:26 -0800 Subject: [PATCH 0262/1087] hack fence.g.cv to support waiting the control processor --- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/ctrl_vec.scala | 94 +++++++++++++++------------- 2 files changed, 52 insertions(+), 43 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 06d27ca4..0ba3f74b 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -193,6 +193,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // exceptions vu.io.cpu_exception.addr := dpath.io.vec_iface.eaddr.toUFix vu.io.cpu_exception.exception := dpath.io.vec_iface.exception + ctrl.io.vec_iface.exception_done := vu.io.done // hooking up vector memory interface val storegen = new StoreDataGen diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 835478ec..3e8aefd5 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -35,6 +35,8 @@ class ioCtrlVecInterface extends Bundle val vackq_valid = Bool(INPUT) val vackq_ready = Bool(OUTPUT) + + val exception_done = Bool(INPUT) } class ioCtrlVec extends Bundle @@ -62,52 +64,52 @@ class rocketCtrlVec extends Component // val vcmd vimm | fn | | | | | | vpximm2q // | | | | | | | | | | | | cpfence // | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,Y,Y,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,Y,Y,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,Y), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,Y), - VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N) + List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,Y,Y,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,Y,Y,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,Y,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N,Y), + VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: veccs1 = veccs0 - val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_cpfence :: Nil = veccs1 + val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_cpfence :: wb_vec_xcptfence :: Nil = veccs1 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) @@ -171,5 +173,11 @@ class rocketCtrlVec extends Component when (do_cpfence) { reg_cpfence := Bool(true) } when (io.iface.vackq_valid || io.exception) { reg_cpfence := Bool(false) } - io.cpfence := reg_cpfence + val reg_xcptfence = Reg(resetVal = Bool(false)) + val do_xcptfence = valid_common && wb_vec_xcptfence && !io.replay + + when (do_xcptfence) { reg_xcptfence := Bool(true) } + when (io.iface.exception_done) { reg_xcptfence := Bool(false) } + + io.cpfence := reg_cpfence || reg_xcptfence } From 7406908d4a38063f54df418599d1b60429dc3a4f Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 2 Mar 2012 12:19:21 -0800 Subject: [PATCH 0263/1087] BroadcastHub can be elaborated by C and vlsi backends --- rocket/src/main/scala/coherence.scala | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 97d098bd..536a4775 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -198,9 +198,9 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val p_rep_tile_id = Bits(log2up(NTILES), INPUT) + val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) - val sharer_count = Bits(TILE_ID_BITS, OUTPUT) + val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) val t_type = Bits(TTYPE_BITS, OUTPUT) val push_p_req = Bits(NTILES, OUTPUT) val pop_p_rep = Bits(NTILES, OUTPUT) @@ -411,8 +411,8 @@ class CoherenceHubBroadcast extends CoherenceHub { val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_cnt_dec_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } - val p_req_cnt_inc_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=NTILES)} } + val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(NTILES){ Wire(){Bool()} } } + val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(NTILES){ Wire(){Bool()} } } val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } for( i <- 0 until NGLOBAL_XACTS) { @@ -424,14 +424,16 @@ class CoherenceHubBroadcast extends CoherenceHub { t_type_arr(i) := t.t_type sh_count_arr(i) := t.sharer_count send_x_rep_ack_arr(i) := t.send_x_rep_ack - do_free_arr(i) := Bool(false) - p_rep_cnt_dec_arr(i) := Bits(0) - p_req_cnt_inc_arr(i) := Bits(0) - sent_x_rep_ack_arr(i) := Bool(false) t.xact_finish := do_free_arr(i) - t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i) - t.p_req_cnt_inc := p_req_cnt_inc_arr(i) + t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits + t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits t.sent_x_rep_ack := sent_x_rep_ack_arr(i) + do_free_arr(i) := Bool(false) + sent_x_rep_ack_arr(i) := Bool(false) + for( j <- 0 until NTILES) { + p_rep_cnt_dec_arr(i)(j) := Bool(false) + p_req_cnt_inc_arr(i)(j) := Bool(false) + } } // Free finished transactions @@ -495,8 +497,7 @@ class CoherenceHubBroadcast extends CoherenceHub { trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits for( j <- 0 until NTILES) { val p_rep = io.tiles(j).probe_rep - val dec = p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) - p_rep_cnt_dec_arr(UFix(i)) := p_rep_cnt_dec_arr(UFix(i)).bitSet(UFix(j), dec) + p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) } } @@ -556,7 +557,7 @@ class CoherenceHubBroadcast extends CoherenceHub { val t = trackerList(i).io p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) - p_rep_cnt_dec_arr(i) = p_rep_cnt_dec_arr(i).bitSet(UFix(j), p_req_arb_arr(j).io.in(i).ready) + p_req_cnt_inc_arr(i)(j) := p_req_arb_arr(j).io.in(i).ready } p_req_arb_arr(j).io.out <> io.tiles(j).probe_req } From 1e1926ce63709591538c81524222e71b8382af90 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 2 Mar 2012 16:18:32 -0800 Subject: [PATCH 0264/1087] flip direction of ioPipe to match ioDecoupled --- rocket/src/main/scala/coherence.scala | 4 ++-- rocket/src/main/scala/queues.scala | 4 ++-- rocket/src/main/scala/util.scala | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 536a4775..195234a4 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -23,7 +23,7 @@ class ioMem() extends Bundle { val req_cmd = (new ioDecoupled) { new MemReqCmd() } val req_data = (new ioDecoupled) { new MemData() } - val resp = (new ioPipe) { new MemResp() } + val resp = (new ioPipe) { new MemResp() }.flip } class HubMemReq extends Bundle { @@ -86,7 +86,7 @@ class ioTileLink extends Bundle { val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip val probe_rep = (new ioDecoupled) { new ProbeReply() } val probe_rep_data = (new ioDecoupled) { new ProbeReplyData() } - val xact_rep = (new ioPipe) { new TransactionReply() } + val xact_rep = (new ioPipe) { new TransactionReply() }.flip val xact_finish = (new ioDecoupled) { new TransactionFinish() } } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 46f1ddec..823a031b 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -66,8 +66,8 @@ object Queue class pipereg[T <: Data]()(data: => T) extends Component { val io = new Bundle { - val enq = new ioPipe()(data) - val deq = new ioPipe()(data).flip + val enq = new ioPipe()(data).flip + val deq = new ioPipe()(data) } //val bits = Reg() { io.enq.bits.clone } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 4b59d022..60225a9e 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -175,8 +175,8 @@ class ioDecoupled[+T <: Data]()(data: => T) extends Bundle class ioPipe[T <: Data]()(data: => T) extends Bundle { - val valid = Bool(INPUT) - val bits = data.asInput + val valid = Bool(OUTPUT) + val bits = data.asOutput } class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { From 00989c58bd6dcde1dbbcd002eae9177efcd796da Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 2 Mar 2012 13:41:01 -0800 Subject: [PATCH 0265/1087] Correction to probe reply w/ data handling --- rocket/src/main/scala/coherence.scala | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 195234a4..638cc0d7 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -31,8 +31,7 @@ class HubMemReq extends Bundle { } class TrackerProbeData extends Bundle { - val valid = Bool() - val data_tile_id = Bits(width = log2up(NTILES)) + val tile_id = Bits(width = TILE_ID_BITS) } class TrackerAllocReq extends Bundle { @@ -182,7 +181,7 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(id: Int) extends Component with CoherencePolicy { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq() }.flip - val probe_data = (new TrackerProbeData).asInput + val p_data = (new ioPipe) { new TrackerProbeData() } val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(NTILES, INPUT) @@ -331,9 +330,9 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { state := s_mem } } - when(io.probe_data.valid) { + when(io.p_data.valid) { p_rep_data_needs_write := Bool(true) - p_rep_tile_id_ := io.p_rep_tile_id + p_rep_tile_id_ := io.p_data.bits.tile_id } } is(s_mem) { @@ -413,7 +412,9 @@ class CoherenceHubBroadcast extends CoherenceHub { val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(NTILES){ Wire(){Bool()} } } val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(NTILES){ Wire(){Bool()} } } - val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } + val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bits(width = TILE_ID_BITS)} } + val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io @@ -425,14 +426,18 @@ class CoherenceHubBroadcast extends CoherenceHub { sh_count_arr(i) := t.sharer_count send_x_rep_ack_arr(i) := t.send_x_rep_ack t.xact_finish := do_free_arr(i) + t.p_data.bits.tile_id := p_data_tile_id_arr(i) + t.p_data.valid := p_data_valid_arr(i) t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits t.sent_x_rep_ack := sent_x_rep_ack_arr(i) do_free_arr(i) := Bool(false) sent_x_rep_ack_arr(i) := Bool(false) + p_data_tile_id_arr(i) := Bits(0, width = TILE_ID_BITS) + p_data_valid_arr(i) := Bool(false) for( j <- 0 until NTILES) { - p_rep_cnt_dec_arr(i)(j) := Bool(false) - p_req_cnt_inc_arr(i)(j) := Bool(false) + p_rep_cnt_dec_arr(i)(j) := Bool(false) + p_req_cnt_inc_arr(i)(j) := Bool(false) } } @@ -490,7 +495,9 @@ class CoherenceHubBroadcast extends CoherenceHub { val p_rep_data = io.tiles(j).probe_rep_data val idx = p_rep.bits.global_xact_id p_rep.ready := foldR(trackerList.map(_.io.pop_p_rep(j)))(_ || _) - p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) + p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) + p_data_valid_arr(idx) := p_rep.valid && p_rep.bits.has_data + p_data_tile_id_arr(idx) := UFix(j) } for( i <- 0 until NGLOBAL_XACTS ) { trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid From 35f97bf858bda09291c74435a9d4da80bfbd7e13 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 2 Mar 2012 21:58:50 -0800 Subject: [PATCH 0266/1087] Filled out 4 state coherence functions for cache --- rocket/src/main/scala/coherence.scala | 44 ++++++++++++++++----------- rocket/src/main/scala/nbdcache.scala | 2 -- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 638cc0d7..4532794b 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -156,16 +156,26 @@ trait FourStateCoherence extends CoherencePolicy { def newStateOnWriteback() = tileInvalid def newStateOnFlush() = tileInvalid + def newStateOnHit(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, tileExclusiveDirty, state) + } + def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + MuxLookup(incoming.t_type, tileInvalid, Array( + X_READ_SHARED -> tileShared, + X_READ_EXCLUSIVE -> Mux(outstanding.t_type === X_READ_EXCLUSIVE, tileExclusiveDirty, tileExclusiveClean), + X_READ_EXCLUSIVE_ACK -> tileExclusiveDirty, + X_READ_UNCACHED -> tileInvalid, + X_WRITE_UNCACHED -> tileInvalid + )) + } + def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = { + val (read, write) = cpuCmdToRW(cmd) + (read && (outstanding.t_type === X_READ_UNCACHED || outstanding.t_type === X_WRITE_UNCACHED)) || + (write && (outstanding.t_type != X_READ_EXCLUSIVE)) + } - // TODO: New funcs as compared to incoherent protocol: - def newState(cmd: Bits, state: UFix): UFix - def newStateOnHit(cmd: Bits, state: UFix): UFix - def newStateOnPrimaryMiss(cmd: Bits): UFix - def newStateOnSecondaryMiss(cmd: Bits, state: UFix): UFix - - def needsSecondaryXact (cmd: Bits, outstanding: TransactionInit): Bool - - def newStateOnProbe (incoming: ProbeRequest, state: UFix): Bits = { + def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = { MuxLookup(incoming.p_type, state, Array( probeInvalidate -> tileInvalid, probeDowngrade -> tileShared, @@ -174,26 +184,26 @@ trait FourStateCoherence extends CoherencePolicy { } def replyTypeHasData (reply: TransactionReply): Bool = { - (reply.t_type != X_WRITE_UNCACHED) + (reply.t_type != X_WRITE_UNCACHED && reply.t_type != X_READ_EXCLUSIVE_ACK) } } class XactTracker(id: Int) extends Component with CoherencePolicy { val io = new Bundle { - val alloc_req = (new ioDecoupled) { new TrackerAllocReq() }.flip - val p_data = (new ioPipe) { new TrackerProbeData() } + val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip + val p_data = (new ioPipe) { new TrackerProbeData } val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(NTILES, INPUT) val p_req_cnt_inc = Bits(NTILES, INPUT) - val p_rep_data = (new ioDecoupled) { new ProbeReplyData() }.flip - val x_init_data = (new ioDecoupled) { new TransactionInitData() }.flip + val p_rep_data = (new ioDecoupled) { new ProbeReplyData }.flip + val x_init_data = (new ioDecoupled) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) - val mem_req_cmd = (new ioDecoupled) { new MemReqCmd() } - val mem_req_data = (new ioDecoupled) { new MemData() } + val mem_req_cmd = (new ioDecoupled) { new MemReqCmd } + val mem_req_data = (new ioDecoupled) { new MemData } val mem_req_lock = Bool(OUTPUT) - val probe_req = (new ioDecoupled) { new ProbeRequest() } + val probe_req = (new ioDecoupled) { new ProbeRequest } val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e928360c..b99701c6 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -605,8 +605,6 @@ abstract class HellaCache extends Component { def newStateOnWriteback(): UFix def newStateOnFlush(): UFix def newStateOnHit(cmd: Bits, state: UFix): UFix - def newStateOnPrimaryMiss(cmd: Bits): UFix - def newStateOnSecondaryMiss(cmd: Bits, state: UFix): UFix } class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { From 1b3307df32f48e2fc1cc83c6bbc8f8170dbf95b9 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 2 Mar 2012 23:51:53 -0800 Subject: [PATCH 0267/1087] Removed has_data fields from all coherence messages, increased message type names to compensate --- rocket/src/main/scala/coherence.scala | 78 ++++++++++++--------- rocket/src/main/scala/consts.scala | 33 ++++++--- rocket/src/main/scala/htif.scala | 3 +- rocket/src/main/scala/icache.scala | 3 +- rocket/src/main/scala/icache_prefetch.scala | 3 +- rocket/src/main/scala/nbdcache.scala | 8 +-- 6 files changed, 73 insertions(+), 55 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 4532794b..0ce1f1c9 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -42,8 +42,7 @@ class TrackerAllocReq extends Bundle { class TransactionInit extends Bundle { - val t_type = Bits(width = TTYPE_BITS) - val has_data = Bool() + val t_type = Bits(width = X_INIT_TYPE_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val address = UFix(width = PADDR_BITS) } @@ -55,21 +54,20 @@ class TransactionAbort extends Bundle { } class ProbeRequest extends Bundle { - val p_type = Bits(width = PTYPE_BITS) + val p_type = Bits(width = P_REQ_TYPE_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) val address = Bits(width = PADDR_BITS) } class ProbeReply extends Bundle { - val p_type = Bits(width = PTYPE_BITS) - val has_data = Bool() + val p_type = Bits(width = P_REP_TYPE_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } class ProbeReplyData extends MemData class TransactionReply extends MemData { - val t_type = Bits(width = TTYPE_BITS) + val t_type = Bits(width = X_REP_TYPE_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } @@ -162,17 +160,17 @@ trait FourStateCoherence extends CoherencePolicy { } def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { MuxLookup(incoming.t_type, tileInvalid, Array( - X_READ_SHARED -> tileShared, - X_READ_EXCLUSIVE -> Mux(outstanding.t_type === X_READ_EXCLUSIVE, tileExclusiveDirty, tileExclusiveClean), - X_READ_EXCLUSIVE_ACK -> tileExclusiveDirty, - X_READ_UNCACHED -> tileInvalid, - X_WRITE_UNCACHED -> tileInvalid + X_REP_READ_SHARED -> tileShared, + X_REP_READ_EXCLUSIVE -> Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileExclusiveDirty, tileExclusiveClean), + X_REP_READ_EXCLUSIVE_ACK -> tileExclusiveDirty, + X_REP_READ_UNCACHED -> tileInvalid, + X_REP_WRITE_UNCACHED -> tileInvalid )) } def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = { val (read, write) = cpuCmdToRW(cmd) - (read && (outstanding.t_type === X_READ_UNCACHED || outstanding.t_type === X_WRITE_UNCACHED)) || - (write && (outstanding.t_type != X_READ_EXCLUSIVE)) + (read && (outstanding.t_type === X_INIT_READ_UNCACHED || outstanding.t_type === X_INIT_WRITE_UNCACHED)) || + (write && (outstanding.t_type != X_INIT_READ_EXCLUSIVE)) } def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = { @@ -183,12 +181,20 @@ trait FourStateCoherence extends CoherencePolicy { )) } - def replyTypeHasData (reply: TransactionReply): Bool = { - (reply.t_type != X_WRITE_UNCACHED && reply.t_type != X_READ_EXCLUSIVE_ACK) + def probeReplyHasData (reply: ProbeReply): Bool = { + (reply.p_type === P_REP_INVALIDATE_DATA || + reply.p_type === P_REP_DOWNGRADE_DATA || + reply.p_type === P_REP_COPY_DATA) + } + def transactionInitHasData (init: TransactionInit): Bool = { + (init.t_type != X_INIT_WRITE_UNCACHED) + } + def transactionReplyHasData (reply: TransactionReply): Bool = { + (reply.t_type != X_REP_WRITE_UNCACHED && reply.t_type != X_REP_READ_EXCLUSIVE_ACK) } } -class XactTracker(id: Int) extends Component with CoherencePolicy { +class XactTracker(id: Int) extends Component with FourStateCoherence { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip val p_data = (new ioPipe) { new TrackerProbeData } @@ -210,7 +216,7 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val t_type = Bits(TTYPE_BITS, OUTPUT) + val t_type = Bits(X_INIT_TYPE_BITS, OUTPUT) val push_p_req = Bits(NTILES, OUTPUT) val pop_p_rep = Bits(NTILES, OUTPUT) val pop_p_rep_data = Bits(NTILES, OUTPUT) @@ -220,18 +226,20 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { } def sendProbeReqType(t_type: UFix, global_state: UFix): UFix = { - MuxCase(P_COPY, Array((t_type === X_READ_SHARED) -> P_DOWNGRADE, - (t_type === X_READ_EXCLUSIVE) -> P_INVALIDATE, - (t_type === X_READ_UNCACHED) -> P_COPY, - (t_type === X_WRITE_UNCACHED) -> P_INVALIDATE)) + MuxLookup(t_type, P_REQ_COPY, Array( + X_INIT_READ_SHARED -> P_REQ_DOWNGRADE, + X_INIT_READ_EXCLUSIVE -> P_REQ_INVALIDATE, + X_INIT_READ_UNCACHED -> P_REQ_COPY, + X_INIT_WRITE_UNCACHED -> P_REQ_INVALIDATE + )) } def needsMemRead(t_type: UFix, global_state: UFix): Bool = { - (t_type != X_WRITE_UNCACHED) + (t_type != X_INIT_WRITE_UNCACHED) } def needsAckRep(t_type: UFix, global_state: UFix): Bool = { - (t_type === X_WRITE_UNCACHED) + (t_type === X_INIT_WRITE_UNCACHED) } val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } @@ -312,7 +320,7 @@ class XactTracker(id: Int) extends Component with CoherencePolicy { t_type_ := io.alloc_req.bits.xact_init.t_type init_tile_id_ := io.alloc_req.bits.init_tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id - x_init_data_needs_write := io.alloc_req.bits.xact_init.has_data + x_init_data_needs_write := transactionInitHasData(io.alloc_req.bits.xact_init) x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) p_rep_count := UFix(NTILES-1) p_req_flags := ~( UFix(1) << io.alloc_req.bits.init_tile_id ) @@ -378,7 +386,7 @@ abstract class CoherenceHub extends Component with CoherencePolicy { class CoherenceHubNull extends CoherenceHub { val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.t_type === X_WRITE_UNCACHED + val is_write = x_init.bits.t_type === X_INIT_WRITE_UNCACHED x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write @@ -387,7 +395,7 @@ class CoherenceHubNull extends CoherenceHub { io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep - x_rep.bits.t_type := Mux(io.mem.resp.valid, X_READ_EXCLUSIVE, X_WRITE_UNCACHED) + x_rep.bits.t_type := Mux(io.mem.resp.valid, X_REP_READ_EXCLUSIVE, X_REP_WRITE_UNCACHED) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data @@ -395,17 +403,17 @@ class CoherenceHubNull extends CoherenceHub { } -class CoherenceHubBroadcast extends CoherenceHub { +class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ def coherenceConflict(addr1: Bits, addr2: Bits): Bool = { addr1(PADDR_BITS-1, OFFSET_BITS) === addr2(PADDR_BITS-1, OFFSET_BITS) } def getTransactionReplyType(t_type: UFix, count: UFix): Bits = { - MuxLookup(t_type, X_READ_UNCACHED, Array( - X_READ_SHARED -> Mux(count > UFix(0), X_READ_SHARED, X_READ_EXCLUSIVE), - X_READ_EXCLUSIVE -> X_READ_EXCLUSIVE, - X_READ_UNCACHED -> X_READ_UNCACHED, - X_WRITE_UNCACHED -> X_WRITE_UNCACHED + MuxLookup(t_type, X_REP_READ_UNCACHED, Array( + X_INIT_READ_SHARED -> Mux(count > UFix(0), X_REP_READ_SHARED, X_REP_READ_EXCLUSIVE), + X_INIT_READ_EXCLUSIVE -> X_REP_READ_EXCLUSIVE, + X_INIT_READ_UNCACHED -> X_REP_READ_UNCACHED, + X_INIT_WRITE_UNCACHED -> X_REP_WRITE_UNCACHED )) } @@ -415,7 +423,7 @@ class CoherenceHubBroadcast extends CoherenceHub { val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TTYPE_BITS)} } + val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } @@ -506,7 +514,7 @@ class CoherenceHubBroadcast extends CoherenceHub { val idx = p_rep.bits.global_xact_id p_rep.ready := foldR(trackerList.map(_.io.pop_p_rep(j)))(_ || _) p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) - p_data_valid_arr(idx) := p_rep.valid && p_rep.bits.has_data + p_data_valid_arr(idx) := p_rep.valid && probeReplyHasData(p_rep.bits) p_data_tile_id_arr(idx) := UFix(j) } for( i <- 0 until NGLOBAL_XACTS ) { @@ -527,7 +535,7 @@ class CoherenceHubBroadcast extends CoherenceHub { for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io conflicts(UFix(i), t.busy && coherenceConflict(t.addr, x_init.bits.address) && - !(x_init.bits.has_data && (UFix(j) === t.init_tile_id))) + !(transactionInitHasData(x_init.bits) && (UFix(j) === t.init_tile_id))) // Don't abort writebacks stalled on mem. // TODO: This assumes overlapped writeback init reqs to // the same addr will never be issued; is this ok? diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index f0c2432a..4159e952 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -191,16 +191,31 @@ object Constants val GLOBAL_XACT_ID_BITS = 4 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS - val TTYPE_BITS = 2 - val X_READ_SHARED = UFix(0, TTYPE_BITS) - val X_READ_EXCLUSIVE = UFix(1, TTYPE_BITS) - val X_READ_UNCACHED = UFix(2, TTYPE_BITS) - val X_WRITE_UNCACHED = UFix(3, TTYPE_BITS) + val X_INIT_TYPE_BITS = 2 + val X_INIT_READ_SHARED = UFix(0, X_INIT_TYPE_BITS) + val X_INIT_READ_EXCLUSIVE = UFix(1, X_INIT_TYPE_BITS) + val X_INIT_READ_UNCACHED = UFix(2, X_INIT_TYPE_BITS) + val X_INIT_WRITE_UNCACHED = UFix(3, X_INIT_TYPE_BITS) - val PTYPE_BITS = 2 - val P_INVALIDATE = UFix(0, PTYPE_BITS) - val P_DOWNGRADE = UFix(1, PTYPE_BITS) - val P_COPY = UFix(2, PTYPE_BITS) + val X_REP_TYPE_BITS = 3 + val X_REP_READ_SHARED = UFix(0, X_REP_TYPE_BITS) + val X_REP_READ_EXCLUSIVE = UFix(1, X_REP_TYPE_BITS) + val X_REP_READ_UNCACHED = UFix(2, X_REP_TYPE_BITS) + val X_REP_WRITE_UNCACHED = UFix(3, X_REP_TYPE_BITS) + val X_REP_READ_EXCLUSIVE_ACK = UFix(4, X_REP_TYPE_BITS) + + val P_REQ_TYPE_BITS = 2 + val P_REQ_INVALIDATE = UFix(0, P_REQ_TYPE_BITS) + val P_REQ_DOWNGRADE = UFix(1, P_REQ_TYPE_BITS) + val P_REQ_COPY = UFix(2, P_REQ_TYPE_BITS) + + val P_REP_TYPE_BITS = 3 + val P_REP_INVALIDATE_DATA = UFix(0, P_REP_TYPE_BITS) + val P_REP_DOWNGRADE_DATA = UFix(1, P_REP_TYPE_BITS) + val P_REP_COPY_DATA = UFix(2, P_REP_TYPE_BITS) + val P_REP_INVALIDATE_ACK = UFix(3, P_REP_TYPE_BITS) + val P_REP_DOWNGRADE_ACK = UFix(4, P_REP_TYPE_BITS) + val P_REP_COPY_ACK = UFix(5, P_REP_TYPE_BITS) // external memory interface val MEM_TAG_BITS = 4 diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 0357d359..6dedbc16 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -117,8 +117,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component mem_req_data = Cat(packet_ram.read(idx), mem_req_data) } io.mem.xact_init.valid := state === state_mem_req - io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_WRITE_UNCACHED, X_READ_UNCACHED) - io.mem.xact_init.bits.has_data := cmd === cmd_writemem + io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_INIT_WRITE_UNCACHED, X_INIT_READ_UNCACHED) io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) io.mem.xact_init_data.valid:= state === state_mem_wdata diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 3d18f974..6ca20c3e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -129,8 +129,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out io.mem.xact_init.valid := (state === s_request) - io.mem.xact_init.bits.t_type := X_READ_UNCACHED - io.mem.xact_init.bits.has_data := Bool(false) + io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix io.mem.xact_init_data.valid := Bool(false) diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index ba666cd9..9ad85d82 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -30,8 +30,7 @@ class rocketIPrefetcher extends Component() { val ip_mem_resp_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id(0) io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) - io.mem.xact_init.bits.t_type := X_READ_UNCACHED - io.mem.xact_init.bits.has_data := Bool(false) + io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); io.mem.xact_init_data.valid := Bool(false) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b99701c6..a5dc640b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -240,8 +240,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { io.meta_req.bits.way_en := way_oh_ io.mem_req.valid := valid && !requested - io.mem_req.bits.t_type := Mux(needsWriteback(next_state), X_READ_EXCLUSIVE, X_READ_SHARED) - io.mem_req.bits.has_data := Bool(false) + io.mem_req.bits.t_type := Mux(needsWriteback(next_state), X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) io.mem_req.bits.address := Cat(ppn, idx_).toUFix io.mem_req.bits.tile_xact_id := Bits(id) @@ -380,8 +379,7 @@ class WritebackUnit extends Component { val wb_req_val = io.req.valid && !valid io.refill_req.ready := io.mem_req.ready && !wb_req_val io.mem_req.valid := io.refill_req.valid || wb_req_val - io.mem_req.bits.t_type := Mux(wb_req_val, X_WRITE_UNCACHED, io.refill_req.bits.t_type) - io.mem_req.bits.has_data := wb_req_val + io.mem_req.bits.t_type := Mux(wb_req_val, X_INIT_WRITE_UNCACHED, io.refill_req.bits.t_type) io.mem_req.bits.address := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.address) io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) @@ -679,7 +677,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // refill counter val mem_resp_type = io.mem.xact_rep.bits.t_type - val refill_val = io.mem.xact_rep.valid && (mem_resp_type === X_READ_SHARED || mem_resp_type === X_READ_EXCLUSIVE) + val refill_val = io.mem.xact_rep.valid && (mem_resp_type === X_REP_READ_SHARED || mem_resp_type === X_REP_READ_EXCLUSIVE) val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count_next = rr_count + UFix(1) when (refill_val) { rr_count := rr_count_next } From f9fb3978caa5eb4a3e4a546a3de8bfd7fb1e7de7 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 3 Mar 2012 15:07:22 -0800 Subject: [PATCH 0268/1087] fix store prefetch bug, it no longer occupies an entry in the sdq --- rocket/src/main/scala/coherence.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 0ce1f1c9..005f17c9 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -92,8 +92,8 @@ object cpuCmdToRW { val store = (cmd === M_XWR) val load = (cmd === M_XRD) val amo = cmd(3).toBool - val read = load || amo || (cmd === M_PFR) - val write = store || amo || (cmd === M_PFW) + val read = load || amo || (cmd === M_PFR) || (cmd === M_PFW) + val write = store || amo (read, write) } } From e28a551368a1a6e2c73b0438986e08d3681e77a1 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 3 Mar 2012 15:09:42 -0800 Subject: [PATCH 0269/1087] refactor code related to vector exceptions - revisied interfaces - new instructions --- rocket/src/main/scala/consts.scala | 5 + rocket/src/main/scala/cpu.scala | 14 +- rocket/src/main/scala/ctrl.scala | 16 +- rocket/src/main/scala/ctrl_vec.scala | 193 +++++++++++++---------- rocket/src/main/scala/dpath.scala | 3 - rocket/src/main/scala/dpath_vec.scala | 16 +- rocket/src/main/scala/instructions.scala | 7 + 7 files changed, 158 insertions(+), 96 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 4159e952..1568abb5 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -244,12 +244,17 @@ object Constants val VCMD_TF = UFix(3, 3) val VCMD_MX = UFix(4, 3) val VCMD_MF = UFix(5, 3) + val VCMD_A = UFix(6, 3) val VCMD_X = UFix(0, 3) val VIMM_VLEN = UFix(0, 1) val VIMM_ALU = UFix(1, 1) val VIMM_X = UFix(0, 1) + val VIMM2_RS2 = UFix(0, 1) + val VIMM2_ALU = UFix(1, 1) + val VIMM2_X = UFix(0, 1) + val DTLB_CPU = 0 val DTLB_VEC = 1 val DTLB_VPF = 2 diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 0ba3f74b..90039b1f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -169,7 +169,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_ximm1q.bits := dpath.io.vec_iface.vximm1q_bits vu.io.vec_ximm2q.valid := ctrl.io.vec_iface.vximm2q_valid vu.io.vec_ximm2q.bits := dpath.io.vec_iface.vximm2q_bits - vu.io.vec_cntq <> dpath.io.vec_iface.vcntq + vu.io.vec_cntq.valid := ctrl.io.vec_iface.vcntq_valid + vu.io.vec_cntq.bits := dpath.io.vec_iface.vcntq_bits // prefetch queues vu.io.vec_pfcmdq.valid := ctrl.io.vec_iface.vpfcmdq_valid @@ -178,22 +179,31 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_pfximm1q.bits := dpath.io.vec_iface.vximm1q_bits vu.io.vec_pfximm2q.valid := ctrl.io.vec_iface.vpfximm2q_valid vu.io.vec_pfximm2q.bits := dpath.io.vec_iface.vximm2q_bits + // vu.io.vec_pfcntq.valid := ctrl.io.vec_iface.vpfcntq_valid + // vu.io.vec_pfcntq.bits := dpath.io.vec_iface.vcntq_bits // don't have to use pf ready signals // if cmdq is not a load or store ctrl.io.vec_iface.vcmdq_ready := vu.io.vec_cmdq.ready ctrl.io.vec_iface.vximm1q_ready := vu.io.vec_ximm1q.ready ctrl.io.vec_iface.vximm2q_ready := vu.io.vec_ximm2q.ready + ctrl.io.vec_iface.vcntq_ready := vu.io.vec_cntq.ready ctrl.io.vec_iface.vpfcmdq_ready := vu.io.vec_pfcmdq.ready ctrl.io.vec_iface.vpfximm1q_ready := vu.io.vec_pfximm1q.ready ctrl.io.vec_iface.vpfximm2q_ready := vu.io.vec_pfximm2q.ready + // ctrl.io.vec_iface.vpfcntq_ready := vu.io.vec_pfcntq.ready + ctrl.io.vec_iface.vpfcntq_ready := Bool(true) + ctrl.io.vec_iface.vackq_valid := vu.io.vec_ackq.valid vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready // exceptions vu.io.cpu_exception.addr := dpath.io.vec_iface.eaddr.toUFix vu.io.cpu_exception.exception := dpath.io.vec_iface.exception - ctrl.io.vec_iface.exception_done := vu.io.done + ctrl.io.vec_iface.exception_ack_valid := vu.io.exception_ack_valid + vu.io.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready + ctrl.io.vec_iface.kill_ack_valid := vu.io.kill_ack_valid + vu.io.kill_ack_ready := ctrl.io.vec_iface.kill_ack_ready // hooking up vector memory interface val storegen = new StoreDataGen diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ac544a68..cc1d7f69 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -279,7 +279,15 @@ class rocketCtrl extends Component VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N) + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), + + // Vector Supervisor Stuff + VENQCMD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VENQIMM1-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VENQIMM2-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VENQCNT-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), + VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), + VWAITKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y) )) val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs @@ -581,7 +589,7 @@ class rocketCtrl extends Component } var vec_replay = Bool(false) - var vec_cpfence = Bool(false) + var vec_stalld = Bool(false) if (HAVE_VEC) { // vector control @@ -594,7 +602,7 @@ class rocketCtrl extends Component vec.io.exception := wb_reg_exception vec_replay = vec.io.replay - vec_cpfence = vec.io.cpfence + vec_stalld = vec.io.stalld } // exception handling @@ -749,7 +757,7 @@ class rocketCtrl extends Component id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || id_vec_val.toBool && !(io.vec_iface.vcmdq_ready && io.vec_iface.vximm1q_ready && io.vec_iface.vximm2q_ready) || // being conservative ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || - vec_cpfence + vec_stalld ); val ctrl_stallf = ctrl_stalld; diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 3e8aefd5..43c6c321 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -10,11 +10,11 @@ class ioCtrlDpathVec extends Bundle val valid = Bool(INPUT) val inst = Bits(32, INPUT) val appvl0 = Bool(INPUT) - val replay_cntq = Bool(INPUT) val wen = Bool(OUTPUT) val fn = Bits(1, OUTPUT) val sel_vcmd = Bits(3, OUTPUT) val sel_vimm = Bits(1, OUTPUT) + val sel_vimm2 = Bits(1, OUTPUT) } class ioCtrlVecInterface extends Bundle @@ -25,6 +25,8 @@ class ioCtrlVecInterface extends Bundle val vximm1q_ready = Bool(INPUT) val vximm2q_valid = Bool(OUTPUT) val vximm2q_ready = Bool(INPUT) + val vcntq_valid = Bool(OUTPUT) + val vcntq_ready = Bool(INPUT) val vpfcmdq_valid = Bool(OUTPUT) val vpfcmdq_ready = Bool(INPUT) @@ -32,11 +34,17 @@ class ioCtrlVecInterface extends Bundle val vpfximm1q_ready = Bool(INPUT) val vpfximm2q_valid = Bool(OUTPUT) val vpfximm2q_ready = Bool(INPUT) + val vpfcntq_valid = Bool(OUTPUT) + val vpfcntq_ready = Bool(INPUT) val vackq_valid = Bool(INPUT) val vackq_ready = Bool(OUTPUT) - val exception_done = Bool(INPUT) + val exception_ack_valid = Bool(INPUT) + val exception_ack_ready = Bool(OUTPUT) + + val kill_ack_valid = Bool(INPUT) + val kill_ack_ready = Bool(OUTPUT) } class ioCtrlVec extends Bundle @@ -46,7 +54,7 @@ class ioCtrlVec extends Bundle val sr_ev = Bool(INPUT) val exception = Bool(INPUT) val replay = Bool(OUTPUT) - val cpfence = Bool(OUTPUT) + val stalld = Bool(OUTPUT) } class rocketCtrlVec extends Component @@ -55,129 +63,156 @@ class rocketCtrlVec extends Component val veccs = ListLookup(io.dpath.inst, - // appvlmask - // | vcmdq - // | | vximm1q - // | | | vximm2q - // | | | | vpfcmdq - // wen | | | | | vpximm1q - // val vcmd vimm | fn | | | | | | vpximm2q - // | | | | | | | | | | | | cpfence - // | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_CFG,N,Y,Y,N,Y,Y,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,Y,VEC_VL ,N,Y,Y,N,Y,Y,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, N,VEC_X ,Y,Y,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,Y,N,N,N,N,N,Y,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, N,VEC_X ,N,N,N,N,N,N,N,N,Y), - VLD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,N,Y,Y,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, N,VEC_X ,Y,Y,Y,Y,Y,Y,Y,N,N) + // appvlmask + // | vcmdq + // | | vximm1q + // | | | vximm2q + // | | | | vcntq + // | | | | | vpfcmdq + // | | | | | | vpfximm1q + // | | | | | | | vpfximm2q + // wen | | | | | | | | vpfcntq + // val vcmd vimm vimm2 | fn | | | | | | | | | stalld + // | | | | | | | | | | | | | | | | waitxcpt + // | | | | | | | | | | | | | | | | | + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,Y,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,Y,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,N,N), + VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y), + VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y) )) - val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: veccs1 = veccs0 - val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_cpfence :: wb_vec_xcptfence :: Nil = veccs1 + val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs + val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 + val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 + val wb_vec_stalld :: wb_vec_waitxcpt :: Nil = veccs2 - val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val.toBool && !(wb_vec_appvlmask.toBool && io.dpath.appvl0) + val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.iface.vcmdq_ready val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.iface.vximm1q_ready val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.iface.vximm2q_ready + val mask_wb_vec_cntq_ready = !wb_vec_cntq_enq || io.iface.vcntq_ready val mask_wb_vec_pfcmdq_ready = !wb_vec_pfcmdq_enq || io.iface.vpfcmdq_ready val mask_wb_vec_pfximm1q_ready = !wb_vec_pfximm1q_enq || io.iface.vpfximm1q_ready val mask_wb_vec_pfximm2q_ready = !wb_vec_pfximm2q_enq || io.iface.vpfximm2q_ready + val mask_wb_vec_pfcntq_ready = !wb_vec_pfcntq_enq || io.iface.vpfcntq_ready io.dpath.wen := wb_vec_wen.toBool io.dpath.fn := wb_vec_fn io.dpath.sel_vcmd := wb_sel_vcmd io.dpath.sel_vimm := wb_sel_vimm + io.dpath.sel_vimm2 := wb_sel_vimm2 io.iface.vcmdq_valid := valid_common && - wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vximm1q_valid := valid_common && - mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vximm2q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + + io.iface.vcntq_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && wb_vec_cntq_enq && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfcmdq_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfximm1q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq && mask_wb_vec_pfximm2q_ready + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfximm2q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq && mask_wb_vec_pfcntq_ready + + io.iface.vpfcntq_valid := + valid_common && + mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && wb_vec_pfcntq_enq io.iface.vackq_ready := Bool(true) + io.iface.exception_ack_ready := Bool(true) + io.iface.kill_ack_ready := Bool(true) io.replay := valid_common && ( wb_vec_cmdq_enq && !io.iface.vcmdq_ready || wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || wb_vec_ximm2q_enq && !io.iface.vximm2q_ready || + wb_vec_cntq_enq && !io.iface.vcntq_ready || wb_vec_pfcmdq_enq && !io.iface.vpfcmdq_ready || wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready || - io.dpath.replay_cntq + wb_vec_pfcntq_enq && !io.iface.vpfcntq_ready ) - val reg_cpfence = Reg(resetVal = Bool(false)) - val do_cpfence = valid_common && wb_vec_cpfence && !io.replay + val reg_stalld = Reg(resetVal = Bool(false)) + val do_stalld = valid_common && wb_vec_stalld && !io.replay - when (do_cpfence) { reg_cpfence := Bool(true) } - when (io.iface.vackq_valid || io.exception) { reg_cpfence := Bool(false) } + when (do_stalld) { reg_stalld := Bool(true) } + when (io.iface.vackq_valid || io.exception) { reg_stalld := Bool(false) } - val reg_xcptfence = Reg(resetVal = Bool(false)) - val do_xcptfence = valid_common && wb_vec_xcptfence && !io.replay + val reg_waitxcpt = Reg(resetVal = Bool(false)) + val do_waitxcpt = valid_common && wb_vec_waitxcpt && !io.replay - when (do_xcptfence) { reg_xcptfence := Bool(true) } - when (io.iface.exception_done) { reg_xcptfence := Bool(false) } + when (do_waitxcpt) { reg_waitxcpt := Bool(true) } + when (io.iface.exception_ack_valid) { reg_waitxcpt := Bool(false) } + when (io.iface.kill_ack_valid) { reg_waitxcpt := Bool(false) } - io.cpfence := reg_cpfence || reg_xcptfence + io.stalld := reg_stalld || reg_waitxcpt } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a09a3f26..817e001b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -385,9 +385,6 @@ class rocketDpath extends Component vec.io.rs2 := wb_reg_rs2 vec.io.vec_eaddr := pcr.io.vec_eaddr vec.io.vec_exception := pcr.io.vec_exception - vec.io.pcr_wport.addr := wb_reg_raddr2 - vec.io.pcr_wport.en := io.ctrl.wen_pcr - vec.io.pcr_wport.data := wb_reg_wdata wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index f801ac5c..012507af 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -11,7 +11,7 @@ class ioDpathVecInterface extends Bundle val vcmdq_bits = Bits(SZ_VCMD, OUTPUT) val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) - val vcntq = (new ioDecoupled()){ Bits(width = 11) } + val vcntq_bits = Bits(SZ_VLEN, OUTPUT) val eaddr = Bits(64, OUTPUT) val exception = Bool(OUTPUT) } @@ -30,7 +30,6 @@ class ioDpathVec extends Bundle val rs2 = Bits(64, INPUT) val vec_eaddr = Bits(64, INPUT) val vec_exception = Bool(INPUT) - val pcr_wport = new ioWritePort() val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) } @@ -123,17 +122,18 @@ class rocketDpathVec extends Component Mux(io.ctrl.sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), io.waddr, Bits(1,1), io.raddr1), Mux(io.ctrl.sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), io.waddr, Bits(0,1), io.waddr), Mux(io.ctrl.sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), io.waddr, Bits(1,1), io.waddr), - Bits(0,20))))))) + Mux(io.ctrl.sel_vcmd === VCMD_A, io.wdata(SZ_VCMD-1, 0), + Bits(0,20)))))))) io.iface.vximm1q_bits := Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1(10,0)), - io.wdata) // VIMM_ALU + io.wdata) // VIMM_ALU - io.iface.vximm2q_bits := io.rs2 + io.iface.vximm2q_bits := + Mux(io.ctrl.sel_vimm2 === VIMM2_RS2, io.rs2, + io.wdata) // VIMM2_ALU - io.iface.vcntq.bits := io.pcr_wport.data - io.iface.vcntq.valid := io.pcr_wport.en && io.pcr_wport.addr === PCR_VEC_CNT - io.ctrl.replay_cntq := io.iface.vcntq.valid && !io.iface.vcntq.ready + io.iface.vcntq_bits := io.wdata(SZ_VLEN-1, 0) io.iface.eaddr := io.vec_eaddr io.iface.exception := io.vec_exception diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 1511e1f9..4491e80d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -246,6 +246,13 @@ object Instructions val VTCFGIVL = Bits("b?????_?????_????????????_011_1110011",32); val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); val VF = Bits("b00000_?????_????????????_111_1110011",32); + // vector supervisor instructions + val VENQCMD = Bits("b00000_?????_00000_1000000000_1111011",32) + val VENQIMM1 = Bits("b00000_?????_00000_1000000001_1111011",32) + val VENQIMM2 = Bits("b00000_?????_00000_1000000010_1111011",32) + val VENQCNT = Bits("b00000_?????_00000_1000000011_1111011",32) + val VWAITXCPT = Bits("b00000_00000_00000_1100000000_1111011",32) + val VWAITKILL = Bits("b00000_00000_00000_1100000001_1111011",32) val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); } From d4ec7ff4d9da8f1fe38a56fd4fdd761f5f0f1a89 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 3 Mar 2012 16:11:54 -0800 Subject: [PATCH 0270/1087] refined vector exception interface --- rocket/src/main/scala/consts.scala | 1 - rocket/src/main/scala/cpu.scala | 2 ++ rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 8 ++++---- rocket/src/main/scala/dpath_vec.scala | 8 ++++++-- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1568abb5..5ba224bf 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -144,7 +144,6 @@ object Constants val PCR_VECBANK = UFix(18, 5); // temporaries for vector, these will go away - val PCR_VEC_CNT = UFix(29, 5) val PCR_VEC_EADDR = UFix(30, 5) val PCR_VEC_XCPT = UFix(31, 5) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 90039b1f..ea2a2f26 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -200,6 +200,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // exceptions vu.io.cpu_exception.addr := dpath.io.vec_iface.eaddr.toUFix vu.io.cpu_exception.exception := dpath.io.vec_iface.exception + vu.io.cpu_exception.kill := dpath.io.vec_iface.kill + vu.io.cpu_exception.hold := dpath.io.vec_iface.hold ctrl.io.vec_iface.exception_ack_valid := vu.io.exception_ack_valid vu.io.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready ctrl.io.vec_iface.kill_ack_valid := vu.io.kill_ack_valid diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 817e001b..f14f80cb 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -384,7 +384,7 @@ class rocketDpath extends Component vec.io.wdata := wb_reg_vec_wdata vec.io.rs2 := wb_reg_rs2 vec.io.vec_eaddr := pcr.io.vec_eaddr - vec.io.vec_exception := pcr.io.vec_exception + vec.io.vec_xcpt := pcr.io.vec_xcpt wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 6afc8a8e..7353c91f 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -80,7 +80,7 @@ class ioDpathPCR extends Bundle() val vecbank = Bits(8, OUTPUT) val vecbankcnt = UFix(4, OUTPUT) val vec_eaddr = Bits(VADDR_BITS, OUTPUT) - val vec_exception = Bool(OUTPUT) + val vec_xcpt = Bits(3, OUTPUT) } class rocketDpathPCR extends Component @@ -100,7 +100,7 @@ class rocketDpathPCR extends Component val reg_ptbr = Reg() { UFix() }; val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) val reg_vec_eaddr = Reg() { Bits() } - val reg_vec_exception = Reg() { Bool() } + val reg_vec_xcpt = Reg() { Bits() } val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); @@ -143,7 +143,7 @@ class rocketDpathPCR extends Component io.vecbankcnt := cnt(3,0) io.vec_eaddr := reg_vec_eaddr - io.vec_exception := reg_vec_exception + io.vec_xcpt := reg_vec_xcpt val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) when (io.badvaddr_wen) { @@ -212,7 +212,7 @@ class rocketDpathPCR extends Component when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } when (waddr === PCR_VEC_EADDR) { reg_vec_eaddr := wdata(VADDR_BITS,0) } - when (waddr === PCR_VEC_XCPT) { reg_vec_exception:= wdata(0) } + when (waddr === PCR_VEC_XCPT) { reg_vec_xcpt := wdata(2,0) } } rdata := Bits(0, 64) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 012507af..86334e29 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -14,6 +14,8 @@ class ioDpathVecInterface extends Bundle val vcntq_bits = Bits(SZ_VLEN, OUTPUT) val eaddr = Bits(64, OUTPUT) val exception = Bool(OUTPUT) + val kill = Bool(OUTPUT) + val hold = Bool(OUTPUT) } class ioDpathVec extends Bundle @@ -29,7 +31,7 @@ class ioDpathVec extends Bundle val wdata = Bits(64, INPUT) val rs2 = Bits(64, INPUT) val vec_eaddr = Bits(64, INPUT) - val vec_exception = Bool(INPUT) + val vec_xcpt = Bits(3, INPUT) val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) } @@ -136,7 +138,9 @@ class rocketDpathVec extends Component io.iface.vcntq_bits := io.wdata(SZ_VLEN-1, 0) io.iface.eaddr := io.vec_eaddr - io.iface.exception := io.vec_exception + io.iface.exception := io.vec_xcpt(0) + io.iface.kill := io.vec_xcpt(1) + io.iface.hold := io.vec_xcpt(2) io.ctrl.valid := io.valid io.ctrl.inst := io.inst From a950d526d25edec3dbbf36a42a7adec310adc862 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 5 Mar 2012 12:09:41 -0800 Subject: [PATCH 0271/1087] add prefetch count queue --- rocket/src/main/scala/cpu.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index ea2a2f26..57bd3dba 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -179,8 +179,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_pfximm1q.bits := dpath.io.vec_iface.vximm1q_bits vu.io.vec_pfximm2q.valid := ctrl.io.vec_iface.vpfximm2q_valid vu.io.vec_pfximm2q.bits := dpath.io.vec_iface.vximm2q_bits - // vu.io.vec_pfcntq.valid := ctrl.io.vec_iface.vpfcntq_valid - // vu.io.vec_pfcntq.bits := dpath.io.vec_iface.vcntq_bits + vu.io.vec_pfcntq.valid := ctrl.io.vec_iface.vpfcntq_valid + vu.io.vec_pfcntq.bits := dpath.io.vec_iface.vcntq_bits // don't have to use pf ready signals // if cmdq is not a load or store @@ -191,8 +191,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.vec_iface.vpfcmdq_ready := vu.io.vec_pfcmdq.ready ctrl.io.vec_iface.vpfximm1q_ready := vu.io.vec_pfximm1q.ready ctrl.io.vec_iface.vpfximm2q_ready := vu.io.vec_pfximm2q.ready - // ctrl.io.vec_iface.vpfcntq_ready := vu.io.vec_pfcntq.ready - ctrl.io.vec_iface.vpfcntq_ready := Bool(true) + ctrl.io.vec_iface.vpfcntq_ready := vu.io.vec_pfcntq.ready ctrl.io.vec_iface.vackq_valid := vu.io.vec_ackq.valid vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready From 5c66a6699cf47788c019b1af2a15a1432361d447 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 5 Mar 2012 16:34:27 -0800 Subject: [PATCH 0272/1087] Broadcast hub control logic bugfixes and code cleanup --- rocket/src/main/scala/coherence.scala | 81 ++++++++++++--------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 005f17c9..c6e702cd 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -26,18 +26,13 @@ class ioMem() extends Bundle val resp = (new ioPipe) { new MemResp() }.flip } -class HubMemReq extends Bundle { - val lock = Bool() -} - class TrackerProbeData extends Bundle { val tile_id = Bits(width = TILE_ID_BITS) } class TrackerAllocReq extends Bundle { val xact_init = new TransactionInit() - val init_tile_id = Bits(width = TILE_ID_BITS) - val data_valid = Bool() + val tile_id = Bits(width = TILE_ID_BITS) } @@ -98,8 +93,7 @@ object cpuCmdToRW { } } -trait CoherencePolicy { -} +trait CoherencePolicy { } trait ThreeStateIncoherence extends CoherencePolicy { val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } @@ -242,36 +236,18 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { (t_type === X_INIT_WRITE_UNCACHED) } - val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } - val state = Reg(resetVal = s_idle) - val addr_ = Reg{ UFix() } - val t_type_ = Reg{ Bits() } - val init_tile_id_ = Reg{ Bits() } - val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = Reg(resetVal = UFix(0, width = log2up(NTILES))) - val p_req_flags = Reg(resetVal = Bits(0, width = NTILES)) - val p_rep_tile_id_ = Reg{ Bits() } - val x_needs_read = Reg(resetVal = Bool(false)) - val x_init_data_needs_write = Reg(resetVal = Bool(false)) - val p_rep_data_needs_write = Reg(resetVal = Bool(false)) - val mem_cmd_sent = Reg(resetVal = Bool(false)) - val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) - val mem_cnt_next = mem_cnt + UFix(1) - - def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioDecoupled[MemData], trigger: Bool, pop: Bool) { - req_cmd.valid := mem_cmd_sent + def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioDecoupled[MemData], trigger: Bool, pop_data: Bool, cmd_sent: Bool) { + req_cmd.valid := !cmd_sent req_cmd.bits.rw := Bool(true) - //TODO: why does req_data <> data segfault? - req_data.valid := data.valid - req_data.bits.data := data.bits.data data.ready := req_data.ready + req_data <> data lock := Bool(true) when(req_cmd.ready && req_cmd.valid) { - mem_cmd_sent := Bool(false) + cmd_sent := Bool(true) } when(req_data.ready && req_data.valid) { - pop := Bool(true) - mem_cnt := mem_cnt_next + pop_data := Bool(true) + mem_cnt := mem_cnt_next } when(mem_cnt === ~UFix(0)) { trigger := Bool(false) @@ -286,6 +262,25 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { } } + val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } + val state = Reg(resetVal = s_idle) + val addr_ = Reg{ UFix() } + val t_type_ = Reg{ Bits() } + val init_tile_id_ = Reg{ Bits() } + val tile_xact_id_ = Reg{ Bits() } + val p_rep_count = Reg(resetVal = UFix(0, width = log2up(NTILES))) + val p_req_flags = Reg(resetVal = Bits(0, width = NTILES)) + val p_rep_tile_id_ = Reg{ Bits() } + val x_needs_read = Reg(resetVal = Bool(false)) + val x_init_data_needs_write = Reg(resetVal = Bool(false)) + val p_rep_data_needs_write = Reg(resetVal = Bool(false)) + val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val mem_cmd_sent = Reg(resetVal = Bool(false)) + val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val mem_cnt_next = mem_cnt + UFix(1) + val mem_cnt_max = ~UFix(0, width = log2up(REFILL_CYCLES)) + io.busy := state != s_idle io.addr := addr_ io.init_tile_id := init_tile_id_ @@ -318,16 +313,16 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { when( io.alloc_req.valid && io.can_alloc ) { addr_ := io.alloc_req.bits.xact_init.address t_type_ := io.alloc_req.bits.xact_init.t_type - init_tile_id_ := io.alloc_req.bits.init_tile_id + init_tile_id_ := io.alloc_req.bits.tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := transactionInitHasData(io.alloc_req.bits.xact_init) x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) p_rep_count := UFix(NTILES-1) - p_req_flags := ~( UFix(1) << io.alloc_req.bits.init_tile_id ) - state := Mux(p_req_flags.orR, s_probe, s_mem) + p_req_flags := ~( UFix(1) << io.alloc_req.bits.tile_id ) mem_cnt := UFix(0) mem_cmd_sent := Bool(false) io.pop_x_init := Bool(true) + state := Mux(p_req_flags.orR, s_probe, s_mem) } } is(s_probe) { @@ -342,9 +337,8 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) io.pop_p_rep := io.p_rep_cnt_dec p_rep_count := p_rep_count_next - when(p_rep_count_next === UFix(0)) { - mem_cnt := UFix(0) - mem_cmd_sent := Bool(false) + when(p_rep_count === UFix(0)) { + io.pop_p_rep := Bool(true) state := s_mem } } @@ -355,9 +349,9 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { } is(s_mem) { when (p_rep_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.p_rep_data, p_rep_data_needs_write, io.pop_p_rep_data) + doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.p_rep_data, p_rep_data_needs_write, io.pop_p_rep_data, p_w_mem_cmd_sent) } . elsewhen(x_init_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.x_init_data, x_init_data_needs_write, io.pop_x_init_data) + doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.x_init_data, x_init_data_needs_write, io.pop_x_init_data, x_w_mem_cmd_sent) } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { @@ -565,10 +559,9 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ val x_init_data = io.tiles(j).xact_init_data init_arb.io.in(j).valid := x_init.valid init_arb.io.in(j).bits.xact_init := x_init.bits - init_arb.io.in(j).bits.init_tile_id := UFix(j) - init_arb.io.in(j).bits.data_valid := x_init_data.valid - x_init.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.init_tile_id === UFix(j)))(_||_) - x_init_data.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.init_tile_id === UFix(j)))(_||_) + init_arb.io.in(j).bits.tile_id := UFix(j) + x_init.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) + x_init_data.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) } alloc_arb.io.out.ready := init_arb.io.out.valid && !busy_arr.toBits.andR && From 950b5cd900325913bb65c4f2840996b26b468354 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 5 Mar 2012 17:44:30 -0800 Subject: [PATCH 0273/1087] Added aborted data dequeueing state machine for BroadcastHub --- rocket/src/main/scala/coherence.scala | 58 ++++++++++++++++++++------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index c6e702cd..fc4f3570 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -240,7 +240,8 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { req_cmd.valid := !cmd_sent req_cmd.bits.rw := Bool(true) data.ready := req_data.ready - req_data <> data + req_data.bits := data.bits + req_data.valid := data.valid lock := Bool(true) when(req_cmd.ready && req_cmd.valid) { cmd_sent := Bool(true) @@ -521,23 +522,51 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ } // Nack conflicting transaction init attempts - val aborting = Bits(0, width = NTILES) + val s_idle :: s_abort_drain :: s_abort_send :: s_abort_complete :: Nil = Enum(4){ UFix() } + val abort_state_arr = Vec(NTILES) { Reg(resetVal = s_idle) } + val want_to_abort_arr = Vec(NTILES) { Wire() { Bool()} } for( j <- 0 until NTILES ) { val x_init = io.tiles(j).xact_init + val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort val conflicts = Bits(width = NGLOBAL_XACTS) for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(UFix(i), t.busy && coherenceConflict(t.addr, x_init.bits.address) && - !(transactionInitHasData(x_init.bits) && (UFix(j) === t.init_tile_id))) - // Don't abort writebacks stalled on mem. - // TODO: This assumes overlapped writeback init reqs to - // the same addr will never be issued; is this ok? + conflicts(UFix(i), t.busy && x_init.valid && coherenceConflict(t.addr, x_init.bits.address)) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - val want_to_abort = conflicts.orR || busy_arr.toBits.andR - x_abort.valid := want_to_abort && x_init.valid - aborting.bitSet(UFix(j), want_to_abort && x_abort.ready) + val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + want_to_abort_arr(j) := conflicts.orR || busy_arr.toBits.andR + + x_abort.valid := Bool(false) + switch(abort_state_arr(j)) { + is(s_idle) { + when(want_to_abort_arr(j)) { + when(transactionInitHasData(x_init.bits)) { + abort_state_arr(j) := s_abort_drain + } . otherwise { + abort_state_arr(j) := s_abort_send + } + } + } + is(s_abort_drain) { // raises x_init_data.ready below + when(x_init_data.valid) { + abort_cnt := abort_cnt + UFix(1) + } + when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { + abort_state_arr(j) := s_abort_send + } + } + is(s_abort_send) { // nothing is dequeued for now + x_abort.valid := Bool(true) + when(x_abort.ready) { + abort_state_arr(j) := s_abort_complete + } + } + is(s_abort_complete) { // raises x_init.ready below + abort_state_arr(j) := s_idle + } + } } // Handle transaction initiation requests @@ -557,15 +586,14 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ for( j <- 0 until NTILES ) { val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data - init_arb.io.in(j).valid := x_init.valid + init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid init_arb.io.in(j).bits.xact_init := x_init.bits init_arb.io.in(j).bits.tile_id := UFix(j) - x_init.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) - x_init_data.ready := aborting(j) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) + x_init.ready := (abort_state_arr(j) === s_abort_complete) || foldR(trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) + x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) } - alloc_arb.io.out.ready := init_arb.io.out.valid && !busy_arr.toBits.andR && - !foldR(trackerList.map(t => t.io.busy && coherenceConflict(t.io.addr, init_arb.io.out.bits.xact_init.address)))(_||_) + alloc_arb.io.out.ready := init_arb.io.out.valid // Handle probe request generation // Must arbitrate for each request port From 5f12990dfb2edde6f01156cbe7cdf5226e22e4d3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Mar 2012 00:31:44 -0800 Subject: [PATCH 0274/1087] support memory transaction aborts --- rocket/src/main/scala/arbiter.scala | 86 ++++++++++++--------- rocket/src/main/scala/coherence.scala | 2 + rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/htif.scala | 35 ++++++++- rocket/src/main/scala/icache.scala | 27 ++++--- rocket/src/main/scala/icache_prefetch.scala | 24 +++--- rocket/src/main/scala/nbdcache.scala | 78 +++++++++++++++---- rocket/src/main/scala/top.scala | 7 +- 8 files changed, 183 insertions(+), 78 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 9cdeb58f..9c0211a5 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -4,62 +4,74 @@ import Chisel._; import Node._; import Constants._; +class ioUncachedRequestor extends Bundle { + val xact_init = (new ioDecoupled) { new TransactionInit() } + val xact_abort = (new ioDecoupled) { new TransactionAbort() }.flip + val xact_rep = (new ioPipe) { new TransactionReply() }.flip + val xact_finish = (new ioDecoupled) { new TransactionFinish() } +} + class rocketMemArbiter(n: Int) extends Component { val io = new Bundle { - val mem = new ioTileLink - val requestor = Vec(n) { new ioTileLink().flip } + val mem = new ioUncachedRequestor + val requestor = Vec(n) { new ioUncachedRequestor().flip } } - var req_val = Bool(false) - var req_rdy = io.mem.xact_init.ready + var xi_val = Bool(false) + var xi_rdy = io.mem.xact_init.ready for (i <- 0 until n) { - io.requestor(i).xact_init.ready := req_rdy - req_val = req_val || io.requestor(i).xact_init.valid - req_rdy = req_rdy && !io.requestor(i).xact_init.valid + io.requestor(i).xact_init.ready := xi_rdy + xi_val = xi_val || io.requestor(i).xact_init.valid + xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid } - // if more than one requestor at a time can write back, the data - // arbiter needs to be made stateful: one xact's write data must - // be sent to the memory system contiguously. - var req_data_val = Bool(false) - var req_data_rdy = io.mem.xact_init_data.ready + var xi_bits = Wire() { new TransactionInit } + xi_bits := io.requestor(n-1).xact_init.bits + xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2up(n))) + for (i <- n-2 to 0 by -1) + { + var my_xi_bits = Wire() { new TransactionInit } + my_xi_bits := io.requestor(i).xact_init.bits + my_xi_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2up(n))) + + xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) + } + + io.mem.xact_init.valid := xi_val + io.mem.xact_init.bits := xi_bits + + var xf_val = Bool(false) + var xf_rdy = io.mem.xact_finish.ready for (i <- 0 until n) { - io.requestor(i).xact_init_data.ready := req_data_rdy - req_data_val = req_data_val || io.requestor(i).xact_init_data.valid - req_data_rdy = req_data_rdy && !io.requestor(i).xact_init_data.valid + io.requestor(i).xact_finish.ready := xf_rdy + xf_val = xf_val || io.requestor(i).xact_finish.valid + xf_rdy = xf_rdy && !io.requestor(i).xact_finish.valid } - var req_bits = Wire() { new TransactionInit } - req_bits := io.requestor(n-1).xact_init.bits - req_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2up(n))) + var xf_bits = io.requestor(n-1).xact_finish.bits for (i <- n-2 to 0 by -1) - { - var my_req_bits = Wire() { new TransactionInit } - my_req_bits := io.requestor(i).xact_init.bits - my_req_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2up(n))) + xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) - req_bits = Mux(io.requestor(i).xact_init.valid, my_req_bits, req_bits) - } - - var req_data_bits = io.requestor(n-1).xact_init_data.bits - for (i <- n-2 to 0 by -1) - req_data_bits = Mux(io.requestor(i).xact_init_data.valid, io.requestor(i).xact_init_data.bits, req_data_bits) - - io.mem.xact_init.valid := req_val - io.mem.xact_init.bits := req_bits - - io.mem.xact_init_data.valid := req_data_val - io.mem.xact_init_data.bits := req_data_bits + io.mem.xact_finish.valid := xf_val + io.mem.xact_finish.bits := xf_bits for (i <- 0 until n) { val tag = io.mem.xact_rep.bits.tile_xact_id io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2up(n)-1,0) === UFix(i) - io.requestor(i).xact_rep.bits.data := io.mem.xact_rep.bits.data - io.requestor(i).xact_rep.bits.t_type := io.mem.xact_rep.bits.t_type + io.requestor(i).xact_rep.bits := io.mem.xact_rep.bits io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2up(n)) - io.requestor(i).xact_rep.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id } + + for (i <- 0 until n) + { + val tag = io.mem.xact_abort.bits.tile_xact_id + io.requestor(i).xact_abort.valid := io.mem.xact_abort.valid && tag(log2up(n)-1,0) === UFix(i) + io.requestor(i).xact_abort.bits := io.mem.xact_abort.bits + io.requestor(i).xact_abort.bits.tile_xact_id := tag >> UFix(log2up(n)) + } + + io.mem.xact_abort.ready := Bool(true) } diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index fc4f3570..7bcd19c3 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -395,6 +395,8 @@ class CoherenceHubNull extends CoherenceHub { x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data x_rep.valid := io.mem.resp.valid || x_init.valid && is_write + + io.tiles(0).xact_abort.valid := Bool(false) } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 5ba224bf..f7a094fe 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -186,7 +186,7 @@ object Constants val NTILES = 1 val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val TILE_ID_BITS = 1 - val TILE_XACT_ID_BITS = log2up(NMSHR)+2 + val TILE_XACT_ID_BITS = log2up(NMSHR)+3 val GLOBAL_XACT_ID_BITS = 4 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 6dedbc16..ca847376 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -78,7 +78,12 @@ class rocketHTIF(w: Int, ncores: Int) extends Component Mux(!nack && cmd === cmd_readcr, UFix(1), UFix(0))) val tx_done = packet_ram_raddr - UFix(1) === tx_size - val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_rdata :: state_tx :: Nil = Enum(6) { UFix() } + val mem_acked = Reg(resetVal = Bool(false)) + val mem_nacked = Reg(resetVal = Bool(false)) + when (io.mem.xact_rep.valid) { mem_acked := Bool(true) } + when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } + + val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_tx :: Nil = Enum(7) { UFix() } val state = Reg(resetVal = state_rx) when (state === state_rx && rx_done) { @@ -96,13 +101,35 @@ class rocketHTIF(w: Int, ncores: Int) extends Component when (state === state_mem_req && io.mem.xact_init.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } - when (state === state_mem_wdata && io.mem.xact_init_data.ready || - state === state_mem_rdata && io.mem.xact_rep.valid) { + when (state === state_mem_wdata && io.mem.xact_init_data.ready) { when (mem_cnt.andR) { - state := state_tx + state := state_mem_wresp } mem_cnt := mem_cnt + UFix(1) } + when (state === state_mem_wresp) { + when (mem_nacked) { + state := state_mem_req + mem_nacked := Bool(false) + } + when (mem_acked) { + state := state_tx + mem_acked := Bool(false) + } + } + when (state === state_mem_rdata) { + when (mem_nacked) { + state := state_mem_req + mem_nacked := Bool(false) + } + when (io.mem.xact_rep.valid) { + when (mem_cnt.andR) { + state := state_tx + } + mem_cnt := mem_cnt + UFix(1) + } + mem_acked := Bool(false) + } when (state === state_tx && tx_done) { rx_count := UFix(0) tx_count := UFix(0) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 6ca20c3e..b9cf197d 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -20,7 +20,7 @@ class ioImem(view: List[String] = null) extends Bundle (view) class ioRocketICache extends Bundle() { val cpu = new ioImem(); - val mem = new ioTileLink + val mem = new ioUncachedRequestor } // basic direct mapped instruction cache @@ -49,6 +49,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_reset); + val invalidated = Reg() { Bool() } val r_cpu_req_idx = Reg { Bits() } val r_cpu_req_ppn = Reg { Bits() } @@ -78,13 +79,14 @@ class rocketICache(sets: Int, assoc: Int) extends Component { when (io.mem.xact_rep.valid) { refill_count := refill_count + UFix(1); } + val refill_done = io.mem.xact_rep.valid && refill_count.andR val repl_way = LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2up(assoc)-1,0) val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix + val tag_we = (state === s_refill) && refill_done val tag_addr = - Mux((state === s_refill_wait), r_cpu_req_idx(indexmsb,indexlsb), + Mux((state === s_refill), r_cpu_req_idx(indexmsb,indexlsb), io.cpu.req_idx(indexmsb,indexlsb)).toUFix; - val tag_we = (state === s_refill_wait) && io.mem.xact_rep.valid; val data_addr = Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; @@ -102,10 +104,10 @@ class rocketICache(sets: Int, assoc: Int) extends Component { // valid bit array val vb_array = Reg(resetVal = Bits(0, sets)); when (io.cpu.invalidate) { - vb_array := Bits(0,sets); + vb_array := Bits(0) } .elsewhen (tag_we && repl_me) { - vb_array := vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, UFix(1,1)); + vb_array := vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, !invalidated) } val valid = vb_array(r_cpu_req_idx(indexmsb,indexlsb)).toBool; @@ -131,20 +133,20 @@ class rocketICache(sets: Int, assoc: Int) extends Component { io.mem.xact_init.valid := (state === s_request) io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix - io.mem.xact_init_data.valid := Bool(false) // control state machine + when (io.cpu.invalidate) { + invalidated := Bool(true) + } switch (state) { is (s_reset) { state := s_ready; } is (s_ready) { - when (io.cpu.itlb_miss) { - state := s_ready; - } - .elsewhen (r_cpu_req_val && !tag_hit) { + when (r_cpu_req_val && !tag_hit && !io.cpu.itlb_miss) { state := s_request; } + invalidated := Bool(false) } is (s_request) { @@ -153,12 +155,15 @@ class rocketICache(sets: Int, assoc: Int) extends Component { } } is (s_refill_wait) { + when (io.mem.xact_abort.valid) { + state := s_request + } when (io.mem.xact_rep.valid) { state := s_refill; } } is (s_refill) { - when (io.mem.xact_rep.valid && refill_count.andR) { + when (refill_done) { state := s_ready; } } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 9ad85d82..b9d96a81 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -18,22 +18,23 @@ class rocketIPrefetcher extends Component() { val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; val state = Reg(resetVal = s_invalid); + val ip_mem_resp_abort = io.mem.xact_abort.valid && io.mem.xact_abort.bits.tile_xact_id(0) val demand_miss = io.icache.xact_init.valid && io.icache.xact_init.ready val prefetch_addr = Reg() { UFix(width = io.icache.xact_init.bits.address.width) }; val addr_match = (prefetch_addr === io.icache.xact_init.bits.address); - val hit = (state != s_invalid) & (state != s_req_wait) & addr_match; + val hit = (state != s_invalid) && (state != s_req_wait) && addr_match && !ip_mem_resp_abort val prefetch_miss = io.icache.xact_init.valid && !hit when (demand_miss) { prefetch_addr := io.icache.xact_init.bits.address + UFix(1); } - + io.icache.xact_init.ready := io.mem.xact_init.ready - val ip_mem_req_rdy = io.mem.xact_init.ready && !prefetch_miss val ip_mem_resp_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id(0) - + val ip_mem_req_rdy = io.mem.xact_init.ready && !prefetch_miss + + io.mem.xact_abort.ready := Bool(true) io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); - io.mem.xact_init_data.valid := Bool(false) val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } @@ -43,8 +44,10 @@ class rocketIPrefetcher extends Component() { val forward_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); when (forward & pdq.io.deq.valid) { forward_cnt := forward_cnt + UFix(1); } val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; - forward := (demand_miss & hit | forward & ~forward_done); + forward := demand_miss && hit || forward && !forward_done + io.icache.xact_abort.valid := io.mem.xact_abort.valid && !io.mem.xact_abort.bits.tile_xact_id(0) || + forward && ip_mem_resp_abort io.icache.xact_rep.valid := io.mem.xact_rep.valid && !io.mem.xact_rep.bits.tile_xact_id(0) || (forward && pdq.io.deq.valid) io.icache.xact_rep.bits.data := Mux(forward, pdq.io.deq.bits, io.mem.xact_rep.bits.data) @@ -69,11 +72,14 @@ class rocketIPrefetcher extends Component() { when (ip_mem_req_rdy) { state := s_resp_wait; } } is (s_resp_wait) { - when (demand_miss & ~addr_match) { state := s_bad_resp_wait; } - .elsewhen (ip_mem_resp_val.toBool) { state := s_refilling; } + when (ip_mem_resp_abort) { + state := s_invalid + } + .elsewhen (demand_miss && !addr_match) { state := s_bad_resp_wait } + .elsewhen (ip_mem_resp_val) { state := s_refilling } } is (s_bad_resp_wait) { - when (fill_done.toBool & ip_mem_resp_val.toBool) { state := s_req_wait; } + when (fill_done || ip_mem_resp_abort) { state := s_req_wait } } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a5dc640b..ef2e7a08 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -178,6 +178,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val way_oh = Bits(NWAYS, OUTPUT) val mem_resp_val = Bool(INPUT) + val mem_abort_val = Bool(INPUT) val mem_req = (new ioDecoupled) { new TransactionInit } val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } val replay = (new ioDecoupled) { new Replay() } @@ -216,6 +217,9 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { when (io.mem_req.valid && io.mem_req.ready) { requested := Bool(true) } + when (io.mem_abort_val) { + requested := Bool(false) + } when (io.mem_resp_val) { refilled := Bool(true) } @@ -264,6 +268,7 @@ class MSHRFile extends Component { val mem_req = (new ioDecoupled) { new TransactionInit } val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } val data_req = (new ioDecoupled) { new DataReq() } + val mem_abort = (new ioPipe) { new TransactionAbort }.flip val cpu_resp_val = Bool(OUTPUT) val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) @@ -310,8 +315,8 @@ class MSHRFile extends Component { mshr.io.mem_req <> mem_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) - val mem_resp_val = io.mem_resp_val && (UFix(i) === io.mem_resp_tag) - mshr.io.mem_resp_val := mem_resp_val + mshr.io.mem_resp_val := io.mem_resp_val && (UFix(i) === io.mem_resp_tag) + mshr.io.mem_abort_val := io.mem_abort.valid && (UFix(i) === io.mem_abort.bits.tile_xact_id) mem_resp_idx_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) mem_resp_idx_mux.io.in(i) := mshr.io.idx mem_resp_way_oh_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) @@ -352,22 +357,55 @@ class WritebackUnit extends Component { val data_req = (new ioDecoupled) { new DataArrayArrayReq() } val data_resp = Bits(MEM_DATA_BITS, INPUT) val refill_req = (new ioDecoupled) { new TransactionInit }.flip - val mem_req = (new ioDecoupled) { new TransactionInit } + val mem_req = (new ioDecoupled) { new TransactionInit } val mem_req_data = (new ioDecoupled) { new TransactionInitData } + val mem_abort = (new ioPipe) { new TransactionAbort }.flip + val mem_rep = (new ioPipe) { new TransactionReply }.flip } val valid = Reg(resetVal = Bool(false)) val data_req_fired = Reg(resetVal = Bool(false)) + val cmd_sent = Reg() { Bool() } val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } val addr = Reg() { new WritebackReq() } - data_req_fired := Bool(false) - when (io.data_req.valid && io.data_req.ready) { data_req_fired := Bool(true); cnt := cnt + UFix(1) } - when (data_req_fired && !io.mem_req_data.ready) { data_req_fired := Bool(false); cnt := cnt - UFix(1) } - when ((cnt === UFix(REFILL_CYCLES)) && io.mem_req_data.ready) { valid := Bool(false) } - when (io.req.valid && io.req.ready) { valid := Bool(true); cnt := UFix(0); addr := io.req.bits } + val acked = Reg() { Bool() } + val nacked = Reg() { Bool() } + when (io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(NMSHR)) { acked := Bool(true) } + when (io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(NMSHR)) { nacked := Bool(true) } - io.req.ready := !valid && io.mem_req.ready + data_req_fired := Bool(false) + when (valid && io.mem_req.ready) { + cmd_sent := Bool(true) + } + when (io.data_req.valid && io.data_req.ready) { + data_req_fired := Bool(true) + cnt := cnt + UFix(1) + } + when (data_req_fired && !io.mem_req_data.ready) { + data_req_fired := Bool(false) + cnt := cnt - UFix(1) + } + when ((cnt === UFix(REFILL_CYCLES)) && (!data_req_fired || io.mem_req_data.ready)) { + when (acked) { + valid := Bool(false) + } + when (nacked) { + cmd_sent := Bool(false) + nacked := Bool(false) + cnt := UFix(0) + } + } + when (io.req.valid && io.req.ready) { + valid := Bool(true) + acked := Bool(false) + nacked := Bool(false) + cmd_sent := Bool(false) + cnt := UFix(0) + addr := io.req.bits + } + + io.req.ready := !valid io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) io.data_req.bits.way_en := addr.way_oh io.data_req.bits.inner_req.idx := addr.idx @@ -376,11 +414,12 @@ class WritebackUnit extends Component { io.data_req.bits.inner_req.wmask := Bits(0) io.data_req.bits.inner_req.data := Bits(0) - val wb_req_val = io.req.valid && !valid - io.refill_req.ready := io.mem_req.ready && !wb_req_val - io.mem_req.valid := io.refill_req.valid || wb_req_val + val wb_req_val = valid && !cmd_sent + io.refill_req.ready := io.mem_req.ready && !(valid && !acked) + io.mem_req.valid := io.refill_req.valid && !(valid && !acked) || wb_req_val io.mem_req.bits.t_type := Mux(wb_req_val, X_INIT_WRITE_UNCACHED, io.refill_req.bits.t_type) - io.mem_req.bits.address := Mux(wb_req_val, Cat(io.req.bits.ppn, io.req.bits.idx).toUFix, io.refill_req.bits.address) + io.mem_req.bits.has_data := wb_req_val + io.mem_req.bits.address := Mux(wb_req_val, Cat(addr.ppn, addr.idx).toUFix, io.refill_req.bits.address) io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) io.mem_req_data.valid := data_req_fired @@ -676,8 +715,12 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter +<<<<<<< HEAD val mem_resp_type = io.mem.xact_rep.bits.t_type val refill_val = io.mem.xact_rep.valid && (mem_resp_type === X_REP_READ_SHARED || mem_resp_type === X_REP_READ_EXCLUSIVE) +======= + val refill_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id < UFix(NMSHR) +>>>>>>> support memory transaction aborts val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count_next = rr_count + UFix(1) when (refill_val) { rr_count := rr_count_next } @@ -725,6 +768,9 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { wb_arb.io.out <> wb.io.req wb.io.data_req <> data_arb.io.in(3) wb.io.data_resp <> data_resp_mux + wb.io.mem_rep <> io.mem.xact_rep + wb.io.mem_abort.valid := io.mem.xact_abort.valid + wb.io.mem_abort.bits := io.mem.xact_abort.bits // replacement policy val replacer = new RandomReplacementWayGen() @@ -737,9 +783,11 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { // refill response val block_during_refill = !refill_val && (rr_count != UFix(0)) data_arb.io.in(0).bits.inner_req.offset := rr_count + data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx data_arb.io.in(0).bits.inner_req.rw := !block_during_refill data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.inner_req.data := io.mem.xact_rep.bits.data + data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh data_arb.io.in(0).valid := refill_val || block_during_refill // load hits @@ -815,10 +863,10 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0)) mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id + mshr.io.mem_abort.valid := io.mem.xact_abort.valid + mshr.io.mem_abort.bits := io.mem.xact_abort.bits mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) - data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx - data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh replacer.io.pick_new_way := mshr.io.req.valid && mshr.io.req.ready // replays diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 526dc4e8..87cd901c 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -29,13 +29,18 @@ class Top() extends Component { val hub = new CoherenceHubNull // connect tile to hub hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) - hub.io.tiles(0).xact_init_data <> Queue(arbiter.io.mem.xact_init_data) + arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep) // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) hub.io.mem.resp <> Pipe(io.mem.resp) + // temporary HTIF data connection + val data_arb = (new Arbiter(2)) { new TransactionInitData } + data_arb.io.in(0) <> Queue(dcache.io.mem.xact_init_data) + data_arb.io.in(1) <> Queue(htif.io.mem.xact_init_data) + hub.io.tiles(0).xact_init_data <> data_arb.io.out if (HAVE_VEC) { From 5f33ab24b0bf05c1037a890dce46a33f1809d863 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Mar 2012 02:02:53 -0800 Subject: [PATCH 0275/1087] fix merge conflict oops :( --- rocket/src/main/scala/nbdcache.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ef2e7a08..45c5ce4d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -418,7 +418,6 @@ class WritebackUnit extends Component { io.refill_req.ready := io.mem_req.ready && !(valid && !acked) io.mem_req.valid := io.refill_req.valid && !(valid && !acked) || wb_req_val io.mem_req.bits.t_type := Mux(wb_req_val, X_INIT_WRITE_UNCACHED, io.refill_req.bits.t_type) - io.mem_req.bits.has_data := wb_req_val io.mem_req.bits.address := Mux(wb_req_val, Cat(addr.ppn, addr.idx).toUFix, io.refill_req.bits.address) io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) @@ -715,12 +714,7 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) // refill counter -<<<<<<< HEAD - val mem_resp_type = io.mem.xact_rep.bits.t_type - val refill_val = io.mem.xact_rep.valid && (mem_resp_type === X_REP_READ_SHARED || mem_resp_type === X_REP_READ_EXCLUSIVE) -======= val refill_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id < UFix(NMSHR) ->>>>>>> support memory transaction aborts val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val rr_count_next = rr_count + UFix(1) when (refill_val) { rr_count := rr_count_next } From dba99e07a9522945846dd92869e74eb4707bab84 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 6 Mar 2012 08:54:21 -0800 Subject: [PATCH 0276/1087] set MEM_TAG_BITS to 5 when HAVE_VEC is true, since NMSHR=4 --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index f7a094fe..36fc4053 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -217,7 +217,7 @@ object Constants val P_REP_COPY_ACK = UFix(5, P_REP_TYPE_BITS) // external memory interface - val MEM_TAG_BITS = 4 + val MEM_TAG_BITS = if (HAVE_VEC) 5 else 4 val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS require(MEM_TAG_BITS >= max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS)) From 6e16b04ada5692b5f013fc222d0725eb989c1ab1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Mar 2012 15:47:19 -0800 Subject: [PATCH 0277/1087] implement transaction finish messages --- rocket/src/main/scala/coherence.scala | 1 + rocket/src/main/scala/htif.scala | 15 ++-- rocket/src/main/scala/icache.scala | 11 ++- rocket/src/main/scala/icache_prefetch.scala | 48 ++++++------ rocket/src/main/scala/nbdcache.scala | 82 +++++++++++++-------- rocket/src/main/scala/top.scala | 1 + 6 files changed, 97 insertions(+), 61 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 7bcd19c3..817e6d73 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -397,6 +397,7 @@ class CoherenceHubNull extends CoherenceHub { x_rep.valid := io.mem.resp.valid || x_init.valid && is_write io.tiles(0).xact_abort.valid := Bool(false) + io.tiles(0).xact_finish.ready := Bool(true) } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index ca847376..3176608e 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -79,11 +79,12 @@ class rocketHTIF(w: Int, ncores: Int) extends Component val tx_done = packet_ram_raddr - UFix(1) === tx_size val mem_acked = Reg(resetVal = Bool(false)) + val mem_gxid = Reg() { Bits() } val mem_nacked = Reg(resetVal = Bool(false)) - when (io.mem.xact_rep.valid) { mem_acked := Bool(true) } + when (io.mem.xact_rep.valid) { mem_acked := Bool(true); mem_gxid := io.mem.xact_rep.bits.global_xact_id } when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } - val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_tx :: Nil = Enum(7) { UFix() } + val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(8) { UFix() } val state = Reg(resetVal = state_rx) when (state === state_rx && rx_done) { @@ -113,7 +114,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component mem_nacked := Bool(false) } when (mem_acked) { - state := state_tx + state := state_mem_finish mem_acked := Bool(false) } } @@ -124,12 +125,15 @@ class rocketHTIF(w: Int, ncores: Int) extends Component } when (io.mem.xact_rep.valid) { when (mem_cnt.andR) { - state := state_tx + state := state_mem_finish } mem_cnt := mem_cnt + UFix(1) } mem_acked := Bool(false) } + when (state === state_mem_finish && io.mem.xact_finish.ready) { + state := state_tx + } when (state === state_tx && tx_done) { rx_count := UFix(0) tx_count := UFix(0) @@ -146,9 +150,10 @@ class rocketHTIF(w: Int, ncores: Int) extends Component io.mem.xact_init.valid := state === state_mem_req io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_INIT_WRITE_UNCACHED, X_INIT_READ_UNCACHED) io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) - io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data + io.mem.xact_finish.valid := state === state_mem_finish + io.mem.xact_finish.bits.global_xact_id := mem_gxid pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index b9cf197d..4e9aa368 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -83,7 +83,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { val repl_way = LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2up(assoc)-1,0) val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix - val tag_we = (state === s_refill) && refill_done + val tag_we = refill_done val tag_addr = Mux((state === s_refill), r_cpu_req_idx(indexmsb,indexlsb), io.cpu.req_idx(indexmsb,indexlsb)).toUFix; @@ -126,13 +126,18 @@ class rocketICache(sets: Int, assoc: Int) extends Component { } tag_hit := any_hit + val finish_q = (new queue(1)) { new TransactionFinish } + finish_q.io.enq.valid := refill_done + finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id + // output signals io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out - io.mem.xact_init.valid := (state === s_request) + io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix + io.mem.xact_finish <> finish_q.io.deq // control state machine when (io.cpu.invalidate) { @@ -150,7 +155,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { } is (s_request) { - when (io.mem.xact_init.ready) { + when (io.mem.xact_init.ready && finish_q.io.enq.ready) { state := s_refill_wait; } } diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index b9d96a81..9c2d47e5 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -29,21 +29,30 @@ class rocketIPrefetcher extends Component() { io.icache.xact_init.ready := io.mem.xact_init.ready val ip_mem_resp_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id(0) val ip_mem_req_rdy = io.mem.xact_init.ready && !prefetch_miss - + + val finish_q = (new queue(1)) { new TransactionFinish } io.mem.xact_abort.ready := Bool(true) - io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) + io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) && finish_q.io.enq.ready io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); + + val finish_arb = (new Arbiter(2)) { new TransactionFinish } + finish_arb.io.in(0) <> io.icache.xact_finish + finish_arb.io.in(1) <> finish_q.io.deq + io.mem.xact_finish <> finish_arb.io.out - val fill_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); - when (ip_mem_resp_val.toBool) { fill_cnt := fill_cnt + UFix(1); } - val fill_done = (~fill_cnt === UFix(0)) & ip_mem_resp_val; + val fill_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + when (ip_mem_resp_val) { fill_cnt := fill_cnt + UFix(1) } + val fill_done = fill_cnt.andR && ip_mem_resp_val + + finish_q.io.enq.valid := fill_done + finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id - val forward = Reg(resetVal = Bool(false)); - val forward_cnt = Reg(resetVal = UFix(0, ceil(log(REFILL_CYCLES)/log(2)).toInt)); - when (forward & pdq.io.deq.valid) { forward_cnt := forward_cnt + UFix(1); } - val forward_done = (~forward_cnt === UFix(0)) & pdq.io.deq.valid; + val forward = Reg(resetVal = Bool(false)) + val forward_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + when (forward && pdq.io.deq.valid) { forward_cnt := forward_cnt + UFix(1) } + val forward_done = forward_cnt.andR && pdq.io.deq.valid forward := demand_miss && hit || forward && !forward_done io.icache.xact_abort.valid := io.mem.xact_abort.valid && !io.mem.xact_abort.bits.tile_xact_id(0) || @@ -61,29 +70,24 @@ class rocketIPrefetcher extends Component() { when (demand_miss) { state := s_req_wait; } } is (s_valid) { - when (demand_miss | (forward & forward_done)) { state := s_req_wait; } + when (demand_miss || forward && forward_done) { state := s_req_wait } + .elsewhen (io.invalidate && !forward) { state := s_invalid } } is (s_refilling) { - when (demand_miss & ~addr_match & fill_done.toBool) { state := s_req_wait; } - .elsewhen (demand_miss & ~addr_match) { state := s_bad_resp_wait; } - .elsewhen (fill_done.toBool) { state := s_valid; } + when (demand_miss && !addr_match && fill_done) { state := s_req_wait } + .elsewhen (fill_done) { state := Mux(io.invalidate, s_invalid, s_valid) } + .elsewhen (demand_miss && !addr_match || io.invalidate) { state := s_bad_resp_wait } } is (s_req_wait) { - when (ip_mem_req_rdy) { state := s_resp_wait; } + when (ip_mem_req_rdy && finish_q.io.enq.ready) { state := s_resp_wait } } is (s_resp_wait) { - when (ip_mem_resp_abort) { - state := s_invalid - } - .elsewhen (demand_miss && !addr_match) { state := s_bad_resp_wait } + when (ip_mem_resp_abort) { state := s_invalid } + .elsewhen (demand_miss && !addr_match || io.invalidate) { state := s_bad_resp_wait } .elsewhen (ip_mem_resp_val) { state := s_refilling } } is (s_bad_resp_wait) { when (fill_done || ip_mem_resp_abort) { state := s_req_wait } } } - - when (io.invalidate) { - state := s_invalid - } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 45c5ce4d..1e8f6a94 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -174,20 +174,23 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val idx_match = Bool(OUTPUT) val idx = Bits(IDX_BITS, OUTPUT) + val refill_count = Bits(log2up(REFILL_CYCLES), OUTPUT) val tag = Bits(TAG_BITS, OUTPUT) val way_oh = Bits(NWAYS, OUTPUT) - val mem_resp_val = Bool(INPUT) - val mem_abort_val = Bool(INPUT) val mem_req = (new ioDecoupled) { new TransactionInit } val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } val replay = (new ioDecoupled) { new Replay() } + val mem_abort = (new ioPipe) { new TransactionAbort }.flip + val mem_rep = (new ioPipe) { new TransactionReply }.flip + val mem_finish = (new ioDecoupled) { new TransactionFinish } } val valid = Reg(resetVal = Bool(false)) val state = Reg { UFix() } val requested = Reg { Bool() } val refilled = Reg { Bool() } + val refill_count = Reg { UFix(width = log2up(REFILL_CYCLES)) } val ppn = Reg { Bits() } val idx_ = Reg { Bits() } val way_oh_ = Reg { Bits() } @@ -204,11 +207,18 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && refilled + val refill_done = io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id) && refill_count.andR + + val finish_q = (new queue(1)) { new TransactionFinish } + finish_q.io.enq.valid := refill_done + finish_q.io.enq.bits := io.mem_rep.bits.global_xact_id + when (io.req_pri_val && io.req_pri_rdy) { valid := Bool(true) state := newStateOnPrimaryMiss(req_cmd) requested := Bool(false) refilled := Bool(false) + refill_count := UFix(0) ppn := io.req_bits.ppn idx_ := io.req_bits.idx way_oh_ := io.req_bits.way_oh @@ -217,10 +227,13 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { when (io.mem_req.valid && io.mem_req.ready) { requested := Bool(true) } - when (io.mem_abort_val) { + when (io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id)) { requested := Bool(false) } - when (io.mem_resp_val) { + when (io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id)) { + refill_count := refill_count + UFix(1) + } + when (refill_done) { refilled := Bool(true) } when (io.meta_req.valid && io.meta_req.ready) { @@ -233,7 +246,8 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { io.idx := idx_ io.tag := ppn io.way_oh := way_oh_ - io.req_pri_rdy := !valid + io.refill_count := refill_count + io.req_pri_rdy := !valid && finish_q.io.enq.ready io.req_sec_rdy := sec_rdy && rpq.io.enq.ready io.meta_req.valid := valid && refilled && !rpq.io.deq.valid @@ -247,6 +261,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { io.mem_req.bits.t_type := Mux(needsWriteback(next_state), X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) io.mem_req.bits.address := Cat(ppn, idx_).toUFix io.mem_req.bits.tile_xact_id := Bits(id) + io.mem_finish <> finish_q.io.deq io.replay.valid := rpq.io.deq.valid && refilled io.replay.bits <> rpq.io.deq.bits @@ -258,9 +273,8 @@ class MSHRFile extends Component { val io = new Bundle { val req = (new ioDecoupled) { new MSHRReq }.flip - val mem_resp_val = Bool(INPUT) - val mem_resp_tag = Bits(MEM_TAG_BITS, INPUT) val mem_resp_idx = Bits(IDX_BITS, OUTPUT) + val mem_resp_offset = Bits(log2up(REFILL_CYCLES), OUTPUT) val mem_resp_way_oh = Bits(NWAYS, OUTPUT) val fence_rdy = Bool(OUTPUT) @@ -269,6 +283,8 @@ class MSHRFile extends Component { val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } val data_req = (new ioDecoupled) { new DataReq() } val mem_abort = (new ioPipe) { new TransactionAbort }.flip + val mem_rep = (new ioPipe) { new TransactionReply }.flip + val mem_finish = (new ioDecoupled) { new TransactionFinish } val cpu_resp_val = Bool(OUTPUT) val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) @@ -284,10 +300,10 @@ class MSHRFile extends Component { sdq.setTarget('inst) val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } - val mem_resp_idx_mux = (new Mux1H(NMSHR)){ Bits(width = IDX_BITS) } - val mem_resp_way_oh_mux = (new Mux1H(NMSHR)){ Bits(width = NWAYS) } + val mem_resp_mux = (new Mux1H(NMSHR)){ new DataArrayArrayReq } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } + val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } @@ -313,14 +329,15 @@ class MSHRFile extends Component { mshr.io.meta_req <> meta_req_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) + mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) - mshr.io.mem_resp_val := io.mem_resp_val && (UFix(i) === io.mem_resp_tag) - mshr.io.mem_abort_val := io.mem_abort.valid && (UFix(i) === io.mem_abort.bits.tile_xact_id) - mem_resp_idx_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) - mem_resp_idx_mux.io.in(i) := mshr.io.idx - mem_resp_way_oh_mux.io.sel(i) := (UFix(i) === io.mem_resp_tag) - mem_resp_way_oh_mux.io.in(i) := mshr.io.way_oh + mshr.io.mem_abort <> io.mem_abort + mshr.io.mem_rep <> io.mem_rep + mem_resp_mux.io.sel(i) := UFix(i) === io.mem_rep.bits.tile_xact_id + mem_resp_mux.io.in(i).inner_req.idx := mshr.io.idx + mem_resp_mux.io.in(i).inner_req.offset := mshr.io.refill_count + mem_resp_mux.io.in(i).way_en := mshr.io.way_oh pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy @@ -332,10 +349,12 @@ class MSHRFile extends Component { meta_req_arb.io.out <> io.meta_req mem_req_arb.io.out <> io.mem_req + mem_finish_arb.io.out <> io.mem_finish io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy - io.mem_resp_idx := mem_resp_idx_mux.io.out - io.mem_resp_way_oh := mem_resp_way_oh_mux.io.out + io.mem_resp_idx := mem_resp_mux.io.out.inner_req.idx + io.mem_resp_offset := mem_resp_mux.io.out.inner_req.offset + io.mem_resp_way_oh := mem_resp_mux.io.out.way_en io.fence_rdy := !fence val replay = Queue(replay_arb.io.out, 1, pipe = true) @@ -361,6 +380,7 @@ class WritebackUnit extends Component { val mem_req_data = (new ioDecoupled) { new TransactionInitData } val mem_abort = (new ioPipe) { new TransactionAbort }.flip val mem_rep = (new ioPipe) { new TransactionReply }.flip + val mem_finish = (new ioDecoupled) { new TransactionFinish } } val valid = Reg(resetVal = Bool(false)) @@ -374,6 +394,10 @@ class WritebackUnit extends Component { when (io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(NMSHR)) { acked := Bool(true) } when (io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(NMSHR)) { nacked := Bool(true) } + val finish_q = (new queue(1)) { new TransactionFinish } + finish_q.io.enq.valid := io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(NMSHR) + finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id + data_req_fired := Bool(false) when (valid && io.mem_req.ready) { cmd_sent := Bool(true) @@ -405,7 +429,7 @@ class WritebackUnit extends Component { addr := io.req.bits } - io.req.ready := !valid + io.req.ready := !valid && finish_q.io.enq.ready io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) io.data_req.bits.way_en := addr.way_oh io.data_req.bits.inner_req.idx := addr.idx @@ -420,9 +444,9 @@ class WritebackUnit extends Component { io.mem_req.bits.t_type := Mux(wb_req_val, X_INIT_WRITE_UNCACHED, io.refill_req.bits.t_type) io.mem_req.bits.address := Mux(wb_req_val, Cat(addr.ppn, addr.idx).toUFix, io.refill_req.bits.address) io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) - io.mem_req_data.valid := data_req_fired io.mem_req_data.bits.data := io.data_resp + io.mem_finish <> finish_q.io.deq } class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ @@ -713,12 +737,6 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { } val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) - // refill counter - val refill_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id < UFix(NMSHR) - val rr_count = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val rr_count_next = rr_count + UFix(1) - when (refill_val) { rr_count := rr_count_next } - val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || @@ -757,12 +775,14 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val data_resp_mux = Mux1H(NWAYS, data_resp_way_oh, data.io.resp) // writeback unit + val finish_arb = (new Arbiter(2)) { new TransactionFinish } val wb = new WritebackUnit val wb_arb = (new Arbiter(2)) { new WritebackReq() } wb_arb.io.out <> wb.io.req wb.io.data_req <> data_arb.io.in(3) wb.io.data_resp <> data_resp_mux wb.io.mem_rep <> io.mem.xact_rep + wb.io.mem_finish <> finish_arb.io.in(0) wb.io.mem_abort.valid := io.mem.xact_abort.valid wb.io.mem_abort.bits := io.mem.xact_abort.bits @@ -775,14 +795,13 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { val needs_writeback = needsWriteback(meta_wb_mux.state) // refill response - val block_during_refill = !refill_val && (rr_count != UFix(0)) - data_arb.io.in(0).bits.inner_req.offset := rr_count + data_arb.io.in(0).bits.inner_req.offset := mshr.io.mem_resp_offset data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx - data_arb.io.in(0).bits.inner_req.rw := !block_during_refill + data_arb.io.in(0).bits.inner_req.rw := Bool(true) data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.inner_req.data := io.mem.xact_rep.bits.data data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - data_arb.io.in(0).valid := refill_val || block_during_refill + data_arb.io.in(0).valid := io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id < UFix(NMSHR) // load hits data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) @@ -855,8 +874,8 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { mshr.io.req.bits.way_oh := replaced_way_oh mshr.io.req.bits.data := cpu_req_data - mshr.io.mem_resp_val := refill_val && (~rr_count === UFix(0)) - mshr.io.mem_resp_tag := io.mem.xact_rep.bits.tile_xact_id + mshr.io.mem_rep <> io.mem.xact_rep + mshr.io.mem_finish <> finish_arb.io.in(1) mshr.io.mem_abort.valid := io.mem.xact_abort.valid mshr.io.mem_abort.bits := io.mem.xact_abort.bits mshr.io.mem_req <> wb.io.refill_req @@ -939,4 +958,5 @@ class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { io.mem.xact_init <> wb.io.mem_req io.mem.xact_init_data <> wb.io.mem_req_data + io.mem.xact_finish <> finish_arb.io.out } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 87cd901c..2783f951 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -31,6 +31,7 @@ class Top() extends Component { hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep) + hub.io.tiles(0).xact_finish <> Queue(arbiter.io.mem.xact_finish) // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) From 499c5b4a2e71b5aca962f5dda366fbcdf1f139c0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Mar 2012 15:49:28 -0800 Subject: [PATCH 0278/1087] automatically infer MEM_TAG_BITS --- rocket/src/main/scala/consts.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 36fc4053..e9af85b7 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -217,10 +217,9 @@ object Constants val P_REP_COPY_ACK = UFix(5, P_REP_TYPE_BITS) // external memory interface - val MEM_TAG_BITS = if (HAVE_VEC) 5 else 4 + val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS - require(MEM_TAG_BITS >= max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS)) val DTLB_ENTRIES = 8; val ITLB_ENTRIES = 8; From 962e5a54af3c2ee0534aebdc3ba823d401e5dd5a Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 Mar 2012 13:49:20 -0800 Subject: [PATCH 0279/1087] Added store dependency queues to BroadcastHub. Minor improvements to utils. --- rocket/src/main/scala/coherence.scala | 80 +++++++++++++++++++++------ rocket/src/main/scala/util.scala | 7 ++- 2 files changed, 68 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 817e6d73..51505882 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -35,6 +35,9 @@ class TrackerAllocReq extends Bundle { val tile_id = Bits(width = TILE_ID_BITS) } +class TrackerDependency extends Bundle { + val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) +} class TransactionInit extends Bundle { val t_type = Bits(width = X_INIT_TYPE_BITS) @@ -196,9 +199,11 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(NTILES, INPUT) val p_req_cnt_inc = Bits(NTILES, INPUT) - val p_rep_data = (new ioDecoupled) { new ProbeReplyData }.flip - val x_init_data = (new ioDecoupled) { new TransactionInitData }.flip + val p_rep_data = (new ioPipe) { new ProbeReplyData }.flip + val x_init_data = (new ioPipe) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) + val p_rep_data_dep = (new ioPipe) { new TrackerDependency }.flip + val x_init_data_dep = (new ioPipe) { new TrackerDependency }.flip val mem_req_cmd = (new ioDecoupled) { new MemReqCmd } val mem_req_data = (new ioDecoupled) { new MemData } @@ -214,8 +219,10 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val push_p_req = Bits(NTILES, OUTPUT) val pop_p_rep = Bits(NTILES, OUTPUT) val pop_p_rep_data = Bits(NTILES, OUTPUT) + val pop_p_rep_dep = Bits(NTILES, OUTPUT) val pop_x_init = Bool(OUTPUT) val pop_x_init_data = Bool(OUTPUT) + val pop_x_init_dep = Bits(NTILES, OUTPUT) val send_x_rep_ack = Bool(OUTPUT) } @@ -236,13 +243,12 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { (t_type === X_INIT_WRITE_UNCACHED) } - def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioDecoupled[MemData], trigger: Bool, pop_data: Bool, cmd_sent: Bool) { - req_cmd.valid := !cmd_sent + def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, pop_data: Bool, cmd_sent: Bool, pop_dep: Bool, at_front_of_dep_queue: Bool) { + req_cmd.valid := !cmd_sent && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) - data.ready := req_data.ready + req_data.valid := data.valid && at_front_of_dep_queue req_data.bits := data.bits - req_data.valid := data.valid - lock := Bool(true) + lock := at_front_of_dep_queue when(req_cmd.ready && req_cmd.valid) { cmd_sent := Bool(true) } @@ -250,7 +256,8 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { pop_data := Bool(true) mem_cnt := mem_cnt_next } - when(mem_cnt === ~UFix(0)) { + when(mem_cnt_next === UFix(0)) { + pop_dep := Bool(true) trigger := Bool(false) } } @@ -277,7 +284,6 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val p_rep_data_needs_write = Reg(resetVal = Bool(false)) val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val mem_cmd_sent = Reg(resetVal = Bool(false)) val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) val mem_cnt_next = mem_cnt + UFix(1) val mem_cnt_max = ~UFix(0, width = log2up(REFILL_CYCLES)) @@ -285,6 +291,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { io.busy := state != s_idle io.addr := addr_ io.init_tile_id := init_tile_id_ + io.p_rep_tile_id := p_rep_tile_id_ io.tile_xact_id := tile_xact_id_ io.sharer_count := UFix(NTILES) // TODO: Broadcast only io.t_type := t_type_ @@ -303,11 +310,11 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { io.push_p_req := Bits(0, width = NTILES) io.pop_p_rep := Bits(0, width = NTILES) io.pop_p_rep_data := Bits(0, width = NTILES) + io.pop_p_rep_dep := Bits(0, width = NTILES) io.pop_x_init := Bool(false) io.pop_x_init_data := Bool(false) + io.pop_x_init_dep := Bits(0, width = NTILES) io.send_x_rep_ack := Bool(false) - io.x_init_data.ready := Bool(false) // don't care - io.p_rep_data.ready := Bool(false) // don't care switch (state) { is(s_idle) { @@ -321,7 +328,8 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { p_rep_count := UFix(NTILES-1) p_req_flags := ~( UFix(1) << io.alloc_req.bits.tile_id ) mem_cnt := UFix(0) - mem_cmd_sent := Bool(false) + p_w_mem_cmd_sent := Bool(false) + x_w_mem_cmd_sent := Bool(false) io.pop_x_init := Bool(true) state := Mux(p_req_flags.orR, s_probe, s_mem) } @@ -350,9 +358,25 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { } is(s_mem) { when (p_rep_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.p_rep_data, p_rep_data_needs_write, io.pop_p_rep_data, p_w_mem_cmd_sent) + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.p_rep_data, + p_rep_data_needs_write, + io.pop_p_rep_data, + p_w_mem_cmd_sent, + io.pop_p_rep_dep, + io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id))) } . elsewhen(x_init_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.x_init_data, x_init_data_needs_write, io.pop_x_init_data, x_w_mem_cmd_sent) + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.x_init_data, + x_init_data_needs_write, + io.pop_x_init_data, + x_w_mem_cmd_sent, + io.pop_x_init_dep, + io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id))) } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { @@ -457,6 +481,9 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ } } + val p_rep_data_dep_list = List.fill(NTILES)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth must >= NPRIMARY + val x_init_data_dep_list = List.fill(NTILES)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth should >= NPRIMARY + // Free finished transactions for( j <- 0 until NTILES ) { val finish = io.tiles(j).xact_finish @@ -510,14 +537,23 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ val p_rep = io.tiles(j).probe_rep val p_rep_data = io.tiles(j).probe_rep_data val idx = p_rep.bits.global_xact_id - p_rep.ready := foldR(trackerList.map(_.io.pop_p_rep(j)))(_ || _) + val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) + val do_pop = foldR(pop_p_reps)(_ || _) + p_rep.ready := do_pop + p_rep_data_dep_list(j).io.enq.valid := do_pop + p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) p_data_valid_arr(idx) := p_rep.valid && probeReplyHasData(p_rep.bits) p_data_tile_id_arr(idx) := UFix(j) + p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) } for( i <- 0 until NGLOBAL_XACTS ) { trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits + + trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until NTILES).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) + trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until NTILES).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) + for( j <- 0 until NTILES) { val p_rep = io.tiles(j).probe_rep p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) @@ -532,14 +568,14 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort + val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) val conflicts = Bits(width = NGLOBAL_XACTS) for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io conflicts(UFix(i), t.busy && x_init.valid && coherenceConflict(t.addr, x_init.bits.address)) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) - want_to_abort_arr(j) := conflicts.orR || busy_arr.toBits.andR + want_to_abort_arr(j) := conflicts.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits)) x_abort.valid := Bool(false) switch(abort_state_arr(j)) { @@ -585,6 +621,9 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid + //TODO trackerList(i).io.x_init_data_dep <> x_init_data_dep_arr(trackerList(i).io.init_tile_id) + trackerList(i).io.x_init_data_dep.bits <> MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until NTILES).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) + trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until NTILES).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) } for( j <- 0 until NTILES ) { val x_init = io.tiles(j).xact_init @@ -592,8 +631,13 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid init_arb.io.in(j).bits.xact_init := x_init.bits init_arb.io.in(j).bits.tile_id := UFix(j) - x_init.ready := (abort_state_arr(j) === s_abort_complete) || foldR(trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) + val pop_x_inits = trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.tile_id === UFix(j)) + val do_pop = foldR(pop_x_inits)(_||_) + x_init_data_dep_list(j).io.enq.valid := do_pop + x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) + x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) + x_init_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) } alloc_arb.io.out.ready := init_arb.io.out.valid diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 60225a9e..7d0db909 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -85,6 +85,11 @@ object OHToUFix val out = MuxCase( UFix(0), (0 until in.getWidth).map( i => (in(i).toBool, UFix(i)))) out.toUFix } + def apply(in: Seq[Bool]): UFix = + { + val out = MuxCase( UFix(0), in.zipWithIndex map {case (b,i) => (b, UFix(i))}) + out.toUFix + } } object UFixToOH @@ -173,7 +178,7 @@ class ioDecoupled[+T <: Data]()(data: => T) extends Bundle val bits = data.asOutput } -class ioPipe[T <: Data]()(data: => T) extends Bundle +class ioPipe[+T <: Data]()(data: => T) extends Bundle { val valid = Bool(OUTPUT) val bits = data.asOutput From c0ed010bc957dd0fbf5bb0bbcd6b340014da09c9 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 Mar 2012 15:54:19 -0800 Subject: [PATCH 0280/1087] newTransactionOnMiss() --- rocket/src/main/scala/coherence.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 51505882..5a52ac44 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -155,6 +155,10 @@ trait FourStateCoherence extends CoherencePolicy { val (read, write) = cpuCmdToRW(cmd) Mux(write, tileExclusiveDirty, state) } + def newTransactionOnMiss(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) + } def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { MuxLookup(incoming.t_type, tileInvalid, Array( X_REP_READ_SHARED -> tileShared, From 3dd404dcf4af275ea6c04219b2c18a8349d269c0 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 Mar 2012 17:01:47 -0800 Subject: [PATCH 0281/1087] hub code cleanup --- rocket/src/main/scala/coherence.scala | 39 +++++++++++++++------------ 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 5a52ac44..a04b7096 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -141,6 +141,9 @@ trait FourStateCoherence extends CoherencePolicy { (write && (state === tileExclusiveClean || state === tileExclusiveDirty))) } + //TODO: do we need isPresent() for determining that a line needs to be + //upgraded but that no replacement is needed? + def isValid (state: UFix): Bool = { state != tileInvalid } @@ -224,8 +227,8 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val pop_p_rep = Bits(NTILES, OUTPUT) val pop_p_rep_data = Bits(NTILES, OUTPUT) val pop_p_rep_dep = Bits(NTILES, OUTPUT) - val pop_x_init = Bool(OUTPUT) - val pop_x_init_data = Bool(OUTPUT) + val pop_x_init = Bits(NTILES, OUTPUT) + val pop_x_init_data = Bits(NTILES, OUTPUT) val pop_x_init_dep = Bits(NTILES, OUTPUT) val send_x_rep_ack = Bool(OUTPUT) } @@ -247,7 +250,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { (t_type === X_INIT_WRITE_UNCACHED) } - def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, pop_data: Bool, cmd_sent: Bool, pop_dep: Bool, at_front_of_dep_queue: Bool) { + def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { req_cmd.valid := !cmd_sent && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) req_data.valid := data.valid && at_front_of_dep_queue @@ -257,11 +260,11 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { cmd_sent := Bool(true) } when(req_data.ready && req_data.valid) { - pop_data := Bool(true) + pop_data := UFix(1) << tile_id mem_cnt := mem_cnt_next } when(mem_cnt_next === UFix(0)) { - pop_dep := Bool(true) + pop_dep := UFix(1) << tile_id trigger := Bool(false) } } @@ -315,8 +318,8 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { io.pop_p_rep := Bits(0, width = NTILES) io.pop_p_rep_data := Bits(0, width = NTILES) io.pop_p_rep_dep := Bits(0, width = NTILES) - io.pop_x_init := Bool(false) - io.pop_x_init_data := Bool(false) + io.pop_x_init := Bits(0, width = NTILES) + io.pop_x_init_data := Bits(0, width = NTILES) io.pop_x_init_dep := Bits(0, width = NTILES) io.send_x_rep_ack := Bool(false) @@ -334,7 +337,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) x_w_mem_cmd_sent := Bool(false) - io.pop_x_init := Bool(true) + io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id state := Mux(p_req_flags.orR, s_probe, s_mem) } } @@ -367,20 +370,22 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { io.mem_req_lock, io.p_rep_data, p_rep_data_needs_write, - io.pop_p_rep_data, p_w_mem_cmd_sent, + io.pop_p_rep_data, io.pop_p_rep_dep, - io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id))) + io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id)), + p_rep_tile_id_) } . elsewhen(x_init_data_needs_write) { doMemReqWrite(io.mem_req_cmd, io.mem_req_data, io.mem_req_lock, io.x_init_data, x_init_data_needs_write, - io.pop_x_init_data, x_w_mem_cmd_sent, + io.pop_x_init_data, io.pop_x_init_dep, - io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id))) + io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id)), + init_tile_id_) } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { @@ -625,23 +630,23 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid - //TODO trackerList(i).io.x_init_data_dep <> x_init_data_dep_arr(trackerList(i).io.init_tile_id) - trackerList(i).io.x_init_data_dep.bits <> MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until NTILES).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) + trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until NTILES).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until NTILES).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) } for( j <- 0 until NTILES ) { val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data + val x_init_data_dep = x_init_data_dep_list(j).io.deq init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid init_arb.io.in(j).bits.xact_init := x_init.bits init_arb.io.in(j).bits.tile_id := UFix(j) - val pop_x_inits = trackerList.map(_.io.pop_x_init && init_arb.io.out.bits.tile_id === UFix(j)) + val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) val do_pop = foldR(pop_x_inits)(_||_) x_init_data_dep_list(j).io.enq.valid := do_pop x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop - x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data && init_arb.io.out.bits.tile_id === UFix(j)))(_||_) - x_init_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) + x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) + x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) } alloc_arb.io.out.ready := init_arb.io.out.valid From 47a2097507154c67512b625747715456edb532cc Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 Mar 2012 17:33:11 -0800 Subject: [PATCH 0282/1087] unified coherence trait functions --- rocket/src/main/scala/coherence.scala | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index a04b7096..bda67671 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -126,7 +126,15 @@ trait ThreeStateIncoherence extends CoherencePolicy { val (read, write) = cpuCmdToRW(cmd) Mux(write, tileDirty, state) } - + def newTransactionOnMiss(cmd: Bits, state: UFix): UFix = X_INIT_READ_EXCLUSIVE + def newStateOnTransactionRep(cmd: Bits, incoming: TransactionReply, outstanding: TransactionInit): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, tileDirty, tileClean) + } + def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = Bool(false) + def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = state + def probeReplyHasData (reply: ProbeReply): Bool = Bool(false) + def transactionInitHasData (init: TransactionInit): Bool = (init.t_type != X_INIT_WRITE_UNCACHED) } trait FourStateCoherence extends CoherencePolicy { @@ -162,7 +170,7 @@ trait FourStateCoherence extends CoherencePolicy { val (read, write) = cpuCmdToRW(cmd) Mux(write, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) } - def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + def newStateOnTransactionRep(cmd: Bits, incoming: TransactionReply, outstanding: TransactionInit): UFix = { MuxLookup(incoming.t_type, tileInvalid, Array( X_REP_READ_SHARED -> tileShared, X_REP_READ_EXCLUSIVE -> Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileExclusiveDirty, tileExclusiveClean), From 81dcb194d34af0a224d226dfb0a0b22f8cf55ddc Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 6 Mar 2012 22:39:15 -0800 Subject: [PATCH 0283/1087] new vector exception interface --- rocket/src/main/scala/cpu.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 57bd3dba..48aaaeff 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -197,14 +197,14 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready // exceptions - vu.io.cpu_exception.addr := dpath.io.vec_iface.eaddr.toUFix - vu.io.cpu_exception.exception := dpath.io.vec_iface.exception - vu.io.cpu_exception.kill := dpath.io.vec_iface.kill - vu.io.cpu_exception.hold := dpath.io.vec_iface.hold - ctrl.io.vec_iface.exception_ack_valid := vu.io.exception_ack_valid - vu.io.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready - ctrl.io.vec_iface.kill_ack_valid := vu.io.kill_ack_valid - vu.io.kill_ack_ready := ctrl.io.vec_iface.kill_ack_ready + vu.io.xcpt_backup.exception := dpath.io.vec_iface.exception + vu.io.xcpt_backup.exception_addr := dpath.io.vec_iface.eaddr.toUFix + ctrl.io.vec_iface.exception_ack_valid := vu.io.xcpt_backup.exception_ack_valid + vu.io.xcpt_backup.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready + vu.io.xcpt_resume.hold := dpath.io.vec_iface.hold + vu.io.xcpt_kill.kill := dpath.io.vec_iface.kill + ctrl.io.vec_iface.kill_ack_valid := vu.io.xcpt_kill.kill_ack_valid + vu.io.xcpt_kill.kill_ack_ready := ctrl.io.vec_iface.kill_ack_ready // hooking up vector memory interface val storegen = new StoreDataGen From 6e2610b0ada4c54512d0aa709ae8c644be212c00 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Mar 2012 23:37:51 -0800 Subject: [PATCH 0284/1087] fix Mux1H for bundles --- rocket/src/main/scala/util.scala | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 7d0db909..df99e432 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -158,16 +158,19 @@ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component val out = gen.asOutput } - if (n > 2) { - var out = io.in(0).toBits & Fill(gen.getWidth, io.sel(0)) - for (i <- 1 to n-1) - out = out | (io.in(i).toBits & Fill(gen.getWidth, io.sel(i))) - io.out := out - } else if (n == 2) { - io.out := Mux(io.sel(1), io.in(1), io.in(0)) - } else { - io.out := io.in(0) + def buildMux(sel: Bits, in: Vec[T], i: Int, n: Int): T = { + if (n == 1) + in(i) + else + { + val half_n = (1 << log2up(n))/2 + val left = buildMux(sel, in, i, half_n) + val right = buildMux(sel, in, i + half_n, n - half_n) + Mux(sel(i+n-1,i+half_n).orR, right, left) + } } + + io.out := buildMux(io.sel.toBits, io.in, 0, n) } From a0c9452b860de17d87cf98a39d236d0be160fd8b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 7 Mar 2012 01:26:35 -0800 Subject: [PATCH 0285/1087] change D$ to use FourStateCoherence protocol instead of ThreeStateIncoherence. --- rocket/src/main/scala/coherence.scala | 33 +++++++++-------- rocket/src/main/scala/nbdcache.scala | 52 +++++++++++++-------------- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index bda67671..2b34b8ae 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -121,17 +121,18 @@ trait ThreeStateIncoherence extends CoherencePolicy { Mux(write, tileDirty, Mux(read, Mux(state === tileDirty, tileDirty, tileClean), state)) } def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) - def newStateOnPrimaryMiss(cmd: Bits): UFix = newState(cmd, tileInvalid) - def newStateOnSecondaryMiss(cmd: Bits, state: UFix): UFix = { + def newTransactionOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileDirty, state) + Mux(write, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) } - def newTransactionOnMiss(cmd: Bits, state: UFix): UFix = X_INIT_READ_EXCLUSIVE - def newStateOnTransactionRep(cmd: Bits, incoming: TransactionReply, outstanding: TransactionInit): UFix = { + def newTransactionOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileDirty, tileClean) - } + Mux(write, X_INIT_READ_EXCLUSIVE, outstanding.t_type) + } def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = Bool(false) + def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileDirty, tileClean) + } def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = state def probeReplyHasData (reply: ProbeReply): Bool = Bool(false) def transactionInitHasData (init: TransactionInit): Bool = (init.t_type != X_INIT_WRITE_UNCACHED) @@ -166,11 +167,20 @@ trait FourStateCoherence extends CoherencePolicy { val (read, write) = cpuCmdToRW(cmd) Mux(write, tileExclusiveDirty, state) } - def newTransactionOnMiss(cmd: Bits, state: UFix): UFix = { + def newTransactionOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) } - def newStateOnTransactionRep(cmd: Bits, incoming: TransactionReply, outstanding: TransactionInit): UFix = { + def newTransactionOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, X_INIT_READ_EXCLUSIVE, outstanding.t_type) + } + def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = { + val (read, write) = cpuCmdToRW(cmd) + (read && (outstanding.t_type === X_INIT_READ_UNCACHED || outstanding.t_type === X_INIT_WRITE_UNCACHED)) || + (write && (outstanding.t_type != X_INIT_READ_EXCLUSIVE)) + } + def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { MuxLookup(incoming.t_type, tileInvalid, Array( X_REP_READ_SHARED -> tileShared, X_REP_READ_EXCLUSIVE -> Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileExclusiveDirty, tileExclusiveClean), @@ -179,11 +189,6 @@ trait FourStateCoherence extends CoherencePolicy { X_REP_WRITE_UNCACHED -> tileInvalid )) } - def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = { - val (read, write) = cpuCmdToRW(cmd) - (read && (outstanding.t_type === X_INIT_READ_UNCACHED || outstanding.t_type === X_INIT_WRITE_UNCACHED)) || - (write && (outstanding.t_type != X_INIT_READ_EXCLUSIVE)) - } def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = { MuxLookup(incoming.p_type, state, Array( diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 1e8f6a94..54615a74 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -163,7 +163,7 @@ class MetaArrayArrayReq extends Bundle { val way_en = Bits(width = NWAYS) } -class MSHR(id: Int) extends Component with ThreeStateIncoherence { +class MSHR(id: Int) extends Component with FourStateCoherence { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -187,6 +187,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { } val valid = Reg(resetVal = Bool(false)) + val xact_type = Reg { UFix() } val state = Reg { UFix() } val requested = Reg { Bool() } val refilled = Reg { Bool() } @@ -196,10 +197,8 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { val way_oh_ = Reg { Bits() } val req_cmd = io.req_bits.cmd - val req_load = (req_cmd === M_XRD) || (req_cmd === M_PFR) val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) - val next_state = Mux(io.req_sec_val && io.req_sec_rdy, newStateOnSecondaryMiss(req_cmd, state), state) - val sec_rdy = io.idx_match && !refilled && (needsWriteback(state) || !requested || req_load) + val sec_rdy = io.idx_match && !refilled && !((requested || io.mem_req.ready) && needsSecondaryXact(req_cmd, io.mem_req.bits)) val rpq = (new queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq @@ -213,9 +212,28 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { finish_q.io.enq.valid := refill_done finish_q.io.enq.bits := io.mem_rep.bits.global_xact_id + when (io.mem_req.valid && io.mem_req.ready) { + requested := Bool(true) + } + when (io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id)) { + requested := Bool(false) + } + when (io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id)) { + refill_count := refill_count + UFix(1) + state := newStateOnTransactionRep(io.mem_rep.bits, io.mem_req.bits) + } + when (refill_done) { + refilled := Bool(true) + } + when (io.meta_req.valid && io.meta_req.ready) { + valid := Bool(false) + } + when (io.req_sec_val && io.req_sec_rdy) { + xact_type := newTransactionOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) + } when (io.req_pri_val && io.req_pri_rdy) { valid := Bool(true) - state := newStateOnPrimaryMiss(req_cmd) + xact_type := newTransactionOnPrimaryMiss(req_cmd, newStateOnFlush()) requested := Bool(false) refilled := Bool(false) refill_count := UFix(0) @@ -223,24 +241,6 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { idx_ := io.req_bits.idx way_oh_ := io.req_bits.way_oh } - .otherwise { - when (io.mem_req.valid && io.mem_req.ready) { - requested := Bool(true) - } - when (io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id)) { - requested := Bool(false) - } - when (io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id)) { - refill_count := refill_count + UFix(1) - } - when (refill_done) { - refilled := Bool(true) - } - when (io.meta_req.valid && io.meta_req.ready) { - valid := Bool(false) - } - state := next_state - } io.idx_match := valid && (idx_ === io.req_bits.idx) io.idx := idx_ @@ -258,7 +258,7 @@ class MSHR(id: Int) extends Component with ThreeStateIncoherence { io.meta_req.bits.way_en := way_oh_ io.mem_req.valid := valid && !requested - io.mem_req.bits.t_type := Mux(needsWriteback(next_state), X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) + io.mem_req.bits.t_type := xact_type io.mem_req.bits.address := Cat(ppn, idx_).toUFix io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq @@ -449,7 +449,7 @@ class WritebackUnit extends Component { io.mem_finish <> finish_q.io.deq } -class FlushUnit(lines: Int) extends Component with ThreeStateIncoherence{ +class FlushUnit(lines: Int) extends Component with FourStateCoherence{ val io = new Bundle { val req = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) }.flip val resp = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) } @@ -667,7 +667,7 @@ abstract class HellaCache extends Component { def newStateOnHit(cmd: Bits, state: UFix): UFix } -class HellaCacheUniproc extends HellaCache with ThreeStateIncoherence { +class HellaCacheUniproc extends HellaCache with FourStateCoherence { val io = new Bundle { val cpu = new ioDmem() val mem = new ioTileLink From c09eeb7fd2f6b67e896a80dbedefc733fd68716e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 7 Mar 2012 01:42:08 -0800 Subject: [PATCH 0286/1087] fix D$ next-state logic it was using the CPU command from the wrong pipeline stage, which was a don't-care with ThreeStateIncoherence. --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 54615a74..6dc262a5 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -764,7 +764,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { meta_arb.io.in(2).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(2).ready val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until NWAYS).map( w => isHit(io.cpu.req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_hit = r_cpu_req_val && tag_match val tag_miss = r_cpu_req_val && !tag_match From 7deff5fbe2dafbe71584e4f17f98ef0c47352cec Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 7 Mar 2012 11:40:49 -0800 Subject: [PATCH 0287/1087] Broadcast hub bug fixes for load uncached mem req and store uncached xact rep --- rocket/src/main/scala/coherence.scala | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 2b34b8ae..843ff2be 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -135,7 +135,7 @@ trait ThreeStateIncoherence extends CoherencePolicy { } def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = state def probeReplyHasData (reply: ProbeReply): Bool = Bool(false) - def transactionInitHasData (init: TransactionInit): Bool = (init.t_type != X_INIT_WRITE_UNCACHED) + def transactionInitHasData (init: TransactionInit): Bool = (init.t_type === X_INIT_WRITE_UNCACHED) } trait FourStateCoherence extends CoherencePolicy { @@ -204,7 +204,7 @@ trait FourStateCoherence extends CoherencePolicy { reply.p_type === P_REP_COPY_DATA) } def transactionInitHasData (init: TransactionInit): Bool = { - (init.t_type != X_INIT_WRITE_UNCACHED) + (init.t_type === X_INIT_WRITE_UNCACHED) } def transactionReplyHasData (reply: TransactionReply): Bool = { (reply.t_type != X_REP_WRITE_UNCACHED && reply.t_type != X_REP_READ_EXCLUSIVE_ACK) @@ -346,7 +346,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { x_init_data_needs_write := transactionInitHasData(io.alloc_req.bits.xact_init) x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) p_rep_count := UFix(NTILES-1) - p_req_flags := ~( UFix(1) << io.alloc_req.bits.tile_id ) + p_req_flags := ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) x_w_mem_cmd_sent := Bool(false) @@ -516,12 +516,14 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ // Reply to initial requestor // Forward memory responses from mem to tile or arbitrate to ack val mem_idx = io.mem.resp.bits.tag - val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits, NGLOBAL_XACTS) + val ack_idx = UFix(0)//PriorityEncoder(send_x_rep_ack_arr.toBits, NGLOBAL_XACTS) + //val ack_idx_ = Reg(ack_idx) for( j <- 0 until NTILES ) { val rep = io.tiles(j).xact_rep rep.bits.t_type := UFix(0) rep.bits.tile_xact_id := UFix(0) rep.bits.global_xact_id := UFix(0) + rep.bits.data := io.mem.resp.bits.data rep.valid := Bool(false) when(io.mem.resp.valid) { rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) @@ -534,7 +536,6 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ rep.bits.global_xact_id := ack_idx rep.valid := (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr(ack_idx) } - io.tiles(j).xact_rep.bits.data := io.mem.resp.bits.data } sent_x_rep_ack_arr(ack_idx) := !io.mem.resp.valid && send_x_rep_ack_arr(ack_idx) // If there were a ready signal due to e.g. intervening network use: From 941873bad1617efc557e21e1996f6203b44fe068 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 7 Mar 2012 21:03:44 -0800 Subject: [PATCH 0288/1087] coherence hub fixes --- rocket/src/main/scala/coherence.scala | 13 ++++++------- rocket/src/main/scala/consts.scala | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 843ff2be..042ee666 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -516,8 +516,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ // Reply to initial requestor // Forward memory responses from mem to tile or arbitrate to ack val mem_idx = io.mem.resp.bits.tag - val ack_idx = UFix(0)//PriorityEncoder(send_x_rep_ack_arr.toBits, NGLOBAL_XACTS) - //val ack_idx_ = Reg(ack_idx) + val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) for( j <- 0 until NTILES ) { val rep = io.tiles(j).xact_rep rep.bits.t_type := UFix(0) @@ -534,10 +533,10 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx - rep.valid := (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr(ack_idx) + rep.valid := (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr.toBits.orR } } - sent_x_rep_ack_arr(ack_idx) := !io.mem.resp.valid && send_x_rep_ack_arr(ack_idx) + sent_x_rep_ack_arr(ack_idx) := !io.mem.resp.valid // If there were a ready signal due to e.g. intervening network use: //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready @@ -592,13 +591,13 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) - val conflicts = Bits(width = NGLOBAL_XACTS) + val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(UFix(i), t.busy && x_init.valid && coherenceConflict(t.addr, x_init.bits.address)) + conflicts(i) := t.busy && x_init.valid && coherenceConflict(t.addr, x_init.bits.address) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := conflicts.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits)) + want_to_abort_arr(j) := conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits)) x_abort.valid := Bool(false) switch(abort_state_arr(j)) { diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e9af85b7..dbc5f7c7 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -187,7 +187,7 @@ object Constants val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val TILE_ID_BITS = 1 val TILE_XACT_ID_BITS = log2up(NMSHR)+3 - val GLOBAL_XACT_ID_BITS = 4 + val GLOBAL_XACT_ID_BITS = 2 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS val X_INIT_TYPE_BITS = 2 From 5a7c5772a8a3fb07cc44e25dcb254e56f347591a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 7 Mar 2012 23:11:17 -0800 Subject: [PATCH 0289/1087] clearly distinguish PPN and cache tag --- rocket/src/main/scala/nbdcache.scala | 38 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 6dc262a5..b6594aa0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -95,14 +95,14 @@ class LoadDataGen extends Component { } class MSHRReq extends Bundle { - val ppn = Bits(width = TAG_BITS) + val tag = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) val way_oh = Bits(width = NWAYS) val offset = Bits(width = OFFSET_BITS) val cmd = Bits(width = 4) val typ = Bits(width = 3) - val tag = Bits(width = DCACHE_TAG_BITS) + val cpu_tag = Bits(width = DCACHE_TAG_BITS) val data = Bits(width = CPU_DATA_BITS) } @@ -111,7 +111,7 @@ class RPQEntry extends Bundle { val cmd = Bits(width = 4) val typ = Bits(width = 3) val sdq_id = UFix(width = log2up(NSDQ)) - val tag = Bits(width = DCACHE_TAG_BITS) + val cpu_tag = Bits(width = DCACHE_TAG_BITS) } class Replay extends RPQEntry { @@ -142,7 +142,7 @@ class DataArrayArrayReq extends Bundle { } class WritebackReq extends Bundle { - val ppn = Bits(width = TAG_BITS) + val tag = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) val way_oh = Bits(width = NWAYS) } @@ -192,7 +192,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val requested = Reg { Bool() } val refilled = Reg { Bool() } val refill_count = Reg { UFix(width = log2up(REFILL_CYCLES)) } - val ppn = Reg { Bits() } + val tag = Reg { Bits() } val idx_ = Reg { Bits() } val way_oh_ = Reg { Bits() } @@ -237,14 +237,14 @@ class MSHR(id: Int) extends Component with FourStateCoherence { requested := Bool(false) refilled := Bool(false) refill_count := UFix(0) - ppn := io.req_bits.ppn + tag := io.req_bits.tag idx_ := io.req_bits.idx way_oh_ := io.req_bits.way_oh } io.idx_match := valid && (idx_ === io.req_bits.idx) io.idx := idx_ - io.tag := ppn + io.tag := tag io.way_oh := way_oh_ io.refill_count := refill_count io.req_pri_rdy := !valid && finish_q.io.enq.ready @@ -254,12 +254,12 @@ class MSHR(id: Int) extends Component with FourStateCoherence { io.meta_req.bits.inner_req.rw := Bool(true) io.meta_req.bits.inner_req.idx := idx_ io.meta_req.bits.inner_req.data.state := state - io.meta_req.bits.inner_req.data.tag := ppn + io.meta_req.bits.inner_req.data.tag := tag io.meta_req.bits.way_en := way_oh_ io.mem_req.valid := valid && !requested io.mem_req.bits.t_type := xact_type - io.mem_req.bits.address := Cat(ppn, idx_).toUFix + io.mem_req.bits.address := Cat(tag, idx_).toUFix io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq @@ -307,7 +307,7 @@ class MSHRFile extends Component { val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } - val tag_match = tag_mux.io.out === io.req.bits.ppn + val tag_match = tag_mux.io.out === io.req.bits.tag var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -367,7 +367,7 @@ class MSHRFile extends Component { io.data_req.bits.data := sdq.read(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) io.cpu_resp_val := Reg(replay.valid && replay.ready && replay_read, resetVal = Bool(false)) - io.cpu_resp_tag := Reg(replay.bits.tag) + io.cpu_resp_tag := Reg(replay.bits.cpu_tag) } class WritebackUnit extends Component { @@ -442,7 +442,7 @@ class WritebackUnit extends Component { io.refill_req.ready := io.mem_req.ready && !(valid && !acked) io.mem_req.valid := io.refill_req.valid && !(valid && !acked) || wb_req_val io.mem_req.bits.t_type := Mux(wb_req_val, X_INIT_WRITE_UNCACHED, io.refill_req.bits.t_type) - io.mem_req.bits.address := Mux(wb_req_val, Cat(addr.ppn, addr.idx).toUFix, io.refill_req.bits.address) + io.mem_req.bits.address := Mux(wb_req_val, Cat(addr.tag, addr.idx).toUFix, io.refill_req.bits.address) io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) io.mem_req_data.valid := data_req_fired io.mem_req_data.bits.data := io.data_resp @@ -460,7 +460,7 @@ class FlushUnit(lines: Int) extends Component with FourStateCoherence{ val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_meta_write :: s_done :: Nil = Enum(6) { UFix() } val state = Reg(resetVal = s_reset) - val tag = Reg() { Bits() } + val cpu_tag = Reg() { Bits() } val idx_cnt = Reg(resetVal = UFix(0, log2up(lines))) val next_idx_cnt = idx_cnt + UFix(1) val way_cnt = Reg(resetVal = UFix(0, log2up(NWAYS))) @@ -474,7 +474,7 @@ class FlushUnit(lines: Int) extends Component with FourStateCoherence{ way_cnt := next_way_cnt; } } - is(s_ready) { when (io.req.valid) { state := s_meta_read; tag := io.req.bits } } + is(s_ready) { when (io.req.valid) { state := s_meta_read; cpu_tag := io.req.bits } } is(s_meta_read) { when (io.meta_req.ready) { state := s_meta_wait } } is(s_meta_wait) { state := Mux(needsWriteback(io.meta_resp.state) && !io.wb_req.ready, s_meta_read, s_meta_write) } is(s_meta_write) { @@ -489,7 +489,7 @@ class FlushUnit(lines: Int) extends Component with FourStateCoherence{ io.req.ready := state === s_ready io.resp.valid := state === s_done - io.resp.bits := tag + io.resp.bits := cpu_tag io.meta_req.valid := (state === s_meta_read) || (state === s_meta_write) || (state === s_reset) io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) io.meta_req.bits.inner_req.idx := idx_cnt @@ -497,7 +497,7 @@ class FlushUnit(lines: Int) extends Component with FourStateCoherence{ io.meta_req.bits.inner_req.data.state := newStateOnFlush() io.meta_req.bits.inner_req.data.tag := UFix(0) io.wb_req.valid := state === s_meta_wait && needsWriteback(io.meta_resp.state) - io.wb_req.bits.ppn := io.meta_resp.tag + io.wb_req.bits.tag := io.meta_resp.tag io.wb_req.bits.idx := idx_cnt io.wb_req.bits.way_oh := UFixToOH(way_cnt, NWAYS) } @@ -835,7 +835,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // writeback val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && needs_writeback && !p_store_idx_match - wb_arb.io.in(1).bits.ppn := meta_wb_mux.tag + wb_arb.io.in(1).bits.tag := meta_wb_mux.tag wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) wb_arb.io.in(1).bits.way_oh := replaced_way_oh @@ -865,9 +865,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // miss handling mshr.io.req.valid := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) - mshr.io.req.bits.ppn := cpu_req_tag + mshr.io.req.bits.tag := cpu_req_tag mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - mshr.io.req.bits.tag := r_cpu_req_tag + mshr.io.req.bits.cpu_tag := r_cpu_req_tag mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req.bits.cmd := r_cpu_req_cmd mshr.io.req.bits.typ := r_cpu_req_type From 7f43dee0c91c4e537013ef433e13ed906bb3e552 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 8 Mar 2012 01:04:26 -0800 Subject: [PATCH 0290/1087] PriorityEncoder apply() no longer has recursive depth param --- rocket/src/main/scala/util.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index df99e432..fbc00ebb 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -259,10 +259,11 @@ class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { object PriorityEncoder { - def apply(in: Bits, n: Int = 0): UFix = { + def apply(in: Bits): UFix = doApply(in, 0) + def doApply(in: Bits, n: Int = 0): UFix = { if (n >= in.getWidth-1) UFix(n) else - Mux(in(n), UFix(n), PriorityEncoder(in, n+1)) + Mux(in(n), UFix(n), doApply(in, n+1)) } } From 788ad327da04794430a9de462b7b0e6d6db9d3db Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 8 Mar 2012 11:36:10 -0800 Subject: [PATCH 0291/1087] Fixed dependency queue bug in Broadcast Hub --- rocket/src/main/scala/coherence.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 042ee666..14504062 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -638,7 +638,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ for( i <- 0 until NGLOBAL_XACTS ) { alloc_arb.io.in(i).valid := !trackerList(i).io.busy trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready - trackerList(i).io.alloc_req.bits <> init_arb.io.out.bits + trackerList(i).io.alloc_req.bits := init_arb.io.out.bits trackerList(i).io.alloc_req.valid := init_arb.io.out.valid trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits @@ -655,7 +655,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ init_arb.io.in(j).bits.tile_id := UFix(j) val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) val do_pop = foldR(pop_x_inits)(_||_) - x_init_data_dep_list(j).io.enq.valid := do_pop + x_init_data_dep_list(j).io.enq.valid := do_pop && transactionInitHasData(x_init.bits) && (abort_state_arr(j) === s_idle) x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) From 35c4bd4084e995be0812b7171e832b428ec65ca2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 8 Mar 2012 16:39:05 -0800 Subject: [PATCH 0292/1087] Hub addr comparison bug fix --- rocket/src/main/scala/coherence.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 14504062..e3827c72 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -42,7 +42,7 @@ class TrackerDependency extends Bundle { class TransactionInit extends Bundle { val t_type = Bits(width = X_INIT_TYPE_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) - val address = UFix(width = PADDR_BITS) + val address = UFix(width = PADDR_BITS - OFFSET_BITS) } class TransactionInitData extends MemData @@ -54,7 +54,7 @@ class TransactionAbort extends Bundle { class ProbeRequest extends Bundle { val p_type = Bits(width = P_REQ_TYPE_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) - val address = Bits(width = PADDR_BITS) + val address = Bits(width = PADDR_BITS - OFFSET_BITS) } class ProbeReply extends Bundle { @@ -230,7 +230,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val mem_req_lock = Bool(OUTPUT) val probe_req = (new ioDecoupled) { new ProbeRequest } val busy = Bool(OUTPUT) - val addr = Bits(PADDR_BITS, OUTPUT) + val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) @@ -449,9 +449,8 @@ class CoherenceHubNull extends CoherenceHub { class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ - def coherenceConflict(addr1: Bits, addr2: Bits): Bool = { - addr1(PADDR_BITS-1, OFFSET_BITS) === addr2(PADDR_BITS-1, OFFSET_BITS) - } + def coherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) + def getTransactionReplyType(t_type: UFix, count: UFix): Bits = { MuxLookup(t_type, X_REP_READ_UNCACHED, Array( X_INIT_READ_SHARED -> Mux(count > UFix(0), X_REP_READ_SHARED, X_REP_READ_EXCLUSIVE), @@ -464,7 +463,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS)} } + val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } From 4d2e7172f6ac8648f189c59bfe6ae8152444f9b3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 8 Mar 2012 18:07:44 -0800 Subject: [PATCH 0293/1087] Added require_ack field to TransactionReply bundle --- rocket/src/main/scala/coherence.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index e3827c72..a2c6ba5f 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -68,6 +68,7 @@ class TransactionReply extends MemData { val t_type = Bits(width = X_REP_TYPE_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) + val require_ack = Bool() } class TransactionFinish extends Bundle { @@ -440,6 +441,7 @@ class CoherenceHubNull extends CoherenceHub { x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data + x_rep.bits.require_ack := Bool(true) x_rep.valid := io.mem.resp.valid || x_init.valid && is_write io.tiles(0).xact_abort.valid := Bool(false) @@ -522,6 +524,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ rep.bits.tile_xact_id := UFix(0) rep.bits.global_xact_id := UFix(0) rep.bits.data := io.mem.resp.bits.data + rep.bits.require_ack := Bool(true) rep.valid := Bool(false) when(io.mem.resp.valid) { rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) From 22726ae646c64fa9e2074539bd705f4d78d3c0d3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 8 Mar 2012 18:47:32 -0800 Subject: [PATCH 0294/1087] icache and htif now obey require_ack field of TransactionReply. Avoids extraneous TransactionFinish on prefetcher-supplied icache data --- rocket/src/main/scala/htif.scala | 9 +++++++-- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/icache_prefetch.scala | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 3176608e..702fe5db 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -80,8 +80,13 @@ class rocketHTIF(w: Int, ncores: Int) extends Component val mem_acked = Reg(resetVal = Bool(false)) val mem_gxid = Reg() { Bits() } + val mem_needs_ack = Reg() { Bool() } val mem_nacked = Reg(resetVal = Bool(false)) - when (io.mem.xact_rep.valid) { mem_acked := Bool(true); mem_gxid := io.mem.xact_rep.bits.global_xact_id } + when (io.mem.xact_rep.valid) { + mem_acked := Bool(true) + mem_gxid := io.mem.xact_rep.bits.global_xact_id + mem_needs_ack := io.mem.xact_rep.bits.require_ack + } when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(8) { UFix() } @@ -152,7 +157,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data - io.mem.xact_finish.valid := state === state_mem_finish + io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack io.mem.xact_finish.bits.global_xact_id := mem_gxid pcr_done := Bool(false) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 4e9aa368..b1178f97 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -127,7 +127,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { tag_hit := any_hit val finish_q = (new queue(1)) { new TransactionFinish } - finish_q.io.enq.valid := refill_done + finish_q.io.enq.valid := refill_done && io.mem.xact_rep.bits.require_ack finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id // output signals diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 9c2d47e5..425f95d6 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -46,7 +46,7 @@ class rocketIPrefetcher extends Component() { when (ip_mem_resp_val) { fill_cnt := fill_cnt + UFix(1) } val fill_done = fill_cnt.andR && ip_mem_resp_val - finish_q.io.enq.valid := fill_done + finish_q.io.enq.valid := fill_done && io.mem.xact_rep.bits.require_ack finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id val forward = Reg(resetVal = Bool(false)) @@ -59,6 +59,7 @@ class rocketIPrefetcher extends Component() { forward && ip_mem_resp_abort io.icache.xact_rep.valid := io.mem.xact_rep.valid && !io.mem.xact_rep.bits.tile_xact_id(0) || (forward && pdq.io.deq.valid) io.icache.xact_rep.bits.data := Mux(forward, pdq.io.deq.bits, io.mem.xact_rep.bits.data) + io.icache.xact_rep.bits.require_ack := !forward && io.mem.xact_rep.bits.require_ack pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false)) pdq.io.enq.bits := io.mem.xact_rep.bits.data From 8acbe98f53186ef59b55b03fe1608877ad903f62 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 8 Mar 2012 23:31:57 -0800 Subject: [PATCH 0295/1087] change how fence.*.cv works, now control processor stalls on the fence instruction --- rocket/src/main/scala/cpu.scala | 4 +- rocket/src/main/scala/ctrl.scala | 421 ++++++++++++++------------- rocket/src/main/scala/ctrl_vec.scala | 21 +- 3 files changed, 235 insertions(+), 211 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 48aaaeff..c9ed016d 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -193,8 +193,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.vec_iface.vpfximm2q_ready := vu.io.vec_pfximm2q.ready ctrl.io.vec_iface.vpfcntq_ready := vu.io.vec_pfcntq.ready - ctrl.io.vec_iface.vackq_valid := vu.io.vec_ackq.valid - vu.io.vec_ackq.ready := ctrl.io.vec_iface.vackq_ready + // fences + ctrl.io.vec_iface.vfence_ready := vu.io.vec_fence_ready // exceptions vu.io.xcpt_backup.exception := dpath.io.vec_iface.exception diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cc1d7f69..3c812b3e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -91,208 +91,237 @@ class ioCtrlAll extends Bundle() val vec_iface = new ioCtrlVecInterface() } +object rocketCtrlDecode +{ + val xpr64 = Y; + + val decode_default = + // vfence_cv + // | eret + // | | syscall + // vec_val mem_val mul_val div_val renpcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N) + + val xdecode = Array( + // vfence_cv + // | eret + // | | syscall + // vec_val mem_val mul_val div_val renpcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + + SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,N,N), + EI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,N,Y,Y), + DI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,N,Y,Y), + ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,Y,N), + FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N,N), + FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N,N), + CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,Y,Y), + MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,N,Y,N), + MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,N,Y,Y), + RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N)) + + val fdecode = Array( + // vfence_cv + // | eret + // | | syscall + // vec_val mem_val mul_val div_val renpcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N)) + + val vdecode = Array( + // vfence_cv + // | eret + // | | syscall + // vec_val mem_val mul_val div_val renpcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), + VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), + VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + FENCE_L_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + FENCE_G_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), + FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), + VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + + VENQCMD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), + VENQIMM1-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), + VENQIMM2-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), + VENQCNT-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), + VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), + VWAITKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y)) +} + class rocketCtrl extends Component { val io = new ioCtrlAll(); - val xpr64 = Y; - val cs = - ListLookup(io.dpath.inst, - // eret - // | syscall - // vec_val mem_val mul_val div_val renpcr | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N),Array( - BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), + var decode_table = rocketCtrlDecode.xdecode + if (HAVE_FPU) decode_table ++= rocketCtrlDecode.fdecode + if (HAVE_VEC) decode_table ++= rocketCtrlDecode.vdecode - J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,N), - EI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,Y,Y), - DI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,Y,Y), - ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N), - CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,Y,N), - MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N), - - // Vector Stuff - VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - FENCE_L_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - FENCE_G_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N), - - // Vector Supervisor Stuff - VENQCMD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), - VENQIMM1-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), - VENQIMM2-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), - VENQCNT-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,Y,Y), - VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y), - VWAITKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y) - )) + val cs = ListLookup(io.dpath.inst, rocketCtrlDecode.decode_default, decode_table) val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + val id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_vfence_cv :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); @@ -602,7 +631,7 @@ class rocketCtrl extends Component vec.io.exception := wb_reg_exception vec_replay = vec.io.replay - vec_stalld = vec.io.stalld + vec_stalld = vec.io.stalld || id_vfence_cv && !vec.io.vfence_ready } // exception handling @@ -655,7 +684,7 @@ class rocketCtrl extends Component // control transfer from ex/mem val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken - val take_pc_wb = wb_reg_replay || wb_reg_exception || wb_reg_eret; + val take_pc_wb = wb_reg_replay || vec_replay || wb_reg_exception || wb_reg_eret take_pc := take_pc_ex || take_pc_wb; // replay mem stage PC on a DTLB miss or a long-latency writeback diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 43c6c321..5a7deedb 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -37,8 +37,7 @@ class ioCtrlVecInterface extends Bundle val vpfcntq_valid = Bool(OUTPUT) val vpfcntq_ready = Bool(INPUT) - val vackq_valid = Bool(INPUT) - val vackq_ready = Bool(OUTPUT) + val vfence_ready = Bool(INPUT) val exception_ack_valid = Bool(INPUT) val exception_ack_ready = Bool(OUTPUT) @@ -55,6 +54,7 @@ class ioCtrlVec extends Bundle val exception = Bool(INPUT) val replay = Bool(OUTPUT) val stalld = Bool(OUTPUT) + val vfence_ready = Bool(OUTPUT) } class rocketCtrlVec extends Component @@ -72,7 +72,7 @@ class rocketCtrlVec extends Component // | | | | | | vpfximm1q // | | | | | | | vpfximm2q // wen | | | | | | | | vpfcntq - // val vcmd vimm vimm2 | fn | | | | | | | | | stalld + // val vcmd vimm vimm2 | fn | | | | | | | | | fence_cv // | | | | | | | | | | | | | | | | waitxcpt // | | | | | | | | | | | | | | | | | List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N),Array( @@ -127,7 +127,7 @@ class rocketCtrlVec extends Component val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 - val wb_vec_stalld :: wb_vec_waitxcpt :: Nil = veccs2 + val wb_vec_fence_cv :: wb_vec_waitxcpt :: Nil = veccs2 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) @@ -186,7 +186,6 @@ class rocketCtrlVec extends Component mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && wb_vec_pfcntq_enq - io.iface.vackq_ready := Bool(true) io.iface.exception_ack_ready := Bool(true) io.iface.kill_ack_ready := Bool(true) @@ -198,15 +197,10 @@ class rocketCtrlVec extends Component wb_vec_pfcmdq_enq && !io.iface.vpfcmdq_ready || wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready || - wb_vec_pfcntq_enq && !io.iface.vpfcntq_ready + wb_vec_pfcntq_enq && !io.iface.vpfcntq_ready || + wb_vec_fence_cv && !io.iface.vfence_ready ) - val reg_stalld = Reg(resetVal = Bool(false)) - val do_stalld = valid_common && wb_vec_stalld && !io.replay - - when (do_stalld) { reg_stalld := Bool(true) } - when (io.iface.vackq_valid || io.exception) { reg_stalld := Bool(false) } - val reg_waitxcpt = Reg(resetVal = Bool(false)) val do_waitxcpt = valid_common && wb_vec_waitxcpt && !io.replay @@ -214,5 +208,6 @@ class rocketCtrlVec extends Component when (io.iface.exception_ack_valid) { reg_waitxcpt := Bool(false) } when (io.iface.kill_ack_valid) { reg_waitxcpt := Bool(false) } - io.stalld := reg_stalld || reg_waitxcpt + io.stalld := reg_waitxcpt + io.vfence_ready := io.iface.vfence_ready } From a1b30282ddd8eb74b1c5ff301bc9419814e0a1ae Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 9 Mar 2012 01:09:22 -0800 Subject: [PATCH 0296/1087] major refactoring on vector exception interface --- rocket/src/main/scala/consts.scala | 5 +++-- rocket/src/main/scala/cpu.scala | 15 +++++++-------- rocket/src/main/scala/ctrl_vec.scala | 13 +++++-------- rocket/src/main/scala/dpath.scala | 6 ++++-- rocket/src/main/scala/dpath_util.scala | 13 +++++-------- rocket/src/main/scala/dpath_vec.scala | 16 ++++++++-------- 6 files changed, 32 insertions(+), 36 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index dbc5f7c7..a3b1e4ea 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -144,8 +144,9 @@ object Constants val PCR_VECBANK = UFix(18, 5); // temporaries for vector, these will go away - val PCR_VEC_EADDR = UFix(30, 5) - val PCR_VEC_XCPT = UFix(31, 5) + val PCR_VEC_BACKUP = UFix(29, 5) + val PCR_VEC_KILL = UFix(30, 5) + val PCR_VEC_HOLD = UFix(31, 5) // definition of bits in PCR status reg val SR_ET = 0; // enable traps diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index c9ed016d..068d2de6 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -197,14 +197,13 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.vec_iface.vfence_ready := vu.io.vec_fence_ready // exceptions - vu.io.xcpt_backup.exception := dpath.io.vec_iface.exception - vu.io.xcpt_backup.exception_addr := dpath.io.vec_iface.eaddr.toUFix - ctrl.io.vec_iface.exception_ack_valid := vu.io.xcpt_backup.exception_ack_valid - vu.io.xcpt_backup.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready - vu.io.xcpt_resume.hold := dpath.io.vec_iface.hold - vu.io.xcpt_kill.kill := dpath.io.vec_iface.kill - ctrl.io.vec_iface.kill_ack_valid := vu.io.xcpt_kill.kill_ack_valid - vu.io.xcpt_kill.kill_ack_ready := ctrl.io.vec_iface.kill_ack_ready + vu.io.xcpt.exception := ctrl.io.vec_iface.exception + ctrl.io.vec_iface.exception_ack_valid := vu.io.xcpt.exception_ack_valid + vu.io.xcpt.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready + vu.io.xcpt.backup := dpath.io.vec_iface.backup + vu.io.xcpt.backup_addr := dpath.io.vec_iface.backup_addr.toUFix + vu.io.xcpt.kill := dpath.io.vec_iface.kill + vu.io.xcpt.hold := dpath.io.vec_iface.hold // hooking up vector memory interface val storegen = new StoreDataGen diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 5a7deedb..2423e426 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -39,11 +39,9 @@ class ioCtrlVecInterface extends Bundle val vfence_ready = Bool(INPUT) + val exception = Bool(OUTPUT) val exception_ack_valid = Bool(INPUT) val exception_ack_ready = Bool(OUTPUT) - - val kill_ack_valid = Bool(INPUT) - val kill_ack_ready = Bool(OUTPUT) } class ioCtrlVec extends Bundle @@ -186,9 +184,6 @@ class rocketCtrlVec extends Component mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && wb_vec_pfcntq_enq - io.iface.exception_ack_ready := Bool(true) - io.iface.kill_ack_ready := Bool(true) - io.replay := valid_common && ( wb_vec_cmdq_enq && !io.iface.vcmdq_ready || wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || @@ -206,8 +201,10 @@ class rocketCtrlVec extends Component when (do_waitxcpt) { reg_waitxcpt := Bool(true) } when (io.iface.exception_ack_valid) { reg_waitxcpt := Bool(false) } - when (io.iface.kill_ack_valid) { reg_waitxcpt := Bool(false) } + + io.iface.exception := io.exception && io.sr_ev + io.iface.exception_ack_ready := reg_waitxcpt io.stalld := reg_waitxcpt - io.vfence_ready := io.iface.vfence_ready + io.vfence_ready := !io.sr_ev || io.iface.vfence_ready } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f14f80cb..1486c3d4 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -383,8 +383,10 @@ class rocketDpath extends Component vec.io.vecbankcnt := pcr.io.vecbankcnt vec.io.wdata := wb_reg_vec_wdata vec.io.rs2 := wb_reg_rs2 - vec.io.vec_eaddr := pcr.io.vec_eaddr - vec.io.vec_xcpt := pcr.io.vec_xcpt + vec.io.vechold := pcr.io.vechold + vec.io.pcrw.addr := wb_reg_raddr2 + vec.io.pcrw.en := io.ctrl.wen_pcr + vec.io.pcrw.data := wb_reg_wdata wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 7353c91f..2748048c 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -79,8 +79,7 @@ class ioDpathPCR extends Bundle() val irq_ipi = Bool(OUTPUT); val vecbank = Bits(8, OUTPUT) val vecbankcnt = UFix(4, OUTPUT) - val vec_eaddr = Bits(VADDR_BITS, OUTPUT) - val vec_xcpt = Bits(3, OUTPUT) + val vechold = Bool(OUTPUT) } class rocketDpathPCR extends Component @@ -99,8 +98,7 @@ class rocketDpathPCR extends Component val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) - val reg_vec_eaddr = Reg() { Bits() } - val reg_vec_xcpt = Reg() { Bits() } + val reg_vechold = Reg() { Bool() } val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); @@ -142,8 +140,7 @@ class rocketDpathPCR extends Component cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) - io.vec_eaddr := reg_vec_eaddr - io.vec_xcpt := reg_vec_xcpt + io.vechold := reg_vechold val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) when (io.badvaddr_wen) { @@ -174,6 +171,7 @@ class rocketDpathPCR extends Component when (io.eret) { reg_status_s := reg_status_ps; reg_status_et := Bool(true); + reg_vechold := Bool(false) } when (reg_count === reg_compare) { @@ -211,8 +209,7 @@ class rocketDpathPCR extends Component when (waddr === PCR_K1) { reg_k1 := wdata; } when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } - when (waddr === PCR_VEC_EADDR) { reg_vec_eaddr := wdata(VADDR_BITS,0) } - when (waddr === PCR_VEC_XCPT) { reg_vec_xcpt := wdata(2,0) } + when (waddr === PCR_VEC_HOLD) { reg_vechold := reg_status_ev && wdata(0) } } rdata := Bits(0, 64) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 86334e29..1ef4dd92 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -12,8 +12,8 @@ class ioDpathVecInterface extends Bundle val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) val vcntq_bits = Bits(SZ_VLEN, OUTPUT) - val eaddr = Bits(64, OUTPUT) - val exception = Bool(OUTPUT) + val backup = Bool(OUTPUT) + val backup_addr = Bits(64, OUTPUT) val kill = Bool(OUTPUT) val hold = Bool(OUTPUT) } @@ -30,8 +30,8 @@ class ioDpathVec extends Bundle val vecbankcnt = UFix(4, INPUT) val wdata = Bits(64, INPUT) val rs2 = Bits(64, INPUT) - val vec_eaddr = Bits(64, INPUT) - val vec_xcpt = Bits(3, INPUT) + val vechold = Bool(INPUT) + val pcrw = new ioWritePort() val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) } @@ -137,10 +137,10 @@ class rocketDpathVec extends Component io.iface.vcntq_bits := io.wdata(SZ_VLEN-1, 0) - io.iface.eaddr := io.vec_eaddr - io.iface.exception := io.vec_xcpt(0) - io.iface.kill := io.vec_xcpt(1) - io.iface.hold := io.vec_xcpt(2) + io.iface.backup := io.pcrw.en && (io.pcrw.addr === PCR_VEC_BACKUP) + io.iface.backup_addr := io.pcrw.data + io.iface.kill := io.pcrw.en && (io.pcrw.addr === PCR_VEC_KILL) + io.iface.hold := io.vechold io.ctrl.valid := io.valid io.ctrl.inst := io.inst From 766bac88f8e404066af5f5d62a3e70b07b39cba1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 Mar 2012 02:55:46 -0800 Subject: [PATCH 0297/1087] refactor D$ writebacks and flushes MSHRs now arbitrate for writebacks and handle flushes. --- rocket/src/main/scala/nbdcache.scala | 325 +++++++++++++-------------- rocket/src/main/scala/util.scala | 2 +- 2 files changed, 158 insertions(+), 169 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b6594aa0..cf788a9d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -12,14 +12,12 @@ class ioReplacementWayGen extends Bundle { class RandomReplacementWayGen extends Component { val io = new ioReplacementWayGen() //TODO: Actually limit selection based on which ways are allowed (io.ways_en) + io.way_id := UFix(0) if(NWAYS > 1) { - val rand_way_id = UFix(width = log2up(NWAYS)) - rand_way_id := LFSR16(io.pick_new_way)(log2up(NWAYS)-1,0) - when (rand_way_id >= UFix(NWAYS, width = log2up(NWAYS)+1)) { io.way_id := UFix(0, width = log2up(NWAYS)) } - .otherwise { io.way_id := rand_way_id } + val rand_way_id = LFSR16(io.pick_new_way)(log2up(NWAYS)-1,0) + when (rand_way_id < UFix(NWAYS)) { io.way_id := rand_way_id } } - else io.way_id := UFix(0) } class StoreMaskGen extends Component { @@ -95,6 +93,9 @@ class LoadDataGen extends Component { } class MSHRReq extends Bundle { + val old_state = UFix(width = 2) + val old_tag = Bits(width = TAG_BITS) + val tag = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) val way_oh = Bits(width = NWAYS) @@ -145,6 +146,7 @@ class WritebackReq extends Bundle { val tag = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) val way_oh = Bits(width = NWAYS) + val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) } class MetaData extends Bundle { @@ -184,89 +186,109 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val mem_abort = (new ioPipe) { new TransactionAbort }.flip val mem_rep = (new ioPipe) { new TransactionReply }.flip val mem_finish = (new ioDecoupled) { new TransactionFinish } + val wb_req = (new ioDecoupled) { new WritebackReq } } - val valid = Reg(resetVal = Bool(false)) + val s_invalid :: s_meta_invalidate :: s_wb_req :: s_wb_resp :: s_refill_req :: s_refill_resp :: s_drain_rpq :: s_write_meta :: Nil = Enum(8) { UFix() } + val state = Reg(resetVal = s_invalid) + val flush = Reg { Bool() } + val xact_type = Reg { UFix() } - val state = Reg { UFix() } - val requested = Reg { Bool() } - val refilled = Reg { Bool() } + val line_state = Reg { UFix() } val refill_count = Reg { UFix(width = log2up(REFILL_CYCLES)) } - val tag = Reg { Bits() } - val idx_ = Reg { Bits() } - val way_oh_ = Reg { Bits() } + val req = Reg { new MSHRReq() } val req_cmd = io.req_bits.cmd - val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) - val sec_rdy = io.idx_match && !refilled && !((requested || io.mem_req.ready) && needsSecondaryXact(req_cmd, io.mem_req.bits)) + val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) + val idx_match = req.idx === io.req_bits.idx + val sec_rdy = idx_match && !flush && (state === s_meta_invalidate || state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !needsSecondaryXact(req_cmd, io.mem_req.bits)) val rpq = (new queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id - rpq.io.deq.ready := io.replay.ready && refilled + rpq.io.deq.ready := io.replay.ready && (state === s_drain_rpq) - val refill_done = io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id) && refill_count.andR + val abort = io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id) + val reply = io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id) + val refill_done = reply && refill_count.andR + val wb_done = reply && (state === s_wb_resp) - val finish_q = (new queue(1)) { new TransactionFinish } - finish_q.io.enq.valid := refill_done + val finish_q = (new queue(2 /* wb + refill */)) { new TransactionFinish } + finish_q.io.enq.valid := wb_done || refill_done finish_q.io.enq.bits := io.mem_rep.bits.global_xact_id - when (io.mem_req.valid && io.mem_req.ready) { - requested := Bool(true) + when (state === s_write_meta && io.meta_req.ready) { + state := s_invalid } - when (io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id)) { - requested := Bool(false) + when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { + state := s_write_meta } - when (io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id)) { - refill_count := refill_count + UFix(1) - state := newStateOnTransactionRep(io.mem_rep.bits, io.mem_req.bits) + when (state === s_refill_resp) { + when (refill_done) { state := s_drain_rpq } + when (reply) { + refill_count := refill_count + UFix(1) + line_state := newStateOnTransactionRep(io.mem_rep.bits, io.mem_req.bits) + } + when (abort) { state := s_refill_req } } - when (refill_done) { - refilled := Bool(true) + when (state === s_refill_req && io.mem_req.ready) { + state := Mux(flush, s_write_meta, s_refill_resp) } - when (io.meta_req.valid && io.meta_req.ready) { - valid := Bool(false) + when (state === s_wb_resp) { + when (reply) { state := s_refill_req } + when (abort) { state := s_wb_req } } - when (io.req_sec_val && io.req_sec_rdy) { + when (state === s_wb_req && io.wb_req.ready) { + state := s_wb_resp + } + when (state === s_meta_invalidate && io.meta_req.ready) { + state := Mux(needsWriteback(req.old_state), s_wb_req, s_refill_req) + } + + when (io.req_sec_val && io.req_sec_rdy) { // s_meta_invalidate, s_wb_req, s_wb_resp, s_refill_req xact_type := newTransactionOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) } - when (io.req_pri_val && io.req_pri_rdy) { - valid := Bool(true) - xact_type := newTransactionOnPrimaryMiss(req_cmd, newStateOnFlush()) - requested := Bool(false) - refilled := Bool(false) + when ((state === s_invalid) && io.req_pri_val) { + state := s_meta_invalidate + flush := req_cmd === M_FLA + line_state := newStateOnFlush() refill_count := UFix(0) - tag := io.req_bits.tag - idx_ := io.req_bits.idx - way_oh_ := io.req_bits.way_oh + xact_type := newTransactionOnPrimaryMiss(req_cmd, newStateOnFlush()) + req := io.req_bits } - io.idx_match := valid && (idx_ === io.req_bits.idx) - io.idx := idx_ - io.tag := tag - io.way_oh := way_oh_ + io.idx_match := (state != s_invalid) && idx_match + io.idx := req.idx + io.tag := req.tag + io.way_oh := req.way_oh io.refill_count := refill_count - io.req_pri_rdy := !valid && finish_q.io.enq.ready + io.req_pri_rdy := (state === s_invalid) io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.meta_req.valid := valid && refilled && !rpq.io.deq.valid + io.meta_req.valid := (state === s_write_meta) || (state === s_meta_invalidate) io.meta_req.bits.inner_req.rw := Bool(true) - io.meta_req.bits.inner_req.idx := idx_ - io.meta_req.bits.inner_req.data.state := state - io.meta_req.bits.inner_req.data.tag := tag - io.meta_req.bits.way_en := way_oh_ + io.meta_req.bits.inner_req.idx := req.idx + io.meta_req.bits.inner_req.data.state := line_state + io.meta_req.bits.inner_req.data.tag := req.tag + io.meta_req.bits.way_en := req.way_oh - io.mem_req.valid := valid && !requested + io.wb_req.valid := (state === s_wb_req) + io.wb_req.bits.tag := req.old_tag + io.wb_req.bits.idx := req.idx + io.wb_req.bits.way_oh := req.way_oh + io.wb_req.bits.tile_xact_id := Bits(id) + + io.mem_req.valid := (state === s_refill_req) && !flush io.mem_req.bits.t_type := xact_type - io.mem_req.bits.address := Cat(tag, idx_).toUFix + io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq - io.replay.valid := rpq.io.deq.valid && refilled + io.replay.valid := (state === s_drain_rpq) && rpq.io.deq.valid io.replay.bits <> rpq.io.deq.bits - io.replay.bits.idx := idx_ - io.replay.bits.way_oh := way_oh_ + io.replay.bits.idx := req.idx + io.replay.bits.way_oh := req.way_oh } class MSHRFile extends Component { @@ -285,6 +307,7 @@ class MSHRFile extends Component { val mem_abort = (new ioPipe) { new TransactionAbort }.flip val mem_rep = (new ioPipe) { new TransactionReply }.flip val mem_finish = (new ioDecoupled) { new TransactionFinish } + val wb_req = (new ioDecoupled) { new WritebackReq } val cpu_resp_val = Bool(OUTPUT) val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) @@ -304,6 +327,7 @@ class MSHRFile extends Component { val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } + val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq } val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } @@ -330,6 +354,7 @@ class MSHRFile extends Component { mshr.io.meta_req <> meta_req_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) mshr.io.mem_finish <> mem_finish_arb.io.in(i) + mshr.io.wb_req <> wb_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) mshr.io.mem_abort <> io.mem_abort @@ -350,6 +375,7 @@ class MSHRFile extends Component { meta_req_arb.io.out <> io.meta_req mem_req_arb.io.out <> io.mem_req mem_finish_arb.io.out <> io.mem_finish + wb_req_arb.io.out <> io.wb_req io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.mem_resp_idx := mem_resp_mux.io.out.inner_req.idx @@ -375,28 +401,15 @@ class WritebackUnit extends Component { val req = (new ioDecoupled) { new WritebackReq() }.flip val data_req = (new ioDecoupled) { new DataArrayArrayReq() } val data_resp = Bits(MEM_DATA_BITS, INPUT) - val refill_req = (new ioDecoupled) { new TransactionInit }.flip val mem_req = (new ioDecoupled) { new TransactionInit } val mem_req_data = (new ioDecoupled) { new TransactionInitData } - val mem_abort = (new ioPipe) { new TransactionAbort }.flip - val mem_rep = (new ioPipe) { new TransactionReply }.flip - val mem_finish = (new ioDecoupled) { new TransactionFinish } } val valid = Reg(resetVal = Bool(false)) val data_req_fired = Reg(resetVal = Bool(false)) val cmd_sent = Reg() { Bool() } val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } - val addr = Reg() { new WritebackReq() } - - val acked = Reg() { Bool() } - val nacked = Reg() { Bool() } - when (io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(NMSHR)) { acked := Bool(true) } - when (io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(NMSHR)) { nacked := Bool(true) } - - val finish_q = (new queue(1)) { new TransactionFinish } - finish_q.io.enq.valid := io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(NMSHR) - finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id + val req = Reg() { new WritebackReq() } data_req_fired := Bool(false) when (valid && io.mem_req.ready) { @@ -410,96 +423,80 @@ class WritebackUnit extends Component { data_req_fired := Bool(false) cnt := cnt - UFix(1) } - when ((cnt === UFix(REFILL_CYCLES)) && (!data_req_fired || io.mem_req_data.ready)) { - when (acked) { - valid := Bool(false) - } - when (nacked) { - cmd_sent := Bool(false) - nacked := Bool(false) - cnt := UFix(0) - } + when ((cnt === UFix(REFILL_CYCLES)) && io.mem_req_data.ready) { + valid := Bool(false) } when (io.req.valid && io.req.ready) { valid := Bool(true) - acked := Bool(false) - nacked := Bool(false) cmd_sent := Bool(false) cnt := UFix(0) - addr := io.req.bits + req := io.req.bits } - io.req.ready := !valid && finish_q.io.enq.ready + io.req.ready := !valid io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) - io.data_req.bits.way_en := addr.way_oh - io.data_req.bits.inner_req.idx := addr.idx + io.data_req.bits.way_en := req.way_oh + io.data_req.bits.inner_req.idx := req.idx io.data_req.bits.inner_req.offset := cnt io.data_req.bits.inner_req.rw := Bool(false) io.data_req.bits.inner_req.wmask := Bits(0) io.data_req.bits.inner_req.data := Bits(0) - val wb_req_val = valid && !cmd_sent - io.refill_req.ready := io.mem_req.ready && !(valid && !acked) - io.mem_req.valid := io.refill_req.valid && !(valid && !acked) || wb_req_val - io.mem_req.bits.t_type := Mux(wb_req_val, X_INIT_WRITE_UNCACHED, io.refill_req.bits.t_type) - io.mem_req.bits.address := Mux(wb_req_val, Cat(addr.tag, addr.idx).toUFix, io.refill_req.bits.address) - io.mem_req.bits.tile_xact_id := Mux(wb_req_val, Bits(NMSHR), io.refill_req.bits.tile_xact_id) + io.mem_req.valid := valid && !cmd_sent + io.mem_req.bits.t_type := X_INIT_WRITE_UNCACHED + io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix + io.mem_req.bits.tile_xact_id := req.tile_xact_id io.mem_req_data.valid := data_req_fired io.mem_req_data.bits.data := io.data_resp - io.mem_finish <> finish_q.io.deq } class FlushUnit(lines: Int) extends Component with FourStateCoherence{ val io = new Bundle { - val req = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) }.flip - val resp = (new ioDecoupled) { Bits(width = DCACHE_TAG_BITS) } - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } - val meta_resp = (new MetaData).asInput() - val wb_req = (new ioDecoupled) { new WritebackReq() } + val req = (new ioDecoupled) { Bool() }.flip + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } + val mshr_req = (new ioDecoupled) { Bool() }.flip } - val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: s_meta_write :: s_done :: Nil = Enum(6) { UFix() } + val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: Nil = Enum(4) { UFix() } val state = Reg(resetVal = s_reset) - val cpu_tag = Reg() { Bits() } val idx_cnt = Reg(resetVal = UFix(0, log2up(lines))) val next_idx_cnt = idx_cnt + UFix(1) - val way_cnt = Reg(resetVal = UFix(0, log2up(NWAYS))) + val way_cnt = if (NWAYS == 1) UFix(0) else Reg(resetVal = UFix(0, log2up(NWAYS))) val next_way_cnt = way_cnt + UFix(1) switch (state) { is(s_reset) { when (io.meta_req.ready) { - state := Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_ready, s_reset); - when (~way_cnt === UFix(0)) { idx_cnt := next_idx_cnt }; - way_cnt := next_way_cnt; + state := Mux(way_cnt === UFix(NWAYS-1) && idx_cnt.andR, s_ready, s_reset); + when (way_cnt === UFix(NWAYS-1)) { idx_cnt := next_idx_cnt }; + if (NWAYS > 1) way_cnt := next_way_cnt; } } - is(s_ready) { when (io.req.valid) { state := s_meta_read; cpu_tag := io.req.bits } } + is(s_ready) { when (io.req.valid) { state := s_meta_read } } is(s_meta_read) { when (io.meta_req.ready) { state := s_meta_wait } } - is(s_meta_wait) { state := Mux(needsWriteback(io.meta_resp.state) && !io.wb_req.ready, s_meta_read, s_meta_write) } - is(s_meta_write) { - when (io.meta_req.ready) { - state := Mux(~way_cnt === UFix(0) && ~idx_cnt === UFix(0), s_done, s_meta_read); - when (~way_cnt === UFix(0)) { idx_cnt := next_idx_cnt }; - way_cnt := next_way_cnt; + is(s_meta_wait) { + state := s_meta_read + when (io.mshr_req.ready) { + state := s_meta_read + when (way_cnt === UFix(NWAYS-1)) { + when (idx_cnt.andR) { + state := s_ready + } + idx_cnt := next_idx_cnt + } + if (NWAYS > 1) way_cnt := next_way_cnt; } } - is(s_done) { when (io.resp.ready) { state := s_ready } } } io.req.ready := state === s_ready - io.resp.valid := state === s_done - io.resp.bits := cpu_tag - io.meta_req.valid := (state === s_meta_read) || (state === s_meta_write) || (state === s_reset) + io.mshr_req.valid := state === s_meta_wait + io.meta_req.valid := (state === s_meta_read) || (state === s_reset) io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) io.meta_req.bits.inner_req.idx := idx_cnt - io.meta_req.bits.inner_req.rw := (state === s_meta_write) || (state === s_reset) + io.meta_req.bits.inner_req.rw := (state === s_reset) io.meta_req.bits.inner_req.data.state := newStateOnFlush() io.meta_req.bits.inner_req.data.tag := UFix(0) - io.wb_req.valid := state === s_meta_wait && needsWriteback(io.meta_resp.state) - io.wb_req.bits.tag := io.meta_resp.tag - io.wb_req.bits.idx := idx_cnt - io.wb_req.bits.way_oh := UFixToOH(way_cnt, NWAYS) } class MetaDataArray(lines: Int) extends Component { @@ -718,9 +715,17 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val r_req_read = r_req_load || r_req_amo val r_req_write = r_req_store || r_req_amo val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch + val nack_hit = Wire() { Bool() } val mshr = new MSHRFile() val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool + + // reset and flush unit + val flusher = new FlushUnit(lines) + val flushed = Reg(resetVal = Bool(true)) + flushed := flushed && (!r_cpu_req_val_ || r_req_flush) || r_cpu_req_val_ && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready + flusher.io.req.valid := r_cpu_req_val_ && r_req_flush && mshr.io.fence_rdy && !flushed + flusher.io.mshr_req.ready := mshr.io.req.ready when (io.cpu.req_val) { r_cpu_req_idx := io.cpu.req_idx @@ -735,6 +740,11 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { r_amo_replay_data := mshr.io.data_req.bits.data r_way_oh := mshr.io.data_req.bits.way_oh } + when (flusher.io.meta_req.valid) { + r_cpu_req_idx := Cat(flusher.io.meta_req.bits.inner_req.idx, mshr.io.data_req.bits.offset) + r_cpu_req_cmd := M_FLA + r_way_oh := flusher.io.meta_req.bits.way_en + } val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) val misaligned = @@ -748,6 +758,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // tags val meta = new MetaDataArrayArray(lines) val meta_arb = (new Arbiter(3)) { new MetaArrayArrayReq() } + flusher.io.meta_req <> meta_arb.io.in(0) meta_arb.io.out <> meta.io.req // data @@ -777,22 +788,15 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // writeback unit val finish_arb = (new Arbiter(2)) { new TransactionFinish } val wb = new WritebackUnit - val wb_arb = (new Arbiter(2)) { new WritebackReq() } - wb_arb.io.out <> wb.io.req + wb.io.req <> mshr.io.wb_req wb.io.data_req <> data_arb.io.in(3) wb.io.data_resp <> data_resp_mux - wb.io.mem_rep <> io.mem.xact_rep - wb.io.mem_finish <> finish_arb.io.in(0) - wb.io.mem_abort.valid := io.mem.xact_abort.valid - wb.io.mem_abort.bits := io.mem.xact_abort.bits // replacement policy val replacer = new RandomReplacementWayGen() replacer.io.way_en := ~UFix(0, NWAYS) - val replaced_way_id = replacer.io.way_id - val replaced_way_oh = UFixToOH(replaced_way_id, NWAYS) - val meta_wb_mux = meta.io.resp(replaced_way_id) - val needs_writeback = needsWriteback(meta_wb_mux.state) + val replaced_way_oh = Mux(flusher.io.mshr_req.valid, r_way_oh, UFixToOH(replacer.io.way_id, NWAYS)) + val meta_wb_mux = Mux1H(NWAYS, replaced_way_oh, meta.io.resp) // refill response data_arb.io.in(0).bits.inner_req.offset := mshr.io.mem_resp_offset @@ -801,7 +805,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.inner_req.data := io.mem.xact_rep.bits.data data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - data_arb.io.in(0).valid := io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id < UFix(NMSHR) + data_arb.io.in(0).valid := io.mem.xact_rep.valid && (io.mem.xact_rep.bits.t_type === X_REP_READ_SHARED || io.mem.xact_rep.bits.t_type === X_REP_READ_EXCLUSIVE) // load hits data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) @@ -820,7 +824,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(tag_miss))) || p_store_match + val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(wb.io.req.valid || mshr.io.data_req.valid))) || p_store_match data_arb.io.in(2).bits.inner_req.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.inner_req.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.inner_req.rw := Bool(true) @@ -829,30 +833,21 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val drain_store = drain_store_val && data_arb.io.in(2).ready val p_amo = Reg(resetVal = Bool(false)) val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo) - p_amo := tag_hit && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo - p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && p_store_rdy) || p_amo + p_amo := tag_hit && mshr.io.req.ready && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo + p_store_valid := p_store_valid && !drain_store || (tag_hit && mshr.io.req.ready && r_req_store && p_store_rdy) || p_amo - // writeback - val wb_rdy = wb_arb.io.in(1).ready && !p_store_idx_match - wb_arb.io.in(1).valid := tag_miss && r_req_readwrite && needs_writeback && !p_store_idx_match - wb_arb.io.in(1).bits.tag := meta_wb_mux.tag - wb_arb.io.in(1).bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - wb_arb.io.in(1).bits.way_oh := replaced_way_oh - - // tag update after a miss or a store to an exclusive clean line. - val set_wb_state = tag_miss && r_req_readwrite && isValid(meta_wb_mux.state) && (!needs_writeback || wb_rdy) - //val set_hit_state = tag_hit && meta_resp_mux.state != newStateOnHit(r_cpu_req_cmd) + // tag update after a store to an exclusive clean line. val new_hit_state = newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) val set_hit_state = tag_hit && meta_resp_mux.state != new_hit_state meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := r_cpu_req_idx(indexmsb,indexlsb) - meta.io.state_req.bits.inner_req.data.state := Mux(set_wb_state, newStateOnWriteback(), new_hit_state) - meta.io.state_req.bits.way_en := Mux(set_wb_state, replaced_way_oh, hit_way_oh) - meta.io.state_req.valid := set_wb_state || set_hit_state + meta.io.state_req.bits.inner_req.data.state := new_hit_state + meta.io.state_req.bits.way_en := hit_way_oh + meta.io.state_req.valid := set_hit_state // pending store data, also used for AMO RHS val amoalu = new AMOALU - when (tag_hit && r_req_write && p_store_rdy || r_replay_amo) { + when (r_cpu_req_val_ && r_req_write && p_store_rdy || r_replay_amo) { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd @@ -864,7 +859,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { } // miss handling - mshr.io.req.valid := tag_miss && r_req_readwrite && (!needs_writeback || wb_rdy) + mshr.io.req.valid := tag_miss && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid + mshr.io.req.bits.old_state := meta_wb_mux.state + mshr.io.req.bits.old_tag := meta_wb_mux.tag mshr.io.req.bits.tag := cpu_req_tag mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) mshr.io.req.bits.cpu_tag := r_cpu_req_tag @@ -878,20 +875,19 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { mshr.io.mem_finish <> finish_arb.io.in(1) mshr.io.mem_abort.valid := io.mem.xact_abort.valid mshr.io.mem_abort.bits := io.mem.xact_abort.bits - mshr.io.mem_req <> wb.io.refill_req mshr.io.meta_req <> meta_arb.io.in(1) replacer.io.pick_new_way := mshr.io.req.valid && mshr.io.req.ready // replays val replay = mshr.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || p_store_valid + val stall_replay = r_replay_amo || p_amo || p_store_valid || flusher.io.meta_req.valid val replay_val = mshr.io.data_req.valid + val replay_fire = replay_val && !stall_replay val replay_rdy = data_arb.io.in(1).ready && !stall_replay - val replay_fire = replay_val && replay_rdy data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) data_arb.io.in(1).bits.inner_req.idx := replay.idx data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR - data_arb.io.in(1).valid := replay_val && !stall_replay + data_arb.io.in(1).valid := replay_fire data_arb.io.in(1).bits.way_en := mshr.io.data_req.bits.way_oh mshr.io.data_req.ready := replay_rdy r_replay_amo := replay_amo_val && replay_rdy @@ -925,38 +921,31 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo - // reset and flush unit - val flusher = new FlushUnit(lines) - val flushed = Reg(resetVal = Bool(true)) - val flush_rdy = mshr.io.fence_rdy && wb_rdy && !p_store_valid - flushed := flushed && !r_cpu_req_val_ || r_cpu_req_val_ && r_req_flush && flush_rdy && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val_ && r_req_flush && flush_rdy && !flushed - flusher.io.wb_req <> wb_arb.io.in(0) - flusher.io.meta_req <> meta_arb.io.in(0) - flusher.io.meta_resp <> meta_resp_mux - flusher.io.resp.ready := Bool(true) // we don't respond to flush requests - // we usually nack rather than reporting that the cache is not ready. // fences and flushes are the exceptions. val pending_fence = Reg(resetVal = Bool(false)) - pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !flush_rdy - val nack_hit = p_store_match || replay_val || r_req_write && !p_store_rdy - val nack_miss = needs_writeback && !wb_rdy || !mshr.io.req.ready - val nack_flush = !flush_rdy && (r_req_fence || r_req_flush) || + pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy + nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy + val nack_miss = !mshr.io.req.ready + val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush - val nack = early_nack || r_req_readwrite && Mux(tag_match, nack_hit, nack_miss) || nack_flush + val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !nack_hit && r_req_read) || mshr.io.cpu_resp_val + io.cpu.resp_val := (tag_hit && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp_replay := mshr.io.cpu_resp_val - io.cpu.resp_miss := tag_miss && !nack_miss && r_req_read + io.cpu.resp_miss := r_cpu_req_val_ && !tag_match && r_req_read io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout io.cpu.resp_data_subword := loadgen.io.r_dout_subword - io.mem.xact_init <> wb.io.mem_req + val xact_init_arb = (new Arbiter(2)) { new TransactionInit } + xact_init_arb.io.in(0) <> wb.io.mem_req + xact_init_arb.io.in(1) <> mshr.io.mem_req + io.mem.xact_init <> xact_init_arb.io.out + io.mem.xact_init_data <> wb.io.mem_req_data io.mem.xact_finish <> finish_arb.io.out } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index fbc00ebb..76c93390 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -13,7 +13,7 @@ object foldR object log2up { - def apply(in: Int) = if (in == 1) 1 else ceil(log(in)/log(2)).toInt + def apply(in: Int) = ceil(log(in)/log(2)).toInt } object ispow2 From 85504f0ddc09933477d878d97139b0703233c709 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 Mar 2012 03:26:05 -0800 Subject: [PATCH 0298/1087] fix bug in fence.i and improve test --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3c812b3e..4683febc 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -205,7 +205,7 @@ object rocketCtrlDecode DI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,N,Y,Y), ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,Y,N), FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N,N), - FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N,N), + FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N,Y), CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,Y,Y), MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,N,Y,N), MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,N,Y,Y), From 9319130483b1f9c10a8e065242e68a2b6b1b8751 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 9 Mar 2012 11:04:58 -0800 Subject: [PATCH 0299/1087] Special cased NTILES == 1 due to log2up revision --- rocket/src/main/scala/coherence.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index a2c6ba5f..275e427b 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -297,7 +297,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val t_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = Reg(resetVal = UFix(0, width = log2up(NTILES))) + val p_rep_count = if (NTILES == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(NTILES))) val p_req_flags = Reg(resetVal = Bits(0, width = NTILES)) val p_rep_tile_id_ = Reg{ Bits() } val x_needs_read = Reg(resetVal = Bool(false)) @@ -346,7 +346,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := transactionInitHasData(io.alloc_req.bits.xact_init) x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) - p_rep_count := UFix(NTILES-1) + if(NTILES > 1) p_rep_count := UFix(NTILES-1) p_req_flags := ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) @@ -366,7 +366,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { when(io.p_rep_cnt_dec.orR) { val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) io.pop_p_rep := io.p_rep_cnt_dec - p_rep_count := p_rep_count_next + if(NTILES > 1) p_rep_count := p_rep_count_next when(p_rep_count === UFix(0)) { io.pop_p_rep := Bool(true) state := s_mem From e591d83e919f2d52479e0c38d360470bf49d15a3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 9 Mar 2012 11:05:44 -0800 Subject: [PATCH 0300/1087] Fixed global_xact_id propagation bug --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index cf788a9d..4e96b6ed 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -216,7 +216,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val finish_q = (new queue(2 /* wb + refill */)) { new TransactionFinish } finish_q.io.enq.valid := wb_done || refill_done - finish_q.io.enq.bits := io.mem_rep.bits.global_xact_id + finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id when (state === s_write_meta && io.meta_req.ready) { state := s_invalid From e3a68848e0f2caa20db4f7d72dffd029547c1a0c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 Mar 2012 20:01:47 -0800 Subject: [PATCH 0301/1087] fix D$ critical paths and fix verilog build --- rocket/src/main/scala/nbdcache.scala | 73 +++++++++++++--------------- rocket/src/main/scala/util.scala | 69 +++++++++++--------------- 2 files changed, 63 insertions(+), 79 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4e96b6ed..0013f025 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -93,6 +93,7 @@ class LoadDataGen extends Component { } class MSHRReq extends Bundle { + val tag_miss = Bool() val old_state = UFix(width = 2) val old_tag = Bits(width = TAG_BITS) @@ -189,7 +190,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val wb_req = (new ioDecoupled) { new WritebackReq } } - val s_invalid :: s_meta_invalidate :: s_wb_req :: s_wb_resp :: s_refill_req :: s_refill_resp :: s_drain_rpq :: s_write_meta :: Nil = Enum(8) { UFix() } + val s_invalid :: s_wb_req :: s_wb_resp :: s_refill_req :: s_refill_resp :: s_drain_rpq :: Nil = Enum(6) { UFix() } val state = Reg(resetVal = s_invalid) val flush = Reg { Bool() } @@ -201,13 +202,13 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val req_cmd = io.req_bits.cmd val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) val idx_match = req.idx === io.req_bits.idx - val sec_rdy = idx_match && !flush && (state === s_meta_invalidate || state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !needsSecondaryXact(req_cmd, io.mem_req.bits)) + val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !needsSecondaryXact(req_cmd, io.mem_req.bits)) val rpq = (new queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id - rpq.io.deq.ready := io.replay.ready && (state === s_drain_rpq) + rpq.io.deq.ready := io.replay.ready && (state === s_drain_rpq) || (state === s_invalid) val abort = io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id) val reply = io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id) @@ -218,12 +219,9 @@ class MSHR(id: Int) extends Component with FourStateCoherence { finish_q.io.enq.valid := wb_done || refill_done finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id - when (state === s_write_meta && io.meta_req.ready) { + when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid && io.meta_req.ready) { state := s_invalid } - when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { - state := s_write_meta - } when (state === s_refill_resp) { when (refill_done) { state := s_drain_rpq } when (reply) { @@ -233,7 +231,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { when (abort) { state := s_refill_req } } when (state === s_refill_req && io.mem_req.ready) { - state := Mux(flush, s_write_meta, s_refill_resp) + state := Mux(flush, s_drain_rpq, s_refill_resp) } when (state === s_wb_resp) { when (reply) { state := s_refill_req } @@ -242,20 +240,20 @@ class MSHR(id: Int) extends Component with FourStateCoherence { when (state === s_wb_req && io.wb_req.ready) { state := s_wb_resp } - when (state === s_meta_invalidate && io.meta_req.ready) { - state := Mux(needsWriteback(req.old_state), s_wb_req, s_refill_req) - } - when (io.req_sec_val && io.req_sec_rdy) { // s_meta_invalidate, s_wb_req, s_wb_resp, s_refill_req + when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req xact_type := newTransactionOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { - state := s_meta_invalidate flush := req_cmd === M_FLA line_state := newStateOnFlush() refill_count := UFix(0) xact_type := newTransactionOnPrimaryMiss(req_cmd, newStateOnFlush()) req := io.req_bits + + when (io.req_bits.tag_miss) { + state := Mux(needsWriteback(io.req_bits.old_state), s_wb_req, s_refill_req) + } } io.idx_match := (state != s_invalid) && idx_match @@ -266,7 +264,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { io.req_pri_rdy := (state === s_invalid) io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.meta_req.valid := (state === s_write_meta) || (state === s_meta_invalidate) + io.meta_req.valid := (state === s_drain_rpq) && !rpq.io.deq.valid && !finish_q.io.deq.valid io.meta_req.bits.inner_req.rw := Bool(true) io.meta_req.bits.inner_req.idx := req.idx io.meta_req.bits.inner_req.data.state := line_state @@ -389,7 +387,8 @@ class MSHRFile extends Component { val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) val sdq_free = replay.valid && replay.ready && replay_write - sdq_val := sdq_val & ~(sdq_free.toUFix << replay.bits.sdq_id) | (sdq_enq.toUFix << sdq_alloc_id) + sdq_val := sdq_val & ~(sdq_free.toUFix << replay.bits.sdq_id) | + PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) io.data_req.bits.data := sdq.read(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) io.cpu_resp_val := Reg(replay.valid && replay.ready && replay_read, resetVal = Bool(false)) @@ -454,7 +453,7 @@ class FlushUnit(lines: Int) extends Component with FourStateCoherence{ val io = new Bundle { val req = (new ioDecoupled) { Bool() }.flip val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } - val mshr_req = (new ioDecoupled) { Bool() }.flip + val mshr_req = (new ioDecoupled) { Bool() } } val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: Nil = Enum(4) { UFix() } @@ -506,14 +505,12 @@ class MetaDataArray(lines: Int) extends Component { val state_req = (new ioDecoupled) { new MetaArrayReq() }.flip } - val permissions_array = Mem(lines){ Bits(width = 2) } - permissions_array.setReadLatency(1); + val permissions_array = Mem(lines){ UFix(width = 2) } permissions_array.write(io.state_req.bits.idx, io.state_req.bits.data.state, io.state_req.valid && io.state_req.bits.rw) - val permissions_rdata1 = permissions_array.rw(io.req.bits.idx, io.req.bits.data.state, io.req.valid && io.req.bits.rw) - - // don't allow reading and writing of vd_array in same cycle. - // this could be eliminated if the read port were combinational. - val permissions_conflict = io.state_req.valid && (io.req.bits.idx === io.state_req.bits.idx) + permissions_array.write(io.req.bits.idx, io.req.bits.data.state, io.req.valid && io.req.bits.rw) + val raddr = Reg() { Bits() } + when (io.req.valid && !io.req.bits.rw) { raddr := io.req.bits.idx } + val permissions_rdata1 = permissions_array.read(raddr) val tag_array = Mem(lines){ Bits(width=TAG_BITS) } tag_array.setReadLatency(1); @@ -522,7 +519,7 @@ class MetaDataArray(lines: Int) extends Component { io.resp.state := permissions_rdata1.toUFix io.resp.tag := tag_rdata - io.req.ready := !permissions_conflict + io.req.ready := Bool(true) } class MetaDataArrayArray(lines: Int) extends Component { @@ -782,11 +779,10 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) - val meta_resp_mux = Mux1H(NWAYS, meta_resp_way_oh, meta.io.resp) - val data_resp_mux = Mux1H(NWAYS, data_resp_way_oh, data.io.resp) + val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) + val data_resp_mux = Mux1H(data_resp_way_oh, data.io.resp) // writeback unit - val finish_arb = (new Arbiter(2)) { new TransactionFinish } val wb = new WritebackUnit wb.io.req <> mshr.io.wb_req wb.io.data_req <> data_arb.io.in(3) @@ -796,7 +792,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val replacer = new RandomReplacementWayGen() replacer.io.way_en := ~UFix(0, NWAYS) val replaced_way_oh = Mux(flusher.io.mshr_req.valid, r_way_oh, UFixToOH(replacer.io.way_id, NWAYS)) - val meta_wb_mux = Mux1H(NWAYS, replaced_way_oh, meta.io.resp) + val meta_wb_mux = Mux1H(replaced_way_oh, meta.io.resp) // refill response data_arb.io.in(0).bits.inner_req.offset := mshr.io.mem_resp_offset @@ -833,17 +829,17 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val drain_store = drain_store_val && data_arb.io.in(2).ready val p_amo = Reg(resetVal = Bool(false)) val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo) - p_amo := tag_hit && mshr.io.req.ready && r_req_amo && p_store_rdy && !p_store_match || r_replay_amo - p_store_valid := p_store_valid && !drain_store || (tag_hit && mshr.io.req.ready && r_req_store && p_store_rdy) || p_amo + p_amo := tag_hit && r_req_amo && mshr.io.req.ready && !nack_hit || r_replay_amo + p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo // tag update after a store to an exclusive clean line. val new_hit_state = newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) val set_hit_state = tag_hit && meta_resp_mux.state != new_hit_state meta.io.state_req.bits.inner_req.rw := Bool(true) - meta.io.state_req.bits.inner_req.idx := r_cpu_req_idx(indexmsb,indexlsb) - meta.io.state_req.bits.inner_req.data.state := new_hit_state - meta.io.state_req.bits.way_en := hit_way_oh - meta.io.state_req.valid := set_hit_state + meta.io.state_req.bits.inner_req.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) + meta.io.state_req.bits.inner_req.data.state := Reg(new_hit_state) + meta.io.state_req.bits.way_en := Reg(hit_way_oh) + meta.io.state_req.valid := Reg(set_hit_state, resetVal = Bool(false)) // pending store data, also used for AMO RHS val amoalu = new AMOALU @@ -859,7 +855,8 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { } // miss handling - mshr.io.req.valid := tag_miss && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid + mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid + mshr.io.req.bits.tag_miss := tag_miss || flusher.io.mshr_req.valid mshr.io.req.bits.old_state := meta_wb_mux.state mshr.io.req.bits.old_tag := meta_wb_mux.tag mshr.io.req.bits.tag := cpu_req_tag @@ -872,7 +869,6 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { mshr.io.req.bits.data := cpu_req_data mshr.io.mem_rep <> io.mem.xact_rep - mshr.io.mem_finish <> finish_arb.io.in(1) mshr.io.mem_abort.valid := io.mem.xact_abort.valid mshr.io.mem_abort.bits := io.mem.xact_abort.bits mshr.io.meta_req <> meta_arb.io.in(1) @@ -925,7 +921,8 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // fences and flushes are the exceptions. val pending_fence = Reg(resetVal = Bool(false)) pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy - nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy + nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || + p_store_idx_match && meta.io.state_req.valid val nack_miss = !mshr.io.req.ready val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush @@ -947,5 +944,5 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { io.mem.xact_init <> xact_init_arb.io.out io.mem.xact_init_data <> wb.io.mem_req_data - io.mem.xact_finish <> finish_arb.io.out + io.mem.xact_finish <> mshr.io.mem_finish } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 76c93390..b584696f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -119,46 +119,7 @@ object ShiftRegister object Mux1H { -//TODO: cloning in(0) is unsafe if other elements have different widths, but -//is that even allowable? - def apply [T <: Data](n: Int, sel: Vec[Bool], in: Vec[T]): T = { - MuxCase(in(0), (0 until n).map( i => (sel(i), in(i)))) -// val mux = (new Mux1H(n)){ in(0).clone } -// mux.io.sel <> sel -// mux.io.in <> in -// mux.io.out.asInstanceOf[T] - } - - def apply [T <: Data](n: Int, sel: Seq[Bool], in: Vec[T]): T = { - MuxCase(in(0), (0 until n).map( i => (sel(i), in(i)))) -// val mux = (new Mux1H(n)){ in(0).clone } -// for(i <- 0 until n) { -// mux.io.sel(i) := sel(i) -// } -// mux.io.in <> in.asOutput -// mux.io.out.asInstanceOf[T] - } - - def apply [T <: Data](n: Int, sel: Bits, in: Vec[T]): T = { - MuxCase(in(0), (0 until n).map( i => (sel(i).toBool, in(i)))) -// val mux = (new Mux1H(n)){ in(0).clone } -// for(i <- 0 until n) { -// mux.io.sel(i) := sel(i).toBool -// } -// mux.io.in := in -// mux.io.out - } -} - -class Mux1H [T <: Data](n: Int)(gen: => T) extends Component -{ - val io = new Bundle { - val sel = Vec(n) { Bool(dir = INPUT) } - val in = Vec(n) { gen }.asInput - val out = gen.asOutput - } - - def buildMux(sel: Bits, in: Vec[T], i: Int, n: Int): T = { + def buildMux[T <: Data](sel: Bits, in: Vec[T], i: Int, n: Int): T = { if (n == 1) in(i) else @@ -170,7 +131,19 @@ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component } } - io.out := buildMux(io.sel.toBits, io.in, 0, n) + def apply [T <: Data](sel: Bits, in: Vec[T]): T = buildMux(sel, in, 0, sel.getWidth) + def apply [T <: Data](sel: Vec[Bool], in: Vec[T]): T = apply(sel.toBits, in) +} + +class Mux1H [T <: Data](n: Int)(gen: => T) extends Component +{ + val io = new Bundle { + val sel = Vec(n) { Bool(dir = INPUT) } + val in = Vec(n) { gen }.asInput + val out = gen.asOutput + } + + io.out := Mux1H(io.sel, io.in) } @@ -267,3 +240,17 @@ object PriorityEncoder Mux(in(n), UFix(n), doApply(in, n+1)) } } + +object PriorityEncoderOH +{ + def apply(in: Bits): UFix = doApply(in, 0) + def doApply(in: Bits, n: Int = 0): UFix = { + val out = Vec(in.getWidth) { Wire() { Bool() } } + var none_hot = Bool(true) + for (i <- 0 until in.getWidth) { + out(i) := none_hot && in(i) + none_hot = none_hot && !in(i) + } + out.toBits + } +} From 7eb73c325ea3173de1b9a557603e3275399deea2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 10 Mar 2012 00:21:51 -0800 Subject: [PATCH 0302/1087] fix signedness of zero fmul results We were using the FMA unit to compute rs1 * rs2 + 0.0 for fmul, which incorrectly computes +0.0 when rs1 * rs2 == -0.0. Now we add -0.0 if rs1*rs2 is negative. --- rocket/src/main/scala/fpu.scala | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index bf81d49e..8696f5a2 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -390,12 +390,15 @@ class rocketFPUSFMAPipe(latency: Int) extends Component io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB + val one = Bits("h80000000") + val zero = Cat(io.in1(32) ^ io.in2(32), Bits(0, 32)) + when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm in1 := io.in1 - in2 := Mux(cmd_addsub, Bits("h80000000"), io.in2) - in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, Bits(0))) + in2 := Mux(cmd_addsub, one, io.in2) + in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } val fma = new hardfloat.mulAddSubRecodedFloat32_1 @@ -423,12 +426,15 @@ class rocketFPUDFMAPipe(latency: Int) extends Component io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB + val one = Bits("h8000000000000000") + val zero = Cat(io.in1(64) ^ io.in2(64), Bits(0, 64)) + when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm in1 := io.in1 - in2 := Mux(cmd_addsub, Bits("h8000000000000000"), io.in2) - in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, Bits(0))) + in2 := Mux(cmd_addsub, one, io.in2) + in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } val fma = new hardfloat.mulAddSubRecodedFloat64_1 From 44ff22a26f33dc7c587be036d6ad17e66454d1db Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 10 Mar 2012 12:54:36 -0800 Subject: [PATCH 0303/1087] vector exception handler now handles prefetches correctly --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/ctrl.scala | 8 +- rocket/src/main/scala/ctrl_vec.scala | 135 ++++++++++++----------- rocket/src/main/scala/dpath_vec.scala | 1 + rocket/src/main/scala/instructions.scala | 8 +- 5 files changed, 81 insertions(+), 73 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index a3b1e4ea..c53904fc 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = false + val HAVE_VEC = true val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 4683febc..d44c88c5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -301,10 +301,10 @@ object rocketCtrlDecode VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VENQCMD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), - VENQIMM1-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), - VENQIMM2-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), - VENQCNT-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), + VENQCMD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), + VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), VWAITKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y)) } diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 2423e426..62f7f8c5 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -10,6 +10,7 @@ class ioCtrlDpathVec extends Bundle val valid = Bool(INPUT) val inst = Bits(32, INPUT) val appvl0 = Bool(INPUT) + val pfq = Bool(INPUT) val wen = Bool(OUTPUT) val fn = Bits(1, OUTPUT) val sel_vcmd = Bits(3, OUTPUT) @@ -69,74 +70,80 @@ class rocketCtrlVec extends Component // | | | | | vpfcmdq // | | | | | | vpfximm1q // | | | | | | | vpfximm2q - // wen | | | | | | | | vpfcntq - // val vcmd vimm vimm2 | fn | | | | | | | | | fence_cv - // | | | | | | | | | | | | | | | | waitxcpt - // | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,Y,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,Y,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N), - VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,N,N), - VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,N,N), - VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,N,N), - VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y), - VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y) + // | | | | | | | | vpfcntq + // wen | | | | | | | | | pfq + // val vcmd vimm vimm2 | fn | | | | | | | | | | fence_cv + // | | | | | | | | | | | | | | | | | waitxcpt + // | | | | | | | | | | | | | | | | | | + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,Y,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,Y,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,Y,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,Y,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,Y,N,N), + VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y), + VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 - val wb_vec_fence_cv :: wb_vec_waitxcpt :: Nil = veccs2 + val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_waitxcpt :: Nil = veccs2 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) + val wb_vec_pfcmdq_enq_mask_pfq = wb_vec_pfcmdq_enq && (!wb_vec_pfaq || io.dpath.pfq) + val wb_vec_pfximm1q_enq_mask_pfq = wb_vec_pfximm1q_enq && (!wb_vec_pfaq || io.dpath.pfq) + val wb_vec_pfximm2q_enq_mask_pfq = wb_vec_pfximm2q_enq && (!wb_vec_pfaq || io.dpath.pfq) + val wb_vec_pfcntq_enq_mask_pfq = wb_vec_pfcntq_enq && (!wb_vec_pfaq || io.dpath.pfq) + val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.iface.vcmdq_ready val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.iface.vximm1q_ready val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.iface.vximm2q_ready val mask_wb_vec_cntq_ready = !wb_vec_cntq_enq || io.iface.vcntq_ready - val mask_wb_vec_pfcmdq_ready = !wb_vec_pfcmdq_enq || io.iface.vpfcmdq_ready - val mask_wb_vec_pfximm1q_ready = !wb_vec_pfximm1q_enq || io.iface.vpfximm1q_ready - val mask_wb_vec_pfximm2q_ready = !wb_vec_pfximm2q_enq || io.iface.vpfximm2q_ready - val mask_wb_vec_pfcntq_ready = !wb_vec_pfcntq_enq || io.iface.vpfcntq_ready + val mask_wb_vec_pfcmdq_ready = !wb_vec_pfcmdq_enq_mask_pfq || io.iface.vpfcmdq_ready + val mask_wb_vec_pfximm1q_ready = !wb_vec_pfximm1q_enq_mask_pfq || io.iface.vpfximm1q_ready + val mask_wb_vec_pfximm2q_ready = !wb_vec_pfximm2q_enq_mask_pfq || io.iface.vpfximm2q_ready + val mask_wb_vec_pfcntq_ready = !wb_vec_pfcntq_enq_mask_pfq || io.iface.vpfcntq_ready io.dpath.wen := wb_vec_wen.toBool io.dpath.fn := wb_vec_fn @@ -167,32 +174,32 @@ class rocketCtrlVec extends Component io.iface.vpfcmdq_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - wb_vec_pfcmdq_enq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + wb_vec_pfcmdq_enq_mask_pfq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfximm1q_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq_mask_pfq && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready io.iface.vpfximm2q_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq && mask_wb_vec_pfcntq_ready + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq_mask_pfq && mask_wb_vec_pfcntq_ready io.iface.vpfcntq_valid := valid_common && mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && wb_vec_pfcntq_enq + mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && wb_vec_pfcntq_enq_mask_pfq io.replay := valid_common && ( wb_vec_cmdq_enq && !io.iface.vcmdq_ready || wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || wb_vec_ximm2q_enq && !io.iface.vximm2q_ready || wb_vec_cntq_enq && !io.iface.vcntq_ready || - wb_vec_pfcmdq_enq && !io.iface.vpfcmdq_ready || - wb_vec_pfximm1q_enq && !io.iface.vpfximm1q_ready || - wb_vec_pfximm2q_enq && !io.iface.vpfximm2q_ready || - wb_vec_pfcntq_enq && !io.iface.vpfcntq_ready || + wb_vec_pfcmdq_enq_mask_pfq && !io.iface.vpfcmdq_ready || + wb_vec_pfximm1q_enq_mask_pfq && !io.iface.vpfximm1q_ready || + wb_vec_pfximm2q_enq_mask_pfq && !io.iface.vpfximm2q_ready || + wb_vec_pfcntq_enq_mask_pfq && !io.iface.vpfcntq_ready || wb_vec_fence_cv && !io.iface.vfence_ready ) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 1ef4dd92..7d045fe4 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -145,4 +145,5 @@ class rocketDpathVec extends Component io.ctrl.valid := io.valid io.ctrl.inst := io.inst io.ctrl.appvl0 := reg_appvl0 + io.ctrl.pfq := io.rs2(0) } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 4491e80d..5b5a7b89 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -247,10 +247,10 @@ object Instructions val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); val VF = Bits("b00000_?????_????????????_111_1110011",32); // vector supervisor instructions - val VENQCMD = Bits("b00000_?????_00000_1000000000_1111011",32) - val VENQIMM1 = Bits("b00000_?????_00000_1000000001_1111011",32) - val VENQIMM2 = Bits("b00000_?????_00000_1000000010_1111011",32) - val VENQCNT = Bits("b00000_?????_00000_1000000011_1111011",32) + val VENQCMD = Bits("b00000_?????_?????_1000000000_1111011",32) + val VENQIMM1 = Bits("b00000_?????_?????_1000000001_1111011",32) + val VENQIMM2 = Bits("b00000_?????_?????_1000000010_1111011",32) + val VENQCNT = Bits("b00000_?????_?????_1000000011_1111011",32) val VWAITXCPT = Bits("b00000_00000_00000_1100000000_1111011",32) val VWAITKILL = Bits("b00000_00000_00000_1100000001_1111011",32) From 4f4b990a4f4d8719e4b4c1a957a191edeff062f2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 10 Mar 2012 15:18:57 -0800 Subject: [PATCH 0304/1087] fix null hub store ack bug --- rocket/src/main/scala/coherence.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 275e427b..41959915 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -124,7 +124,7 @@ trait ThreeStateIncoherence extends CoherencePolicy { def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) def newTransactionOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) + Mux(write || cmd === M_PFW, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) } def newTransactionOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { val (read, write) = cpuCmdToRW(cmd) @@ -170,7 +170,7 @@ trait FourStateCoherence extends CoherencePolicy { } def newTransactionOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) + Mux(write || cmd === M_PFW, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) } def newTransactionOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { val (read, write) = cpuCmdToRW(cmd) @@ -442,7 +442,7 @@ class CoherenceHubNull extends CoherenceHub { x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data x_rep.bits.require_ack := Bool(true) - x_rep.valid := io.mem.resp.valid || x_init.valid && is_write + x_rep.valid := io.mem.resp.valid || x_init.valid && is_write && io.mem.req_cmd.ready io.tiles(0).xact_abort.valid := Bool(false) io.tiles(0).xact_finish.ready := Bool(true) From 8ffdac9526b73ce7a54adb94b2f16106d47f9876 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 10 Mar 2012 15:50:10 -0800 Subject: [PATCH 0305/1087] fix D$ store-upgrade bug loads to the same address as stores that cause an upgrade could return the old value --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 6 ++++-- rocket/src/main/scala/top.scala | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index c53904fc..a3b1e4ea 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = true + val HAVE_VEC = false val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0013f025..fd76bbaf 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -292,6 +292,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { class MSHRFile extends Component { val io = new Bundle { val req = (new ioDecoupled) { new MSHRReq }.flip + val secondary_miss = Bool(OUTPUT) val mem_resp_idx = Bits(IDX_BITS, OUTPUT) val mem_resp_offset = Bits(log2up(REFILL_CYCLES), OUTPUT) @@ -376,6 +377,7 @@ class MSHRFile extends Component { wb_req_arb.io.out <> io.wb_req io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy + io.secondary_miss := idx_match io.mem_resp_idx := mem_resp_mux.io.out.inner_req.idx io.mem_resp_offset := mem_resp_mux.io.out.inner_req.offset io.mem_resp_way_oh := mem_resp_mux.io.out.way_en @@ -930,9 +932,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !nack && r_req_read) || mshr.io.cpu_resp_val + io.cpu.resp_val := (tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp_replay := mshr.io.cpu_resp_val - io.cpu.resp_miss := r_cpu_req_val_ && !tag_match && r_req_read + io.cpu.resp_miss := r_cpu_req_val_ && (!tag_match || mshr.io.secondary_miss) && r_req_read io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 2783f951..1384d89d 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -26,7 +26,7 @@ class Top() extends Component { arbiter.io.requestor(1) <> icache_pf.io.mem arbiter.io.requestor(2) <> htif.io.mem - val hub = new CoherenceHubNull + val hub = new CoherenceHubBroadcast // connect tile to hub hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) From 1aa4b0e93d32222a53ef8ca3213cd3c032cf75f5 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 10 Mar 2012 20:16:20 -0800 Subject: [PATCH 0306/1087] going back to null coherence hub --- rocket/src/main/scala/top.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 1384d89d..2783f951 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -26,7 +26,7 @@ class Top() extends Component { arbiter.io.requestor(1) <> icache_pf.io.mem arbiter.io.requestor(2) <> htif.io.mem - val hub = new CoherenceHubBroadcast + val hub = new CoherenceHubNull // connect tile to hub hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) From a4d0025187e973982c1032f7ff2c067689f47715 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 11 Mar 2012 00:48:29 -0800 Subject: [PATCH 0307/1087] fix icache prefetch global_xact_id bug --- rocket/src/main/scala/icache_prefetch.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 425f95d6..1781175e 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -60,11 +60,12 @@ class rocketIPrefetcher extends Component() { io.icache.xact_rep.valid := io.mem.xact_rep.valid && !io.mem.xact_rep.bits.tile_xact_id(0) || (forward && pdq.io.deq.valid) io.icache.xact_rep.bits.data := Mux(forward, pdq.io.deq.bits, io.mem.xact_rep.bits.data) io.icache.xact_rep.bits.require_ack := !forward && io.mem.xact_rep.bits.require_ack - + io.icache.xact_rep.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id + pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false)) pdq.io.enq.bits := io.mem.xact_rep.bits.data - pdq.io.enq.valid := ip_mem_resp_val.toBool; - pdq.io.deq.ready := forward; + pdq.io.enq.valid := ip_mem_resp_val + pdq.io.deq.ready := forward switch (state) { is (s_invalid) { From 4ebf637642d14309cc500d85484302f56b9fb8a2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 11 Mar 2012 14:17:27 -0700 Subject: [PATCH 0308/1087] More broadcast hub bugfixes --- rocket/src/main/scala/coherence.scala | 15 ++++---- rocket/src/main/scala/util.scala | 55 ++++++++++++--------------- 2 files changed, 32 insertions(+), 38 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 41959915..f128036a 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -276,17 +276,17 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { when(req_data.ready && req_data.valid) { pop_data := UFix(1) << tile_id mem_cnt := mem_cnt_next - } - when(mem_cnt_next === UFix(0)) { - pop_dep := UFix(1) << tile_id - trigger := Bool(false) + when(mem_cnt_next === UFix(0)) { + pop_dep := UFix(1) << tile_id + trigger := Bool(false) + } } } def doMemReqRead(req_cmd: ioDecoupled[MemReqCmd], trigger: Bool) { req_cmd.valid := Bool(true) req_cmd.bits.rw := Bool(false) - when(req_cmd.ready ) { + when(req_cmd.ready) { trigger := Bool(false) } } @@ -535,10 +535,11 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx - rep.valid := (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr.toBits.orR + val do_send_ack = (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr.toBits.orR + rep.valid := do_send_ack + sent_x_rep_ack_arr(ack_idx) := do_send_ack } } - sent_x_rep_ack_arr(ack_idx) := !io.mem.resp.valid // If there were a ready signal due to e.g. intervening network use: //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index b584696f..abd66b04 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -194,40 +194,33 @@ class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioLockingArbiter(n)(data) val locked = Vec(n) { Reg(resetVal = Bool(false)) } - var dout = io.in(0).bits - var vout = Bool(false) - - for (i <- 0 until n) { - io.in(i).ready := io.out.ready - } - val any_lock_held = (locked.toBits & io.lock.toBits).orR - when(any_lock_held) { - vout = io.in(0).valid && locked(0) - for (i <- 0 until n) { - io.in(i).ready := io.out.ready && locked(i) - dout = Mux(locked(i), io.in(i).bits, dout) - vout = vout || io.in(i).valid && locked(i) - } - } .otherwise { - io.in(0).ready := io.out.ready - locked(0) := io.out.ready && io.lock(0) - for (i <- 1 until n) { - io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready - locked(i) := !io.in(i-1).valid && io.in(i-1).ready && io.lock(i) - } - - dout = io.in(n-1).bits - for (i <- 1 until n) - dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) - - vout = io.in(0).valid - for (i <- 1 until n) - vout = vout || io.in(i).valid + val valid_arr = Vec(n) { Wire() { Bool() } } + val bits_arr = Vec(n) { Wire() { data } } + for(i <- 0 until n) { + valid_arr(i) := io.in(i).valid + bits_arr(i) := io.in(i).bits } - vout <> io.out.valid - dout <> io.out.bits + io.in(0).ready := Mux(any_lock_held, io.out.ready && locked(0), io.out.ready) + locked(0) := Mux(any_lock_held, locked(0), io.in(0).ready && io.lock(0)) + for (i <- 1 until n) { + io.in(i).ready := Mux(any_lock_held, io.out.ready && locked(i), + !io.in(i-1).valid && io.in(i-1).ready) + locked(i) := Mux(any_lock_held, locked(i), io.in(i).ready) + } + + var dout = io.in(n-1).bits + for (i <- 1 until n) + dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) + + var vout = io.in(0).valid + for (i <- 1 until n) + vout = vout || io.in(i).valid + + val lock_idx = PriorityEncoder(locked.toBits) + io.out.valid := Mux(any_lock_held, valid_arr(lock_idx), vout) + io.out.bits := Mux(any_lock_held, bits_arr(lock_idx), dout) } object PriorityEncoder From c5dd37ae80f9fe28c2066825f35296f3bb2d2231 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 11 Mar 2012 15:47:27 -0700 Subject: [PATCH 0309/1087] bugfix in locking arbiter --- rocket/src/main/scala/util.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index abd66b04..10f618c2 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -207,7 +207,7 @@ class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { for (i <- 1 until n) { io.in(i).ready := Mux(any_lock_held, io.out.ready && locked(i), !io.in(i-1).valid && io.in(i-1).ready) - locked(i) := Mux(any_lock_held, locked(i), io.in(i).ready) + locked(i) := Mux(any_lock_held, locked(i), io.in(i).ready && io.lock(i)) } var dout = io.in(n-1).bits From e42a4c767edbb78a5a1ba2ee0857364f92c355fb Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 11 Mar 2012 16:28:32 -0700 Subject: [PATCH 0310/1087] don't stall on vector fences, keep replaying --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d44c88c5..4cd4e94a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -631,7 +631,7 @@ class rocketCtrl extends Component vec.io.exception := wb_reg_exception vec_replay = vec.io.replay - vec_stalld = vec.io.stalld || id_vfence_cv && !vec.io.vfence_ready + vec_stalld = vec.io.stalld // || id_vfence_cv && !vec.io.vfence_ready } // exception handling From 113a94a21d07a396faf18a6b4bbbabb545be2078 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 11 Mar 2012 16:28:56 -0700 Subject: [PATCH 0311/1087] add vector hold waits --- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/ctrl_vec.scala | 105 ++++++++++++++------------- 2 files changed, 57 insertions(+), 49 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 068d2de6..4ad88674 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -195,6 +195,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // fences ctrl.io.vec_iface.vfence_ready := vu.io.vec_fence_ready + ctrl.io.vec_iface.vhold_ready := vu.io.vec_hold_ready // exceptions vu.io.xcpt.exception := ctrl.io.vec_iface.exception diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 62f7f8c5..48ea0d60 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -39,6 +39,7 @@ class ioCtrlVecInterface extends Bundle val vpfcntq_ready = Bool(INPUT) val vfence_ready = Bool(INPUT) + val vhold_ready = Bool(INPUT) val exception = Bool(OUTPUT) val exception_ack_valid = Bool(INPUT) @@ -75,59 +76,59 @@ class rocketCtrlVec extends Component // val vcmd vimm vimm2 | fn | | | | | | | | | | fence_cv // | | | | | | | | | | | | | | | | | waitxcpt // | | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,Y,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,Y,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,Y,N,N), - VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,Y,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N), - VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,Y,N,N), - VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y), - VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y) + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,Y,N,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,Y,N,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,Y,N,N,N), + VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y,N), + VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,Y) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 - val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_waitxcpt :: Nil = veccs2 + val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_waitxcpt :: wb_vec_waithold :: Nil = veccs2 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) @@ -212,6 +213,12 @@ class rocketCtrlVec extends Component io.iface.exception := io.exception && io.sr_ev io.iface.exception_ack_ready := reg_waitxcpt - io.stalld := reg_waitxcpt + val reg_waithold = Reg(resetVal = Bool(false)) + val do_waithold = valid_common && wb_vec_waithold && !io.replay + + when (do_waithold) { reg_waithold := Bool(true) } + when (io.iface.vhold_ready) { reg_waithold := Bool(false) } + + io.stalld := reg_waitxcpt || reg_waithold io.vfence_ready := !io.sr_ev || io.iface.vfence_ready } From 1ba5e7b8650c427017de8dbdcfbdd75c78a0e762 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 11 Mar 2012 21:38:47 -0700 Subject: [PATCH 0312/1087] changes to the vector exception interface --- rocket/src/main/scala/cpu.scala | 6 +- rocket/src/main/scala/ctrl.scala | 1 + rocket/src/main/scala/ctrl_vec.scala | 115 +++++++++++++-------------- 3 files changed, 62 insertions(+), 60 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 4ad88674..08ef77fa 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -193,9 +193,13 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.vec_iface.vpfximm2q_ready := vu.io.vec_pfximm2q.ready ctrl.io.vec_iface.vpfcntq_ready := vu.io.vec_pfcntq.ready + // user level vector command queue ready signals + ctrl.io.vec_iface.vcmdq_user_ready := vu.io.vec_cmdq_user_ready + ctrl.io.vec_iface.vximm1q_user_ready := vu.io.vec_ximm1q_user_ready + ctrl.io.vec_iface.vximm2q_user_ready := vu.io.vec_ximm2q_user_ready + // fences ctrl.io.vec_iface.vfence_ready := vu.io.vec_fence_ready - ctrl.io.vec_iface.vhold_ready := vu.io.vec_hold_ready // exceptions vu.io.xcpt.exception := ctrl.io.vec_iface.exception diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 4cd4e94a..49683f96 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -627,6 +627,7 @@ class rocketCtrl extends Component io.vec_dpath <> vec.io.dpath io.vec_iface <> vec.io.iface + vec.io.s := io.dpath.status(SR_S) vec.io.sr_ev := io.dpath.status(SR_EV) vec.io.exception := wb_reg_exception diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 48ea0d60..20dfa34a 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -38,8 +38,10 @@ class ioCtrlVecInterface extends Bundle val vpfcntq_valid = Bool(OUTPUT) val vpfcntq_ready = Bool(INPUT) + val vcmdq_user_ready = Bool(INPUT) + val vximm1q_user_ready = Bool(INPUT) + val vximm2q_user_ready = Bool(INPUT) val vfence_ready = Bool(INPUT) - val vhold_ready = Bool(INPUT) val exception = Bool(OUTPUT) val exception_ack_valid = Bool(INPUT) @@ -50,6 +52,7 @@ class ioCtrlVec extends Bundle { val dpath = new ioCtrlDpathVec() val iface = new ioCtrlVecInterface() + val s = Bool(INPUT) val sr_ev = Bool(INPUT) val exception = Bool(INPUT) val replay = Bool(OUTPUT) @@ -76,59 +79,59 @@ class rocketCtrlVec extends Component // val vcmd vimm vimm2 | fn | | | | | | | | | | fence_cv // | | | | | | | | | | | | | | | | | waitxcpt // | | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N), - VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,Y,N,N,N), - VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,Y,N,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N,N), - VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,Y,N,N,N), - VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y,N), - VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,Y) + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,Y,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,Y,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,Y,N,N), + VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y), + VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 - val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_waitxcpt :: wb_vec_waithold :: Nil = veccs2 + val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_waitxcpt :: Nil = veccs2 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) @@ -137,9 +140,9 @@ class rocketCtrlVec extends Component val wb_vec_pfximm2q_enq_mask_pfq = wb_vec_pfximm2q_enq && (!wb_vec_pfaq || io.dpath.pfq) val wb_vec_pfcntq_enq_mask_pfq = wb_vec_pfcntq_enq && (!wb_vec_pfaq || io.dpath.pfq) - val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.iface.vcmdq_ready - val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.iface.vximm1q_ready - val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.iface.vximm2q_ready + val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.s && io.iface.vcmdq_ready || !io.s && io.iface.vcmdq_user_ready + val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.s && io.iface.vximm1q_ready || !io.s && io.iface.vximm1q_user_ready + val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.s && io.iface.vximm2q_ready || !io.s && io.iface.vximm2q_user_ready val mask_wb_vec_cntq_ready = !wb_vec_cntq_enq || io.iface.vcntq_ready val mask_wb_vec_pfcmdq_ready = !wb_vec_pfcmdq_enq_mask_pfq || io.iface.vpfcmdq_ready val mask_wb_vec_pfximm1q_ready = !wb_vec_pfximm1q_enq_mask_pfq || io.iface.vpfximm1q_ready @@ -213,12 +216,6 @@ class rocketCtrlVec extends Component io.iface.exception := io.exception && io.sr_ev io.iface.exception_ack_ready := reg_waitxcpt - val reg_waithold = Reg(resetVal = Bool(false)) - val do_waithold = valid_common && wb_vec_waithold && !io.replay - - when (do_waithold) { reg_waithold := Bool(true) } - when (io.iface.vhold_ready) { reg_waithold := Bool(false) } - - io.stalld := reg_waitxcpt || reg_waithold + io.stalld := reg_waitxcpt io.vfence_ready := !io.sr_ev || io.iface.vfence_ready } From ea0775643b0a30567aaf374feb4c5c4b80c9c7a9 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 11 Mar 2012 17:13:01 -0700 Subject: [PATCH 0313/1087] fixed abort bug --- rocket/src/main/scala/coherence.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index f128036a..3db3ac46 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -600,7 +600,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ conflicts(i) := t.busy && x_init.valid && coherenceConflict(t.addr, x_init.bits.address) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits)) + want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits))) x_abort.valid := Bool(false) switch(abort_state_arr(j)) { From 6229a33dc488731da5ba2bf5166146b319581a97 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 11 Mar 2012 18:36:26 -0700 Subject: [PATCH 0314/1087] fixed cache controller flush unit deadlock --- rocket/src/main/scala/nbdcache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index fd76bbaf..4c9542d5 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -722,8 +722,8 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // reset and flush unit val flusher = new FlushUnit(lines) val flushed = Reg(resetVal = Bool(true)) - flushed := flushed && (!r_cpu_req_val_ || r_req_flush) || r_cpu_req_val_ && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val_ && r_req_flush && mshr.io.fence_rdy && !flushed + flushed := flushed && (!r_cpu_req_val || r_req_flush) || r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready + flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed flusher.io.mshr_req.ready := mshr.io.req.ready when (io.cpu.req_val) { From cbf7b133410ea53e51c8fd5892604fd36b8947ab Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 12 Mar 2012 10:38:37 -0700 Subject: [PATCH 0315/1087] fix hit logic for amos --- rocket/src/main/scala/coherence.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 3db3ac46..653ab6dc 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -147,8 +147,8 @@ trait FourStateCoherence extends CoherencePolicy { def isHit ( cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) - ((read && ( state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) || - (write && (state === tileExclusiveClean || state === tileExclusiveDirty))) + Mux(write, (state === tileExclusiveClean || state === tileExclusiveDirty), + (state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) } //TODO: do we need isPresent() for determining that a line needs to be From fd29e00db03e765ccfcc3d73c59421d8ccac0a12 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 01:56:10 -0700 Subject: [PATCH 0316/1087] support non-power-of-2 queue sizes need to manually wrap queue pointers. --- rocket/src/main/scala/queues.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 823a031b..0d92ddfc 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -24,12 +24,13 @@ class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = { enq_ptr = Reg(resetVal = UFix(0, log2up(entries))) deq_ptr = Reg(resetVal = UFix(0, log2up(entries))) + val pow2 = Bool((entries & (entries-1)) == 0) when (do_deq) { - deq_ptr := deq_ptr + UFix(1) + deq_ptr := Mux(!pow2 && deq_ptr === UFix(entries-1), UFix(0), deq_ptr + UFix(1)) } when (do_enq) { - enq_ptr := enq_ptr + UFix(1) + enq_ptr := Mux(!pow2 && enq_ptr === UFix(entries-1), UFix(0), enq_ptr + UFix(1)) } if (flushable) { when (io.flush) { From d76b05bde147f3fd76e10a047a24b476b2f882bc Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 02:21:02 -0700 Subject: [PATCH 0317/1087] fix way selection on D$ write upgrades --- rocket/src/main/scala/nbdcache.scala | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4c9542d5..3e38b505 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -94,7 +94,7 @@ class LoadDataGen extends Component { class MSHRReq extends Bundle { val tag_miss = Bool() - val old_state = UFix(width = 2) + val old_dirty = Bool() val old_tag = Bits(width = TAG_BITS) val tag = Bits(width = TAG_BITS) @@ -252,7 +252,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { req := io.req_bits when (io.req_bits.tag_miss) { - state := Mux(needsWriteback(io.req_bits.old_state), s_wb_req, s_refill_req) + state := Mux(io.req_bits.old_dirty, s_wb_req, s_refill_req) } } @@ -774,11 +774,11 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { meta_arb.io.in(2).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(2).ready val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_arr = (0 until NWAYS).map( w => isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR - val tag_hit = r_cpu_req_val && tag_match - val tag_miss = r_cpu_req_val && !tag_match val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec + val tag_hit_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_hit = Cat(Bits(0),tag_match_arr:_*).orR val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) @@ -809,8 +809,6 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) data_arb.io.in(4).bits.inner_req.rw := Bool(false) - data_arb.io.in(4).bits.inner_req.wmask := UFix(0) // don't care - data_arb.io.in(4).bits.inner_req.data := io.mem.xact_rep.bits.data // don't care data_arb.io.in(4).valid := io.cpu.req_val && req_read data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check val early_load_nack = req_read && !data_arb.io.in(4).ready @@ -831,12 +829,12 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val drain_store = drain_store_val && data_arb.io.in(2).ready val p_amo = Reg(resetVal = Bool(false)) val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo) - p_amo := tag_hit && r_req_amo && mshr.io.req.ready && !nack_hit || r_replay_amo - p_store_valid := p_store_valid && !drain_store || (tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo + p_amo := r_cpu_req_val && tag_hit && r_req_amo && mshr.io.req.ready && !nack_hit || r_replay_amo + p_store_valid := p_store_valid && !drain_store || (r_cpu_req_val && tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo // tag update after a store to an exclusive clean line. val new_hit_state = newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) - val set_hit_state = tag_hit && meta_resp_mux.state != new_hit_state + val set_hit_state = r_cpu_req_val && tag_hit && meta_resp_mux.state != new_hit_state meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) meta.io.state_req.bits.inner_req.data.state := Reg(new_hit_state) @@ -858,8 +856,8 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // miss handling mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.tag_miss := tag_miss || flusher.io.mshr_req.valid - mshr.io.req.bits.old_state := meta_wb_mux.state + mshr.io.req.bits.tag_miss := !tag_hit || flusher.io.mshr_req.valid + mshr.io.req.bits.old_dirty := needsWriteback(meta_wb_mux.state) && !tag_match // don't wb upgrades mshr.io.req.bits.old_tag := meta_wb_mux.tag mshr.io.req.bits.tag := cpu_req_tag mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) @@ -867,7 +865,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req.bits.cmd := r_cpu_req_cmd mshr.io.req.bits.typ := r_cpu_req_type - mshr.io.req.bits.way_oh := replaced_way_oh + mshr.io.req.bits.way_oh := Mux(tag_match, hit_way_oh, replaced_way_oh) mshr.io.req.bits.data := cpu_req_data mshr.io.mem_rep <> io.mem.xact_rep @@ -932,9 +930,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val + io.cpu.resp_val := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp_replay := mshr.io.cpu_resp_val - io.cpu.resp_miss := r_cpu_req_val_ && (!tag_match || mshr.io.secondary_miss) && r_req_read + io.cpu.resp_miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) io.cpu.resp_type := loadgen.io.typ io.cpu.resp_data := loadgen.io.dout From 287bc1c2625f236002b7a20b51d56bcedd3eef54 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 13 Mar 2012 11:48:12 -0700 Subject: [PATCH 0318/1087] Further refinement of tag_match/tag_hit signals --- rocket/src/main/scala/nbdcache.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3e38b505..f0058175 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -776,11 +776,11 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until NWAYS).map( w => isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR - val hit_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec + val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec val tag_hit_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) - val tag_hit = Cat(Bits(0),tag_match_arr:_*).orR - val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), hit_way_oh, meta.io.way_en) - val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), hit_way_oh, data.io.way_en) + val tag_hit = Cat(Bits(0),tag_hit_arr:_*).orR + val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, meta.io.way_en) + val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) val data_resp_mux = Mux1H(data_resp_way_oh, data.io.resp) @@ -838,7 +838,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { meta.io.state_req.bits.inner_req.rw := Bool(true) meta.io.state_req.bits.inner_req.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) meta.io.state_req.bits.inner_req.data.state := Reg(new_hit_state) - meta.io.state_req.bits.way_en := Reg(hit_way_oh) + meta.io.state_req.bits.way_en := Reg(tag_match_way_oh) meta.io.state_req.valid := Reg(set_hit_state, resetVal = Bool(false)) // pending store data, also used for AMO RHS @@ -847,7 +847,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { p_store_idx := r_cpu_req_idx p_store_type := r_cpu_req_type p_store_cmd := r_cpu_req_cmd - p_store_way_oh := Mux(r_replay_amo, r_way_oh, hit_way_oh) + p_store_way_oh := Mux(r_replay_amo, r_way_oh, tag_match_way_oh) p_store_data := cpu_req_data } when (p_amo) { @@ -865,7 +865,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req.bits.cmd := r_cpu_req_cmd mshr.io.req.bits.typ := r_cpu_req_type - mshr.io.req.bits.way_oh := Mux(tag_match, hit_way_oh, replaced_way_oh) + mshr.io.req.bits.way_oh := Mux(tag_match, tag_match_way_oh, replaced_way_oh) mshr.io.req.bits.data := cpu_req_data mshr.io.mem_rep <> io.mem.xact_rep From 6fd15274761a9d8a8883d4a36b87746c22017351 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Tue, 13 Mar 2012 12:34:02 -0700 Subject: [PATCH 0319/1087] fix to rocket vec_dpath, updating makefiles to run xcpt test cases --- rocket/src/main/scala/ctrl_vec.scala | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 20dfa34a..13565b53 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -105,24 +105,24 @@ class rocketCtrlVec extends Component VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,Y,N,N), VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,Y,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N), VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,Y,N,N), VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y), VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y) From b0f798962c6a09867b4d865c29d8974f4db89fdb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 16:43:35 -0700 Subject: [PATCH 0320/1087] add probe unit --- rocket/src/main/scala/coherence.scala | 25 +++++ rocket/src/main/scala/nbdcache.scala | 148 ++++++++++++++++++++++---- rocket/src/main/scala/top.scala | 3 + 3 files changed, 155 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 653ab6dc..de158417 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -135,6 +135,12 @@ trait ThreeStateIncoherence extends CoherencePolicy { Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileDirty, tileClean) } def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = state + def newProbeReply (incoming: ProbeRequest, has_data: Bool): ProbeReply = { + val reply = Wire() { new ProbeReply() } + reply.p_type := P_REP_INVALIDATE_ACK + reply.global_xact_id := UFix(0) + reply + } def probeReplyHasData (reply: ProbeReply): Bool = Bool(false) def transactionInitHasData (init: TransactionInit): Bool = (init.t_type === X_INIT_WRITE_UNCACHED) } @@ -199,6 +205,22 @@ trait FourStateCoherence extends CoherencePolicy { )) } + def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { + val reply = Wire() { new ProbeReply() } + val with_data = MuxLookup(incoming.p_type, state, Array( + probeInvalidate -> P_REP_INVALIDATE_DATA, + probeDowngrade -> P_REP_DOWNGRADE_DATA, + probeCopy -> P_REP_COPY_DATA + )) + val without_data = MuxLookup(incoming.p_type, state, Array( + probeInvalidate -> P_REP_INVALIDATE_ACK, + probeDowngrade -> P_REP_DOWNGRADE_ACK, + probeCopy -> P_REP_COPY_ACK + )) + reply.p_type := Mux(needsWriteback(state), with_data, without_data) + reply.global_xact_id := incoming.global_xact_id + reply + } def probeReplyHasData (reply: ProbeReply): Bool = { (reply.p_type === P_REP_INVALIDATE_DATA || reply.p_type === P_REP_DOWNGRADE_DATA || @@ -446,6 +468,9 @@ class CoherenceHubNull extends CoherenceHub { io.tiles(0).xact_abort.valid := Bool(false) io.tiles(0).xact_finish.ready := Bool(true) + io.tiles(0).probe_req.valid := Bool(false) + io.tiles(0).probe_rep.ready := Bool(true) + io.tiles(0).probe_rep_data.ready := Bool(true) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f0058175..c90b4fc8 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -188,6 +188,8 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val mem_rep = (new ioPipe) { new TransactionReply }.flip val mem_finish = (new ioDecoupled) { new TransactionFinish } val wb_req = (new ioDecoupled) { new WritebackReq } + val probe_writeback = (new ioDecoupled) { Bool() }.flip + val probe_refill = (new ioDecoupled) { Bool() }.flip } val s_invalid :: s_wb_req :: s_wb_resp :: s_refill_req :: s_refill_resp :: s_drain_rpq :: Nil = Enum(6) { UFix() } @@ -238,6 +240,8 @@ class MSHR(id: Int) extends Component with FourStateCoherence { when (abort) { state := s_wb_req } } when (state === s_wb_req && io.wb_req.ready) { + when (io.probe_writeback.valid && idx_match) { state := s_refill_req } + when (io.wb_req.ready) { state := s_wb_resp } state := s_wb_resp } @@ -277,6 +281,9 @@ class MSHR(id: Int) extends Component with FourStateCoherence { io.wb_req.bits.way_oh := req.way_oh io.wb_req.bits.tile_xact_id := Bits(id) + io.probe_writeback.ready := (state != s_wb_resp) || !idx_match + io.probe_refill.ready := (state != s_refill_resp) || !idx_match + io.mem_req.valid := (state === s_refill_req) && !flush io.mem_req.bits.t_type := xact_type io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix @@ -307,6 +314,7 @@ class MSHRFile extends Component { val mem_rep = (new ioPipe) { new TransactionReply }.flip val mem_finish = (new ioDecoupled) { new TransactionFinish } val wb_req = (new ioDecoupled) { new WritebackReq } + val probe = (new ioDecoupled) { Bool() }.flip val cpu_resp_val = Bool(OUTPUT) val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) @@ -322,6 +330,7 @@ class MSHRFile extends Component { sdq.setTarget('inst) val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } + val wb_probe_mux = (new Mux1H(NMSHR)) { new WritebackReq } val mem_resp_mux = (new Mux1H(NMSHR)){ new DataArrayArrayReq } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } @@ -331,17 +340,22 @@ class MSHRFile extends Component { val alloc_arb = (new Arbiter(NMSHR)) { Bool() } val tag_match = tag_mux.io.out === io.req.bits.tag + val wb_probe_match = wb_probe_mux.io.out.tag === io.req.bits.tag var idx_match = Bool(false) var pri_rdy = Bool(false) var fence = Bool(false) var sec_rdy = Bool(false) + var writeback_probe_rdy = Bool(true) + var refill_probe_rdy = Bool(true) for (i <- 0 to NMSHR-1) { val mshr = new MSHR(i) tag_mux.io.sel(i) := mshr.io.idx_match tag_mux.io.in(i) := mshr.io.tag + wb_probe_mux.io.sel(i) := mshr.io.idx_match + wb_probe_mux.io.in(i) := mshr.io.wb_req.bits alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready @@ -355,6 +369,8 @@ class MSHRFile extends Component { mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.wb_req <> wb_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) + mshr.io.probe_refill.valid := io.probe.valid && tag_match + mshr.io.probe_writeback.valid := io.probe.valid && wb_probe_match mshr.io.mem_abort <> io.mem_abort mshr.io.mem_rep <> io.mem_rep @@ -367,6 +383,8 @@ class MSHRFile extends Component { sec_rdy = sec_rdy || mshr.io.req_sec_rdy fence = fence || !mshr.io.req_pri_rdy idx_match = idx_match || mshr.io.idx_match + refill_probe_rdy = refill_probe_rdy && mshr.io.probe_refill.ready + writeback_probe_rdy = writeback_probe_rdy && mshr.io.probe_writeback.ready } alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match @@ -382,6 +400,7 @@ class MSHRFile extends Component { io.mem_resp_offset := mem_resp_mux.io.out.inner_req.offset io.mem_resp_way_oh := mem_resp_mux.io.out.way_en io.fence_rdy := !fence + io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) val replay = Queue(replay_arb.io.out, 1, pipe = true) replay.ready := io.data_req.ready @@ -399,19 +418,23 @@ class MSHRFile extends Component { class WritebackUnit extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new WritebackReq() }.flip - val data_req = (new ioDecoupled) { new DataArrayArrayReq() } + val req = (new ioDecoupled) { new WritebackReq() }.flip + val probe = (new ioDecoupled) { new WritebackReq() }.flip + val data_req = (new ioDecoupled) { new DataArrayArrayReq() } val data_resp = Bits(MEM_DATA_BITS, INPUT) val mem_req = (new ioDecoupled) { new TransactionInit } val mem_req_data = (new ioDecoupled) { new TransactionInitData } + val probe_rep_data = (new ioDecoupled) { new ProbeReplyData } } val valid = Reg(resetVal = Bool(false)) + val is_probe = Reg() { Bool() } val data_req_fired = Reg(resetVal = Bool(false)) val cmd_sent = Reg() { Bool() } val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } val req = Reg() { new WritebackReq() } + val dout_rdy = Mux(is_probe, io.probe_rep_data.ready, io.mem_req_data.ready) data_req_fired := Bool(false) when (valid && io.mem_req.ready) { cmd_sent := Bool(true) @@ -420,21 +443,30 @@ class WritebackUnit extends Component { data_req_fired := Bool(true) cnt := cnt + UFix(1) } - when (data_req_fired && !io.mem_req_data.ready) { + when (data_req_fired && !dout_rdy) { data_req_fired := Bool(false) cnt := cnt - UFix(1) } - when ((cnt === UFix(REFILL_CYCLES)) && io.mem_req_data.ready) { + .elsewhen (cmd_sent && (cnt === UFix(REFILL_CYCLES))) { valid := Bool(false) } + when (io.probe.valid && io.probe.ready) { + valid := Bool(true) + is_probe := Bool(true) + cmd_sent := Bool(true) + cnt := UFix(0) + req := io.probe.bits + } when (io.req.valid && io.req.ready) { valid := Bool(true) + is_probe := Bool(false) cmd_sent := Bool(false) cnt := UFix(0) req := io.req.bits } - io.req.ready := !valid + io.req.ready := !valid && !io.probe.valid + io.probe.ready := !valid io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) io.data_req.bits.way_en := req.way_oh io.data_req.bits.inner_req.idx := req.idx @@ -447,8 +479,70 @@ class WritebackUnit extends Component { io.mem_req.bits.t_type := X_INIT_WRITE_UNCACHED io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id - io.mem_req_data.valid := data_req_fired + io.mem_req_data.valid := data_req_fired && !is_probe io.mem_req_data.bits.data := io.data_resp + io.probe_rep_data.valid := data_req_fired && is_probe + io.probe_rep_data.bits.data := io.data_resp +} + +class ProbeUnit extends Component with FourStateCoherence { + val io = new Bundle { + val req = (new ioDecoupled) { new ProbeRequest }.flip + val rep = (new ioDecoupled) { new ProbeReply } + val meta_req = (new ioDecoupled) { new MetaArrayArrayReq } + val mshr_req = (new ioDecoupled) { Bool() } + val wb_req = (new ioDecoupled) { new WritebackReq } + val hit_way_oh = Bits(NWAYS, INPUT) + val line_state = UFix(2, INPUT) + val address = Bits(PADDR_BITS-OFFSET_BITS, OUTPUT) + } + + val s_invalid :: s_meta_req :: s_meta_resp :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(6) { UFix() } + val state = Reg(resetVal = s_invalid) + val line_state = Reg() { UFix() } + val way_oh = Reg() { Bits() } + val req = Reg() { new ProbeRequest() } + + when ((state === s_writeback_resp) && io.wb_req.ready) { + state := s_invalid + } + when ((state === s_writeback_req) && io.wb_req.ready) { + state := s_writeback_resp + } + when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { + state := Mux(way_oh.orR && needsWriteback(line_state), s_writeback_req, s_invalid) + } + when (state === s_meta_resp) { + way_oh := io.hit_way_oh + line_state := io.line_state + state := Mux(!io.mshr_req.ready, s_meta_req, s_probe_rep) + } + when ((state === s_meta_req) && io.meta_req.ready) { + state := s_meta_resp + } + when ((state === s_invalid) && io.req.valid) { + state := s_meta_req + req := io.req.bits + } + + io.req.ready := state === s_invalid + io.rep.valid := state === s_probe_rep && io.meta_req.ready + io.rep.bits := newProbeReply(req, line_state) + + val new_state = newStateOnProbeReq(req, line_state) + io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && new_state != line_state + io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) + io.meta_req.bits.inner_req.rw := state === s_probe_rep + io.meta_req.bits.inner_req.idx := req.address + io.meta_req.bits.inner_req.data.state := new_state + io.meta_req.bits.inner_req.data.tag := req.address >> UFix(OFFSET_BITS) + io.mshr_req.valid := state === s_meta_resp + io.address := req.address + + io.wb_req.valid := state === s_writeback_req + io.wb_req.bits.way_oh := way_oh + io.wb_req.bits.idx := req.address + io.wb_req.bits.tag := req.address >> UFix(OFFSET_BITS) } class FlushUnit(lines: Int) extends Component with FourStateCoherence{ @@ -716,11 +810,13 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch val nack_hit = Wire() { Bool() } + val wb = new WritebackUnit + val prober = new ProbeUnit val mshr = new MSHRFile() + val flusher = new FlushUnit(lines) val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool // reset and flush unit - val flusher = new FlushUnit(lines) val flushed = Reg(resetVal = Bool(true)) flushed := flushed && (!r_cpu_req_val || r_req_flush) || r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed @@ -756,7 +852,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // tags val meta = new MetaDataArrayArray(lines) - val meta_arb = (new Arbiter(3)) { new MetaArrayArrayReq() } + val meta_arb = (new Arbiter(4)) { new MetaArrayArrayReq() } flusher.io.meta_req <> meta_arb.io.in(0) meta_arb.io.out <> meta.io.req @@ -766,14 +862,13 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { data_arb.io.out <> data.io.req // cpu tag check - meta_arb.io.in(2).valid := io.cpu.req_val - meta_arb.io.in(2).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) - meta_arb.io.in(2).bits.inner_req.rw := Bool(false) - meta_arb.io.in(2).bits.inner_req.data.state := UFix(0) // don't care - meta_arb.io.in(2).bits.inner_req.data.tag := UFix(0) // don't care - meta_arb.io.in(2).bits.way_en := ~UFix(0, NWAYS) - val early_tag_nack = !meta_arb.io.in(2).ready - val cpu_req_tag = Cat(io.cpu.req_ppn, r_cpu_req_idx)(tagmsb,taglsb) + meta_arb.io.in(3).valid := io.cpu.req_val + meta_arb.io.in(3).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) + meta_arb.io.in(3).bits.inner_req.rw := Bool(false) + meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS) + val early_tag_nack = !meta_arb.io.in(3).ready + val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req_ppn) + val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until NWAYS).map( w => isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec @@ -785,10 +880,19 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val data_resp_mux = Mux1H(data_resp_way_oh, data.io.resp) // writeback unit - val wb = new WritebackUnit wb.io.req <> mshr.io.wb_req wb.io.data_req <> data_arb.io.in(3) wb.io.data_resp <> data_resp_mux + wb.io.probe_rep_data <> io.mem.probe_rep_data + + // probes + prober.io.req <> io.mem.probe_req + prober.io.rep <> io.mem.probe_rep + prober.io.meta_req <> meta_arb.io.in(2) + prober.io.mshr_req <> mshr.io.probe + prober.io.wb_req <> wb.io.probe + prober.io.hit_way_oh := hit_way_oh + prober.io.line_state := meta_resp_mux.state // replacement policy val replacer = new RandomReplacementWayGen() @@ -820,7 +924,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || Reg(wb.io.req.valid || mshr.io.data_req.valid))) || p_store_match + val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match data_arb.io.in(2).bits.inner_req.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.inner_req.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.inner_req.rw := Bool(true) @@ -857,7 +961,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // miss handling mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid mshr.io.req.bits.tag_miss := !tag_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.old_dirty := needsWriteback(meta_wb_mux.state) && !tag_match // don't wb upgrades + mshr.io.req.bits.old_dirty := needsWriteback(meta_wb_mux.state) && (!tag_match || flusher.io.mshr_req.valid) // don't wb upgrades mshr.io.req.bits.old_tag := meta_wb_mux.tag mshr.io.req.bits.tag := cpu_req_tag mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) @@ -876,7 +980,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // replays val replay = mshr.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || p_store_valid || flusher.io.meta_req.valid + val stall_replay = r_replay_amo || p_amo || flusher.io.meta_req.valid || p_store_valid val replay_val = mshr.io.data_req.valid val replay_fire = replay_val && !stall_replay val replay_rdy = data_arb.io.in(1).ready && !stall_replay @@ -940,7 +1044,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val xact_init_arb = (new Arbiter(2)) { new TransactionInit } xact_init_arb.io.in(0) <> wb.io.mem_req - xact_init_arb.io.in(1) <> mshr.io.mem_req + xact_init_arb.io.in(1).valid := mshr.io.mem_req.valid && prober.io.req.ready + mshr.io.mem_req.ready := xact_init_arb.io.in(1).ready && prober.io.req.ready + xact_init_arb.io.in(1).bits := mshr.io.mem_req.bits io.mem.xact_init <> xact_init_arb.io.out io.mem.xact_init_data <> wb.io.mem_req_data diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 2783f951..c2932513 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -32,6 +32,9 @@ class Top() extends Component { arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep) hub.io.tiles(0).xact_finish <> Queue(arbiter.io.mem.xact_finish) + dcache.io.mem.probe_req <> Queue(hub.io.tiles(0).probe_req) + hub.io.tiles(0).probe_rep <> Queue(dcache.io.mem.probe_rep, 1) + hub.io.tiles(0).probe_rep_data <> Queue(dcache.io.mem.probe_rep_data) // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) From 1492457df5f441d7e46b1982e064faefbd86f28d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 16:56:47 -0700 Subject: [PATCH 0321/1087] add probe replies to HTIF --- rocket/src/main/scala/htif.scala | 9 ++++++++- rocket/src/main/scala/nbdcache.scala | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 702fe5db..967b57a0 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -21,7 +21,7 @@ class ioHTIF extends Bundle val pcr_rdata = Bits(64, OUTPUT) } -class rocketHTIF(w: Int, ncores: Int) extends Component +class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence { val io = new Bundle { val host = new ioHost(w) @@ -160,6 +160,13 @@ class rocketHTIF(w: Int, ncores: Int) extends Component io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack io.mem.xact_finish.bits.global_xact_id := mem_gxid + val probe_q = (new queue(1, pipe=true)) { new TransactionReply } + probe_q.io.enq.valid := io.mem.probe_req.valid + io.mem.probe_req.ready := probe_q.io.enq.ready + probe_q.io.enq.bits := newProbeReply(io.mem.probe_req.bits, newStateOnFlush()) + io.mem.probe_rep <> probe_q.io.deq + io.mem.probe_rep_data.valid := Bool(false) + pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } for (i <- 0 until ncores) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c90b4fc8..4e2fe515 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -492,7 +492,7 @@ class ProbeUnit extends Component with FourStateCoherence { val meta_req = (new ioDecoupled) { new MetaArrayArrayReq } val mshr_req = (new ioDecoupled) { Bool() } val wb_req = (new ioDecoupled) { new WritebackReq } - val hit_way_oh = Bits(NWAYS, INPUT) + val tag_match_way_oh = Bits(NWAYS, INPUT) val line_state = UFix(2, INPUT) val address = Bits(PADDR_BITS-OFFSET_BITS, OUTPUT) } @@ -513,7 +513,7 @@ class ProbeUnit extends Component with FourStateCoherence { state := Mux(way_oh.orR && needsWriteback(line_state), s_writeback_req, s_invalid) } when (state === s_meta_resp) { - way_oh := io.hit_way_oh + way_oh := io.tag_match_way_oh line_state := io.line_state state := Mux(!io.mshr_req.ready, s_meta_req, s_probe_rep) } @@ -891,7 +891,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { prober.io.meta_req <> meta_arb.io.in(2) prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe - prober.io.hit_way_oh := hit_way_oh + prober.io.tag_match_way_oh := tag_match_way_oh prober.io.line_state := meta_resp_mux.state // replacement policy From 1788c341134f1433c581f0b85cd3e45b126fb68a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 17:12:01 -0700 Subject: [PATCH 0322/1087] parameterize broadcast hub by # of tiles --- rocket/src/main/scala/coherence.scala | 94 +++++++++++++-------------- rocket/src/main/scala/consts.scala | 1 - 2 files changed, 47 insertions(+), 48 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index de158417..ae9321b1 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -234,14 +234,14 @@ trait FourStateCoherence extends CoherencePolicy { } } -class XactTracker(id: Int) extends Component with FourStateCoherence { +class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip val p_data = (new ioPipe) { new TrackerProbeData } val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) - val p_rep_cnt_dec = Bits(NTILES, INPUT) - val p_req_cnt_inc = Bits(NTILES, INPUT) + val p_rep_cnt_dec = Bits(ntiles, INPUT) + val p_req_cnt_inc = Bits(ntiles, INPUT) val p_rep_data = (new ioPipe) { new ProbeReplyData }.flip val x_init_data = (new ioPipe) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) @@ -259,13 +259,13 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) val t_type = Bits(X_INIT_TYPE_BITS, OUTPUT) - val push_p_req = Bits(NTILES, OUTPUT) - val pop_p_rep = Bits(NTILES, OUTPUT) - val pop_p_rep_data = Bits(NTILES, OUTPUT) - val pop_p_rep_dep = Bits(NTILES, OUTPUT) - val pop_x_init = Bits(NTILES, OUTPUT) - val pop_x_init_data = Bits(NTILES, OUTPUT) - val pop_x_init_dep = Bits(NTILES, OUTPUT) + val push_p_req = Bits(ntiles, OUTPUT) + val pop_p_rep = Bits(ntiles, OUTPUT) + val pop_p_rep_data = Bits(ntiles, OUTPUT) + val pop_p_rep_dep = Bits(ntiles, OUTPUT) + val pop_x_init = Bits(ntiles, OUTPUT) + val pop_x_init_data = Bits(ntiles, OUTPUT) + val pop_x_init_dep = Bits(ntiles, OUTPUT) val send_x_rep_ack = Bool(OUTPUT) } @@ -319,8 +319,8 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { val t_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = if (NTILES == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(NTILES))) - val p_req_flags = Reg(resetVal = Bits(0, width = NTILES)) + val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) + val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) val p_rep_tile_id_ = Reg{ Bits() } val x_needs_read = Reg(resetVal = Bool(false)) val x_init_data_needs_write = Reg(resetVal = Bool(false)) @@ -336,7 +336,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { io.init_tile_id := init_tile_id_ io.p_rep_tile_id := p_rep_tile_id_ io.tile_xact_id := tile_xact_id_ - io.sharer_count := UFix(NTILES) // TODO: Broadcast only + io.sharer_count := UFix(ntiles) // TODO: Broadcast only io.t_type := t_type_ io.mem_req_cmd.valid := Bool(false) @@ -350,13 +350,13 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { io.probe_req.bits.p_type := sendProbeReqType(t_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) io.probe_req.bits.address := addr_ - io.push_p_req := Bits(0, width = NTILES) - io.pop_p_rep := Bits(0, width = NTILES) - io.pop_p_rep_data := Bits(0, width = NTILES) - io.pop_p_rep_dep := Bits(0, width = NTILES) - io.pop_x_init := Bits(0, width = NTILES) - io.pop_x_init_data := Bits(0, width = NTILES) - io.pop_x_init_dep := Bits(0, width = NTILES) + io.push_p_req := Bits(0, width = ntiles) + io.pop_p_rep := Bits(0, width = ntiles) + io.pop_p_rep_data := Bits(0, width = ntiles) + io.pop_p_rep_dep := Bits(0, width = ntiles) + io.pop_x_init := Bits(0, width = ntiles) + io.pop_x_init_data := Bits(0, width = ntiles) + io.pop_x_init_dep := Bits(0, width = ntiles) io.send_x_rep_ack := Bool(false) switch (state) { @@ -368,7 +368,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := transactionInitHasData(io.alloc_req.bits.xact_init) x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) - if(NTILES > 1) p_rep_count := UFix(NTILES-1) + if(ntiles > 1) p_rep_count := UFix(ntiles-1) p_req_flags := ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) @@ -388,7 +388,7 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { when(io.p_rep_cnt_dec.orR) { val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) io.pop_p_rep := io.p_rep_cnt_dec - if(NTILES > 1) p_rep_count := p_rep_count_next + if(ntiles > 1) p_rep_count := p_rep_count_next when(p_rep_count === UFix(0)) { io.pop_p_rep := Bool(true) state := s_mem @@ -440,14 +440,14 @@ class XactTracker(id: Int) extends Component with FourStateCoherence { } } -abstract class CoherenceHub extends Component with CoherencePolicy { +abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy { val io = new Bundle { - val tiles = Vec(NTILES) { new ioTileLink() }.flip + val tiles = Vec(ntiles) { new ioTileLink() }.flip val mem = new ioMem } } -class CoherenceHubNull extends CoherenceHub { +class CoherenceHubNull extends CoherenceHub(1) { val x_init = io.tiles(0).xact_init val is_write = x_init.bits.t_type === X_INIT_WRITE_UNCACHED @@ -474,7 +474,7 @@ class CoherenceHubNull extends CoherenceHub { } -class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ +class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourStateCoherence{ def coherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) @@ -487,7 +487,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ )) } - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(_)) + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _)) val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } @@ -498,8 +498,8 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(NTILES){ Wire(){Bool()} } } - val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(NTILES){ Wire(){Bool()} } } + val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } + val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bits(width = TILE_ID_BITS)} } val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } @@ -523,17 +523,17 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ sent_x_rep_ack_arr(i) := Bool(false) p_data_tile_id_arr(i) := Bits(0, width = TILE_ID_BITS) p_data_valid_arr(i) := Bool(false) - for( j <- 0 until NTILES) { + for( j <- 0 until ntiles) { p_rep_cnt_dec_arr(i)(j) := Bool(false) p_req_cnt_inc_arr(i)(j) := Bool(false) } } - val p_rep_data_dep_list = List.fill(NTILES)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth must >= NPRIMARY - val x_init_data_dep_list = List.fill(NTILES)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth should >= NPRIMARY + val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth must >= NPRIMARY + val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth should >= NPRIMARY // Free finished transactions - for( j <- 0 until NTILES ) { + for( j <- 0 until ntiles ) { val finish = io.tiles(j).xact_finish do_free_arr(finish.bits.global_xact_id) := finish.valid finish.ready := Bool(true) @@ -543,7 +543,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ // Forward memory responses from mem to tile or arbitrate to ack val mem_idx = io.mem.resp.bits.tag val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) - for( j <- 0 until NTILES ) { + for( j <- 0 until ntiles ) { val rep = io.tiles(j).xact_rep rep.bits.t_type := UFix(0) rep.bits.tile_xact_id := UFix(0) @@ -583,7 +583,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ io.mem.req_data <> mem_req_data_arb.io.out // Handle probe replies, which may or may not have data - for( j <- 0 until NTILES ) { + for( j <- 0 until ntiles ) { val p_rep = io.tiles(j).probe_rep val p_rep_data = io.tiles(j).probe_rep_data val idx = p_rep.bits.global_xact_id @@ -601,10 +601,10 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits - trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until NTILES).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) - trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until NTILES).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) + trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) + trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) - for( j <- 0 until NTILES) { + for( j <- 0 until ntiles) { val p_rep = io.tiles(j).probe_rep p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) } @@ -612,9 +612,9 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ // Nack conflicting transaction init attempts val s_idle :: s_abort_drain :: s_abort_send :: s_abort_complete :: Nil = Enum(4){ UFix() } - val abort_state_arr = Vec(NTILES) { Reg(resetVal = s_idle) } - val want_to_abort_arr = Vec(NTILES) { Wire() { Bool()} } - for( j <- 0 until NTILES ) { + val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } + val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } + for( j <- 0 until ntiles ) { val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort @@ -662,7 +662,7 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ // Only one allocation per cycle // Init requests may or may not have data val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } - val init_arb = (new Arbiter(NTILES)) { new TrackerAllocReq() } + val init_arb = (new Arbiter(ntiles)) { new TrackerAllocReq() } for( i <- 0 until NGLOBAL_XACTS ) { alloc_arb.io.in(i).valid := !trackerList(i).io.busy trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready @@ -671,10 +671,10 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid - trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until NTILES).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) - trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until NTILES).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) + trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) + trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) } - for( j <- 0 until NTILES ) { + for( j <- 0 until ntiles ) { val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_init_data_dep = x_init_data_dep_list(j).io.deq @@ -694,8 +694,8 @@ class CoherenceHubBroadcast extends CoherenceHub with FourStateCoherence{ // Handle probe request generation // Must arbitrate for each request port - val p_req_arb_arr = List.fill(NTILES)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) - for( j <- 0 until NTILES ) { + val p_req_arb_arr = List.fill(ntiles)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) + for( j <- 0 until ntiles ) { for( i <- 0 until NGLOBAL_XACTS ) { val t = trackerList(i).io p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index a3b1e4ea..ca4d6dbd 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -184,7 +184,6 @@ object Constants require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); // coherence parameters - val NTILES = 1 val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val TILE_ID_BITS = 1 val TILE_XACT_ID_BITS = log2up(NMSHR)+3 From ab6c9350db2291d45145116094e6ad5e31a5e585 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 13 Mar 2012 19:10:54 -0700 Subject: [PATCH 0323/1087] fix minor coherence bugs --- rocket/src/main/scala/coherence.scala | 17 +++++++++-------- rocket/src/main/scala/htif.scala | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index ae9321b1..fc32b6eb 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -207,12 +207,12 @@ trait FourStateCoherence extends CoherencePolicy { def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { val reply = Wire() { new ProbeReply() } - val with_data = MuxLookup(incoming.p_type, state, Array( + val with_data = MuxLookup(incoming.p_type, P_REP_INVALIDATE_DATA, Array( probeInvalidate -> P_REP_INVALIDATE_DATA, probeDowngrade -> P_REP_DOWNGRADE_DATA, probeCopy -> P_REP_COPY_DATA )) - val without_data = MuxLookup(incoming.p_type, state, Array( + val without_data = MuxLookup(incoming.p_type, P_REP_INVALIDATE_ACK, Array( probeInvalidate -> P_REP_INVALIDATE_ACK, probeDowngrade -> P_REP_DOWNGRADE_ACK, probeCopy -> P_REP_COPY_ACK @@ -369,12 +369,13 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc x_init_data_needs_write := transactionInitHasData(io.alloc_req.bits.xact_init) x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) if(ntiles > 1) p_rep_count := UFix(ntiles-1) - p_req_flags := ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only + val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only + p_req_flags := p_req_initial_flags mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) x_w_mem_cmd_sent := Bool(false) io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id - state := Mux(p_req_flags.orR, s_probe, s_mem) + state := Mux(p_req_initial_flags.orR, s_probe, s_mem) } } is(s_probe) { @@ -389,7 +390,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) io.pop_p_rep := io.p_rep_cnt_dec if(ntiles > 1) p_rep_count := p_rep_count_next - when(p_rep_count === UFix(0)) { + when(p_rep_count === UFix(1)) { io.pop_p_rep := Bool(true) state := s_mem } @@ -529,8 +530,8 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS } } - val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth must >= NPRIMARY - val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS, true)){new TrackerDependency}) // depth should >= NPRIMARY + val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY + val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY // Free finished transactions for( j <- 0 until ntiles ) { @@ -589,7 +590,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val idx = p_rep.bits.global_xact_id val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) val do_pop = foldR(pop_p_reps)(_ || _) - p_rep.ready := do_pop + p_rep.ready := Bool(true) p_rep_data_dep_list(j).io.enq.valid := do_pop p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 967b57a0..bcf11b09 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -160,7 +160,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack io.mem.xact_finish.bits.global_xact_id := mem_gxid - val probe_q = (new queue(1, pipe=true)) { new TransactionReply } + val probe_q = (new queue(1)) { new ProbeReply } probe_q.io.enq.valid := io.mem.probe_req.valid io.mem.probe_req.ready := probe_q.io.enq.ready probe_q.io.enq.bits := newProbeReply(io.mem.probe_req.bits, newStateOnFlush()) From 5655dbd5daab407d0b32538f5767532f882b6c3b Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 13 Mar 2012 21:10:03 -0700 Subject: [PATCH 0324/1087] add vvcfg and vtcfg instructions --- rocket/src/main/scala/consts.scala | 11 +- rocket/src/main/scala/ctrl.scala | 4 +- rocket/src/main/scala/ctrl_vec.scala | 122 +++++++++++------------ rocket/src/main/scala/dpath_vec.scala | 38 +++++-- rocket/src/main/scala/instructions.scala | 3 +- 5 files changed, 101 insertions(+), 77 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index ca4d6dbd..105b6911 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = false + val HAVE_VEC = true val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); @@ -142,6 +142,7 @@ object Constants val PCR_TOHOST = UFix(16, 5); val PCR_FROMHOST = UFix(17, 5); val PCR_VECBANK = UFix(18, 5); + val PCR_VECCFG = UFix(19, 5); // temporaries for vector, these will go away val PCR_VEC_BACKUP = UFix(29, 5) @@ -232,9 +233,11 @@ object Constants val VEC_N = UFix(0, 1); val VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N; - val VEC_X = UFix(0, 1) - val VEC_VL = UFix(0, 1) - val VEC_CFG = UFix(1, 1) + val VEC_X = UFix(0, 2) + val VEC_FN_N = UFix(0, 2) + val VEC_VL = UFix(1, 2) + val VEC_CFG = UFix(2, 2) + val VEC_CFGVL = UFix(3, 2) val VCMD_I = UFix(0, 3) val VCMD_F = UFix(1, 3) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 49683f96..e9eff28a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -261,6 +261,7 @@ object rocketCtrlDecode // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | | | | VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), + VVCFG-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), @@ -305,8 +306,7 @@ object rocketCtrlDecode VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), - VWAITKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y)) + VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y)) } class rocketCtrl extends Component diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 13565b53..fe76ddeb 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -12,7 +12,7 @@ class ioCtrlDpathVec extends Bundle val appvl0 = Bool(INPUT) val pfq = Bool(INPUT) val wen = Bool(OUTPUT) - val fn = Bits(1, OUTPUT) + val fn = Bits(2, OUTPUT) val sel_vcmd = Bits(3, OUTPUT) val sel_vimm = Bits(1, OUTPUT) val sel_vimm2 = Bits(1, OUTPUT) @@ -66,66 +66,66 @@ class rocketCtrlVec extends Component val veccs = ListLookup(io.dpath.inst, - // appvlmask - // | vcmdq - // | | vximm1q - // | | | vximm2q - // | | | | vcntq - // | | | | | vpfcmdq - // | | | | | | vpfximm1q - // | | | | | | | vpfximm2q - // | | | | | | | | vpfcntq - // wen | | | | | | | | | pfq - // val vcmd vimm vimm2 | fn | | | | | | | | | | fence_cv - // | | | | | | | | | | | | | | | | | waitxcpt - // | | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFG,N,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,N,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_X, Y,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,Y,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_X, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_X, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_X, N,Y,N,N,N,Y,N,N,N,Y,N,N), - VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_X, N,N,Y,N,N,N,Y,N,N,Y,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_X, N,N,N,Y,N,N,N,Y,N,Y,N,N), - VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,Y,N,N,N,Y,Y,N,N), - VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y), - VWAITKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,Y) + // appvlmask + // | vcmdq + // | | vximm1q + // | | | vximm2q + // | | | | vcntq + // | | | | | vpfcmdq + // | | | | | | vpfximm1q + // | | | | | | | vpfximm2q + // | | | | | | | | vpfcntq + // wen | | | | | | | | | pfq + // val vcmd vimm vimm2 | fn | | | | | | | | | | fence_cv + // | | | | | | | | | | | | | | | | | waitxcpt + // | | | | | | | | | | | | | | | | | | + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,N,N), + VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,Y,N,N,N,Y,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_FN_N, N,N,Y,N,N,N,Y,N,N,Y,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_FN_N, N,N,N,Y,N,N,N,Y,N,Y,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,Y,N,N,N,Y,Y,N,N), + VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,Y) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 7d045fe4..d557341a 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -40,8 +40,8 @@ class rocketDpathVec extends Component { val io = new ioDpathVec() - val nxregs = Cat(UFix(0,1),io.inst(15,10).toUFix) // FIXME: to make the nregs width 7 bits - val nfregs = io.inst(21,16).toUFix + val nxregs = Mux(io.ctrl.fn === VEC_CFG, io.wdata(5,0), io.inst(15,10)).toUFix + UFix(0,7) + val nfregs = Mux(io.ctrl.fn === VEC_CFG, io.rs2(5,0), io.inst(21,16)).toUFix + UFix(0,7) val nregs = nxregs + nfregs val uts_per_bank = MuxLookup( @@ -104,18 +104,38 @@ class rocketDpathVec extends Component val reg_hwvl = Reg(resetVal = UFix(32, 12)) val reg_appvl0 = Reg(resetVal = Bool(true)) val hwvl_vcfg = (uts_per_bank * io.vecbankcnt)(11,0) - val hwvl = Mux(io.ctrl.fn === VEC_CFG, hwvl_vcfg, reg_hwvl) - val appvl = Mux(io.wdata(11,0) < hwvl, io.wdata(11,0), hwvl).toUFix - when (io.valid && io.ctrl.wen) + val hwvl = + Mux(io.ctrl.fn === VEC_CFG || io.ctrl.fn === VEC_CFGVL, hwvl_vcfg, + reg_hwvl) + + val appvl = + Mux(io.ctrl.fn === VEC_CFG, UFix(0), + Mux(io.wdata(11,0) < hwvl, io.wdata(11,0).toUFix, + hwvl.toUFix)) + + val reg_nxregs = Reg(resetVal = UFix(32, 6)) + val reg_nfregs = Reg(resetVal = UFix(32, 6)) + val reg_appvl = Reg(resetVal = UFix(32, 12)) + + when (io.valid) { - when (io.ctrl.fn === VEC_CFG) { reg_hwvl := hwvl_vcfg } - reg_appvl0 := !(appvl.orR()) + when (io.ctrl.fn === VEC_CFG || io.ctrl.fn === VEC_CFGVL) + { + reg_hwvl := hwvl_vcfg + reg_nxregs := nxregs + reg_nfregs := nfregs + } + when (io.ctrl.fn === VEC_VL || io.ctrl.fn === VEC_CFGVL) + { + reg_appvl0 := !(appvl.orR()) + reg_appvl := appvl + } } io.wen := io.valid && io.ctrl.wen io.appvl := appvl - val vlenm1 = appvl - Bits(1,1) + val appvlm1 = appvl - UFix(1) io.iface.vcmdq_bits := Mux(io.ctrl.sel_vcmd === VCMD_I, Cat(Bits(0,2), Bits(0,4), io.inst(9,8), Bits(0,6), Bits(0,6)), @@ -128,7 +148,7 @@ class rocketDpathVec extends Component Bits(0,20)))))))) io.iface.vximm1q_bits := - Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, io.inst(21,10), vlenm1(10,0)), + Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, nfregs, nxregs, appvlm1(10,0)), io.wdata) // VIMM_ALU io.iface.vximm2q_bits := diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 5b5a7b89..a6e3c839 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -244,6 +244,8 @@ object Instructions val VFMTS = Bits("b?????_?????_?????_0000110010_1110011",32); val VVCFGIVL = Bits("b?????_?????_????????????_001_1110011",32); val VTCFGIVL = Bits("b?????_?????_????????????_011_1110011",32); + val VVCFG = Bits("b00000_?????_?????_0000001000_1110011",32); + val VTCFG = Bits("b00000_?????_?????_0000011000_1110011",32); val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); val VF = Bits("b00000_?????_????????????_111_1110011",32); // vector supervisor instructions @@ -252,7 +254,6 @@ object Instructions val VENQIMM2 = Bits("b00000_?????_?????_1000000010_1111011",32) val VENQCNT = Bits("b00000_?????_?????_1000000011_1111011",32) val VWAITXCPT = Bits("b00000_00000_00000_1100000000_1111011",32) - val VWAITKILL = Bits("b00000_00000_00000_1100000001_1111011",32) val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); } From b100544b250d1c8f96c3ef3038a8645846057eed Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 13 Mar 2012 22:21:26 -0700 Subject: [PATCH 0325/1087] datapath to read out vector state --- rocket/src/main/scala/dpath.scala | 8 ++++++++ rocket/src/main/scala/dpath_util.scala | 4 ++++ rocket/src/main/scala/dpath_vec.scala | 9 +++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 1486c3d4..bcdea1e6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -388,6 +388,10 @@ class rocketDpath extends Component vec.io.pcrw.en := io.ctrl.wen_pcr vec.io.pcrw.data := wb_reg_wdata + pcr.io.vec_appvl := vec.io.appvl + pcr.io.vec_nxregs := vec.io.nxregs + pcr.io.vec_nfregs := vec.io.nfregs + wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), Mux(wb_src_dmem, io.dmem.resp_data_subword, @@ -395,6 +399,10 @@ class rocketDpath extends Component } else { + pcr.io.vec_appvl := UFix(0) + pcr.io.vec_nxregs := UFix(0) + pcr.io.vec_nfregs := UFix(0) + wb_wdata := Mux(wb_src_dmem, io.dmem.resp_data_subword, wb_reg_wdata) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 2748048c..b5986bfe 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -80,6 +80,9 @@ class ioDpathPCR extends Bundle() val vecbank = Bits(8, OUTPUT) val vecbankcnt = UFix(4, OUTPUT) val vechold = Bool(OUTPUT) + val vec_appvl = UFix(12, INPUT) + val vec_nxregs = UFix(6, INPUT) + val vec_nfregs = UFix(6, INPUT) } class rocketDpathPCR extends Component @@ -229,6 +232,7 @@ class rocketDpathPCR extends Component is (PCR_K1) { rdata := reg_k1; } is (PCR_PTBR) { rdata := Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } + is (PCR_VECCFG) { rdata := Cat(Bits(0, 40), io.vec_nfregs, io.vec_nxregs, io.vec_appvl) } } } } diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index d557341a..95653b98 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -34,6 +34,8 @@ class ioDpathVec extends Bundle val pcrw = new ioWritePort() val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) + val nxregs = UFix(6, OUTPUT) + val nfregs = UFix(6, OUTPUT) } class rocketDpathVec extends Component @@ -134,7 +136,10 @@ class rocketDpathVec extends Component } io.wen := io.valid && io.ctrl.wen - io.appvl := appvl + io.appvl := Mux(io.ctrl.fn === VEC_VL || io.ctrl.fn === VEC_CFGVL, appvl, reg_appvl) + io.nxregs := reg_nxregs + io.nfregs := reg_nfregs + val appvlm1 = appvl - UFix(1) io.iface.vcmdq_bits := @@ -148,7 +153,7 @@ class rocketDpathVec extends Component Bits(0,20)))))))) io.iface.vximm1q_bits := - Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, nfregs, nxregs, appvlm1(10,0)), + Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, nfregs(5,0), nxregs(5,0), appvlm1(10,0)), io.wdata) // VIMM_ALU io.iface.vximm2q_bits := From 040d62f372a254d3d1bbff05094a3f0bfd53c6a5 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 13 Mar 2012 23:45:10 -0700 Subject: [PATCH 0326/1087] refactored vector exception handling interface --- rocket/src/main/scala/consts.scala | 7 +- rocket/src/main/scala/cpu.scala | 8 +- rocket/src/main/scala/ctrl.scala | 6 +- rocket/src/main/scala/ctrl_vec.scala | 127 +++++++++++++---------- rocket/src/main/scala/dpath.scala | 4 - rocket/src/main/scala/dpath_util.scala | 6 -- rocket/src/main/scala/dpath_vec.scala | 12 +-- rocket/src/main/scala/instructions.scala | 11 +- 8 files changed, 91 insertions(+), 90 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 105b6911..3649e6a0 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = true + val HAVE_VEC = false val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); @@ -144,11 +144,6 @@ object Constants val PCR_VECBANK = UFix(18, 5); val PCR_VECCFG = UFix(19, 5); - // temporaries for vector, these will go away - val PCR_VEC_BACKUP = UFix(29, 5) - val PCR_VEC_KILL = UFix(30, 5) - val PCR_VEC_HOLD = UFix(31, 5) - // definition of bits in PCR status reg val SR_ET = 0; // enable traps val SR_EF = 1; // enable floating point diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 08ef77fa..c2cc4893 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -205,10 +205,10 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.xcpt.exception := ctrl.io.vec_iface.exception ctrl.io.vec_iface.exception_ack_valid := vu.io.xcpt.exception_ack_valid vu.io.xcpt.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready - vu.io.xcpt.backup := dpath.io.vec_iface.backup - vu.io.xcpt.backup_addr := dpath.io.vec_iface.backup_addr.toUFix - vu.io.xcpt.kill := dpath.io.vec_iface.kill - vu.io.xcpt.hold := dpath.io.vec_iface.hold + vu.io.xcpt.evac := ctrl.io.vec_iface.evac + vu.io.xcpt.evac_addr := dpath.io.vec_iface.evac_addr.toUFix + vu.io.xcpt.kill := ctrl.io.vec_iface.kill + vu.io.xcpt.hold := ctrl.io.vec_iface.hold // hooking up vector memory interface val storegen = new StoreDataGen diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index e9eff28a..d71d8b02 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -306,7 +306,10 @@ object rocketCtrlDecode VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VWAITXCPT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y)) + VXCPTEVAC-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), + VXCPTWAIT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), + VXCPTHOLD-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N)) } class rocketCtrl extends Component @@ -630,6 +633,7 @@ class rocketCtrl extends Component vec.io.s := io.dpath.status(SR_S) vec.io.sr_ev := io.dpath.status(SR_EV) vec.io.exception := wb_reg_exception + vec.io.eret := wb_reg_eret vec_replay = vec.io.replay vec_stalld = vec.io.stalld // || id_vfence_cv && !vec.io.vfence_ready diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index fe76ddeb..3bc73e38 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -46,6 +46,10 @@ class ioCtrlVecInterface extends Bundle val exception = Bool(OUTPUT) val exception_ack_valid = Bool(INPUT) val exception_ack_ready = Bool(OUTPUT) + + val evac = Bool(OUTPUT) + val kill = Bool(OUTPUT) + val hold = Bool(OUTPUT) } class ioCtrlVec extends Bundle @@ -55,6 +59,7 @@ class ioCtrlVec extends Bundle val s = Bool(INPUT) val sr_ev = Bool(INPUT) val exception = Bool(INPUT) + val eret = Bool(INPUT) val replay = Bool(OUTPUT) val stalld = Bool(OUTPUT) val vfence_ready = Bool(OUTPUT) @@ -77,61 +82,64 @@ class rocketCtrlVec extends Component // | | | | | | | | vpfcntq // wen | | | | | | | | | pfq // val vcmd vimm vimm2 | fn | | | | | | | | | | fence_cv - // | | | | | | | | | | | | | | | | | waitxcpt + // | | | | | | | | | | | | | | | | | xcptwait // | | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,N,N), - VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N), - VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,Y,N,N,N,Y,N,N), - VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_FN_N, N,N,Y,N,N,N,Y,N,N,Y,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_FN_N, N,N,N,Y,N,N,N,Y,N,Y,N,N), - VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,Y,N,N,N,Y,Y,N,N), - VWAITXCPT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,Y) + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), + FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), + FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), + FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), + FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,Y,N,N,N,Y,N,N,N,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_FN_N, N,N,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_FN_N, N,N,N,Y,N,N,N,Y,N,Y,N,N,N,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,Y,N,N,N,Y,Y,N,N,N,N,N), + VXCPTEVAC-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,Y,N,N,N), + VXCPTKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,Y,N,N), + VXCPTWAIT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,Y,N), + VXCPTHOLD-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,N,Y) )) val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 - val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_waitxcpt :: Nil = veccs2 + val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_xcptevac :: wb_vec_xcptkill :: wb_vec_xcptwait :: wb_vec_xcpthold :: Nil = veccs2 val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) @@ -207,15 +215,24 @@ class rocketCtrlVec extends Component wb_vec_fence_cv && !io.iface.vfence_ready ) - val reg_waitxcpt = Reg(resetVal = Bool(false)) - val do_waitxcpt = valid_common && wb_vec_waitxcpt && !io.replay + val reg_xcptwait = Reg(resetVal = Bool(false)) + val do_xcptwait = valid_common && wb_vec_xcptwait && !io.replay - when (do_waitxcpt) { reg_waitxcpt := Bool(true) } - when (io.iface.exception_ack_valid) { reg_waitxcpt := Bool(false) } + when (do_xcptwait) { reg_xcptwait := Bool(true) } + when (io.iface.exception_ack_valid) { reg_xcptwait := Bool(false) } io.iface.exception := io.exception && io.sr_ev - io.iface.exception_ack_ready := reg_waitxcpt + io.iface.exception_ack_ready := reg_xcptwait - io.stalld := reg_waitxcpt + val reg_hold = Reg(resetVal = Bool(false)) + + when (wb_vec_xcpthold) { reg_hold := Bool(true) } + when (io.eret) { reg_hold := Bool(false) } + + io.iface.evac := wb_vec_xcptevac.toBool + io.iface.kill := wb_vec_xcptkill.toBool + io.iface.hold := reg_hold + + io.stalld := reg_xcptwait io.vfence_ready := !io.sr_ev || io.iface.vfence_ready } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index bcdea1e6..fa69077b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -383,10 +383,6 @@ class rocketDpath extends Component vec.io.vecbankcnt := pcr.io.vecbankcnt vec.io.wdata := wb_reg_vec_wdata vec.io.rs2 := wb_reg_rs2 - vec.io.vechold := pcr.io.vechold - vec.io.pcrw.addr := wb_reg_raddr2 - vec.io.pcrw.en := io.ctrl.wen_pcr - vec.io.pcrw.data := wb_reg_wdata pcr.io.vec_appvl := vec.io.appvl pcr.io.vec_nxregs := vec.io.nxregs diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index b5986bfe..a3564b69 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -79,7 +79,6 @@ class ioDpathPCR extends Bundle() val irq_ipi = Bool(OUTPUT); val vecbank = Bits(8, OUTPUT) val vecbankcnt = UFix(4, OUTPUT) - val vechold = Bool(OUTPUT) val vec_appvl = UFix(12, INPUT) val vec_nxregs = UFix(6, INPUT) val vec_nfregs = UFix(6, INPUT) @@ -101,7 +100,6 @@ class rocketDpathPCR extends Component val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) - val reg_vechold = Reg() { Bool() } val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); @@ -143,8 +141,6 @@ class rocketDpathPCR extends Component cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) - io.vechold := reg_vechold - val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) when (io.badvaddr_wen) { reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; @@ -174,7 +170,6 @@ class rocketDpathPCR extends Component when (io.eret) { reg_status_s := reg_status_ps; reg_status_et := Bool(true); - reg_vechold := Bool(false) } when (reg_count === reg_compare) { @@ -212,7 +207,6 @@ class rocketDpathPCR extends Component when (waddr === PCR_K1) { reg_k1 := wdata; } when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } - when (waddr === PCR_VEC_HOLD) { reg_vechold := reg_status_ev && wdata(0) } } rdata := Bits(0, 64) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 95653b98..f8270002 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -12,10 +12,7 @@ class ioDpathVecInterface extends Bundle val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) val vcntq_bits = Bits(SZ_VLEN, OUTPUT) - val backup = Bool(OUTPUT) - val backup_addr = Bits(64, OUTPUT) - val kill = Bool(OUTPUT) - val hold = Bool(OUTPUT) + val evac_addr = Bits(64, OUTPUT) } class ioDpathVec extends Bundle @@ -30,8 +27,6 @@ class ioDpathVec extends Bundle val vecbankcnt = UFix(4, INPUT) val wdata = Bits(64, INPUT) val rs2 = Bits(64, INPUT) - val vechold = Bool(INPUT) - val pcrw = new ioWritePort() val wen = Bool(OUTPUT) val appvl = UFix(12, OUTPUT) val nxregs = UFix(6, OUTPUT) @@ -162,10 +157,7 @@ class rocketDpathVec extends Component io.iface.vcntq_bits := io.wdata(SZ_VLEN-1, 0) - io.iface.backup := io.pcrw.en && (io.pcrw.addr === PCR_VEC_BACKUP) - io.iface.backup_addr := io.pcrw.data - io.iface.kill := io.pcrw.en && (io.pcrw.addr === PCR_VEC_KILL) - io.iface.hold := io.vechold + io.iface.evac_addr := io.wdata io.ctrl.valid := io.valid io.ctrl.inst := io.inst diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index a6e3c839..bf985739 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -33,11 +33,11 @@ object Instructions val SLL = Bits("b?????_?????_?????_0000000001_0110011",32); val SLT = Bits("b?????_?????_?????_0000000010_0110011",32); val SLTU = Bits("b?????_?????_?????_0000000011_0110011",32); - val riscvXOR = Bits("b?????_?????_?????_0000000100_0110011",32); + val riscvXOR = Bits("b?????_?????_?????_0000000100_0110011",32); val SRL = Bits("b?????_?????_?????_0000000101_0110011",32); val SRA = Bits("b?????_?????_?????_1000000101_0110011",32); - val riscvOR = Bits("b?????_?????_?????_0000000110_0110011",32); - val riscvAND = Bits("b?????_?????_?????_0000000111_0110011",32); + val riscvOR = Bits("b?????_?????_?????_0000000110_0110011",32); + val riscvAND = Bits("b?????_?????_?????_0000000111_0110011",32); val MUL = Bits("b?????_?????_?????_0000001000_0110011",32); val MULH = Bits("b?????_?????_?????_0000001001_0110011",32); val MULHSU = Bits("b?????_?????_?????_0000001010_0110011",32); @@ -253,7 +253,10 @@ object Instructions val VENQIMM1 = Bits("b00000_?????_?????_1000000001_1111011",32) val VENQIMM2 = Bits("b00000_?????_?????_1000000010_1111011",32) val VENQCNT = Bits("b00000_?????_?????_1000000011_1111011",32) - val VWAITXCPT = Bits("b00000_00000_00000_1100000000_1111011",32) + val VXCPTEVAC = Bits("b00000_?????_00000_1100000000_1111011",32) + val VXCPTKILL = Bits("b00000_00000_00000_1100000001_1111011",32) + val VXCPTWAIT = Bits("b00000_00000_00000_1100000010_1111011",32) + val VXCPTHOLD = Bits("b00000_00000_00000_1100000011_1111011",32) val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); } From b19d783fbdbd3ee570b51f786cf09a378abbec88 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 14 Mar 2012 14:15:28 -0700 Subject: [PATCH 0327/1087] add vector irq handler --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/cpu.scala | 5 +++++ rocket/src/main/scala/ctrl.scala | 13 +++++++++++-- rocket/src/main/scala/ctrl_vec.scala | 7 +++++++ rocket/src/main/scala/dpath.scala | 3 +++ rocket/src/main/scala/dpath_util.scala | 8 ++++++++ rocket/src/main/scala/dpath_vec.scala | 3 +++ 7 files changed, 38 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 3649e6a0..5e2a5142 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -143,6 +143,7 @@ object Constants val PCR_FROMHOST = UFix(17, 5); val PCR_VECBANK = UFix(18, 5); val PCR_VECCFG = UFix(19, 5); + val PCR_VECIRQAUX= UFix(20, 5) // definition of bits in PCR status reg val SR_ET = 0; // enable traps diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index c2cc4893..8adaef61 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -201,6 +201,11 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // fences ctrl.io.vec_iface.vfence_ready := vu.io.vec_fence_ready + // irqs + ctrl.io.vec_iface.irq := vu.io.irq + ctrl.io.vec_iface.irq_cause := vu.io.irq_cause + dpath.io.vec_iface.irq_aux := vu.io.irq_aux + // exceptions vu.io.xcpt.exception := ctrl.io.vec_iface.exception ctrl.io.vec_iface.exception_ack_valid := vu.io.xcpt.exception_ack_valid diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d71d8b02..801198ef 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -50,6 +50,7 @@ class ioCtrlDpath extends Bundle() val exception = Bool(OUTPUT); val cause = UFix(5,OUTPUT); val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault + val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC val btb_hit = Bool(INPUT); @@ -622,6 +623,8 @@ class rocketCtrl extends Component var vec_replay = Bool(false) var vec_stalld = Bool(false) + var vec_irq = Bool(false) + var vec_irq_cause = UFix(0,5) if (HAVE_VEC) { // vector control @@ -637,21 +640,26 @@ class rocketCtrl extends Component vec_replay = vec.io.replay vec_stalld = vec.io.stalld // || id_vfence_cv && !vec.io.vfence_ready + vec_irq = vec.io.irq + vec_irq_cause = vec.io.irq_cause } // exception handling // FIXME: verify PC in MEM stage points to valid, restartable instruction val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); + val p_irq_vec = (io.dpath.status(8) && vec_irq) val interrupt = io.dpath.status(SR_ET).toBool && mem_reg_valid && ((io.dpath.status(15).toBool && io.dpath.irq_timer) || - (io.dpath.status(13).toBool && io.dpath.irq_ipi)); + (io.dpath.status(13).toBool && io.dpath.irq_ipi) || + p_irq_vec); val interrupt_cause = Mux(p_irq_ipi, UFix(21,5), Mux(p_irq_timer, UFix(23,5), - UFix(0,5))); + Mux(p_irq_vec, vec_irq_cause, + UFix(0,5)))) val mem_xcpt_ma_ld = io.dmem.xcpt_ma_ld && !mem_reg_kill val mem_xcpt_ma_st = io.dmem.xcpt_ma_st && !mem_reg_kill @@ -723,6 +731,7 @@ class rocketCtrl extends Component io.dpath.exception := wb_reg_exception; io.dpath.cause := wb_reg_cause; io.dpath.badvaddr_wen := wb_badvaddr_wen; + io.dpath.vec_irq_aux_wen := wb_reg_exception && wb_reg_cause >= UFix(24) io.dpath.sel_pc := Mux(wb_reg_exception, PC_EVEC, // exception diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 3bc73e38..5a9f5576 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -43,6 +43,9 @@ class ioCtrlVecInterface extends Bundle val vximm2q_user_ready = Bool(INPUT) val vfence_ready = Bool(INPUT) + val irq = Bool(INPUT) + val irq_cause = UFix(5, INPUT) + val exception = Bool(OUTPUT) val exception_ack_valid = Bool(INPUT) val exception_ack_ready = Bool(OUTPUT) @@ -63,6 +66,8 @@ class ioCtrlVec extends Bundle val replay = Bool(OUTPUT) val stalld = Bool(OUTPUT) val vfence_ready = Bool(OUTPUT) + val irq = Bool(OUTPUT) + val irq_cause = UFix(5, OUTPUT) } class rocketCtrlVec extends Component @@ -235,4 +240,6 @@ class rocketCtrlVec extends Component io.stalld := reg_xcptwait io.vfence_ready := !io.sr_ev || io.iface.vfence_ready + io.irq := io.iface.irq + io.irq_cause := io.iface.irq_cause } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index fa69077b..9299982d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -384,6 +384,7 @@ class rocketDpath extends Component vec.io.wdata := wb_reg_vec_wdata vec.io.rs2 := wb_reg_rs2 + pcr.io.vec_irq_aux := vec.io.irq_aux pcr.io.vec_appvl := vec.io.appvl pcr.io.vec_nxregs := vec.io.nxregs pcr.io.vec_nfregs := vec.io.nfregs @@ -395,6 +396,7 @@ class rocketDpath extends Component } else { + pcr.io.vec_irq_aux := UFix(0) pcr.io.vec_appvl := UFix(0) pcr.io.vec_nxregs := UFix(0) pcr.io.vec_nfregs := UFix(0) @@ -429,4 +431,5 @@ class rocketDpath extends Component pcr.io.cause := io.ctrl.cause; pcr.io.pc := wb_reg_pc; pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; + pcr.io.vec_irq_aux_wen := io.ctrl.vec_irq_aux_wen } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a3564b69..fae2fca9 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -70,6 +70,8 @@ class ioDpathPCR extends Bundle() val exception = Bool(INPUT); val cause = UFix(5, INPUT); val badvaddr_wen = Bool(INPUT); + val vec_irq_aux = Bits(64, INPUT) + val vec_irq_aux_wen = Bool(INPUT) val pc = UFix(VADDR_BITS+1, INPUT); val eret = Bool(INPUT); val ei = Bool(INPUT); @@ -100,6 +102,7 @@ class rocketDpathPCR extends Component val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) + val reg_vec_irq_aux = Reg() { Bits() } val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); @@ -145,6 +148,9 @@ class rocketDpathPCR extends Component when (io.badvaddr_wen) { reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; } + when (io.vec_irq_aux_wen) { + reg_vec_irq_aux := io.vec_irq_aux + } when (io.exception) { when (!reg_status_et) { @@ -207,6 +213,7 @@ class rocketDpathPCR extends Component when (waddr === PCR_K1) { reg_k1 := wdata; } when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } + when (waddr === PCR_VECIRQAUX) { reg_vec_irq_aux := wdata } } rdata := Bits(0, 64) @@ -227,6 +234,7 @@ class rocketDpathPCR extends Component is (PCR_PTBR) { rdata := Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } is (PCR_VECCFG) { rdata := Cat(Bits(0, 40), io.vec_nfregs, io.vec_nxregs, io.vec_appvl) } + is (PCR_VECIRQAUX){ rdata := reg_vec_irq_aux } } } } diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index f8270002..48ae3d34 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -13,6 +13,7 @@ class ioDpathVecInterface extends Bundle val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) val vcntq_bits = Bits(SZ_VLEN, OUTPUT) val evac_addr = Bits(64, OUTPUT) + val irq_aux = Bits(64, INPUT) } class ioDpathVec extends Bundle @@ -28,6 +29,7 @@ class ioDpathVec extends Bundle val wdata = Bits(64, INPUT) val rs2 = Bits(64, INPUT) val wen = Bool(OUTPUT) + val irq_aux = Bits(64, OUTPUT) val appvl = UFix(12, OUTPUT) val nxregs = UFix(6, OUTPUT) val nfregs = UFix(6, OUTPUT) @@ -131,6 +133,7 @@ class rocketDpathVec extends Component } io.wen := io.valid && io.ctrl.wen + io.irq_aux := io.iface.irq_aux io.appvl := Mux(io.ctrl.fn === VEC_VL || io.ctrl.fn === VEC_CFGVL, appvl, reg_appvl) io.nxregs := reg_nxregs io.nfregs := reg_nfregs From 7dde7099d2a6984a6c8a88d6294f74728766006e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 14 Mar 2012 06:13:16 -0700 Subject: [PATCH 0328/1087] use broadcast hub and coherent HTIF --- rocket/src/main/scala/coherence.scala | 29 ++++++++++++++++----------- rocket/src/main/scala/nbdcache.scala | 19 +++++++++--------- rocket/src/main/scala/top.scala | 16 ++++++--------- rocket/src/main/scala/util.scala | 27 +++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 31 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index fc32b6eb..7cf36e8a 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -237,7 +237,7 @@ trait FourStateCoherence extends CoherencePolicy { class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip - val p_data = (new ioPipe) { new TrackerProbeData } + val p_data = (new ioPipe) { new TrackerProbeData }.flip val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(ntiles, INPUT) @@ -387,10 +387,10 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs } when(io.p_rep_cnt_dec.orR) { - val p_rep_count_next = p_rep_count - PopCount(io.p_rep_cnt_dec) + val dec = PopCount(io.p_rep_cnt_dec) io.pop_p_rep := io.p_rep_cnt_dec - if(ntiles > 1) p_rep_count := p_rep_count_next - when(p_rep_count === UFix(1)) { + if(ntiles > 1) p_rep_count := p_rep_count - dec + when(p_rep_count === dec) { io.pop_p_rep := Bool(true) state := s_mem } @@ -536,7 +536,9 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS // Free finished transactions for( j <- 0 until ntiles ) { val finish = io.tiles(j).xact_finish - do_free_arr(finish.bits.global_xact_id) := finish.valid + when (finish.valid) { + do_free_arr(finish.bits.global_xact_id) := Bool(true) + } finish.ready := Bool(true) } @@ -552,18 +554,19 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS rep.bits.data := io.mem.resp.bits.data rep.bits.require_ack := Bool(true) rep.valid := Bool(false) - when(io.mem.resp.valid) { + when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) rep.bits.global_xact_id := mem_idx - rep.valid := (UFix(j) === init_tile_id_arr(mem_idx)) + rep.valid := Bool(true) } . otherwise { rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx - val do_send_ack = (UFix(j) === init_tile_id_arr(ack_idx)) && send_x_rep_ack_arr.toBits.orR - rep.valid := do_send_ack - sent_x_rep_ack_arr(ack_idx) := do_send_ack + when (UFix(j) === init_tile_id_arr(ack_idx)) { + rep.valid := send_x_rep_ack_arr.toBits.orR + sent_x_rep_ack_arr(ack_idx) := Bool(true) + } } } // If there were a ready signal due to e.g. intervening network use: @@ -594,8 +597,10 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS p_rep_data_dep_list(j).io.enq.valid := do_pop p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) - p_data_valid_arr(idx) := p_rep.valid && probeReplyHasData(p_rep.bits) - p_data_tile_id_arr(idx) := UFix(j) + when (p_rep.valid) { + p_data_valid_arr(idx) := probeReplyHasData(p_rep.bits) + p_data_tile_id_arr(idx) := UFix(j) + } p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) } for( i <- 0 until NGLOBAL_XACTS ) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4e2fe515..6ba02488 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -232,8 +232,9 @@ class MSHR(id: Int) extends Component with FourStateCoherence { } when (abort) { state := s_refill_req } } - when (state === s_refill_req && io.mem_req.ready) { - state := Mux(flush, s_drain_rpq, s_refill_resp) + when (state === s_refill_req) { + when (flush) { state := s_drain_rpq } + .elsewhen (io.mem_req.ready) { state := s_refill_resp } } when (state === s_wb_resp) { when (reply) { state := s_refill_req } @@ -502,6 +503,7 @@ class ProbeUnit extends Component with FourStateCoherence { val line_state = Reg() { UFix() } val way_oh = Reg() { Bits() } val req = Reg() { new ProbeRequest() } + val hit = way_oh.orR when ((state === s_writeback_resp) && io.wb_req.ready) { state := s_invalid @@ -510,7 +512,7 @@ class ProbeUnit extends Component with FourStateCoherence { state := s_writeback_resp } when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { - state := Mux(way_oh.orR && needsWriteback(line_state), s_writeback_req, s_invalid) + state := Mux(hit && needsWriteback(line_state), s_writeback_req, s_invalid) } when (state === s_meta_resp) { way_oh := io.tag_match_way_oh @@ -527,22 +529,21 @@ class ProbeUnit extends Component with FourStateCoherence { io.req.ready := state === s_invalid io.rep.valid := state === s_probe_rep && io.meta_req.ready - io.rep.bits := newProbeReply(req, line_state) + io.rep.bits := newProbeReply(req, Mux(hit, line_state, newStateOnFlush())) - val new_state = newStateOnProbeReq(req, line_state) - io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && new_state != line_state + io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && hit io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) io.meta_req.bits.inner_req.rw := state === s_probe_rep io.meta_req.bits.inner_req.idx := req.address - io.meta_req.bits.inner_req.data.state := new_state - io.meta_req.bits.inner_req.data.tag := req.address >> UFix(OFFSET_BITS) + io.meta_req.bits.inner_req.data.state := newStateOnProbeReq(req, line_state) + io.meta_req.bits.inner_req.data.tag := req.address >> UFix(IDX_BITS) io.mshr_req.valid := state === s_meta_resp io.address := req.address io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_oh := way_oh io.wb_req.bits.idx := req.address - io.wb_req.bits.tag := req.address >> UFix(OFFSET_BITS) + io.wb_req.bits.tag := req.address >> UFix(IDX_BITS) } class FlushUnit(lines: Int) extends Component with FourStateCoherence{ diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index c2932513..a0743ead 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -21,35 +21,31 @@ class Top() extends Component { val icache_pf = new rocketIPrefetcher(); val dcache = new HellaCacheUniproc(); - val arbiter = new rocketMemArbiter(3 + (if (HAVE_VEC) 1 else 0)); + val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)); arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache_pf.io.mem - arbiter.io.requestor(2) <> htif.io.mem - val hub = new CoherenceHubNull + val hub = new CoherenceHubBroadcast(2) // connect tile to hub hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) + hub.io.tiles(0).xact_init_data <> Queue(dcache.io.mem.xact_init_data) arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep) hub.io.tiles(0).xact_finish <> Queue(arbiter.io.mem.xact_finish) dcache.io.mem.probe_req <> Queue(hub.io.tiles(0).probe_req) hub.io.tiles(0).probe_rep <> Queue(dcache.io.mem.probe_rep, 1) hub.io.tiles(0).probe_rep_data <> Queue(dcache.io.mem.probe_rep_data) + // connect HTIF to hub + hub.io.tiles(1) <> htif.io.mem // connect hub to memory io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) hub.io.mem.resp <> Pipe(io.mem.resp) - // temporary HTIF data connection - val data_arb = (new Arbiter(2)) { new TransactionInitData } - data_arb.io.in(0) <> Queue(dcache.io.mem.xact_init_data) - data_arb.io.in(1) <> Queue(htif.io.mem.xact_init_data) - hub.io.tiles(0).xact_init_data <> data_arb.io.out - if (HAVE_VEC) { val vicache = new rocketICache(128, 2); // 128 sets x 2 ways - arbiter.io.requestor(3) <> vicache.io.mem + arbiter.io.requestor(2) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu; } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 10f618c2..9d8ed9e1 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -185,6 +185,33 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { dout <> io.out.bits } +class RRArbiter[T <: Data](n: Int)(data: => T) extends Component { + val io = new ioArbiter(n)(data) + + val last_grant = Reg(resetVal = UFix(0, log2up(n))) + var valid = io.in(n-1).valid + var next_grant = UFix(n-1) + var mux = (new Mux1H(n)) { data } + + for (i <- n-2 to 0 by -1) { + valid = valid || io.in(i).valid + next_grant = Mux(io.in(i).valid, UFix(i), next_grant) + } + for (i <- n-1 to 1 by -1) + next_grant = Mux(last_grant < UFix(i) && io.in(i).valid, UFix(i), next_grant) + for (i <- 0 until n) { + mux.io.sel(i) := next_grant === UFix(i) + mux.io.in(i) := io.in(i).bits + io.in(i).ready := io.out.ready && next_grant === UFix(i) + } + when (valid && io.out.ready) { + last_grant := next_grant + } + + io.out.valid := valid + io.out.bits := mux.io.out +} + class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val in = Vec(n) { (new ioDecoupled()) { data } }.flip val lock = Vec(n) { Bool() }.asInput From b5fa86e84493d05862df69a827250aad2a61f026 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 14 Mar 2012 17:51:12 -0700 Subject: [PATCH 0329/1087] 4-way associative by default --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 5e2a5142..f671e29f 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -177,7 +177,7 @@ object Constants val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) val IDX_BITS = 7; val TAG_BITS = PADDR_BITS - OFFSET_BITS - IDX_BITS; - val NWAYS = 1; + val NWAYS = 4 require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); // coherence parameters From f972977da19cf563b113821fb0cf9d048a23cec1 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 15 Mar 2012 01:10:17 -0700 Subject: [PATCH 0330/1087] refactored VMU, now uses one skid buffer --- rocket/src/main/scala/cpu.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 8adaef61..3f902049 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -221,7 +221,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) storegen.io.din := vu.io.dmem_req.bits.data arb.io.requestor(DMEM_VU).req_val := vu.io.dmem_req.valid - arb.io.requestor(DMEM_VU).req_kill := Reg(vu.io.dmem_req.bits.kill) + arb.io.requestor(DMEM_VU).req_kill := vu.io.dmem_req.bits.kill arb.io.requestor(DMEM_VU).req_cmd := vu.io.dmem_req.bits.cmd arb.io.requestor(DMEM_VU).req_type := vu.io.dmem_req.bits.typ arb.io.requestor(DMEM_VU).req_idx := vu.io.dmem_req.bits.idx @@ -229,10 +229,9 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) arb.io.requestor(DMEM_VU).req_data := Reg(storegen.io.dout) arb.io.requestor(DMEM_VU).req_tag := vu.io.dmem_req.bits.tag + vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req_rdy vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp_val) - // the vu doesn't look at the ready signal, it's simply a nack - // but should be delayed one cycle to match the nack semantics - vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp_nack || Reg(!arb.io.requestor(DMEM_VU).req_rdy) + vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp_nack vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp_data_subword vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp_tag) vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp_type) From 72006160dc4688f06e76f1e3616acf2edf8d22ad Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 15 Mar 2012 02:09:31 -0700 Subject: [PATCH 0331/1087] fix vxcptwait inst bug, it was incorrect when exception_valid was on before do_xcptwait --- rocket/src/main/scala/ctrl_vec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 5a9f5576..55466cd2 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -223,8 +223,8 @@ class rocketCtrlVec extends Component val reg_xcptwait = Reg(resetVal = Bool(false)) val do_xcptwait = valid_common && wb_vec_xcptwait && !io.replay - when (do_xcptwait) { reg_xcptwait := Bool(true) } when (io.iface.exception_ack_valid) { reg_xcptwait := Bool(false) } + when (do_xcptwait) { reg_xcptwait := Bool(true) } io.iface.exception := io.exception && io.sr_ev io.iface.exception_ack_ready := reg_xcptwait From ba566f246e9ac27aa88ab2b35c108366e4e8dacb Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 15 Mar 2012 15:35:12 -0700 Subject: [PATCH 0332/1087] change icache parameters --- rocket/src/main/scala/top.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index a0743ead..6c62a428 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -17,7 +17,7 @@ class Top() extends Component { val htif = new rocketHTIF(htif_width, 1) val cpu = new rocketProc(resetSignal = htif.io.cpu(0).reset); - val icache = new rocketICache(128, 2); // 128 sets x 2 ways + val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) val icache_pf = new rocketIPrefetcher(); val dcache = new HellaCacheUniproc(); @@ -44,7 +44,7 @@ class Top() extends Component { if (HAVE_VEC) { - val vicache = new rocketICache(128, 2); // 128 sets x 2 ways + val vicache = new rocketICache(128, 1); // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu; } From 2b0bc8df2b0f66c3fc5a9c73ebbc22ea969e2e34 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 15 Mar 2012 18:36:07 -0700 Subject: [PATCH 0333/1087] use divided clk for htif. UDPATE YOUR FESVR by default, we now load programs via a backdoor, because otherwise it takes too long to simulate. --- rocket/src/main/scala/slowio.scala | 49 ++++++++++++++++++++++++++++++ rocket/src/main/scala/top.scala | 10 +++++- 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 rocket/src/main/scala/slowio.scala diff --git a/rocket/src/main/scala/slowio.scala b/rocket/src/main/scala/slowio.scala new file mode 100644 index 00000000..c1535044 --- /dev/null +++ b/rocket/src/main/scala/slowio.scala @@ -0,0 +1,49 @@ +package rocket + +import Chisel._ +import Constants._ + +class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Component +{ + val io = new Bundle { + val out_fast = new ioDecoupled()(data).flip + val out_slow = new ioDecoupled()(data) + + val in_fast = new ioDecoupled()(data) + val in_slow = new ioDecoupled()(data).flip + + val clk_slow = Bool(OUTPUT) + } + + require((divisor & (divisor-1)) == 0) + require(hold_cycles < divisor/2 && hold_cycles >= 2) + + val cnt = Reg() { UFix(width = log2up(divisor)) } + cnt := cnt + UFix(1) + val out_en = cnt === UFix(divisor/2+hold_cycles-1) // rising edge + hold time + val in_en = cnt === UFix(divisor/2-1) // rising edge + + val in_slow_rdy = Reg(resetVal = Bool(false)) + val out_slow_val = Reg(resetVal = Bool(false)) + val out_slow_bits = Reg() { data } + + val fromhost_q = new queue(1)(data) + fromhost_q.io.enq.valid := in_en && io.in_slow.valid && in_slow_rdy + fromhost_q.io.enq.bits := io.in_slow.bits + fromhost_q.io.deq <> io.in_fast + + val tohost_q = new queue(1)(data) + tohost_q.io.enq <> io.out_fast + tohost_q.io.deq.ready := in_en && io.out_slow.ready && out_slow_val + + when (out_en) { + in_slow_rdy := fromhost_q.io.enq.ready + out_slow_val := tohost_q.io.deq.valid + out_slow_bits := tohost_q.io.deq.bits + } + + io.in_slow.ready := in_slow_rdy + io.out_slow.valid := out_slow_val + io.out_slow.bits := out_slow_bits + io.clk_slow := cnt(log2up(divisor)-1).toBool +} diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 6c62a428..dcd394dc 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -7,6 +7,7 @@ import Constants._; class ioTop(htif_width: Int) extends Bundle { val debug = new ioDebug(); val host = new ioHost(htif_width); + val host_clk = Bool(OUTPUT) val mem = new ioMem } @@ -49,7 +50,14 @@ class Top() extends Component { cpu.io.vimem <> vicache.io.cpu; } - htif.io.host <> io.host + // pad out the HTIF using a divided clock + val slow_io = (new slowIO(64, 16)) { Bits(width = htif_width) } + htif.io.host.out <> slow_io.io.out_fast + io.host.out <> slow_io.io.out_slow + htif.io.host.in <> slow_io.io.in_fast + io.host.in <> slow_io.io.in_slow + io.host_clk := slow_io.io.clk_slow + cpu.io.host <> htif.io.cpu(0); cpu.io.debug <> io.debug; From 4684171ac6b246e664ebf4c561002b56a12b80cc Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 15 Mar 2012 21:23:21 -0700 Subject: [PATCH 0334/1087] fix fence.i for associative caches --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 6ba02488..8e813f01 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -970,7 +970,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) mshr.io.req.bits.cmd := r_cpu_req_cmd mshr.io.req.bits.typ := r_cpu_req_type - mshr.io.req.bits.way_oh := Mux(tag_match, tag_match_way_oh, replaced_way_oh) + mshr.io.req.bits.way_oh := Mux(tag_match && !flusher.io.mshr_req.valid, tag_match_way_oh, replaced_way_oh) mshr.io.req.bits.data := cpu_req_data mshr.io.mem_rep <> io.mem.xact_rep From 820884c7e6d8997b953a3e1fd477504d9717be02 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 15 Mar 2012 23:08:30 -0700 Subject: [PATCH 0335/1087] fix probes for smaller cache sizes address bits (pgidx_bits-1,taglsb) were omitted from tag checks. --- rocket/src/main/scala/nbdcache.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 8e813f01..df454ed4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -836,6 +836,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { r_amo_replay_data := mshr.io.data_req.bits.data r_way_oh := mshr.io.data_req.bits.way_oh } + when (prober.io.meta_req.valid) { + r_cpu_req_idx := Cat(prober.io.meta_req.bits.inner_req.data.tag, prober.io.meta_req.bits.inner_req.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) + } when (flusher.io.meta_req.valid) { r_cpu_req_idx := Cat(flusher.io.meta_req.bits.inner_req.idx, mshr.io.data_req.bits.offset) r_cpu_req_cmd := M_FLA @@ -981,7 +984,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // replays val replay = mshr.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || flusher.io.meta_req.valid || p_store_valid + val stall_replay = r_replay_amo || p_amo || flusher.io.meta_req.valid || prober.io.meta_req.valid || p_store_valid val replay_val = mshr.io.data_req.valid val replay_fire = replay_val && !stall_replay val replay_rdy = data_arb.io.in(1).ready && !stall_replay From cfca2d141197147a321b379994ea59c6e1de5332 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Mar 2012 00:44:16 -0700 Subject: [PATCH 0336/1087] clean up cache interfaces; avoid reserved keywords --- rocket/src/main/scala/dpath_alu.scala | 4 +- rocket/src/main/scala/nbdcache.scala | 161 ++++++++++++-------------- 2 files changed, 74 insertions(+), 91 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index c15b08a8..38ae335a 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -38,7 +38,7 @@ class rocketDpathALU extends Component val shin = Mux(shright, shin_r, Reverse(shin_r)) val shout_r = (Cat(sra & shin_r(63), shin).toFix >> shamt)(63,0) - val logic = + val bitwise_logic = Mux(io.fn === FN_AND, io.in1 & io.in2, Mux(io.fn === FN_OR, io.in1 | io.in2, Mux(io.fn === FN_XOR, io.in1 ^ io.in2, @@ -49,7 +49,7 @@ class rocketDpathALU extends Component Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less, Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, Mux(io.fn === FN_SL, Reverse(shout_r), - logic)))) + bitwise_logic)))) val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) io.out := Cat(out_hi, out64(31,0)).toUFix diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index df454ed4..97639207 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -29,9 +29,9 @@ class StoreMaskGen extends Component { val word = (io.typ === MT_W) || (io.typ === MT_WU) val half = (io.typ === MT_H) || (io.typ === MT_HU) - val byte = (io.typ === MT_B) || (io.typ === MT_BU) + val byte_ = (io.typ === MT_B) || (io.typ === MT_BU) - io.wmask := Mux(byte, Bits( 1,1) << io.addr(2,0).toUFix, + io.wmask := Mux(byte_, Bits( 1,1) << io.addr(2,0).toUFix, Mux(half, Bits( 3,2) << Cat(io.addr(2,1), Bits(0,1)).toUFix, Mux(word, Bits( 15,4) << Cat(io.addr(2), Bits(0,2)).toUFix, Bits(255,8)))); @@ -46,9 +46,9 @@ class StoreDataGen extends Component { val word = (io.typ === MT_W) || (io.typ === MT_WU) val half = (io.typ === MT_H) || (io.typ === MT_HU) - val byte = (io.typ === MT_B) || (io.typ === MT_BU) + val byte_ = (io.typ === MT_B) || (io.typ === MT_BU) - io.dout := Mux(byte, Fill(8, io.din( 7,0)), + io.dout := Mux(byte_, Fill(8, io.din( 7,0)), Mux(half, Fill(4, io.din(15,0)), Mux(word, Fill(2, io.din(31,0)), io.din))) @@ -69,7 +69,7 @@ class LoadDataGen extends Component { (io.typ === MT_W) || (io.typ === MT_D) val word = (io.typ === MT_W) || (io.typ === MT_WU) val half = (io.typ === MT_H) || (io.typ === MT_HU) - val byte = (io.typ === MT_B) || (io.typ === MT_BU) + val byte_ = (io.typ === MT_B) || (io.typ === MT_BU) val shifted = io.din >> Cat(io.addr(io.addr.width-1,2), Bits(0, 5)).toUFix val extended = @@ -78,7 +78,7 @@ class LoadDataGen extends Component { val r_extended = Reg(extended) val r_sext = Reg(sext) val r_half = Reg(half) - val r_byte = Reg(byte) + val r_byte = Reg(byte_) val r_addr = Reg(io.addr) val shifted_subword = r_extended >> Cat(r_addr(1,0), Bits(0, 3)).toUFix @@ -131,6 +131,7 @@ class DataReq extends Bundle { } class DataArrayReq extends Bundle { + val way_en = Bits(width = NWAYS) val idx = Bits(width = IDX_BITS) val offset = Bits(width = log2up(REFILL_CYCLES)) val rw = Bool() @@ -138,11 +139,6 @@ class DataArrayReq extends Bundle { val data = Bits(width = MEM_DATA_BITS) } -class DataArrayArrayReq extends Bundle { - val inner_req = new DataArrayReq() - val way_en = Bits(width = NWAYS) -} - class WritebackReq extends Bundle { val tag = Bits(width = TAG_BITS) val idx = Bits(width = IDX_BITS) @@ -156,16 +152,12 @@ class MetaData extends Bundle { } class MetaArrayReq extends Bundle { + val way_en = Bits(width = NWAYS) val idx = Bits(width = IDX_BITS) val rw = Bool() val data = new MetaData() } -class MetaArrayArrayReq extends Bundle { - val inner_req = new MetaArrayReq() - val way_en = Bits(width = NWAYS) -} - class MSHR(id: Int) extends Component with FourStateCoherence { val io = new Bundle { val req_pri_val = Bool(INPUT) @@ -182,7 +174,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val way_oh = Bits(NWAYS, OUTPUT) val mem_req = (new ioDecoupled) { new TransactionInit } - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } + val meta_req = (new ioDecoupled) { new MetaArrayReq() } val replay = (new ioDecoupled) { new Replay() } val mem_abort = (new ioPipe) { new TransactionAbort }.flip val mem_rep = (new ioPipe) { new TransactionReply }.flip @@ -270,10 +262,10 @@ class MSHR(id: Int) extends Component with FourStateCoherence { io.req_sec_rdy := sec_rdy && rpq.io.enq.ready io.meta_req.valid := (state === s_drain_rpq) && !rpq.io.deq.valid && !finish_q.io.deq.valid - io.meta_req.bits.inner_req.rw := Bool(true) - io.meta_req.bits.inner_req.idx := req.idx - io.meta_req.bits.inner_req.data.state := line_state - io.meta_req.bits.inner_req.data.tag := req.tag + io.meta_req.bits.rw := Bool(true) + io.meta_req.bits.idx := req.idx + io.meta_req.bits.data.state := line_state + io.meta_req.bits.data.tag := req.tag io.meta_req.bits.way_en := req.way_oh io.wb_req.valid := (state === s_wb_req) @@ -309,7 +301,7 @@ class MSHRFile extends Component { val fence_rdy = Bool(OUTPUT) val mem_req = (new ioDecoupled) { new TransactionInit } - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } + val meta_req = (new ioDecoupled) { new MetaArrayReq() } val data_req = (new ioDecoupled) { new DataReq() } val mem_abort = (new ioPipe) { new TransactionAbort }.flip val mem_rep = (new ioPipe) { new TransactionReply }.flip @@ -332,8 +324,8 @@ class MSHRFile extends Component { val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } val wb_probe_mux = (new Mux1H(NMSHR)) { new WritebackReq } - val mem_resp_mux = (new Mux1H(NMSHR)){ new DataArrayArrayReq } - val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayArrayReq() } + val mem_resp_mux = (new Mux1H(NMSHR)){ new DataArrayReq } + val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq } @@ -376,8 +368,8 @@ class MSHRFile extends Component { mshr.io.mem_abort <> io.mem_abort mshr.io.mem_rep <> io.mem_rep mem_resp_mux.io.sel(i) := UFix(i) === io.mem_rep.bits.tile_xact_id - mem_resp_mux.io.in(i).inner_req.idx := mshr.io.idx - mem_resp_mux.io.in(i).inner_req.offset := mshr.io.refill_count + mem_resp_mux.io.in(i).idx := mshr.io.idx + mem_resp_mux.io.in(i).offset := mshr.io.refill_count mem_resp_mux.io.in(i).way_en := mshr.io.way_oh pri_rdy = pri_rdy || mshr.io.req_pri_rdy @@ -397,8 +389,8 @@ class MSHRFile extends Component { io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match - io.mem_resp_idx := mem_resp_mux.io.out.inner_req.idx - io.mem_resp_offset := mem_resp_mux.io.out.inner_req.offset + io.mem_resp_idx := mem_resp_mux.io.out.idx + io.mem_resp_offset := mem_resp_mux.io.out.offset io.mem_resp_way_oh := mem_resp_mux.io.out.way_en io.fence_rdy := !fence io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) @@ -421,7 +413,7 @@ class WritebackUnit extends Component { val io = new Bundle { val req = (new ioDecoupled) { new WritebackReq() }.flip val probe = (new ioDecoupled) { new WritebackReq() }.flip - val data_req = (new ioDecoupled) { new DataArrayArrayReq() } + val data_req = (new ioDecoupled) { new DataArrayReq() } val data_resp = Bits(MEM_DATA_BITS, INPUT) val mem_req = (new ioDecoupled) { new TransactionInit } val mem_req_data = (new ioDecoupled) { new TransactionInitData } @@ -470,11 +462,11 @@ class WritebackUnit extends Component { io.probe.ready := !valid io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) io.data_req.bits.way_en := req.way_oh - io.data_req.bits.inner_req.idx := req.idx - io.data_req.bits.inner_req.offset := cnt - io.data_req.bits.inner_req.rw := Bool(false) - io.data_req.bits.inner_req.wmask := Bits(0) - io.data_req.bits.inner_req.data := Bits(0) + io.data_req.bits.idx := req.idx + io.data_req.bits.offset := cnt + io.data_req.bits.rw := Bool(false) + io.data_req.bits.wmask := Bits(0) + io.data_req.bits.data := Bits(0) io.mem_req.valid := valid && !cmd_sent io.mem_req.bits.t_type := X_INIT_WRITE_UNCACHED @@ -490,7 +482,7 @@ class ProbeUnit extends Component with FourStateCoherence { val io = new Bundle { val req = (new ioDecoupled) { new ProbeRequest }.flip val rep = (new ioDecoupled) { new ProbeReply } - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq } + val meta_req = (new ioDecoupled) { new MetaArrayReq } val mshr_req = (new ioDecoupled) { Bool() } val wb_req = (new ioDecoupled) { new WritebackReq } val tag_match_way_oh = Bits(NWAYS, INPUT) @@ -533,10 +525,10 @@ class ProbeUnit extends Component with FourStateCoherence { io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && hit io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) - io.meta_req.bits.inner_req.rw := state === s_probe_rep - io.meta_req.bits.inner_req.idx := req.address - io.meta_req.bits.inner_req.data.state := newStateOnProbeReq(req, line_state) - io.meta_req.bits.inner_req.data.tag := req.address >> UFix(IDX_BITS) + io.meta_req.bits.rw := state === s_probe_rep + io.meta_req.bits.idx := req.address + io.meta_req.bits.data.state := newStateOnProbeReq(req, line_state) + io.meta_req.bits.data.tag := req.address >> UFix(IDX_BITS) io.mshr_req.valid := state === s_meta_resp io.address := req.address @@ -549,7 +541,7 @@ class ProbeUnit extends Component with FourStateCoherence { class FlushUnit(lines: Int) extends Component with FourStateCoherence{ val io = new Bundle { val req = (new ioDecoupled) { Bool() }.flip - val meta_req = (new ioDecoupled) { new MetaArrayArrayReq() } + val meta_req = (new ioDecoupled) { new MetaArrayReq() } val mshr_req = (new ioDecoupled) { Bool() } } @@ -589,10 +581,10 @@ class FlushUnit(lines: Int) extends Component with FourStateCoherence{ io.mshr_req.valid := state === s_meta_wait io.meta_req.valid := (state === s_meta_read) || (state === s_reset) io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) - io.meta_req.bits.inner_req.idx := idx_cnt - io.meta_req.bits.inner_req.rw := (state === s_reset) - io.meta_req.bits.inner_req.data.state := newStateOnFlush() - io.meta_req.bits.inner_req.data.tag := UFix(0) + io.meta_req.bits.idx := idx_cnt + io.meta_req.bits.rw := (state === s_reset) + io.meta_req.bits.data.state := newStateOnFlush() + io.meta_req.bits.data.tag := UFix(0) } class MetaDataArray(lines: Int) extends Component { @@ -621,9 +613,9 @@ class MetaDataArray(lines: Int) extends Component { class MetaDataArrayArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip + val req = (new ioDecoupled) { new MetaArrayReq() }.flip val resp = Vec(NWAYS){ (new MetaData).asOutput } - val state_req = (new ioDecoupled) { new MetaArrayArrayReq() }.flip + val state_req = (new ioDecoupled) { new MetaArrayReq() }.flip val way_en = Bits(width = NWAYS, dir = OUTPUT) } @@ -632,22 +624,18 @@ class MetaDataArrayArray(lines: Int) extends Component { way_en_ := io.req.bits.way_en } - var tag_ready = Bool(true) - var state_ready = Bool(true) for(w <- 0 until NWAYS) { val way = new MetaDataArray(lines) - way.io.req.bits <> io.req.bits.inner_req - tag_ready = tag_ready && way.io.req.ready + way.io.req.bits <> io.req.bits way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool - way.io.state_req.bits <> io.state_req.bits.inner_req - state_ready = state_ready && way.io.state_req.ready + way.io.state_req.bits <> io.state_req.bits way.io.state_req.valid := io.state_req.valid && io.state_req.bits.way_en(w).toBool way.io.resp <> io.resp(w) } io.way_en := way_en_ - io.req.ready := tag_ready - io.state_req.ready := state_ready + io.req.ready := Bool(true) + io.state_req.ready := Bool(true) } class DataArray(lines: Int) extends Component { @@ -669,7 +657,7 @@ class DataArray(lines: Int) extends Component { class DataArrayArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new DataArrayArrayReq() }.flip + val req = (new ioDecoupled) { new DataArrayReq() }.flip val resp = Vec(NWAYS){ Bits(width = MEM_DATA_BITS, dir = OUTPUT) } val way_en = Bits(width = NWAYS, dir = OUTPUT) } @@ -679,20 +667,15 @@ class DataArrayArray(lines: Int) extends Component { way_en_ := io.req.bits.way_en } - //val data_ready_arr = Vec(NWAYS){ Bool() } - var data_ready = Bool(true) for(w <- 0 until NWAYS) { val way = new DataArray(lines) - way.io.req.bits <> io.req.bits.inner_req - //data_ready_arr(w) := way.io.req.ready - data_ready = data_ready && way.io.req.ready + way.io.req.bits <> io.req.bits way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool way.io.resp <> io.resp(w) } io.way_en := way_en_ - //io.req.ready := Cat(data_ready_arr).andR.toBool - io.req.ready := data_ready + io.req.ready := Bool(true) } class AMOALU extends Component { @@ -837,10 +820,10 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { r_way_oh := mshr.io.data_req.bits.way_oh } when (prober.io.meta_req.valid) { - r_cpu_req_idx := Cat(prober.io.meta_req.bits.inner_req.data.tag, prober.io.meta_req.bits.inner_req.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) + r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) } when (flusher.io.meta_req.valid) { - r_cpu_req_idx := Cat(flusher.io.meta_req.bits.inner_req.idx, mshr.io.data_req.bits.offset) + r_cpu_req_idx := Cat(flusher.io.meta_req.bits.idx, mshr.io.data_req.bits.offset) r_cpu_req_cmd := M_FLA r_way_oh := flusher.io.meta_req.bits.way_en } @@ -856,19 +839,19 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // tags val meta = new MetaDataArrayArray(lines) - val meta_arb = (new Arbiter(4)) { new MetaArrayArrayReq() } + val meta_arb = (new Arbiter(4)) { new MetaArrayReq() } flusher.io.meta_req <> meta_arb.io.in(0) meta_arb.io.out <> meta.io.req // data val data = new DataArrayArray(lines) - val data_arb = (new Arbiter(5)) { new DataArrayArrayReq() } + val data_arb = (new Arbiter(5)) { new DataArrayReq() } data_arb.io.out <> data.io.req // cpu tag check meta_arb.io.in(3).valid := io.cpu.req_val - meta_arb.io.in(3).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) - meta_arb.io.in(3).bits.inner_req.rw := Bool(false) + meta_arb.io.in(3).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(3).ready val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req_ppn) @@ -905,18 +888,18 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val meta_wb_mux = Mux1H(replaced_way_oh, meta.io.resp) // refill response - data_arb.io.in(0).bits.inner_req.offset := mshr.io.mem_resp_offset - data_arb.io.in(0).bits.inner_req.idx := mshr.io.mem_resp_idx - data_arb.io.in(0).bits.inner_req.rw := Bool(true) - data_arb.io.in(0).bits.inner_req.wmask := ~UFix(0, MEM_DATA_BITS/8) - data_arb.io.in(0).bits.inner_req.data := io.mem.xact_rep.bits.data + data_arb.io.in(0).bits.offset := mshr.io.mem_resp_offset + data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx + data_arb.io.in(0).bits.rw := Bool(true) + data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) + data_arb.io.in(0).bits.data := io.mem.xact_rep.bits.data data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh data_arb.io.in(0).valid := io.mem.xact_rep.valid && (io.mem.xact_rep.bits.t_type === X_REP_READ_SHARED || io.mem.xact_rep.bits.t_type === X_REP_READ_EXCLUSIVE) // load hits - data_arb.io.in(4).bits.inner_req.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.inner_req.idx := io.cpu.req_idx(indexmsb,indexlsb) - data_arb.io.in(4).bits.inner_req.rw := Bool(false) + data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) + data_arb.io.in(4).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + data_arb.io.in(4).bits.rw := Bool(false) data_arb.io.in(4).valid := io.cpu.req_val && req_read data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check val early_load_nack = req_read && !data_arb.io.in(4).ready @@ -929,9 +912,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match - data_arb.io.in(2).bits.inner_req.offset := p_store_idx(offsetmsb,ramindexlsb) - data_arb.io.in(2).bits.inner_req.idx := p_store_idx(indexmsb,indexlsb) - data_arb.io.in(2).bits.inner_req.rw := Bool(true) + data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) + data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) + data_arb.io.in(2).bits.rw := Bool(true) data_arb.io.in(2).valid := drain_store_val data_arb.io.in(2).bits.way_en := p_store_way_oh val drain_store = drain_store_val && data_arb.io.in(2).ready @@ -943,9 +926,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // tag update after a store to an exclusive clean line. val new_hit_state = newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) val set_hit_state = r_cpu_req_val && tag_hit && meta_resp_mux.state != new_hit_state - meta.io.state_req.bits.inner_req.rw := Bool(true) - meta.io.state_req.bits.inner_req.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) - meta.io.state_req.bits.inner_req.data.state := Reg(new_hit_state) + meta.io.state_req.bits.rw := Bool(true) + meta.io.state_req.bits.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) + meta.io.state_req.bits.data.state := Reg(new_hit_state) meta.io.state_req.bits.way_en := Reg(tag_match_way_oh) meta.io.state_req.valid := Reg(set_hit_state, resetVal = Bool(false)) @@ -988,9 +971,9 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val replay_val = mshr.io.data_req.valid val replay_fire = replay_val && !stall_replay val replay_rdy = data_arb.io.in(1).ready && !stall_replay - data_arb.io.in(1).bits.inner_req.offset := replay.offset(offsetmsb,ramindexlsb) - data_arb.io.in(1).bits.inner_req.idx := replay.idx - data_arb.io.in(1).bits.inner_req.rw := replay.cmd === M_XWR + data_arb.io.in(1).bits.offset := replay.offset(offsetmsb,ramindexlsb) + data_arb.io.in(1).bits.idx := replay.idx + data_arb.io.in(1).bits.rw := replay.cmd === M_XWR data_arb.io.in(1).valid := replay_fire data_arb.io.in(1).bits.way_en := mshr.io.data_req.bits.way_oh mshr.io.data_req.ready := replay_rdy @@ -1005,10 +988,10 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix val store_data = Mux(!replay_fire, p_store_data, replay.data) val store_data_wide = Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) - data_arb.io.in(1).bits.inner_req.data := store_data_wide - data_arb.io.in(1).bits.inner_req.wmask := store_wmask_wide - data_arb.io.in(2).bits.inner_req.data := store_data_wide - data_arb.io.in(2).bits.inner_req.wmask := store_wmask_wide + data_arb.io.in(1).bits.data := store_data_wide + data_arb.io.in(1).bits.wmask := store_wmask_wide + data_arb.io.in(2).bits.data := store_data_wide + data_arb.io.in(2).bits.wmask := store_wmask_wide // load data subword mux/sign extension. // subword loads are delayed by one cycle. From f0157b9e2a0d2e871f4bba001e1dce785d631a92 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Mar 2012 01:24:07 -0700 Subject: [PATCH 0337/1087] fix coherence bug popping wrong store dependence queue --- rocket/src/main/scala/coherence.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 7cf36e8a..9160a84b 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -295,12 +295,14 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc when(req_cmd.ready && req_cmd.valid) { cmd_sent := Bool(true) } - when(req_data.ready && req_data.valid) { - pop_data := UFix(1) << tile_id - mem_cnt := mem_cnt_next - when(mem_cnt_next === UFix(0)) { - pop_dep := UFix(1) << tile_id - trigger := Bool(false) + when(req_data.ready && at_front_of_dep_queue) { + pop_data := UFix(1) << tile_id + when (data.valid) { + mem_cnt := mem_cnt_next + when(mem_cnt_next === UFix(0)) { + pop_dep := UFix(1) << tile_id + trigger := Bool(false) + } } } } @@ -391,7 +393,6 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.pop_p_rep := io.p_rep_cnt_dec if(ntiles > 1) p_rep_count := p_rep_count - dec when(p_rep_count === dec) { - io.pop_p_rep := Bool(true) state := s_mem } } From d38603a4ee0d5b1c3f13e814eac1035ac5fd894d Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 16 Mar 2012 17:08:03 -0700 Subject: [PATCH 0338/1087] change number of tlb entries --- rocket/src/main/scala/consts.scala | 3 ++- rocket/src/main/scala/cpu.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index f671e29f..a5a497df 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -218,8 +218,9 @@ object Constants val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS - val DTLB_ENTRIES = 8; + val DTLB_ENTRIES = 16 val ITLB_ENTRIES = 8; + val VITLB_ENTRIES = 4 val START_ADDR = 0x2000; diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 3f902049..7acacb29 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -28,7 +28,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val dtlb = new rocketDTLB(DTLB_ENTRIES); val itlb = new rocketITLB(ITLB_ENTRIES); - val vitlb = new rocketITLB(ITLB_ENTRIES); + val vitlb = new rocketITLB(VITLB_ENTRIES) val ptw = new rocketPTW(); val arb = new rocketDmemArbiter(DCACHE_PORTS) From 6c26921766bc251453f21a8878baba5f7bc54b8a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Mar 2012 17:14:43 -0700 Subject: [PATCH 0339/1087] reduce D$ critical path through page table walker costs an extra cycle per page table level to resolve a TLB miss. too bad. --- rocket/src/main/scala/ptw.scala | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 524e26bb..c7229c96 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -108,11 +108,12 @@ class rocketPTW extends Component r_req_dest := Bool(false); req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; } - - when (io.dmem.resp_val) { - req_addr := Cat(io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; - r_resp_perm := io.dmem.resp_data(9,4); - r_resp_ppn := io.dmem.resp_data(PADDR_BITS-1, PGIDX_BITS); + + val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) + when (dmem_resp_val) { + req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; + r_resp_perm := io.dmem.resp_data_subword(9,4); + r_resp_ppn := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS); } io.dmem.req_val := @@ -129,8 +130,8 @@ class rocketPTW extends Component val resp_val = (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); val resp_err = (state === s_error); - val resp_ptd = (io.dmem.resp_data(1,0) === Bits(1,2)); - val resp_pte = (io.dmem.resp_data(1,0) === Bits(2,2)); + val resp_ptd = (io.dmem.resp_data_subword(1,0) === Bits(1,2)); + val resp_pte = (io.dmem.resp_data_subword(1,0) === Bits(2,2)); io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val; io.itlb.req_rdy := (state === s_ready); @@ -166,7 +167,7 @@ class rocketPTW extends Component when (io.dmem.resp_nack) { state := s_l1_req } - when (io.dmem.resp_val) { + when (dmem_resp_val) { when (resp_ptd) { // page table descriptor state := s_l2_req; } @@ -191,7 +192,7 @@ class rocketPTW extends Component when (io.dmem.resp_nack) { state := s_l2_req } - when (io.dmem.resp_val) { + when (dmem_resp_val) { when (resp_ptd) { // page table descriptor state := s_l3_req; } @@ -216,7 +217,7 @@ class rocketPTW extends Component when (io.dmem.resp_nack) { state := s_l3_req } - when (io.dmem.resp_val) { + when (dmem_resp_val) { when (resp_pte) { // page table entry state := s_done; } From a47eeb95714a071750498055ac4bdf0f834ceeab Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Mar 2012 18:34:40 -0700 Subject: [PATCH 0340/1087] retime D$ bypass into beginning of EX stage --- rocket/src/main/scala/dpath.scala | 46 +++++++++++++++++++------------ 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 9299982d..463b154c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -82,7 +82,6 @@ class rocketDpath extends Component val mem_reg_wdata = Reg() { Bits() }; val mem_reg_raddr1 = Reg() { UFix() }; val mem_reg_raddr2 = Reg() { UFix() }; - val mem_wdata = Wire() { Bits() }; // writeback definitions val wb_reg_pc = Reg() { UFix() }; @@ -90,6 +89,7 @@ class rocketDpath extends Component val wb_reg_rs2 = Reg() { Bits() }; val wb_reg_waddr = Reg() { UFix() } val wb_reg_wdata = Reg() { Bits() } + val wb_reg_dmem_wdata = Reg() { Bits() } val wb_reg_vec_waddr = Reg() { UFix() } val wb_reg_vec_wdata = Reg() { Bits() } val wb_reg_raddr1 = Reg() { UFix() }; @@ -163,15 +163,23 @@ class rocketDpath extends Component RA); // WA_RA // bypass muxes + val id_rs1_dmem_bypass = + Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, Bool(false), + Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, io.ctrl.mem_load, + Bool(false))) val id_rs1 = Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata, - Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_wdata, + Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, id_rdata1))) + val id_rs2_dmem_bypass = + Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, Bool(false), + Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, io.ctrl.mem_load, + Bool(false))) val id_rs2 = Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata, - Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_wdata, + Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, id_rdata2))) @@ -191,6 +199,7 @@ class rocketDpath extends Component Mux(id_imm_ibz, Cat(Fill(20, id_imm_sign), id_imm_small), Cat(Fill(7, id_imm_sign), id_reg_inst(31,7))))) // A2_JTYPE + val id_op2_dmem_bypass = id_rs2_dmem_bypass && io.ctrl.sel_alu2 === A2_RTYPE val id_op2 = Mux(io.ctrl.sel_alu2 === A2_RTYPE, id_rs2, id_imm) io.ctrl.inst := id_reg_inst @@ -223,19 +232,23 @@ class rocketDpath extends Component ex_reg_ctrl_eret := io.ctrl.id_eret; } + val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs1) + val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs2) + val ex_op2 = Mux(Reg(id_op2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_op2) + alu.io.dw := ex_reg_ctrl_fn_dw; alu.io.fn := ex_reg_ctrl_fn_alu; - alu.io.in2 := ex_reg_op2.toUFix; - alu.io.in1 := ex_reg_rs1.toUFix; + alu.io.in2 := ex_op2.toUFix + alu.io.in1 := ex_rs1.toUFix - io.fpu.fromint_data := ex_reg_rs1 + io.fpu.fromint_data := ex_rs1 // divider val div = new rocketDivider(64) div.io.req.valid := ex_reg_ctrl_div_val div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_div_fn) - div.io.req.bits.in0 := ex_reg_rs1 - div.io.req.bits.in1 := ex_reg_rs2 + div.io.req.bits.in0 := ex_rs1 + div.io.req.bits.in1 := ex_rs2 div.io.req_tag := ex_reg_waddr div.io.req_kill := io.ctrl.killm div.io.resp_rdy := !dmem_resp_replay @@ -253,8 +266,8 @@ class rocketDpath extends Component } mul_io.req.valid := ex_reg_ctrl_mul_val; mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_mul_fn) - mul_io.req.bits.in0 := ex_reg_rs1 - mul_io.req.bits.in1 := ex_reg_rs2 + mul_io.req.bits.in0 := ex_rs1 + mul_io.req.bits.in1 := ex_rs2 mul_io.req_tag := ex_reg_waddr mul_io.req_kill := io.ctrl.killm mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val @@ -286,11 +299,11 @@ class rocketDpath extends Component io.debug.error_mode := pcr.io.debug.error_mode; // branch resolution logic - io.ctrl.br_eq := (ex_reg_rs1 === ex_reg_rs2); - io.ctrl.br_ltu := (ex_reg_rs1.toUFix < ex_reg_rs2.toUFix); + io.ctrl.br_eq := (ex_rs1 === ex_rs2) + io.ctrl.br_ltu := (ex_rs1.toUFix < ex_rs2.toUFix) io.ctrl.br_lt := - (~(ex_reg_rs1(63) ^ ex_reg_rs2(63)) & io.ctrl.br_ltu | - ex_reg_rs1(63) & ~ex_reg_rs2(63)).toBool; + (~(ex_rs1(63) ^ ex_rs2(63)) & io.ctrl.br_ltu | + ex_rs1(63) & ~ex_rs2(63)).toBool // time stamp counter val tsc_reg = Reg(resetVal = UFix(0,64)); @@ -310,7 +323,7 @@ class rocketDpath extends Component // subword store data generation val storegen = new StoreDataGen storegen.io.typ := io.ctrl.ex_mem_type - storegen.io.din := ex_reg_rs2 + storegen.io.din := ex_rs2 // memory stage mem_reg_pc := ex_reg_pc; @@ -324,8 +337,6 @@ class rocketDpath extends Component // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; - mem_wdata := Mux(io.ctrl.mem_load, io.dmem.resp_data, mem_reg_wdata) - // 32/64 bit load handling (moved to earlier in file) // writeback arbitration @@ -359,6 +370,7 @@ class rocketDpath extends Component wb_reg_rs2 := mem_reg_rs2 wb_reg_waddr := mem_ll_waddr wb_reg_wdata := mem_ll_wdata + wb_reg_dmem_wdata := io.dmem.resp_data wb_reg_vec_waddr := mem_reg_waddr wb_reg_vec_wdata := mem_reg_wdata wb_reg_raddr1 := mem_reg_raddr1 From 3b4680a8349444db47d9462f7119bce39260d6d7 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 17 Mar 2012 14:03:33 -0700 Subject: [PATCH 0341/1087] add vitlb exception port --- rocket/src/main/scala/cpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 7acacb29..21a27d3e 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -159,7 +159,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.imem_req.ready := Bool(true) vu.io.imem_resp.valid := io.vimem.resp_val vu.io.imem_resp.bits := io.vimem.resp_data - // handle vitlb.io.cpu.exception + vu.io.vitlb_exception := vitlb.io.cpu.exception io.vimem.itlb_miss := vitlb.io.cpu.resp_miss // hooking up vector command queues From 8c50c81b81aacb0a8d18f88060349f4b578af8e9 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 17 Mar 2012 14:03:57 -0700 Subject: [PATCH 0342/1087] drop vec_irq_aux pcr register, now everything goes through badvaddr --- rocket/src/main/scala/consts.scala | 1 - rocket/src/main/scala/dpath_util.scala | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index a5a497df..5fea1889 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -143,7 +143,6 @@ object Constants val PCR_FROMHOST = UFix(17, 5); val PCR_VECBANK = UFix(18, 5); val PCR_VECCFG = UFix(19, 5); - val PCR_VECIRQAUX= UFix(20, 5) // definition of bits in PCR status reg val SR_ET = 0; // enable traps diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index fae2fca9..3856beb8 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -102,7 +102,6 @@ class rocketDpathPCR extends Component val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) - val reg_vec_irq_aux = Reg() { Bits() } val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); @@ -149,7 +148,7 @@ class rocketDpathPCR extends Component reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; } when (io.vec_irq_aux_wen) { - reg_vec_irq_aux := io.vec_irq_aux + reg_badvaddr := io.vec_irq_aux.toUFix } when (io.exception) { @@ -213,7 +212,6 @@ class rocketDpathPCR extends Component when (waddr === PCR_K1) { reg_k1 := wdata; } when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } - when (waddr === PCR_VECIRQAUX) { reg_vec_irq_aux := wdata } } rdata := Bits(0, 64) @@ -234,7 +232,6 @@ class rocketDpathPCR extends Component is (PCR_PTBR) { rdata := Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } is (PCR_VECCFG) { rdata := Cat(Bits(0, 40), io.vec_nfregs, io.vec_nxregs, io.vec_appvl) } - is (PCR_VECIRQAUX){ rdata := reg_vec_irq_aux } } } } From 8a4f95e617744f8ca9c64d0ea9a07cb6f4a27ce2 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 17 Mar 2012 17:50:37 -0700 Subject: [PATCH 0343/1087] changes to xcpt handling --- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/dpath_vec.scala | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 21a27d3e..b9ab1058 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -170,7 +170,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_ximm2q.valid := ctrl.io.vec_iface.vximm2q_valid vu.io.vec_ximm2q.bits := dpath.io.vec_iface.vximm2q_bits vu.io.vec_cntq.valid := ctrl.io.vec_iface.vcntq_valid - vu.io.vec_cntq.bits := dpath.io.vec_iface.vcntq_bits + vu.io.vec_cntq.bits := Cat(dpath.io.vec_iface.vcntq_last, dpath.io.vec_iface.vcntq_bits) // prefetch queues vu.io.vec_pfcmdq.valid := ctrl.io.vec_iface.vpfcmdq_valid diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 48ae3d34..f36bff23 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -12,6 +12,7 @@ class ioDpathVecInterface extends Bundle val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) val vcntq_bits = Bits(SZ_VLEN, OUTPUT) + val vcntq_last = Bool(OUTPUT) val evac_addr = Bits(64, OUTPUT) val irq_aux = Bits(64, INPUT) } @@ -159,6 +160,7 @@ class rocketDpathVec extends Component io.wdata) // VIMM2_ALU io.iface.vcntq_bits := io.wdata(SZ_VLEN-1, 0) + io.iface.vcntq_last := io.rs2(1) io.iface.evac_addr := io.wdata From b793d63182f94e0dda34f8809e281d1d84c28c6b Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 17 Mar 2012 23:00:27 -0700 Subject: [PATCH 0344/1087] no vector interrupt masking --- rocket/src/main/scala/ctrl.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 801198ef..14b74cbc 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -648,17 +648,16 @@ class rocketCtrl extends Component // FIXME: verify PC in MEM stage points to valid, restartable instruction val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); - val p_irq_vec = (io.dpath.status(8) && vec_irq) val interrupt = io.dpath.status(SR_ET).toBool && mem_reg_valid && ((io.dpath.status(15).toBool && io.dpath.irq_timer) || (io.dpath.status(13).toBool && io.dpath.irq_ipi) || - p_irq_vec); + vec_irq); val interrupt_cause = Mux(p_irq_ipi, UFix(21,5), Mux(p_irq_timer, UFix(23,5), - Mux(p_irq_vec, vec_irq_cause, + Mux(vec_irq, vec_irq_cause, UFix(0,5)))) val mem_xcpt_ma_ld = io.dmem.xcpt_ma_ld && !mem_reg_kill From 62ada5ea9e7f7158387d922afc3d8c7bede0bcca Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 17 Mar 2012 23:00:51 -0700 Subject: [PATCH 0345/1087] hookup vitlb ptw port --- rocket/src/main/scala/cpu.scala | 1 + rocket/src/main/scala/ptw.scala | 49 +++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index b9ab1058..bb772ecc 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -100,6 +100,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) ptw.io.dtlb <> dtlb.io.ptw; ptw.io.itlb <> itlb.io.ptw; + ptw.io.vitlb <> vitlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; arb.io.requestor(DMEM_PTW) <> ptw.io.dmem arb.io.dmem <> io.dmem diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index c7229c96..938a942e 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -72,6 +72,7 @@ class ioPTW extends Bundle { val itlb = new ioTLB_PTW().flip val dtlb = new ioTLB_PTW().flip + val vitlb = new ioTLB_PTW().flip val dmem = new ioDmem().flip val ptbr = UFix(PADDR_BITS, INPUT); } @@ -84,31 +85,38 @@ class rocketPTW extends Component val state = Reg(resetVal = s_ready); val r_req_vpn = Reg() { Bits() } - val r_req_dest = Reg() { Bool() } + val r_req_dest = Reg() { Bits() } val req_addr = Reg() { UFix() }; val r_resp_ppn = Reg() { Bits() }; val r_resp_perm = Reg() { Bits() }; val vpn_idx = Mux(state === s_l2_wait, r_req_vpn(9,0), r_req_vpn(19,10)); - val req_val = io.itlb.req_val || io.dtlb.req_val; + val req_val = io.itlb.req_val || io.dtlb.req_val || io.vitlb.req_val // give ITLB requests priority over DTLB requests val req_itlb_val = io.itlb.req_val; val req_dtlb_val = io.dtlb.req_val && !io.itlb.req_val; - - when ((state === s_ready) && req_dtlb_val) { - r_req_vpn := io.dtlb.req_vpn; - r_req_dest := Bool(true); - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; - } + val req_vitlb_val = io.vitlb.req_val && !io.itlb.req_val && !io.dtlb.req_val when ((state === s_ready) && req_itlb_val) { r_req_vpn := io.itlb.req_vpn; - r_req_dest := Bool(false); + r_req_dest := Bits(0) req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; } + when ((state === s_ready) && req_dtlb_val) { + r_req_vpn := io.dtlb.req_vpn; + r_req_dest := Bits(1) + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; + } + + when ((state === s_ready) && req_vitlb_val) { + r_req_vpn := io.vitlb.req_vpn; + r_req_dest := Bits(2) + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; + } + val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) when (dmem_resp_val) { req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; @@ -133,22 +141,27 @@ class rocketPTW extends Component val resp_ptd = (io.dmem.resp_data_subword(1,0) === Bits(1,2)); val resp_pte = (io.dmem.resp_data_subword(1,0) === Bits(2,2)); - io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val; - io.itlb.req_rdy := (state === s_ready); - io.dtlb.resp_val := r_req_dest && resp_val; - io.itlb.resp_val := !r_req_dest && resp_val; - io.dtlb.resp_err := r_req_dest && resp_err; - io.itlb.resp_err := !r_req_dest && resp_err; - io.dtlb.resp_perm := r_resp_perm; - io.itlb.resp_perm := r_resp_perm; + io.itlb.req_rdy := (state === s_ready) + io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val + io.vitlb.req_rdy := (state === s_ready) && !io.itlb.req_val && !io.dtlb.req_val + io.itlb.resp_val := r_req_dest === Bits(0) && resp_val + io.dtlb.resp_val := r_req_dest === Bits(1) && resp_val + io.vitlb.resp_val := r_req_dest === Bits(2) && resp_val + io.itlb.resp_err := r_req_dest === Bits(0) && resp_err + io.dtlb.resp_err := r_req_dest === Bits(1) && resp_err + io.vitlb.resp_err := r_req_dest === Bits(2) && resp_err + io.itlb.resp_perm := r_resp_perm + io.dtlb.resp_perm := r_resp_perm + io.vitlb.resp_perm:= r_resp_perm val resp_ppn = Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-11, 0)), Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-21, 0)), r_resp_ppn)); - io.dtlb.resp_ppn := resp_ppn; io.itlb.resp_ppn := resp_ppn; + io.dtlb.resp_ppn := resp_ppn; + io.vitlb.resp_ppn := resp_ppn; // control state machine switch (state) { From 7493d55d3f1bd2650e9293e75ef01b4e335f69de Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 18 Mar 2012 15:06:39 -0700 Subject: [PATCH 0346/1087] add pf fault handling --- rocket/src/main/scala/cpu.scala | 4 ++-- rocket/src/main/scala/dtlb.scala | 23 +++++++++-------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index bb772ecc..73917c6e 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -50,15 +50,15 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu.io.vec_tlb_resp.xcpt_ld := chosen_vec && dtlb.io.cpu_resp.xcpt_ld vu.io.vec_tlb_resp.xcpt_st := chosen_vec && dtlb.io.cpu_resp.xcpt_st + vu.io.vec_tlb_resp.xcpt_pf := Bool(false) vu.io.vec_tlb_resp.miss := chosen_vec && dtlb.io.cpu_resp.miss vu.io.vec_tlb_resp.ppn := dtlb.io.cpu_resp.ppn - // vector prefetch doesn't care about exceptions - // and shouldn't cause any anyways dtlbarb.io.in(DTLB_VPF) <> vu.io.vec_pftlb_req vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) vu.io.vec_pftlb_resp.xcpt_st := Bool(false) + vu.io.vec_pftlb_resp.xcpt_pf := chosen_pf && dtlb.io.cpu_resp.xcpt_pf vu.io.vec_pftlb_resp.miss := chosen_pf && dtlb.io.cpu_resp.miss vu.io.vec_pftlb_resp.ppn := dtlb.io.cpu_resp.ppn diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 50844713..bd27bf59 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -26,6 +26,7 @@ class ioDTLB_CPU_resp extends Bundle val ppn = Bits(PPN_BITS, OUTPUT) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) + val xcpt_pf = Bool(OUTPUT) } class ioDTLB extends Bundle @@ -138,21 +139,15 @@ class rocketDTLB(entries: Int) extends Component } } - val access_fault_ld = - tlb_hit && (req_load || req_amo) && - ((status_s && !sr_array(tag_hit_addr).toBool) || - (status_u && !ur_array(tag_hit_addr).toBool) || - bad_va); + val access_fault_common = + tlb_hit && + ((status_s && !sr_array(tag_hit_addr)) || + (status_u && !ur_array(tag_hit_addr)) || + bad_va) - io.cpu_resp.xcpt_ld := access_fault_ld; - - val access_fault_st = - tlb_hit && (req_store || req_amo) && - ((status_s && !sw_array(tag_hit_addr).toBool) || - (status_u && !uw_array(tag_hit_addr).toBool) || - bad_va); - - io.cpu_resp.xcpt_st := access_fault_st; + io.cpu_resp.xcpt_ld := access_fault_common && (req_load || req_amo) + io.cpu_resp.xcpt_st := access_fault_common && (req_store || req_amo) + io.cpu_resp.xcpt_pf := access_fault_common && req_pf io.cpu_req.ready := (state === s_ready) && !tlb_miss; io.cpu_resp.miss := tlb_miss; From 98e10ddc3c6f502b01912084524c99864357550b Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 18 Mar 2012 16:36:12 -0700 Subject: [PATCH 0347/1087] update vector exception instructions --- rocket/src/main/scala/instructions.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index bf985739..093f14e7 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -249,14 +249,14 @@ object Instructions val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); val VF = Bits("b00000_?????_????????????_111_1110011",32); // vector supervisor instructions - val VENQCMD = Bits("b00000_?????_?????_1000000000_1111011",32) - val VENQIMM1 = Bits("b00000_?????_?????_1000000001_1111011",32) - val VENQIMM2 = Bits("b00000_?????_?????_1000000010_1111011",32) - val VENQCNT = Bits("b00000_?????_?????_1000000011_1111011",32) + val VENQCMD = Bits("b00000_?????_?????_1100000011_1111011",32) + val VENQIMM1 = Bits("b00000_?????_?????_1100000100_1111011",32) + val VENQIMM2 = Bits("b00000_?????_?????_1100000101_1111011",32) + val VENQCNT = Bits("b00000_?????_?????_1100000110_1111011",32) val VXCPTEVAC = Bits("b00000_?????_00000_1100000000_1111011",32) - val VXCPTKILL = Bits("b00000_00000_00000_1100000001_1111011",32) - val VXCPTWAIT = Bits("b00000_00000_00000_1100000010_1111011",32) - val VXCPTHOLD = Bits("b00000_00000_00000_1100000011_1111011",32) + val VXCPTKILL = Bits("b00000_00000_00000_1000000010_1111011",32) + val VXCPTWAIT = Bits("b00000_00000_00000_1100000001_1111011",32) + val VXCPTHOLD = Bits("b00000_00000_00000_1100000010_1111011",32) val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); } From 2a01f558ba8d6830fe5810a5bf3949e6ea4c7a6e Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 18 Mar 2012 19:55:24 -0700 Subject: [PATCH 0348/1087] fix unmasked valid bug in ctrl_vec --- rocket/src/main/scala/ctrl_vec.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 55466cd2..df3b15d5 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -231,11 +231,11 @@ class rocketCtrlVec extends Component val reg_hold = Reg(resetVal = Bool(false)) - when (wb_vec_xcpthold) { reg_hold := Bool(true) } + when (valid_common && wb_vec_xcpthold) { reg_hold := Bool(true) } when (io.eret) { reg_hold := Bool(false) } - io.iface.evac := wb_vec_xcptevac.toBool - io.iface.kill := wb_vec_xcptkill.toBool + io.iface.evac := valid_common && wb_vec_xcptevac.toBool + io.iface.kill := valid_common && wb_vec_xcptkill.toBool io.iface.hold := reg_hold io.stalld := reg_xcptwait From c4a91303fba83a6315233a51199a56b269c0140d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 18 Mar 2012 20:42:38 -0700 Subject: [PATCH 0349/1087] update vector fence names and encoding --- rocket/src/main/scala/ctrl.scala | 6 ++---- rocket/src/main/scala/ctrl_vec.scala | 6 ++---- rocket/src/main/scala/instructions.scala | 6 ++---- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 14b74cbc..cd99bce4 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -268,10 +268,8 @@ object rocketCtrlDecode VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - FENCE_L_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - FENCE_G_V-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - FENCE_L_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), - FENCE_G_CV->List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), + FENCE_V_L-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), + FENCE_V_G-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index df3b15d5..62dba213 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -97,10 +97,8 @@ class rocketCtrlVec extends Component VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), - FENCE_L_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), - FENCE_G_V-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), - FENCE_L_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), - FENCE_G_CV->List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), + FENCE_V_L-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), + FENCE_V_G-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 093f14e7..1093b5f9 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -166,10 +166,8 @@ object Instructions val FNMSUB_D = Bits("b?????_?????_?????_?????_???_01_1001011",32); val FNMADD_D = Bits("b?????_?????_?????_?????_???_01_1001111",32); // vector instructions - val FENCE_L_V = Bits("b?????_?????_????????????_100_0101111",32); - val FENCE_G_V = Bits("b?????_?????_????????????_101_0101111",32); - val FENCE_L_CV = Bits("b?????_?????_????????????_110_0101111",32); - val FENCE_G_CV = Bits("b?????_?????_????????????_111_0101111",32); + val FENCE_V_L = Bits("b?????_?????_????????????_100_0101111",32); + val FENCE_V_G = Bits("b?????_?????_????????????_101_0101111",32); val MOVZ = Bits("b?????_?????_?????_0000000101_1110111",32); val MOVN = Bits("b?????_?????_?????_0000001101_1110111",32); val STOP = Bits("b00000_00000_00000_0000000010_1110111",32); From ba06cd953e8a9aeb80ef39fc59a4a6a2e0a2a9e4 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 18 Mar 2012 20:43:08 -0700 Subject: [PATCH 0350/1087] add chosen --- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/util.scala | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 73917c6e..e8a0d6da 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -37,7 +37,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { vu = new vu() // cpu, vector prefetch, and vector use the DTLB - val dtlbarb = new hwacha.Arbiter(3)({new ioDTLB_CPU_req()}) + val dtlbarb = new Arbiter(3)({new ioDTLB_CPU_req_bundle()}) val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 9d8ed9e1..e6a2308f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -163,6 +163,7 @@ class ioPipe[+T <: Data]()(data: => T) extends Bundle class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val in = Vec(n) { (new ioDecoupled()) { data } }.flip val out = (new ioDecoupled()) { data } + val chosen = Bits(log2up(n), OUTPUT) } class Arbiter[T <: Data](n: Int)(data: => T) extends Component { @@ -174,8 +175,11 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { } var dout = io.in(n-1).bits - for (i <- 1 to n-1) + var choose = Bits(n-1) + for (i <- 1 to n-1) { dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) + choose = Mux(io.in(n-1-i).valid, Bits(n-1-i), choose) + } var vout = io.in(0).valid for (i <- 1 to n-1) @@ -183,6 +187,7 @@ class Arbiter[T <: Data](n: Int)(data: => T) extends Component { vout <> io.out.valid dout <> io.out.bits + choose <> io.chosen } class RRArbiter[T <: Data](n: Int)(data: => T) extends Component { From 2ed0be65f9c0b8e8d88dd9cf03a36c95ed47a7ce Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 19 Mar 2012 00:19:33 -0700 Subject: [PATCH 0351/1087] fix RRArbiter --- rocket/src/main/scala/util.scala | 63 ++++++++++++++++---------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index e6a2308f..a5137174 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -166,55 +166,54 @@ class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val chosen = Bits(log2up(n), OUTPUT) } +object ArbiterCtrl +{ + def apply(request: Seq[Bool]) = { + Bool(true) +: (1 until request.length).map(i => !foldR(request.slice(0, i))(_||_)) + } +} + class Arbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioArbiter(n)(data) - io.in(0).ready := io.out.ready - for (i <- 1 to n-1) { - io.in(i).ready := !io.in(i-1).valid && io.in(i-1).ready - } + val grant = ArbiterCtrl(io.in.map(_.valid)) + (0 until n).map(i => io.in(i).ready := grant(i) && io.out.ready) var dout = io.in(n-1).bits var choose = Bits(n-1) - for (i <- 1 to n-1) { - dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) - choose = Mux(io.in(n-1-i).valid, Bits(n-1-i), choose) + for (i <- n-2 to 0 by -1) { + dout = Mux(io.in(i).valid, io.in(i).bits, dout) + choose = Mux(io.in(i).valid, Bits(i), choose) } - var vout = io.in(0).valid - for (i <- 1 to n-1) - vout = vout || io.in(i).valid - - vout <> io.out.valid - dout <> io.out.bits - choose <> io.chosen + io.out.valid := foldR(io.in.map(_.valid))(_||_) + io.out.bits <> dout + io.chosen := choose } class RRArbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioArbiter(n)(data) - val last_grant = Reg(resetVal = UFix(0, log2up(n))) - var valid = io.in(n-1).valid - var next_grant = UFix(n-1) - var mux = (new Mux1H(n)) { data } + val last_grant = Reg(resetVal = Bits(0, log2up(n))) + val g = ArbiterCtrl((0 until n).map(i => io.in(i).valid && UFix(i) > last_grant) ++ io.in.map(_.valid)) + val grant = (0 until n).map(i => g(i) && UFix(i) > last_grant || g(i+n)) + (0 until n).map(i => io.in(i).ready := grant(i) && io.out.ready) - for (i <- n-2 to 0 by -1) { - valid = valid || io.in(i).valid - next_grant = Mux(io.in(i).valid, UFix(i), next_grant) - } + var choose = Bits(n-1) + for (i <- n-2 to 0 by -1) + choose = Mux(io.in(i).valid, Bits(i), choose) for (i <- n-1 to 1 by -1) - next_grant = Mux(last_grant < UFix(i) && io.in(i).valid, UFix(i), next_grant) - for (i <- 0 until n) { - mux.io.sel(i) := next_grant === UFix(i) - mux.io.in(i) := io.in(i).bits - io.in(i).ready := io.out.ready && next_grant === UFix(i) - } - when (valid && io.out.ready) { - last_grant := next_grant + choose = Mux(io.in(i).valid && UFix(i) > last_grant, Bits(i), choose) + when (io.out.valid && io.out.ready) { + last_grant := choose } - io.out.valid := valid - io.out.bits := mux.io.out + val dvec = Vec(n) { Wire() { data } } + (0 until n).map(i => dvec(i) := io.in(i).bits ) + + io.out.valid := foldR(io.in.map(_.valid))(_||_) + io.out.bits := dvec(choose) + io.chosen := choose } class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { From bd27d0fab222cc9a291eac5756c1705e8a374851 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 19 Mar 2012 01:02:06 -0700 Subject: [PATCH 0352/1087] can now take interrupts on stalled instructions --- rocket/src/main/scala/ctrl.scala | 146 ++++++++++++++++--------------- 1 file changed, 76 insertions(+), 70 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cd99bce4..9af0c14d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -358,6 +358,8 @@ class rocketCtrl extends Component val ex_reg_inst_di = Reg(resetVal = Bool(false)); val ex_reg_inst_ei = Reg(resetVal = Bool(false)); val ex_reg_flush_inst = Reg(resetVal = Bool(false)); + val ex_reg_xcpt_interrupt = Reg(resetVal = Bool(false)); + val ex_reg_cause = Reg(){UFix()} val ex_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val ex_reg_xcpt_illegal = Reg(resetVal = Bool(false)); @@ -376,6 +378,8 @@ class rocketCtrl extends Component val mem_reg_inst_di = Reg(resetVal = Bool(false)); val mem_reg_inst_ei = Reg(resetVal = Bool(false)); val mem_reg_flush_inst = Reg(resetVal = Bool(false)); + val mem_reg_xcpt_interrupt = Reg(resetVal = Bool(false)); + val mem_reg_cause = Reg(){UFix()} val mem_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_itlb = Reg(resetVal = Bool(false)); val mem_reg_xcpt_illegal = Reg(resetVal = Bool(false)); @@ -402,7 +406,8 @@ class rocketCtrl extends Component val wb_reg_fp_val = Reg(resetVal = Bool(false)); val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); - val take_pc = Wire() { Bool() }; + val take_pc = Wire(){Bool()} + val take_pc_wb = Wire(){Bool()} when (!io.dpath.stalld) { when (io.dpath.killf) { @@ -421,12 +426,47 @@ class rocketCtrl extends Component } id_reg_icmiss := !io.imem.resp_val; } + + var vec_replay = Bool(false) + var vec_stalld = Bool(false) + var vec_irq = Bool(false) + var vec_irq_cause = UFix(23,5) // don't care + if (HAVE_VEC) + { + // vector control + val vec = new rocketCtrlVec() + + io.vec_dpath <> vec.io.dpath + io.vec_iface <> vec.io.iface + + vec.io.s := io.dpath.status(SR_S) + vec.io.sr_ev := io.dpath.status(SR_EV) + vec.io.exception := wb_reg_exception + vec.io.eret := wb_reg_eret + + vec_replay = vec.io.replay + vec_stalld = vec.io.stalld // || id_vfence_cv && !vec.io.vfence_ready + vec_irq = vec.io.irq + vec_irq_cause = vec.io.irq_cause + } // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) val illegal_inst = !(id_int_val.toBool || io.fpu.dec.valid || id_vec_val.toBool) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); - + + val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); + val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); + val id_interrupt = + io.dpath.status(SR_ET).toBool && mem_reg_valid && + ((io.dpath.status(15).toBool && io.dpath.irq_timer) || + (io.dpath.status(13).toBool && io.dpath.irq_ipi) || + vec_irq); + val id_cause = + Mux(p_irq_ipi, UFix(21,5), + Mux(p_irq_timer, UFix(23,5), + vec_irq_cause)) + when (reset.toBool || io.dpath.killd) { ex_reg_br_type := BR_N; ex_reg_btb_hit := Bool(false); @@ -479,6 +519,8 @@ class rocketCtrl extends Component } ex_reg_mem_cmd := id_mem_cmd ex_reg_mem_type := id_mem_type.toUFix + ex_reg_xcpt_interrupt := id_reg_valid && id_interrupt && !take_pc + ex_reg_cause := id_cause val beq = io.dpath.br_eq; val bne = ~io.dpath.br_eq; @@ -547,6 +589,8 @@ class rocketCtrl extends Component } mem_reg_mem_cmd := ex_reg_mem_cmd; mem_reg_mem_type := ex_reg_mem_type; + mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb + mem_reg_cause := ex_reg_cause when (io.dpath.killm) { wb_reg_valid := Bool(false) @@ -619,82 +663,44 @@ class rocketCtrl extends Component io.fpu.dec.wen && fp_sboard.io.r(3).data } - var vec_replay = Bool(false) - var vec_stalld = Bool(false) - var vec_irq = Bool(false) - var vec_irq_cause = UFix(0,5) - if (HAVE_VEC) - { - // vector control - val vec = new rocketCtrlVec() - - io.vec_dpath <> vec.io.dpath - io.vec_iface <> vec.io.iface - - vec.io.s := io.dpath.status(SR_S) - vec.io.sr_ev := io.dpath.status(SR_EV) - vec.io.exception := wb_reg_exception - vec.io.eret := wb_reg_eret - - vec_replay = vec.io.replay - vec_stalld = vec.io.stalld // || id_vfence_cv && !vec.io.vfence_ready - vec_irq = vec.io.irq - vec_irq_cause = vec.io.irq_cause - } - // exception handling - // FIXME: verify PC in MEM stage points to valid, restartable instruction - val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); - val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); - val interrupt = - io.dpath.status(SR_ET).toBool && mem_reg_valid && - ((io.dpath.status(15).toBool && io.dpath.irq_timer) || - (io.dpath.status(13).toBool && io.dpath.irq_ipi) || - vec_irq); - - val interrupt_cause = - Mux(p_irq_ipi, UFix(21,5), - Mux(p_irq_timer, UFix(23,5), - Mux(vec_irq, vec_irq_cause, - UFix(0,5)))) - val mem_xcpt_ma_ld = io.dmem.xcpt_ma_ld && !mem_reg_kill val mem_xcpt_ma_st = io.dmem.xcpt_ma_st && !mem_reg_kill val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill - val mem_exception = - interrupt || - mem_xcpt_ma_ld || - mem_xcpt_ma_st || - mem_xcpt_dtlb_ld || - mem_xcpt_dtlb_st || - mem_reg_xcpt_illegal || - mem_reg_xcpt_privileged || - mem_reg_xcpt_fpu || - mem_reg_xcpt_vec || - mem_reg_xcpt_syscall || - mem_reg_xcpt_itlb || - mem_reg_xcpt_ma_inst; - - val mem_cause = - Mux(interrupt, interrupt_cause, // asynchronous interrupt - Mux(mem_reg_xcpt_itlb, UFix(1,5), // instruction access fault - Mux(mem_reg_xcpt_illegal, UFix(2,5), // illegal instruction - Mux(mem_reg_xcpt_privileged, UFix(3,5), // privileged instruction - Mux(mem_reg_xcpt_fpu, UFix(4,5), // FPU disabled - Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call - // breakpoint - Mux(mem_xcpt_ma_ld, UFix(8,5), // misaligned load - Mux(mem_xcpt_ma_st, UFix(9,5), // misaligned store - Mux(mem_xcpt_dtlb_ld, UFix(10,5), // load fault - Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault - Mux(mem_reg_xcpt_vec, UFix(12,5), // vector disabled - UFix(0,5)))))))))))); // instruction address misaligned + val mem_exception = + mem_reg_xcpt_interrupt || + mem_xcpt_ma_ld || + mem_xcpt_ma_st || + mem_xcpt_dtlb_ld || + mem_xcpt_dtlb_st || + mem_reg_xcpt_illegal || + mem_reg_xcpt_privileged || + mem_reg_xcpt_fpu || + mem_reg_xcpt_vec || + mem_reg_xcpt_syscall || + mem_reg_xcpt_itlb || + mem_reg_xcpt_ma_inst; + + val mem_cause = + Mux(mem_reg_xcpt_interrupt, mem_reg_cause, // asynchronous interrupt + Mux(mem_reg_xcpt_itlb, UFix(1,5), // instruction access fault + Mux(mem_reg_xcpt_illegal, UFix(2,5), // illegal instruction + Mux(mem_reg_xcpt_privileged, UFix(3,5), // privileged instruction + Mux(mem_reg_xcpt_fpu, UFix(4,5), // FPU disabled + Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call + // breakpoint + Mux(mem_xcpt_ma_ld, UFix(8,5), // misaligned load + Mux(mem_xcpt_ma_st, UFix(9,5), // misaligned store + Mux(mem_xcpt_dtlb_ld, UFix(10,5), // load fault + Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault + Mux(mem_reg_xcpt_vec, UFix(12,5), // vector disabled + UFix(0,5)))))))))))); // instruction address misaligned // control transfer from ex/mem val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken - val take_pc_wb = wb_reg_replay || vec_replay || wb_reg_exception || wb_reg_eret + take_pc_wb := wb_reg_replay || vec_replay || wb_reg_exception || wb_reg_eret take_pc := take_pc_ex || take_pc_wb; // replay mem stage PC on a DTLB miss or a long-latency writeback @@ -703,7 +709,7 @@ class rocketCtrl extends Component val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill - + // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || From 264732556f7268559f8f962dad3f2b07242ec44d Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 19 Mar 2012 03:08:53 -0700 Subject: [PATCH 0353/1087] fixes to match verilog X semantics --- rocket/src/main/scala/ctrl.scala | 20 ++++++++++---------- rocket/src/main/scala/ctrl_util.scala | 11 ++++++----- rocket/src/main/scala/nbdcache.scala | 2 +- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 9af0c14d..398275c7 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -333,7 +333,7 @@ class rocketCtrl extends Component val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) - val wb_reg_dcache_miss = Reg(io.dmem.resp_miss, resetVal = Bool(false)); + val wb_reg_dcache_miss = Reg(io.dmem.resp_miss || io.dmem.resp_nack, resetVal = Bool(false)); val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_btb_hit = Reg(resetVal = Bool(false)); @@ -529,15 +529,15 @@ class rocketCtrl extends Component val bge = ~io.dpath.br_lt; val bgeu = ~io.dpath.br_ltu; - val br_taken = - (ex_reg_br_type === BR_EQ) & beq | - (ex_reg_br_type === BR_NE) & bne | - (ex_reg_br_type === BR_LT) & blt | - (ex_reg_br_type === BR_LTU) & bltu | - (ex_reg_br_type === BR_GE) & bge | - (ex_reg_br_type === BR_GEU) & bgeu | - (ex_reg_br_type === BR_J); // treat J/JAL like taken branches - val jr_taken = ex_reg_br_type === BR_JR + val br_taken = !(wb_reg_dcache_miss && ex_reg_load_use) && + ((ex_reg_br_type === BR_EQ) && beq || + (ex_reg_br_type === BR_NE) && bne || + (ex_reg_br_type === BR_LT) && blt || + (ex_reg_br_type === BR_LTU) && bltu || + (ex_reg_br_type === BR_GE) && bge || + (ex_reg_br_type === BR_GEU) && bgeu || + (ex_reg_br_type === BR_J)) // treat J/JAL like taken branches + val jr_taken = !(wb_reg_dcache_miss && ex_reg_load_use) && ex_reg_br_type === BR_JR val mem_reg_div_mul_val = Reg(){Bool()}; val mem_reg_eret = Reg(){Bool()}; diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index f0b577e4..5e85ba60 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -20,13 +20,14 @@ class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component val w = Vec(nwrite) { new write_port() } } - val busybits = Reg(resetVal = Bits(0, entries)); + val busybits = Vec(entries) { Reg(resetVal = Bool(false)) } for (i <- 0 until nread) io.r(i).data := busybits(io.r(i).addr) - var wdata = busybits - for (i <- 0 until nwrite) - wdata = wdata.bitSet(io.w(i).addr, Mux(io.w(i).en, io.w(i).data, wdata(io.w(i).addr))) - busybits := wdata + for (i <- 0 until nwrite) { + when (io.w(i).en) { + busybits(io.w(i).addr) := io.w(i).data + } + } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 97639207..319a43aa 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -401,7 +401,7 @@ class MSHRFile extends Component { val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) val sdq_free = replay.valid && replay.ready && replay_write - sdq_val := sdq_val & ~(sdq_free.toUFix << replay.bits.sdq_id) | + sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) | PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) io.data_req.bits.data := sdq.read(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) From 0edea00166af65f5f4d2c5e1dcf1b5171eec0c21 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 19 Mar 2012 03:09:23 -0700 Subject: [PATCH 0354/1087] now HAVE_VEC is true, since it passes the emulator --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 5fea1889..1fdba6cd 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = false + val HAVE_VEC = true val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); From c036fff79c660e54c2f014557991f5ef72a5227a Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 19 Mar 2012 15:13:57 -0700 Subject: [PATCH 0355/1087] fix id interrupt signal --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 398275c7..7f9a4e6f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -458,7 +458,7 @@ class rocketCtrl extends Component val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); val id_interrupt = - io.dpath.status(SR_ET).toBool && mem_reg_valid && + io.dpath.status(SR_ET).toBool && ((io.dpath.status(15).toBool && io.dpath.irq_timer) || (io.dpath.status(13).toBool && io.dpath.irq_ipi) || vec_irq); From 56cb9b7a634c2422e392bfea96821606b04bc9fc Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 20 Mar 2012 00:18:32 -0700 Subject: [PATCH 0356/1087] fix bug in coherence hub, specifically in abort handling logic --- rocket/src/main/scala/coherence.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 9160a84b..9cd8fc7c 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -648,9 +648,9 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS is(s_abort_drain) { // raises x_init_data.ready below when(x_init_data.valid) { abort_cnt := abort_cnt + UFix(1) - } - when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { - abort_state_arr(j) := s_abort_send + when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { + abort_state_arr(j) := s_abort_send + } } } is(s_abort_send) { // nothing is dequeued for now From 1cddd5de56b0090739ac659a3d8a081163baaa7d Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 20 Mar 2012 02:12:24 -0700 Subject: [PATCH 0357/1087] fix amo locking up problem --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 319a43aa..cf2db3c8 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -911,7 +911,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req_val || !req_read || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match + val drain_store_val = (p_store_valid && (!io.cpu.req_val || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.rw := Bool(true) From 7d7d7f49f98ff94328df7efa2109b7d13c2f8e52 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 20 Mar 2012 15:21:36 -0700 Subject: [PATCH 0358/1087] change the tlb arbiter to a round robing one --- rocket/src/main/scala/cpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index e8a0d6da..aa03f75a 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -37,7 +37,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { vu = new vu() // cpu, vector prefetch, and vector use the DTLB - val dtlbarb = new Arbiter(3)({new ioDTLB_CPU_req_bundle()}) + val dtlbarb = new RRArbiter(3)({new ioDTLB_CPU_req_bundle()}) val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } From e450e3aa40fb78e74530d83eefc02e4fe83330d0 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 20 Mar 2012 17:09:54 -0700 Subject: [PATCH 0359/1087] fix irt counter bug regarding vector stuff --- rocket/src/main/scala/ctrl.scala | 3 ++- rocket/src/main/scala/ctrl_vec.scala | 4 ++-- rocket/src/main/scala/dpath_vec.scala | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7f9a4e6f..f3cc2284 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -439,6 +439,7 @@ class rocketCtrl extends Component io.vec_dpath <> vec.io.dpath io.vec_iface <> vec.io.iface + vec.io.valid := wb_reg_valid vec.io.s := io.dpath.status(SR_S) vec.io.sr_ev := io.dpath.status(SR_EV) vec.io.exception := wb_reg_exception @@ -833,7 +834,7 @@ class rocketCtrl extends Component io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; io.dpath.wb_wen := wb_reg_wen; - io.dpath.wb_valid := wb_reg_valid; + io.dpath.wb_valid := wb_reg_valid && !vec_replay io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb; io.dpath.ren_pcr := id_ren_pcr.toBool; diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 62dba213..4786cb6f 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -7,7 +7,6 @@ import Instructions._ class ioCtrlDpathVec extends Bundle { - val valid = Bool(INPUT) val inst = Bits(32, INPUT) val appvl0 = Bool(INPUT) val pfq = Bool(INPUT) @@ -59,6 +58,7 @@ class ioCtrlVec extends Bundle { val dpath = new ioCtrlDpathVec() val iface = new ioCtrlVecInterface() + val valid = Bool(INPUT) val s = Bool(INPUT) val sr_ev = Bool(INPUT) val exception = Bool(INPUT) @@ -144,7 +144,7 @@ class rocketCtrlVec extends Component val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_xcptevac :: wb_vec_xcptkill :: wb_vec_xcptwait :: wb_vec_xcpthold :: Nil = veccs2 - val valid_common = io.dpath.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) + val valid_common = io.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) val wb_vec_pfcmdq_enq_mask_pfq = wb_vec_pfcmdq_enq && (!wb_vec_pfaq || io.dpath.pfq) val wb_vec_pfximm1q_enq_mask_pfq = wb_vec_pfximm1q_enq && (!wb_vec_pfaq || io.dpath.pfq) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index f36bff23..5fcd20a7 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -164,7 +164,6 @@ class rocketDpathVec extends Component io.iface.evac_addr := io.wdata - io.ctrl.valid := io.valid io.ctrl.inst := io.inst io.ctrl.appvl0 := reg_appvl0 io.ctrl.pfq := io.rs2(0) From 023734175d6eebceb134eb153a709f8eea842427 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 20 Mar 2012 17:10:05 -0700 Subject: [PATCH 0360/1087] now fence stalls in decode --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f3cc2284..230b0798 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -446,7 +446,7 @@ class rocketCtrl extends Component vec.io.eret := wb_reg_eret vec_replay = vec.io.replay - vec_stalld = vec.io.stalld // || id_vfence_cv && !vec.io.vfence_ready + vec_stalld = vec.io.stalld || id_vfence_cv && !vec.io.vfence_ready vec_irq = vec.io.irq vec_irq_cause = vec.io.irq_cause } From aaed0241af71071a05b614e9c82e4c8e214241a9 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 21 Mar 2012 15:08:48 -0700 Subject: [PATCH 0361/1087] get rid of vxcptwait --- rocket/src/main/scala/cpu.scala | 2 - rocket/src/main/scala/ctrl.scala | 32 ++- rocket/src/main/scala/ctrl_vec.scala | 274 +++++++++++++---------- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/instructions.scala | 1 - 5 files changed, 183 insertions(+), 128 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index aa03f75a..e546cd63 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -209,8 +209,6 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // exceptions vu.io.xcpt.exception := ctrl.io.vec_iface.exception - ctrl.io.vec_iface.exception_ack_valid := vu.io.xcpt.exception_ack_valid - vu.io.xcpt.exception_ack_ready := ctrl.io.vec_iface.exception_ack_ready vu.io.xcpt.evac := ctrl.io.vec_iface.evac vu.io.xcpt.evac_addr := dpath.io.vec_iface.evac_addr.toUFix vu.io.xcpt.kill := ctrl.io.vec_iface.kill diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 230b0798..7ae0a6bc 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -97,7 +97,7 @@ object rocketCtrlDecode val xpr64 = Y; val decode_default = - // vfence_cv + // vfence // | eret // | | syscall // vec_val mem_val mul_val div_val renpcr | | | privileged @@ -106,7 +106,7 @@ object rocketCtrlDecode List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N) val xdecode = Array( - // vfence_cv + // vfence // | eret // | | syscall // vec_val mem_val mul_val div_val renpcr | | | privileged @@ -215,7 +215,7 @@ object rocketCtrlDecode RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N)) val fdecode = Array( - // vfence_cv + // vfence // | eret // | | syscall // vec_val mem_val mul_val div_val renpcr | | | privileged @@ -255,7 +255,7 @@ object rocketCtrlDecode FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N)) val vdecode = Array( - // vfence_cv + // vfence // | eret // | | syscall // vec_val mem_val mul_val div_val renpcr | | | privileged @@ -307,7 +307,6 @@ object rocketCtrlDecode VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), VXCPTEVAC-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), VXCPTKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VXCPTWAIT-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,Y), VXCPTHOLD-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N)) } @@ -323,7 +322,7 @@ class rocketCtrl extends Component val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_vfence_cv :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + val id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_vfence :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); @@ -445,8 +444,26 @@ class rocketCtrl extends Component vec.io.exception := wb_reg_exception vec.io.eret := wb_reg_eret + val vec_dec = new rocketCtrlVecDecoder() + vec_dec.io.inst := io.dpath.inst + + val s = io.dpath.status(SR_S) + val mask_cmdq_ready = !vec_dec.io.sigs.enq_cmdq || s && io.vec_iface.vcmdq_ready || !s && io.vec_iface.vcmdq_user_ready + val mask_ximm1q_ready = !vec_dec.io.sigs.enq_ximm1q || s && io.vec_iface.vximm1q_ready || !s && io.vec_iface.vximm1q_user_ready + val mask_ximm2q_ready = !vec_dec.io.sigs.enq_ximm2q || s && io.vec_iface.vximm2q_ready || !s && io.vec_iface.vximm2q_user_ready + val mask_cntq_ready = !vec_dec.io.sigs.enq_cntq || io.vec_iface.vcntq_ready + val mask_pfcmdq_ready = !vec_dec.io.sigs.enq_pfcmdq || io.vec_iface.vpfcmdq_ready + val mask_pfximm1q_ready = !vec_dec.io.sigs.enq_pfximm1q || io.vec_iface.vpfximm1q_ready + val mask_pfximm2q_ready = !vec_dec.io.sigs.enq_pfximm2q || io.vec_iface.vpfximm2q_ready + val mask_pfcntq_ready = !vec_dec.io.sigs.enq_pfcntq || io.vec_iface.vpfcntq_ready + + vec_stalld = + vec_dec.io.sigs.valid && ( + !mask_cmdq_ready || !mask_ximm1q_ready || !mask_ximm2q_ready || !mask_cntq_ready || + !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready) || + id_vec_val && id_vfence && !vec.io.vfence_ready + vec_replay = vec.io.replay - vec_stalld = vec.io.stalld || id_vfence_cv && !vec.io.vfence_ready vec_irq = vec.io.irq vec_irq_cause = vec.io.irq_cause } @@ -802,7 +819,6 @@ class rocketCtrl extends Component id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_stall_fpu || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || - id_vec_val.toBool && !(io.vec_iface.vcmdq_ready && io.vec_iface.vximm1q_ready && io.vec_iface.vximm2q_ready) || // being conservative ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || vec_stalld ); diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 4786cb6f..0ca55f01 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -46,8 +46,6 @@ class ioCtrlVecInterface extends Bundle val irq_cause = UFix(5, INPUT) val exception = Bool(OUTPUT) - val exception_ack_valid = Bool(INPUT) - val exception_ack_ready = Bool(OUTPUT) val evac = Bool(OUTPUT) val kill = Bool(OUTPUT) @@ -64,18 +62,45 @@ class ioCtrlVec extends Bundle val exception = Bool(INPUT) val eret = Bool(INPUT) val replay = Bool(OUTPUT) - val stalld = Bool(OUTPUT) val vfence_ready = Bool(OUTPUT) val irq = Bool(OUTPUT) val irq_cause = UFix(5, OUTPUT) } -class rocketCtrlVec extends Component +class rocketCtrlVecSigs extends Bundle { - val io = new ioCtrlVec() + val valid = Bool() + val sel_vcmd = Bits(width = 3) + val sel_vimm = Bits(width = 1) + val sel_vimm2 = Bits(width = 1) + val wen = Bool() + val fn = Bits(width = 2) + val appvlmask = Bool() + val enq_cmdq = Bool() + val enq_ximm1q = Bool() + val enq_ximm2q = Bool() + val enq_cntq = Bool() + val enq_pfcmdq = Bool() + val enq_pfximm1q = Bool() + val enq_pfximm2q = Bool() + val enq_pfcntq = Bool() + val pfaq = Bool() + val vfence = Bool() + val xcptevac = Bool() + val xcptkill = Bool() + val xcpthold = Bool() +} + +class rocketCtrlVecDecoder extends Component +{ + val io = new Bundle + { + val inst = Bits(32, INPUT) + val sigs = new rocketCtrlVecSigs().asOutput + } val veccs = - ListLookup(io.dpath.inst, + ListLookup(io.inst, // appvlmask // | vcmdq // | | vximm1q @@ -85,158 +110,175 @@ class rocketCtrlVec extends Component // | | | | | | vpfximm1q // | | | | | | | vpfximm2q // | | | | | | | | vpfcntq - // wen | | | | | | | | | pfq - // val vcmd vimm vimm2 | fn | | | | | | | | | | fence_cv - // | | | | | | | | | | | | | | | | | xcptwait - // | | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N,N), - FENCE_V_L-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), - FENCE_V_G-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N,N), - VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,Y,N,N,N,Y,N,N,N,N,N), - VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_FN_N, N,N,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_FN_N, N,N,N,Y,N,N,N,Y,N,Y,N,N,N,N,N), - VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,Y,N,N,N,Y,Y,N,N,N,N,N), - VXCPTEVAC-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,Y,N,N,N), - VXCPTKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,Y,N,N), - VXCPTWAIT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,Y,N), - VXCPTHOLD-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,N,Y) + // | | | | | | | | | pfq + // | | | | | | | | | | vfence + // | | | | | | | | | | | xcptevac + // | | | | | | | | | | | | xcptkill + // wen | | | | | | | | | | | | | xcpthold + // val vcmd vimm vimm2 | fn | | | | | | | | | | | | | | + // | | | | | | | | | | | | | | | | | | | | + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N), + VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), + VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N), + VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), + FENCE_V_L-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N), + FENCE_V_G-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N), + VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), + VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,Y,N,N,N,Y,N,N,N,N), + VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_FN_N, N,N,Y,N,N,N,Y,N,N,Y,N,N,N,N), + VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_FN_N, N,N,N,Y,N,N,N,Y,N,Y,N,N,N,N), + VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,Y,N,N,N,Y,Y,N,N,N,N), + VXCPTEVAC-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,Y,N,N), + VXCPTKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,Y,N), + VXCPTHOLD-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,Y) )) - val wb_vec_val :: wb_sel_vcmd :: wb_sel_vimm :: wb_sel_vimm2 :: wb_vec_wen :: wb_vec_fn :: wb_vec_appvlmask :: veccs0 = veccs - val wb_vec_cmdq_enq :: wb_vec_ximm1q_enq :: wb_vec_ximm2q_enq :: wb_vec_cntq_enq :: veccs1 = veccs0 - val wb_vec_pfcmdq_enq :: wb_vec_pfximm1q_enq :: wb_vec_pfximm2q_enq :: wb_vec_pfcntq_enq :: veccs2 = veccs1 - val wb_vec_pfaq :: wb_vec_fence_cv :: wb_vec_xcptevac :: wb_vec_xcptkill :: wb_vec_xcptwait :: wb_vec_xcpthold :: Nil = veccs2 + val valid :: sel_vcmd :: sel_vimm :: sel_vimm2 :: wen :: fn :: appvlmask :: veccs0 = veccs + val enq_cmdq :: enq_ximm1q :: enq_ximm2q :: enq_cntq :: veccs1 = veccs0 + val enq_pfcmdq :: enq_pfximm1q :: enq_pfximm2q :: enq_pfcntq :: veccs2 = veccs1 + val pfaq :: vfence :: xcptevac :: xcptkill :: xcpthold :: Nil = veccs2 - val valid_common = io.valid && io.sr_ev && wb_vec_val && !(wb_vec_appvlmask && io.dpath.appvl0) + io.sigs.valid := valid.toBool + io.sigs.sel_vcmd := sel_vcmd + io.sigs.sel_vimm := sel_vimm + io.sigs.sel_vimm2 := sel_vimm2 + io.sigs.wen := wen.toBool + io.sigs.fn := fn + io.sigs.appvlmask := appvlmask.toBool + io.sigs.enq_cmdq := enq_cmdq.toBool + io.sigs.enq_ximm1q := enq_ximm1q.toBool + io.sigs.enq_ximm2q := enq_ximm2q.toBool + io.sigs.enq_cntq := enq_cntq.toBool + io.sigs.enq_pfcmdq := enq_pfcmdq.toBool + io.sigs.enq_pfximm1q := enq_pfximm1q.toBool + io.sigs.enq_pfximm2q := enq_pfximm2q.toBool + io.sigs.enq_pfcntq := enq_pfcntq.toBool + io.sigs.pfaq := pfaq.toBool + io.sigs.vfence := vfence.toBool + io.sigs.xcptevac := xcptevac.toBool + io.sigs.xcptkill := xcptkill.toBool + io.sigs.xcpthold := xcpthold.toBool +} - val wb_vec_pfcmdq_enq_mask_pfq = wb_vec_pfcmdq_enq && (!wb_vec_pfaq || io.dpath.pfq) - val wb_vec_pfximm1q_enq_mask_pfq = wb_vec_pfximm1q_enq && (!wb_vec_pfaq || io.dpath.pfq) - val wb_vec_pfximm2q_enq_mask_pfq = wb_vec_pfximm2q_enq && (!wb_vec_pfaq || io.dpath.pfq) - val wb_vec_pfcntq_enq_mask_pfq = wb_vec_pfcntq_enq && (!wb_vec_pfaq || io.dpath.pfq) +class rocketCtrlVec extends Component +{ + val io = new ioCtrlVec() - val mask_wb_vec_cmdq_ready = !wb_vec_cmdq_enq || io.s && io.iface.vcmdq_ready || !io.s && io.iface.vcmdq_user_ready - val mask_wb_vec_ximm1q_ready = !wb_vec_ximm1q_enq || io.s && io.iface.vximm1q_ready || !io.s && io.iface.vximm1q_user_ready - val mask_wb_vec_ximm2q_ready = !wb_vec_ximm2q_enq || io.s && io.iface.vximm2q_ready || !io.s && io.iface.vximm2q_user_ready - val mask_wb_vec_cntq_ready = !wb_vec_cntq_enq || io.iface.vcntq_ready - val mask_wb_vec_pfcmdq_ready = !wb_vec_pfcmdq_enq_mask_pfq || io.iface.vpfcmdq_ready - val mask_wb_vec_pfximm1q_ready = !wb_vec_pfximm1q_enq_mask_pfq || io.iface.vpfximm1q_ready - val mask_wb_vec_pfximm2q_ready = !wb_vec_pfximm2q_enq_mask_pfq || io.iface.vpfximm2q_ready - val mask_wb_vec_pfcntq_ready = !wb_vec_pfcntq_enq_mask_pfq || io.iface.vpfcntq_ready + val dec = new rocketCtrlVecDecoder() + dec.io.inst := io.dpath.inst - io.dpath.wen := wb_vec_wen.toBool - io.dpath.fn := wb_vec_fn - io.dpath.sel_vcmd := wb_sel_vcmd - io.dpath.sel_vimm := wb_sel_vimm - io.dpath.sel_vimm2 := wb_sel_vimm2 + val valid_common = io.valid && io.sr_ev && dec.io.sigs.valid && !(dec.io.sigs.appvlmask && io.dpath.appvl0) + + val enq_pfcmdq_mask_pfq = dec.io.sigs.enq_pfcmdq && (!dec.io.sigs.pfaq || io.dpath.pfq) + val enq_pfximm1q_mask_pfq = dec.io.sigs.enq_pfximm1q && (!dec.io.sigs.pfaq || io.dpath.pfq) + val enq_pfximm2q_mask_pfq = dec.io.sigs.enq_pfximm2q && (!dec.io.sigs.pfaq || io.dpath.pfq) + val enq_pfcntq_mask_pfq = dec.io.sigs.enq_pfcntq && (!dec.io.sigs.pfaq || io.dpath.pfq) + + val mask_cmdq_ready = !dec.io.sigs.enq_cmdq || io.s && io.iface.vcmdq_ready || !io.s && io.iface.vcmdq_user_ready + val mask_ximm1q_ready = !dec.io.sigs.enq_ximm1q || io.s && io.iface.vximm1q_ready || !io.s && io.iface.vximm1q_user_ready + val mask_ximm2q_ready = !dec.io.sigs.enq_ximm2q || io.s && io.iface.vximm2q_ready || !io.s && io.iface.vximm2q_user_ready + val mask_cntq_ready = !dec.io.sigs.enq_cntq || io.iface.vcntq_ready + val mask_pfcmdq_ready = !enq_pfcmdq_mask_pfq || io.iface.vpfcmdq_ready + val mask_pfximm1q_ready = !enq_pfximm1q_mask_pfq || io.iface.vpfximm1q_ready + val mask_pfximm2q_ready = !enq_pfximm2q_mask_pfq || io.iface.vpfximm2q_ready + val mask_pfcntq_ready = !enq_pfcntq_mask_pfq || io.iface.vpfcntq_ready + + io.dpath.wen := dec.io.sigs.wen + io.dpath.fn := dec.io.sigs.fn + io.dpath.sel_vcmd := dec.io.sigs.sel_vcmd + io.dpath.sel_vimm := dec.io.sigs.sel_vimm + io.dpath.sel_vimm2 := dec.io.sigs.sel_vimm2 io.iface.vcmdq_valid := valid_common && - wb_vec_cmdq_enq && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + dec.io.sigs.enq_cmdq && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && + mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready io.iface.vximm1q_valid := valid_common && - mask_wb_vec_cmdq_ready && wb_vec_ximm1q_enq && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + mask_cmdq_ready && dec.io.sigs.enq_ximm1q && mask_ximm2q_ready && mask_cntq_ready && + mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready io.iface.vximm2q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && wb_vec_ximm2q_enq && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + mask_cmdq_ready && mask_ximm1q_ready && dec.io.sigs.enq_ximm2q && mask_cntq_ready && + mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready io.iface.vcntq_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && wb_vec_cntq_enq && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && dec.io.sigs.enq_cntq && + mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready io.iface.vpfcmdq_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - wb_vec_pfcmdq_enq_mask_pfq && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && + enq_pfcmdq_mask_pfq && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready io.iface.vpfximm1q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && wb_vec_pfximm1q_enq_mask_pfq && mask_wb_vec_pfximm2q_ready && mask_wb_vec_pfcntq_ready + mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && + mask_pfcmdq_ready && enq_pfximm1q_mask_pfq && mask_pfximm2q_ready && mask_pfcntq_ready io.iface.vpfximm2q_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && wb_vec_pfximm2q_enq_mask_pfq && mask_wb_vec_pfcntq_ready + mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && + mask_pfcmdq_ready && mask_pfximm1q_ready && enq_pfximm2q_mask_pfq && mask_pfcntq_ready io.iface.vpfcntq_valid := valid_common && - mask_wb_vec_cmdq_ready && mask_wb_vec_ximm1q_ready && mask_wb_vec_ximm2q_ready && mask_wb_vec_cntq_ready && - mask_wb_vec_pfcmdq_ready && mask_wb_vec_pfximm1q_ready && mask_wb_vec_pfximm2q_ready && wb_vec_pfcntq_enq_mask_pfq + mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && + mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && enq_pfcntq_mask_pfq io.replay := valid_common && ( - wb_vec_cmdq_enq && !io.iface.vcmdq_ready || - wb_vec_ximm1q_enq && !io.iface.vximm1q_ready || - wb_vec_ximm2q_enq && !io.iface.vximm2q_ready || - wb_vec_cntq_enq && !io.iface.vcntq_ready || - wb_vec_pfcmdq_enq_mask_pfq && !io.iface.vpfcmdq_ready || - wb_vec_pfximm1q_enq_mask_pfq && !io.iface.vpfximm1q_ready || - wb_vec_pfximm2q_enq_mask_pfq && !io.iface.vpfximm2q_ready || - wb_vec_pfcntq_enq_mask_pfq && !io.iface.vpfcntq_ready || - wb_vec_fence_cv && !io.iface.vfence_ready + !mask_cmdq_ready || !mask_ximm1q_ready || !mask_ximm2q_ready || !mask_cntq_ready || + !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready || + dec.io.sigs.vfence && !io.iface.vfence_ready ) - val reg_xcptwait = Reg(resetVal = Bool(false)) - val do_xcptwait = valid_common && wb_vec_xcptwait && !io.replay - - when (io.iface.exception_ack_valid) { reg_xcptwait := Bool(false) } - when (do_xcptwait) { reg_xcptwait := Bool(true) } - io.iface.exception := io.exception && io.sr_ev - io.iface.exception_ack_ready := reg_xcptwait val reg_hold = Reg(resetVal = Bool(false)) - when (valid_common && wb_vec_xcpthold) { reg_hold := Bool(true) } + when (valid_common && dec.io.sigs.xcpthold) { reg_hold := Bool(true) } when (io.eret) { reg_hold := Bool(false) } - io.iface.evac := valid_common && wb_vec_xcptevac.toBool - io.iface.kill := valid_common && wb_vec_xcptkill.toBool + io.iface.evac := valid_common && dec.io.sigs.xcptevac + io.iface.kill := valid_common && dec.io.sigs.xcptkill io.iface.hold := reg_hold - io.stalld := reg_xcptwait io.vfence_ready := !io.sr_ev || io.iface.vfence_ready io.irq := io.iface.irq io.irq_cause := io.iface.irq_cause diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 463b154c..3212b53e 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -387,7 +387,7 @@ class rocketDpath extends Component vec.io.ctrl <> io.vec_ctrl io.vec_iface <> vec.io.iface - vec.io.valid := io.ctrl.wb_valid + vec.io.valid := io.ctrl.wb_valid && pcr.io.status(SR_EV) vec.io.inst := wb_reg_inst vec.io.waddr := wb_reg_vec_waddr vec.io.raddr1 := wb_reg_raddr1 diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 1093b5f9..614b0fb5 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -253,7 +253,6 @@ object Instructions val VENQCNT = Bits("b00000_?????_?????_1100000110_1111011",32) val VXCPTEVAC = Bits("b00000_?????_00000_1100000000_1111011",32) val VXCPTKILL = Bits("b00000_00000_00000_1000000010_1111011",32) - val VXCPTWAIT = Bits("b00000_00000_00000_1100000001_1111011",32) val VXCPTHOLD = Bits("b00000_00000_00000_1100000010_1111011",32) val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); From 65929a62e33a208b2f64256e4bb4dc7ccd6a0410 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 22 Mar 2012 15:32:04 -0700 Subject: [PATCH 0362/1087] fix reset value for appvl --- rocket/src/main/scala/dpath_vec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 5fcd20a7..251fd629 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -116,7 +116,7 @@ class rocketDpathVec extends Component val reg_nxregs = Reg(resetVal = UFix(32, 6)) val reg_nfregs = Reg(resetVal = UFix(32, 6)) - val reg_appvl = Reg(resetVal = UFix(32, 12)) + val reg_appvl = Reg(resetVal = UFix(0, 12)) when (io.valid) { From 54fa6f660ddf758e241961791a2e3b2bded0331e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Mar 2012 13:03:31 -0700 Subject: [PATCH 0363/1087] new supervisor mode --- rocket/src/main/scala/consts.scala | 29 +- rocket/src/main/scala/ctrl.scala | 463 +++++++++++------------ rocket/src/main/scala/dpath.scala | 28 +- rocket/src/main/scala/dpath_util.scala | 56 ++- rocket/src/main/scala/dtlb.scala | 2 +- rocket/src/main/scala/htif.scala | 4 +- rocket/src/main/scala/instructions.scala | 22 +- rocket/src/main/scala/itlb.scala | 2 +- 8 files changed, 287 insertions(+), 319 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1fdba6cd..cd6d3882 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -63,7 +63,6 @@ object Constants val WB_X = UFix(0, 3); val WB_PC = UFix(0, 3); - val WB_PCR = UFix(1, 3); val WB_ALU = UFix(2, 3); val WB_TSC = UFix(4, 3); val WB_IRT = UFix(5, 3); @@ -117,10 +116,12 @@ object Constants val M_XA_MAX = Bits("b1101", 4); val M_XA_MINU = Bits("b1110", 4); val M_XA_MAXU = Bits("b1111", 4); - - val I_X = Bits(0,2); - val I_DI = Bits(1,2); - val I_EI = Bits(2,2); + + val PCR_N = Bits(0,3) + val PCR_F = Bits(1,3) // mfpcr + val PCR_T = Bits(4,3) // mtpcr + val PCR_C = Bits(6,3) // clearpcr + val PCR_S = Bits(7,3) // setpcr val SYNC_N = Bits(0,2); val SYNC_D = Bits(1,2); @@ -137,12 +138,14 @@ object Constants val PCR_SEND_IPI = UFix( 8, 5); val PCR_CLR_IPI = UFix( 9, 5); val PCR_COREID = UFix(10, 5); + val PCR_IMPL = UFix(11, 5); val PCR_K0 = UFix(12, 5); val PCR_K1 = UFix(13, 5); - val PCR_TOHOST = UFix(16, 5); - val PCR_FROMHOST = UFix(17, 5); val PCR_VECBANK = UFix(18, 5); val PCR_VECCFG = UFix(19, 5); + val PCR_RESET = UFix(29, 5); + val PCR_TOHOST = UFix(30, 5); + val PCR_FROMHOST = UFix(31, 5); // definition of bits in PCR status reg val SR_ET = 0; // enable traps @@ -151,9 +154,15 @@ object Constants val SR_EC = 3; // enable compressed instruction encoding val SR_PS = 4; // mode stack bit val SR_S = 5; // user/supervisor mode - val SR_UX = 6; // 64 bit user mode - val SR_SX = 7; // 64 bit supervisor mode - val SR_VM = 16; // VM enable + val SR_U64 = 6; // 64 bit user mode + val SR_S64 = 7; // 64 bit supervisor mode + val SR_VM = 8 // VM enable + val SR_IM = 16 // interrupt mask + val SR_IM_WIDTH = 8 + + val CAUSE_INTERRUPT = 32 + val IRQ_IPI = 5 + val IRQ_TIMER = 7 val COREID = 0; val PADDR_BITS = 40; diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7ae0a6bc..2f942df9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -30,8 +30,7 @@ class ioCtrlDpath extends Bundle() val div_fn = UFix(2, OUTPUT); val sel_wa = Bool(OUTPUT); val sel_wb = UFix(3, OUTPUT); - val ren_pcr = Bool(OUTPUT); - val wen_pcr = Bool(OUTPUT); + val pcr = UFix(3, OUTPUT) val id_eret = Bool(OUTPUT); val wb_eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); @@ -43,12 +42,9 @@ class ioCtrlDpath extends Bundle() val wb_valid = Bool(OUTPUT) val flush_inst = Bool(OUTPUT); val ex_mem_type = UFix(3,OUTPUT) - // enable/disable interrupts - val irq_enable = Bool(OUTPUT); - val irq_disable = Bool(OUTPUT); // exception handling val exception = Bool(OUTPUT); - val cause = UFix(5,OUTPUT); + val cause = UFix(6,OUTPUT); val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath @@ -66,7 +62,7 @@ class ioCtrlDpath extends Bundle() val ex_waddr = UFix(5,INPUT); // write addr from execute stage val mem_waddr = UFix(5,INPUT); // write addr from memory stage val wb_waddr = UFix(5,INPUT); // write addr from writeback stage - val status = Bits(17, INPUT); + val status = Bits(32, INPUT); val sboard_clr = Bool(INPUT); val sboard_clra = UFix(5, INPUT); val fp_sboard_clr = Bool(INPUT); @@ -97,217 +93,217 @@ object rocketCtrlDecode val xpr64 = Y; val decode_default = - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val renpcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N) + // vfence + // | eret + // | | syscall + // vec_val mem_val mul_val div_val pcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N) val xdecode = Array( - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val renpcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + // vfence + // | eret + // | | syscall + // vec_val mem_val mul_val div_val pcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wsync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), + JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), + JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), + JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), + RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), - LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), + DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,Y,N,N), - EI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_EI,SYNC_N,N,N,N,Y,Y), - DI-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_DI,SYNC_N,N,N,N,Y,Y), - ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_PCR,REN_N,WEN_N,I_X ,SYNC_N,N,Y,N,Y,N), - FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_D,N,N,N,N,N), - FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_I,N,N,N,N,Y), - CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,Y,Y), - MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PCR,REN_Y,WEN_N,I_X ,SYNC_N,N,N,N,Y,N), - MTPCR-> List(Y, N,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_Y,I_X ,SYNC_N,N,N,N,Y,Y), - RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N)) + SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N,N), + SETPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_S,SYNC_N,N,N,N,Y,Y), + CLEARPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_C,SYNC_N,N,N,N,Y,Y), + ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,Y,N), + FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N,N), + FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,N,Y), + CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,Y,Y), + MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,N,Y,Y), + MTPCR-> List(Y, N,BR_N, REN_Y,REN_N,A2_RTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,N,Y,Y), + RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N,N), + RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N,N), + RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N,N)) val fdecode = Array( - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val renpcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N), - FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X ,SYNC_N,N,N,N,N,N)) + // vfence + // | eret + // | | syscall + // vec_val mem_val mul_val div_val pcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N)) val vdecode = Array( - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val renpcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wenpcr irq sync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), - VVCFG-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), - VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,Y), - VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), - FENCE_V_G-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,Y,N,N,N,N), - VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,N,N), + // vfence + // | eret + // | | syscall + // vec_val mem_val mul_val div_val pcr | | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,Y), + VVCFG-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,Y), + VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,Y), + VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + FENCE_V_L-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,N,N,N), + FENCE_V_G-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,N,N,N), + VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VENQCMD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, REN_N,WEN_N,I_X, SYNC_N,N,N,N,Y,N)) + VENQCMD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), + VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), + VXCPTEVAC-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,Y,N), + VXCPTHOLD-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,Y,N)) } class rocketCtrl extends Component @@ -322,7 +318,7 @@ class rocketCtrl extends Component val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_ren_pcr :: id_wen_pcr :: id_irq :: id_sync :: id_vfence :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + val id_pcr :: id_sync :: id_vfence :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); @@ -350,12 +346,10 @@ class rocketCtrl extends Component val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_valid = Reg(resetVal = Bool(false)); - val ex_reg_wen_pcr = Reg(resetVal = Bool(false)); + val ex_reg_pcr = Reg(resetVal = PCR_N); val ex_reg_wen = Reg(resetVal = Bool(false)); val ex_reg_fp_wen = Reg(resetVal = Bool(false)); val ex_reg_eret = Reg(resetVal = Bool(false)); - val ex_reg_inst_di = Reg(resetVal = Bool(false)); - val ex_reg_inst_ei = Reg(resetVal = Bool(false)); val ex_reg_flush_inst = Reg(resetVal = Bool(false)); val ex_reg_xcpt_interrupt = Reg(resetVal = Bool(false)); val ex_reg_cause = Reg(){UFix()} @@ -371,11 +365,9 @@ class rocketCtrl extends Component val ex_reg_load_use = Reg(resetVal = Bool(false)); val mem_reg_valid = Reg(resetVal = Bool(false)); - val mem_reg_wen_pcr = Reg(resetVal = Bool(false)); + val mem_reg_pcr = Reg(resetVal = PCR_N); val mem_reg_wen = Reg(resetVal = Bool(false)); val mem_reg_fp_wen = Reg(resetVal = Bool(false)); - val mem_reg_inst_di = Reg(resetVal = Bool(false)); - val mem_reg_inst_ei = Reg(resetVal = Bool(false)); val mem_reg_flush_inst = Reg(resetVal = Bool(false)); val mem_reg_xcpt_interrupt = Reg(resetVal = Bool(false)); val mem_reg_cause = Reg(){UFix()} @@ -392,11 +384,9 @@ class rocketCtrl extends Component val mem_reg_fp_sboard_set = Reg(resetVal = Bool(false)); val wb_reg_valid = Reg(resetVal = Bool(false)); - val wb_reg_wen_pcr = Reg(resetVal = Bool(false)); + val wb_reg_pcr = Reg(resetVal = PCR_N); val wb_reg_wen = Reg(resetVal = Bool(false)); val wb_reg_fp_wen = Reg(resetVal = Bool(false)); - val wb_reg_inst_di = Reg(resetVal = Bool(false)); - val wb_reg_inst_ei = Reg(resetVal = Bool(false)); val wb_reg_flush_inst = Reg(resetVal = Bool(false)); val wb_reg_eret = Reg(resetVal = Bool(false)); val wb_reg_exception = Reg(resetVal = Bool(false)); @@ -429,7 +419,7 @@ class rocketCtrl extends Component var vec_replay = Bool(false) var vec_stalld = Bool(false) var vec_irq = Bool(false) - var vec_irq_cause = UFix(23,5) // don't care + var vec_irq_cause = UFix(CAUSE_INTERRUPT+IRQ_IPI) // don't care if (HAVE_VEC) { // vector control @@ -473,16 +463,16 @@ class rocketCtrl extends Component !(id_int_val.toBool || io.fpu.dec.valid || id_vec_val.toBool) || (id_eret.toBool && io.dpath.status(SR_ET).toBool); - val p_irq_timer = (io.dpath.status(15).toBool && io.dpath.irq_timer); - val p_irq_ipi = (io.dpath.status(13).toBool && io.dpath.irq_ipi); + val p_irq_timer = (io.dpath.status(SR_IM+IRQ_TIMER).toBool && io.dpath.irq_timer); + val p_irq_ipi = (io.dpath.status(SR_IM+IRQ_IPI).toBool && io.dpath.irq_ipi); val id_interrupt = io.dpath.status(SR_ET).toBool && - ((io.dpath.status(15).toBool && io.dpath.irq_timer) || - (io.dpath.status(13).toBool && io.dpath.irq_ipi) || + ((io.dpath.status(SR_IM+IRQ_TIMER).toBool && io.dpath.irq_timer) || + (io.dpath.status(SR_IM+IRQ_IPI).toBool && io.dpath.irq_ipi) || vec_irq); val id_cause = - Mux(p_irq_ipi, UFix(21,5), - Mux(p_irq_timer, UFix(23,5), + Mux(p_irq_ipi, UFix(CAUSE_INTERRUPT+IRQ_IPI,6), + Mux(p_irq_timer, UFix(CAUSE_INTERRUPT+IRQ_IPI,6), vec_irq_cause)) when (reset.toBool || io.dpath.killd) { @@ -492,12 +482,10 @@ class rocketCtrl extends Component ex_reg_mul_val := Bool(false); ex_reg_mem_val := Bool(false); ex_reg_valid := Bool(false); - ex_reg_wen_pcr := Bool(false) + ex_reg_pcr := PCR_N ex_reg_wen := Bool(false); ex_reg_fp_wen := Bool(false); ex_reg_eret := Bool(false); - ex_reg_inst_di := Bool(false); - ex_reg_inst_ei := Bool(false); ex_reg_flush_inst := Bool(false); ex_reg_xcpt_ma_inst := Bool(false); ex_reg_xcpt_itlb := Bool(false); @@ -517,12 +505,10 @@ class rocketCtrl extends Component ex_reg_mul_val := id_mul_val.toBool && id_waddr != UFix(0); ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := id_reg_valid - ex_reg_wen_pcr := id_wen_pcr + ex_reg_pcr := id_pcr ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); ex_reg_fp_wen := io.fpu.dec.wen; ex_reg_eret := id_eret.toBool; - ex_reg_inst_di := (id_irq === I_DI); - ex_reg_inst_ei := (id_irq === I_EI); ex_reg_flush_inst := (id_sync === SYNC_I); ex_reg_xcpt_ma_inst := id_reg_xcpt_ma_inst; ex_reg_xcpt_itlb := id_reg_xcpt_itlb; @@ -565,14 +551,12 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killx) { mem_reg_valid := Bool(false); - mem_reg_wen_pcr := Bool(false) + mem_reg_pcr := PCR_N mem_reg_div_mul_val := Bool(false); mem_reg_wen := Bool(false); mem_reg_fp_wen := Bool(false); mem_reg_eret := Bool(false); mem_reg_mem_val := Bool(false); - mem_reg_inst_di := Bool(false); - mem_reg_inst_ei := Bool(false); mem_reg_flush_inst := Bool(false); mem_reg_xcpt_ma_inst := Bool(false); mem_reg_xcpt_itlb := Bool(false); @@ -586,14 +570,12 @@ class rocketCtrl extends Component } .otherwise { mem_reg_valid := ex_reg_valid - mem_reg_wen_pcr := ex_reg_wen_pcr + mem_reg_pcr := ex_reg_pcr mem_reg_div_mul_val := ex_reg_div_val || ex_reg_mul_val; mem_reg_wen := ex_reg_wen; mem_reg_fp_wen := ex_reg_fp_wen; mem_reg_eret := ex_reg_eret; mem_reg_mem_val := ex_reg_mem_val; - mem_reg_inst_di := ex_reg_inst_di; - mem_reg_inst_ei := ex_reg_inst_ei; mem_reg_flush_inst := ex_reg_flush_inst; mem_reg_xcpt_ma_inst := ex_reg_xcpt_ma_inst; mem_reg_xcpt_itlb := ex_reg_xcpt_itlb; @@ -612,12 +594,10 @@ class rocketCtrl extends Component when (io.dpath.killm) { wb_reg_valid := Bool(false) - wb_reg_wen_pcr := Bool(false) + wb_reg_pcr := PCR_N wb_reg_wen := Bool(false); wb_reg_fp_wen := Bool(false); wb_reg_eret := Bool(false); - wb_reg_inst_di := Bool(false); - wb_reg_inst_ei := Bool(false); wb_reg_flush_inst := Bool(false); wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) @@ -625,12 +605,10 @@ class rocketCtrl extends Component } .otherwise { wb_reg_valid := mem_reg_valid - wb_reg_wen_pcr := mem_reg_wen_pcr + wb_reg_pcr := mem_reg_pcr wb_reg_wen := mem_reg_wen; wb_reg_fp_wen := mem_reg_fp_wen; wb_reg_eret := mem_reg_eret; - wb_reg_inst_di := mem_reg_inst_di; - wb_reg_inst_ei := mem_reg_inst_ei; wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_div_mul_val := mem_reg_div_mul_val; wb_reg_fp_val := mem_reg_fp_val @@ -752,10 +730,10 @@ class rocketCtrl extends Component io.dpath.exception := wb_reg_exception; io.dpath.cause := wb_reg_cause; io.dpath.badvaddr_wen := wb_badvaddr_wen; - io.dpath.vec_irq_aux_wen := wb_reg_exception && wb_reg_cause >= UFix(24) + io.dpath.vec_irq_aux_wen := wb_reg_exception && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) io.dpath.sel_pc := - Mux(wb_reg_exception, PC_EVEC, // exception + Mux(wb_reg_exception, PC_PCR, // exception Mux(replay_wb, PC_WB, // replay Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch @@ -853,12 +831,9 @@ class rocketCtrl extends Component io.dpath.wb_valid := wb_reg_valid && !vec_replay io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb; - io.dpath.ren_pcr := id_ren_pcr.toBool; - io.dpath.wen_pcr := wb_reg_wen_pcr + io.dpath.pcr := wb_reg_pcr.toUFix io.dpath.id_eret := id_eret.toBool; io.dpath.wb_eret := wb_reg_eret; - io.dpath.irq_disable := wb_reg_inst_di; - io.dpath.irq_enable := wb_reg_inst_ei; io.dpath.ex_mem_type := ex_reg_mem_type io.fpu.valid := !io.dpath.killd && io.fpu.dec.valid diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 3212b53e..286b746f 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -71,7 +71,6 @@ class rocketDpath extends Component val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); val ex_reg_ctrl_div_fn = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; - val ex_reg_ctrl_ren_pcr = Reg(resetVal = Bool(false)); val ex_wdata = Wire() { Bits() }; // memory definitions @@ -116,10 +115,9 @@ class rocketDpath extends Component Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, Mux(io.ctrl.sel_pc === PC_JR, ex_effective_address, - Mux(io.ctrl.sel_pc === PC_PCR, wb_reg_wdata(VADDR_BITS,0), // only used for ERET - Mux(io.ctrl.sel_pc === PC_EVEC, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), + Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, - if_pc_plus4))))))); // PC_4 + if_pc_plus4)))))) // PC_4 when (!io.ctrl.stallf) { if_reg_pc := if_next_pc.toUFix; @@ -219,7 +217,6 @@ class rocketDpath extends Component ex_reg_ctrl_mul_fn := io.ctrl.mul_fn; ex_reg_ctrl_div_fn := io.ctrl.div_fn; ex_reg_ctrl_sel_wb := io.ctrl.sel_wb; - ex_reg_ctrl_ren_pcr := io.ctrl.ren_pcr; when(io.ctrl.killd) { ex_reg_ctrl_div_val := Bool(false); @@ -284,10 +281,8 @@ class rocketDpath extends Component io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) // processor control regfile read - pcr.io.r.en := ex_reg_ctrl_ren_pcr | ex_reg_ctrl_eret; - pcr.io.r.addr := - Mux(ex_reg_ctrl_eret, PCR_EPC, - ex_reg_raddr2); + pcr.io.r.en := io.ctrl.pcr != PCR_N + pcr.io.r.addr := wb_reg_raddr1 pcr.io.host <> io.host @@ -315,10 +310,9 @@ class rocketDpath extends Component // writeback select mux ex_wdata := Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_pc_plus4(VADDR_BITS-1)), ex_pc_plus4), - Mux(ex_reg_ctrl_sel_wb === WB_PCR, ex_pcr, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, - ex_alu_out)))).toBits; // WB_ALU + ex_alu_out))).toBits // WB_ALU // subword store data generation val storegen = new StoreDataGen @@ -420,7 +414,7 @@ class rocketDpath extends Component rfile.io.w0.addr := wb_reg_waddr rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb - rfile.io.w0.data := wb_wdata + rfile.io.w0.data := Mux(io.ctrl.pcr != PCR_N, pcr.io.r.data, wb_wdata) io.ctrl.wb_waddr := wb_reg_waddr io.ctrl.mem_wb := dmem_resp_replay; @@ -432,12 +426,12 @@ class rocketDpath extends Component io.ctrl.fp_sboard_clra := r_dmem_resp_waddr // processor control regfile write - pcr.io.w.addr := wb_reg_raddr2; - pcr.io.w.en := io.ctrl.wen_pcr - pcr.io.w.data := wb_reg_wdata + pcr.io.w.addr := wb_reg_raddr1 + pcr.io.w.en := io.ctrl.pcr === PCR_T || io.ctrl.pcr === PCR_S || io.ctrl.pcr === PCR_C + pcr.io.w.data := Mux(io.ctrl.pcr === PCR_S, pcr.io.r.data | wb_reg_wdata, + Mux(io.ctrl.pcr === PCR_C, pcr.io.r.data & ~wb_reg_wdata, + wb_reg_wdata)) - pcr.io.di := io.ctrl.irq_disable; - pcr.io.ei := io.ctrl.irq_enable; pcr.io.eret := io.ctrl.wb_eret; pcr.io.exception := io.ctrl.exception; pcr.io.cause := io.ctrl.cause; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 3856beb8..5583d6ed 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -64,11 +64,11 @@ class ioDpathPCR extends Bundle() val r = new ioReadPort(); val w = new ioWritePort(); - val status = Bits(17, OUTPUT); + val status = Bits(32, OUTPUT); val ptbr = UFix(PADDR_BITS, OUTPUT); val evec = UFix(VADDR_BITS, OUTPUT); val exception = Bool(INPUT); - val cause = UFix(5, INPUT); + val cause = UFix(6, INPUT); val badvaddr_wen = Bool(INPUT); val vec_irq_aux = Bits(64, INPUT) val vec_irq_aux_wen = Bool(INPUT) @@ -105,7 +105,7 @@ class rocketDpathPCR extends Component val reg_error_mode = Reg(resetVal = Bool(false)); val reg_status_vm = Reg(resetVal = Bool(false)); - val reg_status_im = Reg(resetVal = Bits(0,8)); + val reg_status_im = Reg(resetVal = Bits(0,SR_IM_WIDTH)); val reg_status_sx = Reg(resetVal = Bool(true)); val reg_status_ux = Reg(resetVal = Bool(true)); val reg_status_ec = Reg(resetVal = Bool(false)); @@ -118,7 +118,6 @@ class rocketDpathPCR extends Component val r_irq_timer = Reg(resetVal = Bool(false)); val r_irq_ipi = Reg(resetVal = Bool(false)); - val reg_status = Cat(reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); val rdata = Wire() { Bits() }; val ren = io.r.en || io.host.pcr_ren @@ -131,8 +130,8 @@ class rocketDpathPCR extends Component io.host.pcr_rdy := Mux(io.host.pcr_wen, !io.w.en, !io.r.en) io.ptbr_wen := reg_status_vm.toBool && wen && (waddr === PCR_PTBR); - io.status := Cat(reg_status_vm, reg_status_im, reg_status); - io.evec := reg_ebase; + io.status := Cat(reg_status_im, Bits(0,7), reg_status_vm, reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); + io.evec := Mux(io.exception, reg_ebase, reg_epc) io.ptbr := reg_ptbr; io.debug.error_mode := reg_error_mode; io.r.data := rdata; @@ -164,14 +163,6 @@ class rocketDpathPCR extends Component } } - when (io.di) { - reg_status_et := Bool(false); - } - - when (io.ei) { - reg_status_et := Bool(true); - } - when (io.eret) { reg_status_s := reg_status_ps; reg_status_et := Bool(true); @@ -188,9 +179,9 @@ class rocketDpathPCR extends Component when (wen) { when (waddr === PCR_STATUS) { reg_status_vm := wdata(SR_VM).toBool; - reg_status_im := wdata(15,8); - reg_status_sx := wdata(SR_SX).toBool; - reg_status_ux := wdata(SR_UX).toBool; + reg_status_im := wdata(SR_IM_WIDTH+SR_IM,SR_IM); + reg_status_sx := wdata(SR_S64).toBool; + reg_status_ux := wdata(SR_U64).toBool; reg_status_s := wdata(SR_S).toBool; reg_status_ps := wdata(SR_PS).toBool; reg_status_ev := Bool(HAVE_VEC) && wdata(SR_EV).toBool; @@ -198,33 +189,32 @@ class rocketDpathPCR extends Component reg_status_ec := Bool(HAVE_RVC) && wdata(SR_EC).toBool; reg_status_et := wdata(SR_ET).toBool; } - when (waddr === PCR_EPC) { reg_epc := wdata(VADDR_BITS,0).toUFix; } - when (waddr === PCR_BADVADDR) { reg_badvaddr := wdata(VADDR_BITS,0).toUFix; } - when (waddr === PCR_EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } - when (waddr === PCR_COUNT) { reg_count := wdata(31,0).toUFix; } - when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } - when (waddr === PCR_CAUSE) { reg_cause := wdata(4,0); } - when (waddr === PCR_TOHOST) { reg_tohost := wdata; reg_fromhost := Bits(0) } - when (waddr === PCR_FROMHOST) { reg_fromhost := wdata; reg_tohost := Bits(0) } - when (waddr === PCR_SEND_IPI) { r_irq_ipi := Bool(true); } - when (waddr === PCR_CLR_IPI) { r_irq_ipi := Bool(false); } - when (waddr === PCR_K0) { reg_k0 := wdata; } - when (waddr === PCR_K1) { reg_k1 := wdata; } - when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } - when (waddr === PCR_VECBANK) { reg_vecbank := wdata(7,0) } + when (waddr === PCR_EPC) { reg_epc := wdata(VADDR_BITS,0).toUFix; } + when (waddr === PCR_EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } + when (waddr === PCR_COUNT) { reg_count := wdata(31,0).toUFix; } + when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } + when (waddr === PCR_FROMHOST) { reg_fromhost := wdata; reg_tohost := Bits(0) } + when (waddr === PCR_TOHOST) { reg_tohost := wdata; reg_fromhost := Bits(0) } + when (waddr === PCR_SEND_IPI) { r_irq_ipi := Bool(true); } + when (waddr === PCR_CLR_IPI) { r_irq_ipi := Bool(false); } + when (waddr === PCR_K0) { reg_k0 := wdata; } + when (waddr === PCR_K1) { reg_k1 := wdata; } + when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } + when (waddr === PCR_VECBANK) { reg_vecbank:= wdata(7,0) } } rdata := Bits(0, 64) when (ren) { switch (raddr) { - is (PCR_STATUS) { rdata := Cat(Bits(0,47), reg_status_vm, reg_status_im, reg_status); } + is (PCR_STATUS) { rdata := io.status } is (PCR_EPC) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } is (PCR_BADVADDR) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } is (PCR_EVEC) { rdata := Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } is (PCR_COUNT) { rdata := Cat(Fill(32, reg_count(31)), reg_count); } is (PCR_COMPARE) { rdata := Cat(Fill(32, reg_compare(31)), reg_compare); } - is (PCR_CAUSE) { rdata := Cat(Bits(0,59), reg_cause); } + is (PCR_CAUSE) { rdata := Cat(reg_cause(5), Bits(0,58), reg_cause(4,0)); } is (PCR_COREID) { rdata := Bits(COREID,64); } + is (PCR_IMPL) { rdata := Bits(2) } is (PCR_FROMHOST) { rdata := reg_fromhost; } is (PCR_TOHOST) { rdata := reg_tohost; } is (PCR_K0) { rdata := reg_k0; } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index bd27bf59..cd1917c0 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -32,7 +32,7 @@ class ioDTLB_CPU_resp extends Bundle class ioDTLB extends Bundle { // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(17,INPUT) + val status = Bits(32, INPUT) // invalidate all TLB entries val invalidate = Bool(INPUT) val cpu_req = new ioDTLB_CPU_req().flip diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index bcf11b09..a5c6cd0a 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -177,7 +177,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence val my_reset = Reg(resetVal = Bool(true)) when (io.cpu(i).pcr_wen && io.cpu(i).pcr_rdy) { - when (io.cpu(i).pcr_addr === UFix(15)) { my_reset := io.cpu(i).pcr_wdata(0) } + when (io.cpu(i).pcr_addr === PCR_RESET) { my_reset := io.cpu(i).pcr_wdata(0) } pcr_done := Bool(true) } io.cpu(i).reset := my_reset @@ -186,7 +186,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence val rdata = Reg() { Bits() } when (io.cpu(i).pcr_ren && io.cpu(i).pcr_rdy) { rdata := io.cpu(i).pcr_rdata - when (io.cpu(i).pcr_addr === UFix(15)) { rdata := my_reset } + when (io.cpu(i).pcr_addr === PCR_RESET) { rdata := my_reset } pcr_done := Bool(true) } pcr_mux.io.sel(i) := Reg(me) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 614b0fb5..3bd73b80 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -94,10 +94,10 @@ object Instructions val RDCYCLE = Bits("b?????_00000_00000_0000000100_1110111",32); val RDTIME = Bits("b?????_00000_00000_0000001100_1110111",32); val RDINSTRET = Bits("b?????_00000_00000_0000010100_1110111",32); - val EI = Bits("b?????_00000_00000_0000000000_1111011",32); - val DI = Bits("b?????_00000_00000_0000000001_1111011",32); - val MFPCR = Bits("b?????_00000_?????_0000000010_1111011",32); - val MTPCR = Bits("b00000_?????_?????_0000000011_1111011",32); + val CLEARPCR = Bits("b?????_?????_????????????_000_1111011",32); + val SETPCR = Bits("b?????_?????_????????????_001_1111011",32); + val MFPCR = Bits("b?????_?????_00000_0000000010_1111011",32); + val MTPCR = Bits("b?????_?????_?????_0000000011_1111011",32); val ERET = Bits("b00000_00000_00000_0000000100_1111011",32); val CFLUSH = Bits("b00000_00000_00000_0000000101_1111011",32); // floating point instructions @@ -247,13 +247,13 @@ object Instructions val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); val VF = Bits("b00000_?????_????????????_111_1110011",32); // vector supervisor instructions - val VENQCMD = Bits("b00000_?????_?????_1100000011_1111011",32) - val VENQIMM1 = Bits("b00000_?????_?????_1100000100_1111011",32) - val VENQIMM2 = Bits("b00000_?????_?????_1100000101_1111011",32) - val VENQCNT = Bits("b00000_?????_?????_1100000110_1111011",32) - val VXCPTEVAC = Bits("b00000_?????_00000_1100000000_1111011",32) - val VXCPTKILL = Bits("b00000_00000_00000_1000000010_1111011",32) - val VXCPTHOLD = Bits("b00000_00000_00000_1100000010_1111011",32) + val VENQCMD = Bits("b00000_?????_?????_0001010110_1111011",32) + val VENQIMM1 = Bits("b00000_?????_?????_0001011110_1111011",32) + val VENQIMM2 = Bits("b00000_?????_?????_0001100110_1111011",32) + val VENQCNT = Bits("b00000_?????_?????_0001101110_1111011",32) + val VXCPTKILL = Bits("b00000_00000_00000_0000010110_1111011",32) + val VXCPTEVAC = Bits("b00000_?????_00000_0001000110_1111011",32) + val VXCPTHOLD = Bits("b00000_00000_00000_0001001110_1111011",32) val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 06cd6e2b..2178aa00 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -62,7 +62,7 @@ class ioTLB_PTW extends Bundle class ioITLB_CPU(view: List[String] = null) extends Bundle(view) { // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(17, INPUT); + val status = Bits(32, INPUT); // invalidate all TLB entries val invalidate = Bool(INPUT); // lookup requests From 3a487ac89bfbeac2070a5a37548b8ef1b6a20abe Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Mar 2012 16:23:29 -0700 Subject: [PATCH 0364/1087] improve htif<->pcr interface --- rocket/src/main/scala/dpath_util.scala | 51 ++++++++++++------------- rocket/src/main/scala/htif.scala | 52 ++++++++++++++------------ 2 files changed, 53 insertions(+), 50 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 5583d6ed..3e4fe3d2 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -120,14 +120,14 @@ class rocketDpathPCR extends Component val rdata = Wire() { Bits() }; - val ren = io.r.en || io.host.pcr_ren - val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_addr) - io.host.pcr_rdata := rdata + val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_req.bits.addr) + io.host.pcr_rep.valid := io.host.pcr_req.valid && !io.r.en && !io.host.pcr_req.bits.rw + io.host.pcr_rep.bits := rdata - val wen = io.w.en || io.host.pcr_wen - val waddr = Mux(io.w.en, io.w.addr, io.host.pcr_addr) - val wdata = Mux(io.w.en, io.w.data, io.host.pcr_wdata) - io.host.pcr_rdy := Mux(io.host.pcr_wen, !io.w.en, !io.r.en) + val wen = io.w.en || io.host.pcr_req.valid && io.host.pcr_req.bits.rw + val waddr = Mux(io.w.en, io.w.addr, io.host.pcr_req.bits.addr) + val wdata = Mux(io.w.en, io.w.data, io.host.pcr_req.bits.data) + io.host.pcr_req.ready := Mux(io.host.pcr_req.bits.rw, !io.w.en, !io.r.en) io.ptbr_wen := reg_status_vm.toBool && wen && (waddr === PCR_PTBR); io.status := Cat(reg_status_im, Bits(0,7), reg_status_vm, reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); @@ -203,26 +203,23 @@ class rocketDpathPCR extends Component when (waddr === PCR_VECBANK) { reg_vecbank:= wdata(7,0) } } - rdata := Bits(0, 64) - when (ren) { - switch (raddr) { - is (PCR_STATUS) { rdata := io.status } - is (PCR_EPC) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } - is (PCR_BADVADDR) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } - is (PCR_EVEC) { rdata := Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } - is (PCR_COUNT) { rdata := Cat(Fill(32, reg_count(31)), reg_count); } - is (PCR_COMPARE) { rdata := Cat(Fill(32, reg_compare(31)), reg_compare); } - is (PCR_CAUSE) { rdata := Cat(reg_cause(5), Bits(0,58), reg_cause(4,0)); } - is (PCR_COREID) { rdata := Bits(COREID,64); } - is (PCR_IMPL) { rdata := Bits(2) } - is (PCR_FROMHOST) { rdata := reg_fromhost; } - is (PCR_TOHOST) { rdata := reg_tohost; } - is (PCR_K0) { rdata := reg_k0; } - is (PCR_K1) { rdata := reg_k1; } - is (PCR_PTBR) { rdata := Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } - is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } - is (PCR_VECCFG) { rdata := Cat(Bits(0, 40), io.vec_nfregs, io.vec_nxregs, io.vec_appvl) } - } + rdata := io.status // raddr === PCR_STATUS + switch (raddr) { + is (PCR_EPC) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } + is (PCR_BADVADDR) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } + is (PCR_EVEC) { rdata := Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } + is (PCR_COUNT) { rdata := Cat(Fill(32, reg_count(31)), reg_count); } + is (PCR_COMPARE) { rdata := Cat(Fill(32, reg_compare(31)), reg_compare); } + is (PCR_CAUSE) { rdata := Cat(reg_cause(5), Bits(0,58), reg_cause(4,0)); } + is (PCR_COREID) { rdata := Bits(COREID,64); } + is (PCR_IMPL) { rdata := Bits(2) } + is (PCR_FROMHOST) { rdata := reg_fromhost; } + is (PCR_TOHOST) { rdata := reg_tohost; } + is (PCR_K0) { rdata := reg_k0; } + is (PCR_K1) { rdata := reg_k1; } + is (PCR_PTBR) { rdata := Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } + is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } + is (PCR_VECCFG) { rdata := Cat(Bits(0, 40), io.vec_nfregs, io.vec_nxregs, io.vec_appvl) } } } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index a5c6cd0a..b29d095a 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -10,15 +10,18 @@ class ioHost(w: Int, view: List[String] = null) extends Bundle(view) val out = new ioDecoupled()(Bits(width = w)) } +class PCRReq extends Bundle +{ + val rw = Bool() + val addr = Bits(width = 5) + val data = Bits(width = 64) +} + class ioHTIF extends Bundle { val reset = Bool(INPUT) - val pcr_wen = Bool(INPUT) - val pcr_ren = Bool(INPUT) - val pcr_rdy = Bool(OUTPUT) - val pcr_addr = Bits(5, INPUT) - val pcr_wdata = Bits(64, INPUT) - val pcr_rdata = Bits(64, OUTPUT) + val pcr_req = (new ioDecoupled) { new PCRReq }.flip + val pcr_rep = (new ioPipe) { Bits(width = 64) } } class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence @@ -170,27 +173,30 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } for (i <- 0 until ncores) { - val me = pcr_coreid === UFix(i) - io.cpu(i).pcr_wen := Reg(state === state_pcr && cmd === cmd_writecr && me, resetVal = Bool(false)) - io.cpu(i).pcr_addr := Reg(pcr_addr) - io.cpu(i).pcr_wdata := Reg(pcr_wdata) - val my_reset = Reg(resetVal = Bool(true)) - when (io.cpu(i).pcr_wen && io.cpu(i).pcr_rdy) { - when (io.cpu(i).pcr_addr === PCR_RESET) { my_reset := io.cpu(i).pcr_wdata(0) } - pcr_done := Bool(true) - } - io.cpu(i).reset := my_reset - - io.cpu(i).pcr_ren := Reg(state === state_pcr && cmd === cmd_readcr && me, resetVal = Bool(false)) val rdata = Reg() { Bits() } - when (io.cpu(i).pcr_ren && io.cpu(i).pcr_rdy) { - rdata := io.cpu(i).pcr_rdata - when (io.cpu(i).pcr_addr === PCR_RESET) { rdata := my_reset } + + val cpu = io.cpu(i) + val me = pcr_coreid === UFix(i) + cpu.pcr_req.valid := state === state_pcr && me + cpu.pcr_req.bits.rw := cmd === cmd_writecr + cpu.pcr_req.bits.addr := pcr_addr + cpu.pcr_req.bits.data := pcr_wdata + cpu.reset := my_reset + + when (cpu.pcr_req.valid && cpu.pcr_req.ready && cpu.pcr_req.bits.rw) { pcr_done := Bool(true) + when (cpu.pcr_req.bits.addr === PCR_RESET) { + my_reset := cpu.pcr_req.bits.data(0) + } } - pcr_mux.io.sel(i) := Reg(me) - pcr_mux.io.in(i) := rdata + when (cpu.pcr_rep.valid) { + pcr_done := Bool(true) + rdata := cpu.pcr_rep.bits + } + + pcr_mux.io.sel(i) := me + pcr_mux.io.in(i) := Mux(pcr_addr === PCR_RESET, my_reset, rdata) } val tx_cmd = Mux(nack, cmd_nack, cmd_ack) From 86d56ff67b2bc08793056203700e70bacf887928 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Mar 2012 16:56:59 -0700 Subject: [PATCH 0365/1087] refactor cpu/i$/d$ into Tile (rather than Top) --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/cpu.scala | 7 ---- rocket/src/main/scala/dpath.scala | 2 -- rocket/src/main/scala/dpath_util.scala | 3 +- rocket/src/main/scala/htif.scala | 6 ++++ rocket/src/main/scala/tile.scala | 45 ++++++++++++++++++++++++++ rocket/src/main/scala/top.scala | 41 ++++------------------- 7 files changed, 59 insertions(+), 47 deletions(-) create mode 100644 rocket/src/main/scala/tile.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index cd6d3882..5adfaf7e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = true + val HAVE_VEC = false val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index e546cd63..1468b07d 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -5,14 +5,8 @@ import Node._; import Constants._; import hwacha._ -class ioDebug(view: List[String] = null) extends Bundle(view) -{ - val error_mode = Bool(OUTPUT); -} - class ioRocket extends Bundle() { - val debug = new ioDebug(); val host = new ioHTIF(); val imem = new ioImem().flip val vimem = new ioImem().flip @@ -107,7 +101,6 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; - dpath.io.debug <> io.debug; // FIXME: try to make this more compact diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 286b746f..77a1018a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -17,7 +17,6 @@ class ioDpathAll extends Bundle() { val host = new ioHTIF(); val ctrl = new ioCtrlDpath().flip - val debug = new ioDebug(); val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val imem = new ioDpathImem(); @@ -291,7 +290,6 @@ class rocketDpath extends Component io.ctrl.status := pcr.io.status; io.ptbr := pcr.io.ptbr; io.ptbr_wen := pcr.io.ptbr_wen; - io.debug.error_mode := pcr.io.debug.error_mode; // branch resolution logic io.ctrl.br_eq := (ex_rs1 === ex_rs2) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 3e4fe3d2..30a2959e 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -60,7 +60,6 @@ class rocketDpathBTB(entries: Int) extends Component class ioDpathPCR extends Bundle() { val host = new ioHTIF() - val debug = new ioDebug(List("error_mode", "log_control")); val r = new ioReadPort(); val w = new ioWritePort(); @@ -133,7 +132,7 @@ class rocketDpathPCR extends Component io.status := Cat(reg_status_im, Bits(0,7), reg_status_vm, reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); io.evec := Mux(io.exception, reg_ebase, reg_epc) io.ptbr := reg_ptbr; - io.debug.error_mode := reg_error_mode; + io.host.debug.error_mode := reg_error_mode; io.r.data := rdata; io.vecbank := reg_vecbank diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index b29d095a..85ff9506 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -4,6 +4,11 @@ import Chisel._ import Node._; import Constants._; +class ioDebug(view: List[String] = null) extends Bundle(view) +{ + val error_mode = Bool(OUTPUT); +} + class ioHost(w: Int, view: List[String] = null) extends Bundle(view) { val in = new ioDecoupled()(Bits(width = w)).flip @@ -20,6 +25,7 @@ class PCRReq extends Bundle class ioHTIF extends Bundle { val reset = Bool(INPUT) + val debug = new ioDebug val pcr_req = (new ioDecoupled) { new PCRReq }.flip val pcr_rep = (new ioPipe) { Bits(width = 64) } } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala new file mode 100644 index 00000000..2ebabc7d --- /dev/null +++ b/rocket/src/main/scala/tile.scala @@ -0,0 +1,45 @@ +package rocket + +import Chisel._ +import Node._ +import Constants._ + +class Tile extends Component +{ + val io = new Bundle { + val tilelink = new ioTileLink + val host = new ioHTIF + } + + val cpu = new rocketProc(resetSignal = io.host.reset) + val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) + val icache_pf = new rocketIPrefetcher + val dcache = new HellaCacheUniproc + + val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) + arbiter.io.requestor(0) <> dcache.io.mem + arbiter.io.requestor(1) <> icache_pf.io.mem + + io.tilelink.xact_init <> Queue(arbiter.io.mem.xact_init) + io.tilelink.xact_init_data <> Queue(dcache.io.mem.xact_init_data) + arbiter.io.mem.xact_abort <> Queue(io.tilelink.xact_abort) + arbiter.io.mem.xact_rep <> Pipe(io.tilelink.xact_rep) + io.tilelink.xact_finish <> Queue(arbiter.io.mem.xact_finish) + dcache.io.mem.probe_req <> Queue(io.tilelink.probe_req) + io.tilelink.probe_rep <> Queue(dcache.io.mem.probe_rep, 1) + io.tilelink.probe_rep_data <> Queue(dcache.io.mem.probe_rep_data) + + if (HAVE_VEC) + { + val vicache = new rocketICache(128, 1) // 128 sets x 1 ways (8KB) + arbiter.io.requestor(2) <> vicache.io.mem + cpu.io.vimem <> vicache.io.cpu + } + + cpu.io.host <> io.host + + icache_pf.io.invalidate := cpu.io.imem.invalidate + icache.io.mem <> icache_pf.io.icache + cpu.io.imem <> icache.io.cpu + cpu.io.dmem <> dcache.io.cpu +} diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index dcd394dc..2491a8af 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -15,41 +15,18 @@ class Top() extends Component { val htif_width = 16 val io = new ioTop(htif_width); + + val tile = new Tile val htif = new rocketHTIF(htif_width, 1) - val cpu = new rocketProc(resetSignal = htif.io.cpu(0).reset); - val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) - val icache_pf = new rocketIPrefetcher(); - val dcache = new HellaCacheUniproc(); - - val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)); - arbiter.io.requestor(0) <> dcache.io.mem - arbiter.io.requestor(1) <> icache_pf.io.mem - val hub = new CoherenceHubBroadcast(2) - // connect tile to hub - hub.io.tiles(0).xact_init <> Queue(arbiter.io.mem.xact_init) - hub.io.tiles(0).xact_init_data <> Queue(dcache.io.mem.xact_init_data) - arbiter.io.mem.xact_abort <> Queue(hub.io.tiles(0).xact_abort) - arbiter.io.mem.xact_rep <> Pipe(hub.io.tiles(0).xact_rep) - hub.io.tiles(0).xact_finish <> Queue(arbiter.io.mem.xact_finish) - dcache.io.mem.probe_req <> Queue(hub.io.tiles(0).probe_req) - hub.io.tiles(0).probe_rep <> Queue(dcache.io.mem.probe_rep, 1) - hub.io.tiles(0).probe_rep_data <> Queue(dcache.io.mem.probe_rep_data) - // connect HTIF to hub + hub.io.tiles(0) <> tile.io.tilelink hub.io.tiles(1) <> htif.io.mem - // connect hub to memory + io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) io.mem.req_data <> Queue(hub.io.mem.req_data) hub.io.mem.resp <> Pipe(io.mem.resp) - if (HAVE_VEC) - { - val vicache = new rocketICache(128, 1); // 128 sets x 1 ways (8KB) - arbiter.io.requestor(2) <> vicache.io.mem - cpu.io.vimem <> vicache.io.cpu; - } - // pad out the HTIF using a divided clock val slow_io = (new slowIO(64, 16)) { Bits(width = htif_width) } htif.io.host.out <> slow_io.io.out_fast @@ -58,14 +35,8 @@ class Top() extends Component { io.host.in <> slow_io.io.in_slow io.host_clk := slow_io.io.clk_slow - cpu.io.host <> htif.io.cpu(0); - cpu.io.debug <> io.debug; - - icache_pf.io.invalidate := cpu.io.imem.invalidate - icache.io.mem <> icache_pf.io.icache; - cpu.io.imem <> icache.io.cpu; - cpu.io.dmem <> dcache.io.cpu; - + tile.io.host <> htif.io.cpu(0) + io.debug <> tile.io.host.debug } object top_main { From 1f33f6bb5823d26862e17f15176a25b2a634bb98 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 24 Mar 2012 20:54:43 -0700 Subject: [PATCH 0366/1087] HAVE_VEC is on --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 5adfaf7e..cd6d3882 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = false + val HAVE_VEC = true val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); From 7fa93da4f5e3f9edd3ea029e7dc5685655a95016 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 15:49:32 -0700 Subject: [PATCH 0367/1087] add backup memory port (disabled for now) --- rocket/src/main/scala/htif.scala | 27 ++++++++------- rocket/src/main/scala/top.scala | 59 +++++++++++++++++++++++++------- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 85ff9506..253fdf14 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -58,8 +58,10 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence val rx_count_words = rx_count >> UFix(log2up(short_request_bits/w)) val packet_ram_wen = rx_count(log2up(short_request_bits/w)-1,0).andR && io.host.in.valid && io.host.in.ready - val packet_ram = Mem(long_request_bits/short_request_bits-1, - packet_ram_wen, rx_count_words - UFix(1), rx_shifter_in) + val packet_ram = Vec(long_request_bits/short_request_bits-1) { Reg() { Bits(width = short_request_bits) } } + when (packet_ram_wen) { + packet_ram(rx_count_words - UFix(1)) := rx_shifter_in + } val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val cmd = header(3,0) @@ -67,9 +69,9 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence val seqno = header(23,16) val addr = header(63,24).toUFix - val pcr_addr = addr(19,0) - val pcr_coreid = addr(39,20) - val pcr_wdata = packet_ram(UFix(0)) + val pcr_addr = addr(4,0) + val pcr_coreid = if (ncores == 1) UFix(0) else addr(20+log2up(ncores),20) + val pcr_wdata = packet_ram(0) val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, size != UFix((1 << OFFSET_BITS)/8), Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UFix(1), @@ -157,9 +159,10 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence var mem_req_data: Bits = null for (i <- 0 until MEM_DATA_BITS/short_request_bits) { val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) - packet_ram.write(idx, io.mem.xact_rep.bits.data((i+1)*short_request_bits-1, i*short_request_bits), - state === state_mem_rdata && io.mem.xact_rep.valid) - mem_req_data = Cat(packet_ram.read(idx), mem_req_data) + when (state === state_mem_rdata && io.mem.xact_rep.valid) { + packet_ram(idx) := io.mem.xact_rep.bits.data((i+1)*short_request_bits-1, i*short_request_bits) + } + mem_req_data = Cat(packet_ram(idx), mem_req_data) } io.mem.xact_init.valid := state === state_mem_req io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_INIT_WRITE_UNCACHED, X_INIT_READ_UNCACHED) @@ -190,10 +193,10 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence cpu.pcr_req.bits.data := pcr_wdata cpu.reset := my_reset - when (cpu.pcr_req.valid && cpu.pcr_req.ready && cpu.pcr_req.bits.rw) { - pcr_done := Bool(true) - when (cpu.pcr_req.bits.addr === PCR_RESET) { - my_reset := cpu.pcr_req.bits.data(0) + when (state === state_pcr && me && cmd === cmd_writecr) { + pcr_done := cpu.pcr_req.ready + when (pcr_addr === PCR_RESET) { + my_reset := pcr_wdata(0) } } when (cpu.pcr_rep.valid) { diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 2491a8af..dd02d544 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -4,17 +4,21 @@ import Chisel._ import Node._; import Constants._; -class ioTop(htif_width: Int) extends Bundle { +class ioTop(htif_width: Int, mem_backup_width: Int) extends Bundle { val debug = new ioDebug(); val host = new ioHost(htif_width); val host_clk = Bool(OUTPUT) + val mem_backup = new ioMemSerialized(mem_backup_width) + val mem_backup_en = Bool(INPUT) val mem = new ioMem } -class Top() extends Component { - +class Top() extends Component +{ + val clkdiv = 32 val htif_width = 16 - val io = new ioTop(htif_width); + val mem_backup_width = 16 + val io = new ioTop(htif_width, mem_backup_width); val tile = new Tile val htif = new rocketHTIF(htif_width, 1) @@ -23,17 +27,46 @@ class Top() extends Component { hub.io.tiles(0) <> tile.io.tilelink hub.io.tiles(1) <> htif.io.mem - io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) - io.mem.req_data <> Queue(hub.io.mem.req_data) - hub.io.mem.resp <> Pipe(io.mem.resp) + // mux between main and backup memory ports + val mem_serdes = new MemSerdes(mem_backup_width) + val mem_cmdq = (new queue(1)) { new MemReqCmd } + mem_cmdq.io.enq <> hub.io.mem.req_cmd + mem_cmdq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_cmd.ready, io.mem.req_cmd.ready) + io.mem.req_cmd.valid := mem_cmdq.io.deq.valid && !io.mem_backup_en + io.mem.req_cmd.bits := mem_cmdq.io.deq.bits + mem_serdes.io.wide.req_cmd.valid := mem_cmdq.io.deq.valid && io.mem_backup_en + mem_serdes.io.wide.req_cmd.bits := mem_cmdq.io.deq.bits + + val mem_dataq = (new queue(2)) { new MemData } + mem_dataq.io.enq <> hub.io.mem.req_data + mem_dataq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_data.ready, io.mem.req_data.ready) + io.mem.req_data.valid := mem_dataq.io.deq.valid && !io.mem_backup_en + io.mem.req_data.bits := mem_dataq.io.deq.bits + mem_serdes.io.wide.req_data.valid := mem_dataq.io.deq.valid && io.mem_backup_en + mem_serdes.io.wide.req_data.bits := mem_dataq.io.deq.bits + + // only the main or backup port may respond at any one time + hub.io.mem.resp.valid := io.mem.resp.valid || mem_serdes.io.wide.resp.valid + hub.io.mem.resp.bits := Mux(io.mem.resp.valid, io.mem.resp.bits, mem_serdes.io.wide.resp.bits) // pad out the HTIF using a divided clock - val slow_io = (new slowIO(64, 16)) { Bits(width = htif_width) } - htif.io.host.out <> slow_io.io.out_fast - io.host.out <> slow_io.io.out_slow - htif.io.host.in <> slow_io.io.in_fast - io.host.in <> slow_io.io.in_slow - io.host_clk := slow_io.io.clk_slow + val hio = (new slowIO(clkdiv, 4)) { Bits(width = htif_width) } + htif.io.host.out <> hio.io.out_fast + io.host.out.valid := hio.io.out_slow.valid + hio.io.out_slow.ready := io.host.out.ready + io.host.out.bits := Mux(reset, io.host.in.bits, hio.io.out_slow.bits) + htif.io.host.in <> hio.io.in_fast + io.host.in <> hio.io.in_slow + io.host_clk := hio.io.clk_slow + + // pad out the backup memory link with the HTIF divided clk + val mio = (new slowIO(clkdiv, 4)) { Bits(width = mem_backup_width) } + mem_serdes.io.narrow.req <> mio.io.out_fast + io.mem_backup.req <> mio.io.out_slow + mem_serdes.io.narrow.resp.valid := mio.io.in_fast.valid + mio.io.in_fast.ready := Bool(true) + mem_serdes.io.narrow.resp.bits := mio.io.in_fast.bits + io.mem_backup.resp <> mio.io.in_slow tile.io.host <> htif.io.cpu(0) io.debug <> tile.io.host.debug From 88bf8a4f23948f43326c88a5c7440deaa50e5790 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 17:03:58 -0700 Subject: [PATCH 0368/1087] add mem serdes unit --- rocket/src/main/scala/memserdes.scala | 89 +++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 rocket/src/main/scala/memserdes.scala diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala new file mode 100644 index 00000000..2305ed69 --- /dev/null +++ b/rocket/src/main/scala/memserdes.scala @@ -0,0 +1,89 @@ +package rocket + +import Chisel._ +import Node._ +import Constants._ +import scala.math._ + +class ioMemSerialized(w: Int) extends Bundle +{ + val req = (new ioDecoupled) { Bits(width = w) } + val resp = (new ioPipe) { Bits(width = w) }.flip +} + +class MemSerdes(w: Int) extends Component +{ + val io = new Bundle { + val wide = new ioMem().flip + val narrow = new ioMemSerialized(w) + } + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + + val out_buf = Reg() { Bits() } + val in_buf = Reg() { Bits() } + + val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } + val state = Reg(resetVal = s_idle) + val send_cnt = Reg(resetVal = UFix(0, log2up(max(abits, dbits)))) + val data_send_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) + val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) + + when (state === s_idle) { + when (io.wide.req_cmd.valid) { + state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) + } + } + when (state === s_read_addr && adone) { + state := s_idle + send_cnt := UFix(0) + } + when (state === s_write_addr && adone) { + state := s_write_idle + send_cnt := UFix(0) + } + when (state === s_write_idle && io.wide.req_data.valid) { + state := s_write_data + } + when (state === s_write_data && ddone) { + data_send_cnt := data_send_cnt + UFix(1) + state := Mux(data_send_cnt === UFix(REFILL_CYCLES-1), s_idle, s_write_idle) + } + + when (io.narrow.req.valid && io.narrow.req.ready) { + send_cnt := Mux(adone, UFix(0), send_cnt + UFix(1)) + out_buf := out_buf >> UFix(w) + } + when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { + out_buf := io.wide.req_cmd.bits.toBits + } + when (io.wide.req_data.valid && io.wide.req_data.ready) { + out_buf := io.wide.req_data.bits.toBits + } + + io.wide.req_cmd.ready := state === s_idle + io.wide.req_data.ready := state === s_write_idle + io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data + io.narrow.req.bits := out_buf + + val recv_cnt = Reg() { UFix(width = log2up(rbits)) } + val data_recv_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val resp_val = Reg(resetVal = Bool(false)) + + resp_val := Bool(false) + when (io.narrow.resp.valid) { + recv_cnt := recv_cnt + UFix(1) + when (recv_cnt === UFix((rbits-1)/w)) { + recv_cnt := UFix(0) + data_recv_cnt := data_recv_cnt + UFix(1) + resp_val := Bool(true) + } + in_buf := Cat(io.narrow.resp.bits, in_buf(rbits-1,w)) + } + + io.wide.resp.valid := resp_val + io.wide.resp.bits.tag := in_buf(io.wide.resp.bits.tag.width-1,0) + io.wide.resp.bits.data := in_buf >> UFix(io.wide.resp.bits.tag.width) +} From f62a02ab54cf783db4b55bfe7122411408e0a07f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 21:29:36 -0700 Subject: [PATCH 0369/1087] remove dumb stuff in top.scala --- rocket/src/main/scala/top.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index dd02d544..3f0aa583 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -52,9 +52,7 @@ class Top() extends Component // pad out the HTIF using a divided clock val hio = (new slowIO(clkdiv, 4)) { Bits(width = htif_width) } htif.io.host.out <> hio.io.out_fast - io.host.out.valid := hio.io.out_slow.valid - hio.io.out_slow.ready := io.host.out.ready - io.host.out.bits := Mux(reset, io.host.in.bits, hio.io.out_slow.bits) + io.host.out <> hio.io.out_slow htif.io.host.in <> hio.io.in_fast io.host.in <> hio.io.in_slow io.host_clk := hio.io.clk_slow From 1666d3fbd790e2fca9407717486ed0700bed1259 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 21:45:10 -0700 Subject: [PATCH 0370/1087] loop host.in to host.out during reset --- rocket/src/main/scala/slowio.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/slowio.scala b/rocket/src/main/scala/slowio.scala index c1535044..395bdc8d 100644 --- a/rocket/src/main/scala/slowio.scala +++ b/rocket/src/main/scala/slowio.scala @@ -28,7 +28,7 @@ class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Comp val out_slow_bits = Reg() { data } val fromhost_q = new queue(1)(data) - fromhost_q.io.enq.valid := in_en && io.in_slow.valid && in_slow_rdy + fromhost_q.io.enq.valid := in_en && (io.in_slow.valid && in_slow_rdy || reset) fromhost_q.io.enq.bits := io.in_slow.bits fromhost_q.io.deq <> io.in_fast @@ -39,7 +39,7 @@ class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Comp when (out_en) { in_slow_rdy := fromhost_q.io.enq.ready out_slow_val := tohost_q.io.deq.valid - out_slow_bits := tohost_q.io.deq.bits + out_slow_bits := Mux(reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits) } io.in_slow.ready := in_slow_rdy From 31f0b600fd5c72312f9d8e8d31d132f625976b8b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 23:03:20 -0700 Subject: [PATCH 0371/1087] add dessert --- rocket/src/main/scala/memserdes.scala | 117 ++++++++++++++++++++------ rocket/src/main/scala/top.scala | 5 +- 2 files changed, 95 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index 2305ed69..f68d36e8 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -27,14 +27,28 @@ class MemSerdes(w: Int) extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) val send_cnt = Reg(resetVal = UFix(0, log2up(max(abits, dbits)))) - val data_send_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) - when (state === s_idle) { - when (io.wide.req_cmd.valid) { - state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) - } + when (io.narrow.req.valid && io.narrow.req.ready) { + send_cnt := send_cnt + UFix(1) + out_buf := out_buf >> UFix(w) + } + when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { + out_buf := io.wide.req_cmd.bits.toBits + } + when (io.wide.req_data.valid && io.wide.req_data.ready) { + out_buf := io.wide.req_data.bits.toBits + } + + io.wide.req_cmd.ready := state === s_idle + io.wide.req_data.ready := state === s_write_idle + io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data + io.narrow.req.bits := out_buf + + when (state === s_idle && io.wide.req_cmd.valid) { + state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) } when (state === s_read_addr && adone) { state := s_idle @@ -50,26 +64,11 @@ class MemSerdes(w: Int) extends Component when (state === s_write_data && ddone) { data_send_cnt := data_send_cnt + UFix(1) state := Mux(data_send_cnt === UFix(REFILL_CYCLES-1), s_idle, s_write_idle) + send_cnt := UFix(0) } - when (io.narrow.req.valid && io.narrow.req.ready) { - send_cnt := Mux(adone, UFix(0), send_cnt + UFix(1)) - out_buf := out_buf >> UFix(w) - } - when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { - out_buf := io.wide.req_cmd.bits.toBits - } - when (io.wide.req_data.valid && io.wide.req_data.ready) { - out_buf := io.wide.req_data.bits.toBits - } - - io.wide.req_cmd.ready := state === s_idle - io.wide.req_data.ready := state === s_write_idle - io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data - io.narrow.req.bits := out_buf - - val recv_cnt = Reg() { UFix(width = log2up(rbits)) } - val data_recv_cnt = Reg(resetVal = UFix(0, log2up(MEM_DATA_BITS))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val resp_val = Reg(resetVal = Bool(false)) resp_val := Bool(false) @@ -80,10 +79,80 @@ class MemSerdes(w: Int) extends Component data_recv_cnt := data_recv_cnt + UFix(1) resp_val := Bool(true) } - in_buf := Cat(io.narrow.resp.bits, in_buf(rbits-1,w)) + in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w)) } io.wide.resp.valid := resp_val io.wide.resp.bits.tag := in_buf(io.wide.resp.bits.tag.width-1,0) io.wide.resp.bits.data := in_buf >> UFix(io.wide.resp.bits.tag.width) } + +class MemDessert(w: Int) extends Component // test rig side +{ + val io = new Bundle { + val narrow = new ioMemSerialized(w).flip + val wide = new ioMem + } + val abits = io.wide.req_cmd.bits.toBits.getWidth + val dbits = io.wide.req_data.bits.toBits.getWidth + val rbits = io.wide.resp.bits.getWidth + + require(dbits >= abits && rbits >= dbits) + val recv_cnt = Reg(resetVal = UFix(0, log2up(rbits))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/w) + val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/w) + val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/w) + + val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(5) { UFix() } + val state = Reg(resetVal = s_cmd_recv) + + val in_buf = Reg() { Bits() } + when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) { + recv_cnt := recv_cnt + UFix(1) + in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w)) + } + io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv + + when (state === s_cmd_recv && adone) { + state := s_cmd + recv_cnt := UFix(0) + } + when (state === s_cmd && io.wide.req_cmd.ready) { + state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply) + } + when (state === s_data_recv && ddone) { + state := s_data + recv_cnt := UFix(0) + } + when (state === s_data && io.wide.req_data.ready) { + state := s_data_recv + when (data_recv_cnt === UFix(REFILL_CYCLES-1)) { + state := s_cmd_recv + } + data_recv_cnt := data_recv_cnt + UFix(1) + } + when (rdone) { // state === s_reply + when (data_recv_cnt === UFix(REFILL_CYCLES-1)) { + state := s_cmd_recv + } + recv_cnt := UFix(0) + data_recv_cnt := data_recv_cnt + UFix(1) + } + + val req_cmd = in_buf >> UFix(((rbits+w-1)/w - (abits+w-1)/w)*w) + io.wide.req_cmd.valid := state === s_cmd + io.wide.req_cmd.bits.tag := req_cmd + io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width) + io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.addr.width) + + io.wide.req_data.valid := state === s_data + io.wide.req_data.bits.data := in_buf >> UFix(((rbits+w-1)/w - (dbits+w-1)/w)*w) + + val dataq = (new queue(REFILL_CYCLES)) { new MemResp } + dataq.io.enq <> io.wide.resp + dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/w) + + io.narrow.resp.valid := dataq.io.deq.valid + io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UFix(w)) +} diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 3f0aa583..ca07147d 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -45,9 +45,8 @@ class Top() extends Component mem_serdes.io.wide.req_data.valid := mem_dataq.io.deq.valid && io.mem_backup_en mem_serdes.io.wide.req_data.bits := mem_dataq.io.deq.bits - // only the main or backup port may respond at any one time - hub.io.mem.resp.valid := io.mem.resp.valid || mem_serdes.io.wide.resp.valid - hub.io.mem.resp.bits := Mux(io.mem.resp.valid, io.mem.resp.bits, mem_serdes.io.wide.resp.bits) + hub.io.mem.resp.valid := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.valid, io.mem.resp.valid) + hub.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits) // pad out the HTIF using a divided clock val hio = (new slowIO(clkdiv, 4)) { Bits(width = htif_width) } From ef505de017a63da6657008c5d78bb55310870ace Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 23:49:45 -0700 Subject: [PATCH 0372/1087] reduce HTIF width --- rocket/src/main/scala/top.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index ca07147d..f0f6752c 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -16,7 +16,7 @@ class ioTop(htif_width: Int, mem_backup_width: Int) extends Bundle { class Top() extends Component { val clkdiv = 32 - val htif_width = 16 + val htif_width = 8 val mem_backup_width = 16 val io = new ioTop(htif_width, mem_backup_width); From 5f53cd4ac1d4169196177d886925da0c0662193d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Mar 2012 23:49:59 -0700 Subject: [PATCH 0373/1087] reduce HTIF width --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index cd6d3882..5adfaf7e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = true + val HAVE_VEC = false val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); From e6b0e565dec1ab2796aafc5e3433130ac4e5fccc Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 26 Mar 2012 01:21:39 -0700 Subject: [PATCH 0374/1087] turn HAVE_VEC on --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 5adfaf7e..cd6d3882 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -7,7 +7,7 @@ object Constants { val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = false + val HAVE_VEC = true val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); From e2fe525fb6dc8117535c7427ff65290be2f9b12f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 26 Mar 2012 14:18:57 -0700 Subject: [PATCH 0375/1087] remove bug from dessert --- rocket/src/main/scala/memserdes.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index f68d36e8..c1d1fccb 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -26,7 +26,7 @@ class MemSerdes(w: Int) extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) - val send_cnt = Reg(resetVal = UFix(0, log2up(max(abits, dbits)))) + val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+w-1)/w))) val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) @@ -98,7 +98,7 @@ class MemDessert(w: Int) extends Component // test rig side val rbits = io.wide.resp.bits.getWidth require(dbits >= abits && rbits >= dbits) - val recv_cnt = Reg(resetVal = UFix(0, log2up(rbits))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/w) val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/w) From 32d95e9594aa90a382e081bedf060a3e61ea5884 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 26 Mar 2012 17:00:01 -0700 Subject: [PATCH 0376/1087] fix -1:0 index problem for direct map case --- rocket/src/main/scala/icache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index b1178f97..79ee7a23 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -81,7 +81,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { } val refill_done = io.mem.xact_rep.valid && refill_count.andR - val repl_way = LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2up(assoc)-1,0) + val repl_way = if (assoc == 1) UFix(0) else LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2up(assoc)-1,0) val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix val tag_we = refill_done val tag_addr = From a70f0414fa79ab7d44d6a3987e16fadec472c825 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 26 Mar 2012 20:51:54 -0700 Subject: [PATCH 0377/1087] fix a workaroundable bug --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 77a1018a..297bde30 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -412,7 +412,7 @@ class rocketDpath extends Component rfile.io.w0.addr := wb_reg_waddr rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb - rfile.io.w0.data := Mux(io.ctrl.pcr != PCR_N, pcr.io.r.data, wb_wdata) + rfile.io.w0.data := Mux(io.ctrl.pcr != PCR_N && io.ctrl.wb_wen, pcr.io.r.data, wb_wdata) io.ctrl.wb_waddr := wb_reg_waddr io.ctrl.mem_wb := dmem_resp_replay; From 6bda8674bd93ca24e37a11452567eb93390a1eb9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 26 Mar 2012 23:50:09 -0700 Subject: [PATCH 0378/1087] no dessert tonight :( --- rocket/src/main/scala/consts.scala | 2 ++ rocket/src/main/scala/memserdes.scala | 46 +++++++++++++-------------- rocket/src/main/scala/top.scala | 17 +++++----- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index cd6d3882..2adcd2f5 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -9,6 +9,8 @@ object Constants val HAVE_FPU = true val HAVE_VEC = true + val MEM_BACKUP_WIDTH = 16 + val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); val BR_NE = UFix(2, 4); diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index c1d1fccb..27bbaa95 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -5,17 +5,17 @@ import Node._ import Constants._ import scala.math._ -class ioMemSerialized(w: Int) extends Bundle +class ioMemSerialized extends Bundle { - val req = (new ioDecoupled) { Bits(width = w) } - val resp = (new ioPipe) { Bits(width = w) }.flip + val req = (new ioDecoupled) { Bits(width = MEM_BACKUP_WIDTH) } + val resp = (new ioPipe) { Bits(width = MEM_BACKUP_WIDTH) }.flip } -class MemSerdes(w: Int) extends Component +class MemSerdes extends Component { val io = new Bundle { val wide = new ioMem().flip - val narrow = new ioMemSerialized(w) + val narrow = new ioMemSerialized } val abits = io.wide.req_cmd.bits.toBits.getWidth val dbits = io.wide.req_data.bits.toBits.getWidth @@ -26,14 +26,14 @@ class MemSerdes(w: Int) extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) - val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+w-1)/w))) + val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/w) - val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/w) + val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) + val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) when (io.narrow.req.valid && io.narrow.req.ready) { send_cnt := send_cnt + UFix(1) - out_buf := out_buf >> UFix(w) + out_buf := out_buf >> UFix(MEM_BACKUP_WIDTH) } when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { out_buf := io.wide.req_cmd.bits.toBits @@ -67,19 +67,19 @@ class MemSerdes(w: Int) extends Component send_cnt := UFix(0) } - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) val resp_val = Reg(resetVal = Bool(false)) resp_val := Bool(false) when (io.narrow.resp.valid) { recv_cnt := recv_cnt + UFix(1) - when (recv_cnt === UFix((rbits-1)/w)) { + when (recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH)) { recv_cnt := UFix(0) data_recv_cnt := data_recv_cnt + UFix(1) resp_val := Bool(true) } - in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+w-1)/w*w-1,w)) + in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH*MEM_BACKUP_WIDTH-1,MEM_BACKUP_WIDTH)) } io.wide.resp.valid := resp_val @@ -87,10 +87,10 @@ class MemSerdes(w: Int) extends Component io.wide.resp.bits.data := in_buf >> UFix(io.wide.resp.bits.tag.width) } -class MemDessert(w: Int) extends Component // test rig side +class MemDessert extends Component // test rig side { val io = new Bundle { - val narrow = new ioMemSerialized(w).flip + val narrow = new ioMemSerialized().flip val wide = new ioMem } val abits = io.wide.req_cmd.bits.toBits.getWidth @@ -98,11 +98,11 @@ class MemDessert(w: Int) extends Component // test rig side val rbits = io.wide.resp.bits.getWidth require(dbits >= abits && rbits >= dbits) - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+w-1)/w))) + val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) - val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/w) - val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/w) - val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/w) + val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) + val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) + val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_cmd_recv) @@ -110,7 +110,7 @@ class MemDessert(w: Int) extends Component // test rig side val in_buf = Reg() { Bits() } when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) { recv_cnt := recv_cnt + UFix(1) - in_buf := Cat(io.narrow.req.bits, in_buf((rbits+w-1)/w*w-1,w)) + in_buf := Cat(io.narrow.req.bits, in_buf((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH*MEM_BACKUP_WIDTH-1,MEM_BACKUP_WIDTH)) } io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv @@ -140,19 +140,19 @@ class MemDessert(w: Int) extends Component // test rig side data_recv_cnt := data_recv_cnt + UFix(1) } - val req_cmd = in_buf >> UFix(((rbits+w-1)/w - (abits+w-1)/w)*w) + val req_cmd = in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (abits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) io.wide.req_cmd.valid := state === s_cmd io.wide.req_cmd.bits.tag := req_cmd io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width) io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.addr.width) io.wide.req_data.valid := state === s_data - io.wide.req_data.bits.data := in_buf >> UFix(((rbits+w-1)/w - (dbits+w-1)/w)*w) + io.wide.req_data.bits.data := in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (dbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) val dataq = (new queue(REFILL_CYCLES)) { new MemResp } dataq.io.enq <> io.wide.resp - dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/w) + dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) io.narrow.resp.valid := dataq.io.deq.valid - io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UFix(w)) + io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UFix(MEM_BACKUP_WIDTH)) } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index f0f6752c..a74c9ff4 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -4,21 +4,21 @@ import Chisel._ import Node._; import Constants._; -class ioTop(htif_width: Int, mem_backup_width: Int) extends Bundle { +class ioTop(htif_width: Int) extends Bundle { val debug = new ioDebug(); val host = new ioHost(htif_width); val host_clk = Bool(OUTPUT) - val mem_backup = new ioMemSerialized(mem_backup_width) + val mem_backup = new ioMemSerialized val mem_backup_en = Bool(INPUT) + val mem_backup_clk = Bool(OUTPUT) val mem = new ioMem } -class Top() extends Component +class Top extends Component { val clkdiv = 32 val htif_width = 8 - val mem_backup_width = 16 - val io = new ioTop(htif_width, mem_backup_width); + val io = new ioTop(htif_width) val tile = new Tile val htif = new rocketHTIF(htif_width, 1) @@ -28,7 +28,7 @@ class Top() extends Component hub.io.tiles(1) <> htif.io.mem // mux between main and backup memory ports - val mem_serdes = new MemSerdes(mem_backup_width) + val mem_serdes = new MemSerdes val mem_cmdq = (new queue(1)) { new MemReqCmd } mem_cmdq.io.enq <> hub.io.mem.req_cmd mem_cmdq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_cmd.ready, io.mem.req_cmd.ready) @@ -57,13 +57,14 @@ class Top() extends Component io.host_clk := hio.io.clk_slow // pad out the backup memory link with the HTIF divided clk - val mio = (new slowIO(clkdiv, 4)) { Bits(width = mem_backup_width) } + val mio = (new slowIO(clkdiv, 4)) { Bits(width = MEM_BACKUP_WIDTH) } mem_serdes.io.narrow.req <> mio.io.out_fast io.mem_backup.req <> mio.io.out_slow mem_serdes.io.narrow.resp.valid := mio.io.in_fast.valid mio.io.in_fast.ready := Bool(true) mem_serdes.io.narrow.resp.bits := mio.io.in_fast.bits io.mem_backup.resp <> mio.io.in_slow + io.mem_backup_clk := mio.io.clk_slow tile.io.host <> htif.io.cpu(0) io.debug <> tile.io.host.debug @@ -71,6 +72,6 @@ class Top() extends Component object top_main { def main(args: Array[String]) = { - chiselMain(args, () => new Top()); + chiselMain(args.drop(1), () => Class.forName(args(0)).newInstance.asInstanceOf[Component]) } } From bb704dc0c93cebe6f9d63d0cf5a8096496fe0799 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 27 Mar 2012 12:04:07 -0700 Subject: [PATCH 0379/1087] fix vector length calc bug, thanks chris and andrew --- rocket/src/main/scala/dpath_vec.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 251fd629..52f72618 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -40,8 +40,10 @@ class rocketDpathVec extends Component { val io = new ioDpathVec() - val nxregs = Mux(io.ctrl.fn === VEC_CFG, io.wdata(5,0), io.inst(15,10)).toUFix + UFix(0,7) - val nfregs = Mux(io.ctrl.fn === VEC_CFG, io.rs2(5,0), io.inst(21,16)).toUFix + UFix(0,7) + val nxregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.wdata(5,0), io.inst(15,10)) + val nfregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.rs2(5,0), io.inst(21,16)) + val nxregs = Mux(nxregs_stage(5), Bits(32), Mux(nxregs_stage === Bits(0), Bits(1), nxregs_stage)) + UFix(0,7) + val nfregs = Mux(nfregs_stage(5), Bits(32), nfregs_stage) + UFix(0,7) val nregs = nxregs + nfregs val uts_per_bank = MuxLookup( From 452876af37a71bc6cb88b93f4fd1d4759a67fe11 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 27 Mar 2012 14:48:30 -0700 Subject: [PATCH 0380/1087] fence on vvcfg; implement fence.v.g correctly --- rocket/src/main/scala/ctrl.scala | 392 +++++++++++++-------------- rocket/src/main/scala/ctrl_vec.scala | 4 +- 2 files changed, 196 insertions(+), 200 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2f942df9..2cfdbf26 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -93,217 +93,213 @@ object rocketCtrlDecode val xpr64 = Y; val decode_default = - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val pcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N) + // eret + // | syscall + // vec_val mem_val mul_val div_val pcr | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N) val xdecode = Array( - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val pcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wsync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), + // eret + // | syscall + // vec_val mem_val mul_val div_val pcr | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wsync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), - JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), - JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), - JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), - RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N,N), + J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), + DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N,N), - SETPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_S,SYNC_N,N,N,N,Y,Y), - CLEARPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_C,SYNC_N,N,N,N,Y,Y), - ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,Y,N), - FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N,N), - FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,N,Y), - CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,Y,Y), - MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,N,Y,Y), - MTPCR-> List(Y, N,BR_N, REN_Y,REN_N,A2_RTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,N,Y,Y), - RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N,N), - RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N,N), - RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N,N)) + SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), + SETPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), + CLEARPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), + ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), + CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), + MTPCR-> List(Y, N,BR_N, REN_Y,REN_N,A2_RTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) val fdecode = Array( - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val pcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N)) + // eret + // | syscall + // vec_val mem_val mul_val div_val pcr | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) val vdecode = Array( - // vfence - // | eret - // | | syscall - // vec_val mem_val mul_val div_val pcr | | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,Y), - VVCFG-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,Y), - VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,Y), - VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,N,N,N), - FENCE_V_G-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,N,N,N), - VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N,N), + // eret + // | syscall + // vec_val mem_val mul_val div_val pcr | | privileged + // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VVCFG-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_L-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_G-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VENQCMD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), - VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,Y,N)) + VENQCMD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTEVAC-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), + VXCPTHOLD-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) } class rocketCtrl extends Component @@ -318,7 +314,7 @@ class rocketCtrl extends Component val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_pcr :: id_sync :: id_vfence :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + val id_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); @@ -451,7 +447,7 @@ class rocketCtrl extends Component vec_dec.io.sigs.valid && ( !mask_cmdq_ready || !mask_ximm1q_ready || !mask_ximm2q_ready || !mask_cntq_ready || !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready) || - id_vec_val && id_vfence && !vec.io.vfence_ready + id_vec_val && vec_dec.io.sigs.vfence && !vec.io.vfence_ready vec_replay = vec.io.replay vec_irq = vec.io.irq diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 0ca55f01..a52be58a 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -118,8 +118,8 @@ class rocketCtrlVecDecoder extends Component // val vcmd vimm vimm2 | fn | | | | | | | | | | | | | | // | | | | | | | | | | | | | | | | | | | | List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), + VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), + VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N), VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), From a09e8d1c5550efd2a8ed8f1861d8ed65ba342c2e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 27 Mar 2012 15:43:56 -0700 Subject: [PATCH 0381/1087] remove I$ prefetcher for now there's a bug in it, and I don't have time to fix it at the moment. --- rocket/src/main/scala/tile.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 2ebabc7d..1b7f6e05 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,12 +13,11 @@ class Tile extends Component val cpu = new rocketProc(resetSignal = io.host.reset) val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) - val icache_pf = new rocketIPrefetcher val dcache = new HellaCacheUniproc val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) arbiter.io.requestor(0) <> dcache.io.mem - arbiter.io.requestor(1) <> icache_pf.io.mem + arbiter.io.requestor(1) <> icache.io.mem io.tilelink.xact_init <> Queue(arbiter.io.mem.xact_init) io.tilelink.xact_init_data <> Queue(dcache.io.mem.xact_init_data) @@ -38,8 +37,6 @@ class Tile extends Component cpu.io.host <> io.host - icache_pf.io.invalidate := cpu.io.imem.invalidate - icache.io.mem <> icache_pf.io.icache cpu.io.imem <> icache.io.cpu cpu.io.dmem <> dcache.io.cpu } From c7c35322c2709617fbad2860bb79fc33ceefe459 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Sat, 31 Mar 2012 22:23:51 -0700 Subject: [PATCH 0382/1087] two bug fixes to fpu --- rocket/src/main/scala/ctrl.scala | 6 ++++-- rocket/src/main/scala/dpath.scala | 4 ++++ rocket/src/main/scala/fpu.scala | 4 +++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2cfdbf26..d32bd231 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -67,6 +67,7 @@ class ioCtrlDpath extends Bundle() val sboard_clra = UFix(5, INPUT); val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UFix(5, INPUT); + val fp_sboard_wb_waddr = UFix(5, INPUT); val irq_timer = Bool(INPUT); val irq_ipi = Bool(INPUT); } @@ -639,7 +640,8 @@ class rocketCtrl extends Component fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen || wb_reg_fp_sboard_set fp_sboard.io.w(0).data := Bool(true) - fp_sboard.io.w(0).addr := io.dpath.wb_waddr + //fp_sboard.io.w(0).addr := io.dpath.wb_waddr + fp_sboard.io.w(0).addr := io.dpath.fp_sboard_wb_waddr fp_sboard.io.w(1).en := io.dpath.fp_sboard_clr fp_sboard.io.w(1).data := Bool(false) @@ -698,7 +700,7 @@ class rocketCtrl extends Component // replay mem stage PC on a DTLB miss or a long-latency writeback val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) - val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay + val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || mem_reg_fp_val && io.fpu.nack_mem val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 297bde30..420917cb 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -423,6 +423,10 @@ class rocketDpath extends Component io.ctrl.fp_sboard_clr := r_dmem_fp_replay io.ctrl.fp_sboard_clra := r_dmem_resp_waddr + val r_mem_reg_waddr = Reg(){UFix(width = 5)} + r_mem_reg_waddr := mem_reg_waddr + io.ctrl.fp_sboard_wb_waddr := r_mem_reg_waddr + // processor control regfile write pcr.io.w.addr := wb_reg_raddr1 pcr.io.w.en := io.ctrl.pcr === PCR_T || io.ctrl.pcr === PCR_S || io.ctrl.pcr === PCR_C diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 8696f5a2..f1c36303 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -171,6 +171,7 @@ class ioDpathFPU extends Bundle { class ioCtrlFPU extends Bundle { val valid = Bool(OUTPUT) val nack = Bool(INPUT) + val nack_mem = Bool(INPUT) val illegal_rm = Bool(INPUT) val killx = Bool(OUTPUT) val killm = Bool(OUTPUT) @@ -440,7 +441,7 @@ class rocketFPUDFMAPipe(latency: Int) extends Component val fma = new hardfloat.mulAddSubRecodedFloat64_1 fma.io.op := cmd fma.io.roundingMode := rm - fma.io.a := in1 + fma.io.a := in1 fma.io.b := in2 fma.io.c := in3 @@ -637,6 +638,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val fsr_busy = ctrl.rdfsr && fp_inflight || mem_reg_valid && mem_ctrl.wrfsr || wb_reg_valid && wb_ctrl.wrfsr val units_busy = mem_reg_valid && mem_ctrl.fma && (io.sfma.valid && mem_ctrl.single || io.dfma.valid && !mem_ctrl.single) io.ctrl.nack := fsr_busy || units_busy || write_port_busy + io.ctrl.nack_mem := units_busy io.ctrl.dec <> fp_decoder.io.sigs // we don't currently support round-max-magnitude (rm=4) io.ctrl.illegal_rm := ex_rm(2) From 7f254d9670e360a267a0df67081ad5489d56d10e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 1 Apr 2012 14:52:33 -0700 Subject: [PATCH 0383/1087] refine FP bugfixes --- rocket/src/main/scala/ctrl.scala | 4 +--- rocket/src/main/scala/dpath.scala | 5 +---- rocket/src/main/scala/fpu.scala | 16 +++++++--------- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d32bd231..cfe027da 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -640,7 +640,6 @@ class rocketCtrl extends Component fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen || wb_reg_fp_sboard_set fp_sboard.io.w(0).data := Bool(true) - //fp_sboard.io.w(0).addr := io.dpath.wb_waddr fp_sboard.io.w(0).addr := io.dpath.fp_sboard_wb_waddr fp_sboard.io.w(1).en := io.dpath.fp_sboard_clr @@ -709,8 +708,7 @@ class rocketCtrl extends Component val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy || - ex_reg_fp_val && io.fpu.nack + ex_reg_mul_val && !io.dpath.mul_rdy val kill_ex = take_pc_wb || replay_ex mem_reg_replay := replay_ex && !take_pc_wb; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 420917cb..180d4427 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -422,10 +422,7 @@ class rocketDpath extends Component io.ctrl.sboard_clra := mem_ll_waddr io.ctrl.fp_sboard_clr := r_dmem_fp_replay io.ctrl.fp_sboard_clra := r_dmem_resp_waddr - - val r_mem_reg_waddr = Reg(){UFix(width = 5)} - r_mem_reg_waddr := mem_reg_waddr - io.ctrl.fp_sboard_wb_waddr := r_mem_reg_waddr + io.ctrl.fp_sboard_wb_waddr := Reg(mem_reg_waddr) // processor control regfile write pcr.io.w.addr := wb_reg_raddr1 diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index f1c36303..54a463df 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -170,7 +170,6 @@ class ioDpathFPU extends Bundle { class ioCtrlFPU extends Bundle { val valid = Bool(OUTPUT) - val nack = Bool(INPUT) val nack_mem = Bool(INPUT) val illegal_rm = Bool(INPUT) val killx = Bool(OUTPUT) @@ -441,7 +440,7 @@ class rocketFPUDFMAPipe(latency: Int) extends Component val fma = new hardfloat.mulAddSubRecodedFloat64_1 fma.io.op := cmd fma.io.roundingMode := rm - fma.io.a := in1 + fma.io.a := in1 fma.io.b := in2 fma.io.c := in3 @@ -590,9 +589,9 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component Mux(ctrl.single, UFix(sfma_latency-1), UFix(dfma_latency-1))) val mem_fu_latency = Reg(ex_stage_fu_latency - UFix(1)) - val write_port_busy = ctrl.fastpipe && wen(fastpipe_latency) || + val write_port_busy = Reg(ctrl.fastpipe && wen(fastpipe_latency) || Bool(sfma_latency < dfma_latency) && ctrl.fma && ctrl.single && wen(sfma_latency) || - mem_wen && mem_fu_latency === ex_stage_fu_latency + mem_wen && mem_fu_latency === ex_stage_fu_latency) mem_wen := ex_reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) val ex_stage_wsrc = Cat(ctrl.fastpipe, ctrl.single) val mem_winfo = Reg(Cat(ex_reg_inst(31,27), ex_stage_wsrc)) @@ -606,7 +605,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component wen := (wen >> UFix(1)) | (UFix(1) << mem_fu_latency) } for (i <- 0 until dfma_latency-1) { - when (UFix(i) === mem_fu_latency) { + when (!write_port_busy && UFix(i) === mem_fu_latency) { winfo(i) := mem_winfo } } @@ -634,11 +633,10 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component fsr_rm := fastpipe.io.out_s(7,5) } - val fp_inflight = mem_reg_valid && mem_ctrl.toint || wb_reg_valid && wb_ctrl.toint || mem_wen || wen.orR - val fsr_busy = ctrl.rdfsr && fp_inflight || mem_reg_valid && mem_ctrl.wrfsr || wb_reg_valid && wb_ctrl.wrfsr + val fp_inflight = wb_reg_valid && wb_ctrl.toint || wen.orR + val fsr_busy = mem_ctrl.rdfsr && fp_inflight || wb_reg_valid && wb_ctrl.wrfsr val units_busy = mem_reg_valid && mem_ctrl.fma && (io.sfma.valid && mem_ctrl.single || io.dfma.valid && !mem_ctrl.single) - io.ctrl.nack := fsr_busy || units_busy || write_port_busy - io.ctrl.nack_mem := units_busy + io.ctrl.nack_mem := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs // we don't currently support round-max-magnitude (rm=4) io.ctrl.illegal_rm := ex_rm(2) From c9c3bd02bcfb126307af0873e52a8e2d0dcc681a Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Sun, 1 Apr 2012 17:02:32 -0700 Subject: [PATCH 0384/1087] kill mem stage if fpu nacks in mem stage --- rocket/src/main/scala/ctrl.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cfe027da..5dfeff12 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -699,8 +699,9 @@ class rocketCtrl extends Component // replay mem stage PC on a DTLB miss or a long-latency writeback val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) - val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || mem_reg_fp_val && io.fpu.nack_mem - val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem + val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem + val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || fpu_kill_mem val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill // replay execute stage PC when the D$ is blocked, when the D$ misses, From aee937871273c7e92c855d18d5127af81ce4d8d3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 9 Apr 2012 21:40:35 -0700 Subject: [PATCH 0385/1087] fix coherence bug with multiple probe replies --- rocket/src/main/scala/coherence.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 9cd8fc7c..678f83ab 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -598,8 +598,8 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS p_rep_data_dep_list(j).io.enq.valid := do_pop p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) - when (p_rep.valid) { - p_data_valid_arr(idx) := probeReplyHasData(p_rep.bits) + when (p_rep.valid && probeReplyHasData(p_rep.bits)) { + p_data_valid_arr(idx) := Bool(true) p_data_tile_id_arr(idx) := UFix(j) } p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) From ed79ec98f71fd1c72eab1880a7e864b4e0f6b0b6 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 3 Apr 2012 12:03:05 -0700 Subject: [PATCH 0386/1087] Refactored coherence better from uncore hub, better coherence function names --- rocket/src/main/scala/arbiter.scala | 10 +- rocket/src/main/scala/coherence.scala | 632 ++++---------------------- rocket/src/main/scala/consts.scala | 21 - rocket/src/main/scala/nbdcache.scala | 14 +- rocket/src/main/scala/uncore.scala | 503 ++++++++++++++++++++ 5 files changed, 603 insertions(+), 577 deletions(-) create mode 100644 rocket/src/main/scala/uncore.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 9c0211a5..ac741a9d 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -5,16 +5,16 @@ import Node._; import Constants._; class ioUncachedRequestor extends Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit() } - val xact_abort = (new ioDecoupled) { new TransactionAbort() }.flip - val xact_rep = (new ioPipe) { new TransactionReply() }.flip - val xact_finish = (new ioDecoupled) { new TransactionFinish() } + val xact_init = (new ioDecoupled) { new TransactionInit } + val xact_abort = (new ioDecoupled) { new TransactionAbort }.flip + val xact_rep = (new ioPipe) { new TransactionReply }.flip + val xact_finish = (new ioDecoupled) { new TransactionFinish } } class rocketMemArbiter(n: Int) extends Component { val io = new Bundle { val mem = new ioUncachedRequestor - val requestor = Vec(n) { new ioUncachedRequestor().flip } + val requestor = Vec(n) { new ioUncachedRequestor }.flip } var xi_val = Bool(false) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 678f83ab..ee51e469 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -3,42 +3,6 @@ package rocket import Chisel._ import Constants._ -class MemData extends Bundle { - val data = Bits(width = MEM_DATA_BITS) -} - -class MemReqCmd() extends Bundle -{ - val rw = Bool() - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) - val tag = Bits(width = MEM_TAG_BITS) -} - -class MemResp () extends MemData -{ - val tag = Bits(width = MEM_TAG_BITS) -} - -class ioMem() extends Bundle -{ - val req_cmd = (new ioDecoupled) { new MemReqCmd() } - val req_data = (new ioDecoupled) { new MemData() } - val resp = (new ioPipe) { new MemResp() }.flip -} - -class TrackerProbeData extends Bundle { - val tile_id = Bits(width = TILE_ID_BITS) -} - -class TrackerAllocReq extends Bundle { - val xact_init = new TransactionInit() - val tile_id = Bits(width = TILE_ID_BITS) -} - -class TrackerDependency extends Bundle { - val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) -} - class TransactionInit extends Bundle { val t_type = Bits(width = X_INIT_TYPE_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) @@ -75,17 +39,6 @@ class TransactionFinish extends Bundle { val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } -class ioTileLink extends Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit() } - val xact_init_data = (new ioDecoupled) { new TransactionInitData() } - val xact_abort = (new ioDecoupled) { new TransactionAbort() }.flip - val probe_req = (new ioDecoupled) { new ProbeRequest() }.flip - val probe_rep = (new ioDecoupled) { new ProbeReply() } - val probe_rep_data = (new ioDecoupled) { new ProbeReplyData() } - val xact_rep = (new ioPipe) { new TransactionReply() }.flip - val xact_finish = (new ioDecoupled) { new TransactionFinish() } -} - object cpuCmdToRW { def apply(cmd: Bits): (Bool, Bool) = { val store = (cmd === M_XWR) @@ -101,6 +54,13 @@ trait CoherencePolicy { } trait ThreeStateIncoherence extends CoherencePolicy { val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } + val X_INIT_READ_SHARED = UFix(0, 2) + val X_INIT_READ_EXCLUSIVE = UFix(1, 2) + val X_INIT_WRITE_UNCACHED = UFix(3, 2) + val X_REP_READ_SHARED = UFix(0, X_REP_TYPE_BITS) + val X_REP_READ_EXCLUSIVE = UFix(1, X_REP_TYPE_BITS) + val X_REP_WRITE_UNCACHED = UFix(3, X_REP_TYPE_BITS) + val P_REP_INVALIDATE_ACK = UFix(3, P_REP_TYPE_BITS) def isHit ( cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) @@ -116,25 +76,25 @@ trait ThreeStateIncoherence extends CoherencePolicy { } def newStateOnWriteback() = tileInvalid - def newStateOnFlush() = tileInvalid + def newStateOnCacheControl(cmd: Bits) = tileInvalid def newState(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write, tileDirty, Mux(read, Mux(state === tileDirty, tileDirty, tileClean), state)) } def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) - def newTransactionOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write || cmd === M_PFW, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) } - def newTransactionOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { + def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write, X_INIT_READ_EXCLUSIVE, outstanding.t_type) } - def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = Bool(false) - def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = Bool(false) + def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileDirty, tileClean) } - def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = state + def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = state def newProbeReply (incoming: ProbeRequest, has_data: Bool): ProbeReply = { val reply = Wire() { new ProbeReply() } reply.p_type := P_REP_INVALIDATE_ACK @@ -151,43 +111,66 @@ trait FourStateCoherence extends CoherencePolicy { val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(3){ UFix() } + val X_INIT_READ_SHARED = UFix(0, X_INIT_TYPE_BITS) + val X_INIT_READ_EXCLUSIVE = UFix(1, X_INIT_TYPE_BITS) + val X_INIT_READ_UNCACHED = UFix(2, X_INIT_TYPE_BITS) + val X_INIT_WRITE_UNCACHED = UFix(3, X_INIT_TYPE_BITS) + + val X_REP_READ_SHARED = UFix(0, X_REP_TYPE_BITS) + val X_REP_READ_EXCLUSIVE = UFix(1, X_REP_TYPE_BITS) + val X_REP_READ_UNCACHED = UFix(2, X_REP_TYPE_BITS) + val X_REP_WRITE_UNCACHED = UFix(3, X_REP_TYPE_BITS) + val X_REP_READ_EXCLUSIVE_ACK = UFix(4, X_REP_TYPE_BITS) + + val P_REQ_INVALIDATE = UFix(0, P_REQ_TYPE_BITS) + val P_REQ_DOWNGRADE = UFix(1, P_REQ_TYPE_BITS) + val P_REQ_COPY = UFix(2, P_REQ_TYPE_BITS) + + val P_REP_INVALIDATE_DATA = UFix(0, P_REP_TYPE_BITS) + val P_REP_DOWNGRADE_DATA = UFix(1, P_REP_TYPE_BITS) + val P_REP_COPY_DATA = UFix(2, P_REP_TYPE_BITS) + val P_REP_INVALIDATE_ACK = UFix(3, P_REP_TYPE_BITS) + val P_REP_DOWNGRADE_ACK = UFix(4, P_REP_TYPE_BITS) + val P_REP_COPY_ACK = UFix(5, P_REP_TYPE_BITS) + + def isHit ( cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) Mux(write, (state === tileExclusiveClean || state === tileExclusiveDirty), (state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) } - - //TODO: do we need isPresent() for determining that a line needs to be - //upgraded but that no replacement is needed? - def isValid (state: UFix): Bool = { state != tileInvalid } - def needsWriteback (state: UFix): Bool = { - state === tileExclusiveDirty - } - - def newStateOnWriteback() = tileInvalid - def newStateOnFlush() = tileInvalid - def newStateOnHit(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileExclusiveDirty, state) - } - def newTransactionOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write || cmd === M_PFW, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) - } - def newTransactionOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, X_INIT_READ_EXCLUSIVE, outstanding.t_type) - } - def needsSecondaryXact(cmd: Bits, outstanding: TransactionInit): Bool = { + def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { val (read, write) = cpuCmdToRW(cmd) (read && (outstanding.t_type === X_INIT_READ_UNCACHED || outstanding.t_type === X_INIT_WRITE_UNCACHED)) || (write && (outstanding.t_type != X_INIT_READ_EXCLUSIVE)) } - def newStateOnTransactionRep(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { + MuxLookup(cmd, (state === tileExclusiveDirty), Array( + M_INV -> (state === tileExclusiveDirty), + M_CLN -> (state === tileExclusiveDirty) + )) + } + def needsWriteback (state: UFix): Bool = { + needsTransactionOnCacheControl(M_INV, state) + } + + def newStateOnHit(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, tileExclusiveDirty, state) + } + def newStateOnCacheControl(cmd: Bits) = { + MuxLookup(cmd, tileInvalid, Array( + M_INV -> tileInvalid, + M_CLN -> tileExclusiveClean + )) + } + def newStateOnWriteback() = newStateOnCacheControl(M_INV) + def newStateOnFlush() = newStateOnCacheControl(M_INV) + def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { MuxLookup(incoming.t_type, tileInvalid, Array( X_REP_READ_SHARED -> tileShared, X_REP_READ_EXCLUSIVE -> Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileExclusiveDirty, tileExclusiveClean), @@ -196,8 +179,7 @@ trait FourStateCoherence extends CoherencePolicy { X_REP_WRITE_UNCACHED -> tileInvalid )) } - - def newStateOnProbeReq(incoming: ProbeRequest, state: UFix): Bits = { + def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { MuxLookup(incoming.p_type, state, Array( probeInvalidate -> tileInvalid, probeDowngrade -> tileShared, @@ -205,6 +187,17 @@ trait FourStateCoherence extends CoherencePolicy { )) } + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write || cmd === M_PFW, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) + } + def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, X_INIT_READ_EXCLUSIVE, outstanding.t_type) + } + def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = X_INIT_WRITE_UNCACHED + def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) + def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { val reply = Wire() { new ProbeReply() } val with_data = MuxLookup(incoming.p_type, P_REP_INVALIDATE_DATA, Array( @@ -221,55 +214,31 @@ trait FourStateCoherence extends CoherencePolicy { reply.global_xact_id := incoming.global_xact_id reply } - def probeReplyHasData (reply: ProbeReply): Bool = { + + def hasData (reply: ProbeReply): Bool = { (reply.p_type === P_REP_INVALIDATE_DATA || reply.p_type === P_REP_DOWNGRADE_DATA || reply.p_type === P_REP_COPY_DATA) } - def transactionInitHasData (init: TransactionInit): Bool = { + def hasData (init: TransactionInit): Bool = { (init.t_type === X_INIT_WRITE_UNCACHED) } - def transactionReplyHasData (reply: TransactionReply): Bool = { + def hasData (reply: TransactionReply): Bool = { (reply.t_type != X_REP_WRITE_UNCACHED && reply.t_type != X_REP_READ_EXCLUSIVE_ACK) } -} -class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { - val io = new Bundle { - val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip - val p_data = (new ioPipe) { new TrackerProbeData }.flip - val can_alloc = Bool(INPUT) - val xact_finish = Bool(INPUT) - val p_rep_cnt_dec = Bits(ntiles, INPUT) - val p_req_cnt_inc = Bits(ntiles, INPUT) - val p_rep_data = (new ioPipe) { new ProbeReplyData }.flip - val x_init_data = (new ioPipe) { new TransactionInitData }.flip - val sent_x_rep_ack = Bool(INPUT) - val p_rep_data_dep = (new ioPipe) { new TrackerDependency }.flip - val x_init_data_dep = (new ioPipe) { new TrackerDependency }.flip + def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) - val mem_req_cmd = (new ioDecoupled) { new MemReqCmd } - val mem_req_data = (new ioDecoupled) { new MemData } - val mem_req_lock = Bool(OUTPUT) - val probe_req = (new ioDecoupled) { new ProbeRequest } - val busy = Bool(OUTPUT) - val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) - val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) - val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val t_type = Bits(X_INIT_TYPE_BITS, OUTPUT) - val push_p_req = Bits(ntiles, OUTPUT) - val pop_p_rep = Bits(ntiles, OUTPUT) - val pop_p_rep_data = Bits(ntiles, OUTPUT) - val pop_p_rep_dep = Bits(ntiles, OUTPUT) - val pop_x_init = Bits(ntiles, OUTPUT) - val pop_x_init_data = Bits(ntiles, OUTPUT) - val pop_x_init_dep = Bits(ntiles, OUTPUT) - val send_x_rep_ack = Bool(OUTPUT) + def getTransactionReplyType(t_type: UFix, count: UFix): Bits = { + MuxLookup(t_type, X_REP_READ_UNCACHED, Array( + X_INIT_READ_SHARED -> Mux(count > UFix(0), X_REP_READ_SHARED, X_REP_READ_EXCLUSIVE), + X_INIT_READ_EXCLUSIVE -> X_REP_READ_EXCLUSIVE, + X_INIT_READ_UNCACHED -> X_REP_READ_UNCACHED, + X_INIT_WRITE_UNCACHED -> X_REP_WRITE_UNCACHED + )) } - def sendProbeReqType(t_type: UFix, global_state: UFix): UFix = { + def getProbeRequestType(t_type: UFix, global_state: UFix): UFix = { MuxLookup(t_type, P_REQ_COPY, Array( X_INIT_READ_SHARED -> P_REQ_DOWNGRADE, X_INIT_READ_EXCLUSIVE -> P_REQ_INVALIDATE, @@ -281,435 +250,10 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc def needsMemRead(t_type: UFix, global_state: UFix): Bool = { (t_type != X_INIT_WRITE_UNCACHED) } - - def needsAckRep(t_type: UFix, global_state: UFix): Bool = { + def needsMemWrite(t_type: UFix, global_state: UFix): Bool = { (t_type === X_INIT_WRITE_UNCACHED) } - - def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { - req_cmd.valid := !cmd_sent && at_front_of_dep_queue - req_cmd.bits.rw := Bool(true) - req_data.valid := data.valid && at_front_of_dep_queue - req_data.bits := data.bits - lock := at_front_of_dep_queue - when(req_cmd.ready && req_cmd.valid) { - cmd_sent := Bool(true) - } - when(req_data.ready && at_front_of_dep_queue) { - pop_data := UFix(1) << tile_id - when (data.valid) { - mem_cnt := mem_cnt_next - when(mem_cnt_next === UFix(0)) { - pop_dep := UFix(1) << tile_id - trigger := Bool(false) - } - } - } - } - - def doMemReqRead(req_cmd: ioDecoupled[MemReqCmd], trigger: Bool) { - req_cmd.valid := Bool(true) - req_cmd.bits.rw := Bool(false) - when(req_cmd.ready) { - trigger := Bool(false) - } - } - - val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } - val state = Reg(resetVal = s_idle) - val addr_ = Reg{ UFix() } - val t_type_ = Reg{ Bits() } - val init_tile_id_ = Reg{ Bits() } - val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) - val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) - val p_rep_tile_id_ = Reg{ Bits() } - val x_needs_read = Reg(resetVal = Bool(false)) - val x_init_data_needs_write = Reg(resetVal = Bool(false)) - val p_rep_data_needs_write = Reg(resetVal = Bool(false)) - val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) - val mem_cnt_next = mem_cnt + UFix(1) - val mem_cnt_max = ~UFix(0, width = log2up(REFILL_CYCLES)) - - io.busy := state != s_idle - io.addr := addr_ - io.init_tile_id := init_tile_id_ - io.p_rep_tile_id := p_rep_tile_id_ - io.tile_xact_id := tile_xact_id_ - io.sharer_count := UFix(ntiles) // TODO: Broadcast only - io.t_type := t_type_ - - io.mem_req_cmd.valid := Bool(false) - io.mem_req_cmd.bits.rw := Bool(false) - io.mem_req_cmd.bits.addr := addr_ - io.mem_req_cmd.bits.tag := UFix(id) - io.mem_req_data.valid := Bool(false) - io.mem_req_data.bits.data := UFix(0) - io.mem_req_lock := Bool(false) - io.probe_req.valid := Bool(false) - io.probe_req.bits.p_type := sendProbeReqType(t_type_, UFix(0)) - io.probe_req.bits.global_xact_id := UFix(id) - io.probe_req.bits.address := addr_ - io.push_p_req := Bits(0, width = ntiles) - io.pop_p_rep := Bits(0, width = ntiles) - io.pop_p_rep_data := Bits(0, width = ntiles) - io.pop_p_rep_dep := Bits(0, width = ntiles) - io.pop_x_init := Bits(0, width = ntiles) - io.pop_x_init_data := Bits(0, width = ntiles) - io.pop_x_init_dep := Bits(0, width = ntiles) - io.send_x_rep_ack := Bool(false) - - switch (state) { - is(s_idle) { - when( io.alloc_req.valid && io.can_alloc ) { - addr_ := io.alloc_req.bits.xact_init.address - t_type_ := io.alloc_req.bits.xact_init.t_type - init_tile_id_ := io.alloc_req.bits.tile_id - tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id - x_init_data_needs_write := transactionInitHasData(io.alloc_req.bits.xact_init) - x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) - if(ntiles > 1) p_rep_count := UFix(ntiles-1) - val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only - p_req_flags := p_req_initial_flags - mem_cnt := UFix(0) - p_w_mem_cmd_sent := Bool(false) - x_w_mem_cmd_sent := Bool(false) - io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id - state := Mux(p_req_initial_flags.orR, s_probe, s_mem) - } - } - is(s_probe) { - when(p_req_flags.orR) { - io.push_p_req := p_req_flags - io.probe_req.valid := Bool(true) - } - when(io.p_req_cnt_inc.orR) { - p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs - } - when(io.p_rep_cnt_dec.orR) { - val dec = PopCount(io.p_rep_cnt_dec) - io.pop_p_rep := io.p_rep_cnt_dec - if(ntiles > 1) p_rep_count := p_rep_count - dec - when(p_rep_count === dec) { - state := s_mem - } - } - when(io.p_data.valid) { - p_rep_data_needs_write := Bool(true) - p_rep_tile_id_ := io.p_data.bits.tile_id - } - } - is(s_mem) { - when (p_rep_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, - io.mem_req_data, - io.mem_req_lock, - io.p_rep_data, - p_rep_data_needs_write, - p_w_mem_cmd_sent, - io.pop_p_rep_data, - io.pop_p_rep_dep, - io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id)), - p_rep_tile_id_) - } . elsewhen(x_init_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, - io.mem_req_data, - io.mem_req_lock, - io.x_init_data, - x_init_data_needs_write, - x_w_mem_cmd_sent, - io.pop_x_init_data, - io.pop_x_init_dep, - io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id)), - init_tile_id_) - } . elsewhen (x_needs_read) { - doMemReqRead(io.mem_req_cmd, x_needs_read) - } . otherwise { - state := Mux(needsAckRep(t_type_, UFix(0)), s_ack, s_busy) - } - } - is(s_ack) { - io.send_x_rep_ack := Bool(true) - when(io.sent_x_rep_ack) { state := s_busy } - } - is(s_busy) { // Nothing left to do but wait for transaction to complete - when (io.xact_finish) { - state := s_idle - } - } + def needsAckReply(t_type: UFix, global_state: UFix): Bool = { + (t_type === X_INIT_WRITE_UNCACHED) } } - -abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy { - val io = new Bundle { - val tiles = Vec(ntiles) { new ioTileLink() }.flip - val mem = new ioMem - } -} - -class CoherenceHubNull extends CoherenceHub(1) { - - val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.t_type === X_INIT_WRITE_UNCACHED - x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp - io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) - io.mem.req_cmd.bits.rw := is_write - io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id - io.mem.req_cmd.bits.addr := x_init.bits.address - io.mem.req_data <> io.tiles(0).xact_init_data - - val x_rep = io.tiles(0).xact_rep - x_rep.bits.t_type := Mux(io.mem.resp.valid, X_REP_READ_EXCLUSIVE, X_REP_WRITE_UNCACHED) - x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) - x_rep.bits.global_xact_id := UFix(0) // don't care - x_rep.bits.data := io.mem.resp.bits.data - x_rep.bits.require_ack := Bool(true) - x_rep.valid := io.mem.resp.valid || x_init.valid && is_write && io.mem.req_cmd.ready - - io.tiles(0).xact_abort.valid := Bool(false) - io.tiles(0).xact_finish.ready := Bool(true) - io.tiles(0).probe_req.valid := Bool(false) - io.tiles(0).probe_rep.ready := Bool(true) - io.tiles(0).probe_rep_data.ready := Bool(true) -} - - -class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourStateCoherence{ - - def coherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) - - def getTransactionReplyType(t_type: UFix, count: UFix): Bits = { - MuxLookup(t_type, X_REP_READ_UNCACHED, Array( - X_INIT_READ_SHARED -> Mux(count > UFix(0), X_REP_READ_SHARED, X_REP_READ_EXCLUSIVE), - X_INIT_READ_EXCLUSIVE -> X_REP_READ_EXCLUSIVE, - X_INIT_READ_UNCACHED -> X_REP_READ_UNCACHED, - X_INIT_WRITE_UNCACHED -> X_REP_WRITE_UNCACHED - )) - } - - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _)) - - val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } - val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } - val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - - val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } - val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } - val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } - val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bits(width = TILE_ID_BITS)} } - val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } - - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - busy_arr(i) := t.busy - addr_arr(i) := t.addr - init_tile_id_arr(i) := t.init_tile_id - tile_xact_id_arr(i) := t.tile_xact_id - t_type_arr(i) := t.t_type - sh_count_arr(i) := t.sharer_count - send_x_rep_ack_arr(i) := t.send_x_rep_ack - t.xact_finish := do_free_arr(i) - t.p_data.bits.tile_id := p_data_tile_id_arr(i) - t.p_data.valid := p_data_valid_arr(i) - t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits - t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits - t.sent_x_rep_ack := sent_x_rep_ack_arr(i) - do_free_arr(i) := Bool(false) - sent_x_rep_ack_arr(i) := Bool(false) - p_data_tile_id_arr(i) := Bits(0, width = TILE_ID_BITS) - p_data_valid_arr(i) := Bool(false) - for( j <- 0 until ntiles) { - p_rep_cnt_dec_arr(i)(j) := Bool(false) - p_req_cnt_inc_arr(i)(j) := Bool(false) - } - } - - val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY - val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY - - // Free finished transactions - for( j <- 0 until ntiles ) { - val finish = io.tiles(j).xact_finish - when (finish.valid) { - do_free_arr(finish.bits.global_xact_id) := Bool(true) - } - finish.ready := Bool(true) - } - - // Reply to initial requestor - // Forward memory responses from mem to tile or arbitrate to ack - val mem_idx = io.mem.resp.bits.tag - val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) - for( j <- 0 until ntiles ) { - val rep = io.tiles(j).xact_rep - rep.bits.t_type := UFix(0) - rep.bits.tile_xact_id := UFix(0) - rep.bits.global_xact_id := UFix(0) - rep.bits.data := io.mem.resp.bits.data - rep.bits.require_ack := Bool(true) - rep.valid := Bool(false) - when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { - rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) - rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) - rep.bits.global_xact_id := mem_idx - rep.valid := Bool(true) - } . otherwise { - rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) - rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) - rep.bits.global_xact_id := ack_idx - when (UFix(j) === init_tile_id_arr(ack_idx)) { - rep.valid := send_x_rep_ack_arr.toBits.orR - sent_x_rep_ack_arr(ack_idx) := Bool(true) - } - } - } - // If there were a ready signal due to e.g. intervening network use: - //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready - - // Create an arbiter for the one memory port - // We have to arbitrate between the different trackers' memory requests - // and once we have picked a request, get the right write data - val mem_req_cmd_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemReqCmd() } - val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } - for( i <- 0 until NGLOBAL_XACTS ) { - mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd - mem_req_cmd_arb.io.lock(i) <> trackerList(i).io.mem_req_lock - mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data - mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock - } - io.mem.req_cmd <> mem_req_cmd_arb.io.out - io.mem.req_data <> mem_req_data_arb.io.out - - // Handle probe replies, which may or may not have data - for( j <- 0 until ntiles ) { - val p_rep = io.tiles(j).probe_rep - val p_rep_data = io.tiles(j).probe_rep_data - val idx = p_rep.bits.global_xact_id - val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) - val do_pop = foldR(pop_p_reps)(_ || _) - p_rep.ready := Bool(true) - p_rep_data_dep_list(j).io.enq.valid := do_pop - p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) - p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) - when (p_rep.valid && probeReplyHasData(p_rep.bits)) { - p_data_valid_arr(idx) := Bool(true) - p_data_tile_id_arr(idx) := UFix(j) - } - p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) - } - for( i <- 0 until NGLOBAL_XACTS ) { - trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid - trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits - - trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) - trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) - - for( j <- 0 until ntiles) { - val p_rep = io.tiles(j).probe_rep - p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) - } - } - - // Nack conflicting transaction init attempts - val s_idle :: s_abort_drain :: s_abort_send :: s_abort_complete :: Nil = Enum(4){ UFix() } - val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } - val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } - for( j <- 0 until ntiles ) { - val x_init = io.tiles(j).xact_init - val x_init_data = io.tiles(j).xact_init_data - val x_abort = io.tiles(j).xact_abort - val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) - val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - conflicts(i) := t.busy && x_init.valid && coherenceConflict(t.addr, x_init.bits.address) - } - x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && transactionInitHasData(x_init.bits))) - - x_abort.valid := Bool(false) - switch(abort_state_arr(j)) { - is(s_idle) { - when(want_to_abort_arr(j)) { - when(transactionInitHasData(x_init.bits)) { - abort_state_arr(j) := s_abort_drain - } . otherwise { - abort_state_arr(j) := s_abort_send - } - } - } - is(s_abort_drain) { // raises x_init_data.ready below - when(x_init_data.valid) { - abort_cnt := abort_cnt + UFix(1) - when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { - abort_state_arr(j) := s_abort_send - } - } - } - is(s_abort_send) { // nothing is dequeued for now - x_abort.valid := Bool(true) - when(x_abort.ready) { - abort_state_arr(j) := s_abort_complete - } - } - is(s_abort_complete) { // raises x_init.ready below - abort_state_arr(j) := s_idle - } - } - } - - // Handle transaction initiation requests - // Only one allocation per cycle - // Init requests may or may not have data - val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } - val init_arb = (new Arbiter(ntiles)) { new TrackerAllocReq() } - for( i <- 0 until NGLOBAL_XACTS ) { - alloc_arb.io.in(i).valid := !trackerList(i).io.busy - trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready - trackerList(i).io.alloc_req.bits := init_arb.io.out.bits - trackerList(i).io.alloc_req.valid := init_arb.io.out.valid - - trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits - trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid - trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) - trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) - } - for( j <- 0 until ntiles ) { - val x_init = io.tiles(j).xact_init - val x_init_data = io.tiles(j).xact_init_data - val x_init_data_dep = x_init_data_dep_list(j).io.deq - init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid - init_arb.io.in(j).bits.xact_init := x_init.bits - init_arb.io.in(j).bits.tile_id := UFix(j) - val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) - val do_pop = foldR(pop_x_inits)(_||_) - x_init_data_dep_list(j).io.enq.valid := do_pop && transactionInitHasData(x_init.bits) && (abort_state_arr(j) === s_idle) - x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) - x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop - x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) - x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) - } - - alloc_arb.io.out.ready := init_arb.io.out.valid - - // Handle probe request generation - // Must arbitrate for each request port - val p_req_arb_arr = List.fill(ntiles)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) - for( j <- 0 until ntiles ) { - for( i <- 0 until NGLOBAL_XACTS ) { - val t = trackerList(i).io - p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits - p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) - p_req_cnt_inc_arr(i)(j) := p_req_arb_arr(j).io.in(i).ready - } - p_req_arb_arr(j).io.out <> io.tiles(j).probe_req - } - -} diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 2adcd2f5..9cac98ef 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -198,30 +198,9 @@ object Constants val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS val X_INIT_TYPE_BITS = 2 - val X_INIT_READ_SHARED = UFix(0, X_INIT_TYPE_BITS) - val X_INIT_READ_EXCLUSIVE = UFix(1, X_INIT_TYPE_BITS) - val X_INIT_READ_UNCACHED = UFix(2, X_INIT_TYPE_BITS) - val X_INIT_WRITE_UNCACHED = UFix(3, X_INIT_TYPE_BITS) - val X_REP_TYPE_BITS = 3 - val X_REP_READ_SHARED = UFix(0, X_REP_TYPE_BITS) - val X_REP_READ_EXCLUSIVE = UFix(1, X_REP_TYPE_BITS) - val X_REP_READ_UNCACHED = UFix(2, X_REP_TYPE_BITS) - val X_REP_WRITE_UNCACHED = UFix(3, X_REP_TYPE_BITS) - val X_REP_READ_EXCLUSIVE_ACK = UFix(4, X_REP_TYPE_BITS) - val P_REQ_TYPE_BITS = 2 - val P_REQ_INVALIDATE = UFix(0, P_REQ_TYPE_BITS) - val P_REQ_DOWNGRADE = UFix(1, P_REQ_TYPE_BITS) - val P_REQ_COPY = UFix(2, P_REQ_TYPE_BITS) - val P_REP_TYPE_BITS = 3 - val P_REP_INVALIDATE_DATA = UFix(0, P_REP_TYPE_BITS) - val P_REP_DOWNGRADE_DATA = UFix(1, P_REP_TYPE_BITS) - val P_REP_COPY_DATA = UFix(2, P_REP_TYPE_BITS) - val P_REP_INVALIDATE_ACK = UFix(3, P_REP_TYPE_BITS) - val P_REP_DOWNGRADE_ACK = UFix(4, P_REP_TYPE_BITS) - val P_REP_COPY_ACK = UFix(5, P_REP_TYPE_BITS) // external memory interface val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index cf2db3c8..aed8fb82 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -196,7 +196,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val req_cmd = io.req_bits.cmd val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) val idx_match = req.idx === io.req_bits.idx - val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !needsSecondaryXact(req_cmd, io.mem_req.bits)) + val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val rpq = (new queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq @@ -220,7 +220,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { when (refill_done) { state := s_drain_rpq } when (reply) { refill_count := refill_count + UFix(1) - line_state := newStateOnTransactionRep(io.mem_rep.bits, io.mem_req.bits) + line_state := newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) } when (abort) { state := s_refill_req } } @@ -239,13 +239,13 @@ class MSHR(id: Int) extends Component with FourStateCoherence { } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - xact_type := newTransactionOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) + xact_type := getTransactionInitTypeOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { flush := req_cmd === M_FLA line_state := newStateOnFlush() refill_count := UFix(0) - xact_type := newTransactionOnPrimaryMiss(req_cmd, newStateOnFlush()) + xact_type := getTransactionInitTypeOnPrimaryMiss(req_cmd, newStateOnFlush()) req := io.req_bits when (io.req_bits.tag_miss) { @@ -409,7 +409,7 @@ class MSHRFile extends Component { io.cpu_resp_tag := Reg(replay.bits.cpu_tag) } -class WritebackUnit extends Component { +class WritebackUnit extends Component with FourStateCoherence{ val io = new Bundle { val req = (new ioDecoupled) { new WritebackReq() }.flip val probe = (new ioDecoupled) { new WritebackReq() }.flip @@ -469,7 +469,7 @@ class WritebackUnit extends Component { io.data_req.bits.data := Bits(0) io.mem_req.valid := valid && !cmd_sent - io.mem_req.bits.t_type := X_INIT_WRITE_UNCACHED + io.mem_req.bits.t_type := getTransactionInitTypeOnWriteback() io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id io.mem_req_data.valid := data_req_fired && !is_probe @@ -527,7 +527,7 @@ class ProbeUnit extends Component with FourStateCoherence { io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) io.meta_req.bits.rw := state === s_probe_rep io.meta_req.bits.idx := req.address - io.meta_req.bits.data.state := newStateOnProbeReq(req, line_state) + io.meta_req.bits.data.state := newStateOnProbeRequest(req, line_state) io.meta_req.bits.data.tag := req.address >> UFix(IDX_BITS) io.mshr_req.valid := state === s_meta_resp io.address := req.address diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala new file mode 100644 index 00000000..fa6e2b1e --- /dev/null +++ b/rocket/src/main/scala/uncore.scala @@ -0,0 +1,503 @@ +package rocket + +import Chisel._ +import Constants._ + +class MemData extends Bundle { + val data = Bits(width = MEM_DATA_BITS) +} + +class MemReqCmd() extends Bundle +{ + val rw = Bool() + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val tag = Bits(width = MEM_TAG_BITS) +} + +class MemResp () extends MemData +{ + val tag = Bits(width = MEM_TAG_BITS) +} + +class ioMem() extends Bundle +{ + val req_cmd = (new ioDecoupled) { new MemReqCmd() } + val req_data = (new ioDecoupled) { new MemData() } + val resp = (new ioPipe) { new MemResp() }.flip +} + +class TrackerProbeData extends Bundle { + val tile_id = Bits(width = TILE_ID_BITS) +} + +class TrackerAllocReq extends Bundle { + val xact_init = new TransactionInit() + val tile_id = Bits(width = TILE_ID_BITS) +} + +class TrackerDependency extends Bundle { + val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) +} + +class ioTileLink extends Bundle { + val xact_init = (new ioDecoupled) { new TransactionInit } + val xact_init_data = (new ioDecoupled) { new TransactionInitData } + val xact_abort = (new ioDecoupled) { new TransactionAbort }.flip + val probe_req = (new ioDecoupled) { new ProbeRequest }.flip + val probe_rep = (new ioDecoupled) { new ProbeReply } + val probe_rep_data = (new ioDecoupled) { new ProbeReplyData } + val xact_rep = (new ioPipe) { new TransactionReply }.flip + val xact_finish = (new ioDecoupled) { new TransactionFinish } +} + +class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { + val io = new Bundle { + val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip + val p_data = (new ioPipe) { new TrackerProbeData }.flip + val can_alloc = Bool(INPUT) + val xact_finish = Bool(INPUT) + val p_rep_cnt_dec = Bits(ntiles, INPUT) + val p_req_cnt_inc = Bits(ntiles, INPUT) + val p_rep_data = (new ioPipe) { new ProbeReplyData }.flip + val x_init_data = (new ioPipe) { new TransactionInitData }.flip + val sent_x_rep_ack = Bool(INPUT) + val p_rep_data_dep = (new ioPipe) { new TrackerDependency }.flip + val x_init_data_dep = (new ioPipe) { new TrackerDependency }.flip + + val mem_req_cmd = (new ioDecoupled) { new MemReqCmd } + val mem_req_data = (new ioDecoupled) { new MemData } + val mem_req_lock = Bool(OUTPUT) + val probe_req = (new ioDecoupled) { new ProbeRequest } + val busy = Bool(OUTPUT) + val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) + val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) + val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) + val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) + val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) + val t_type = Bits(X_INIT_TYPE_BITS, OUTPUT) + val push_p_req = Bits(ntiles, OUTPUT) + val pop_p_rep = Bits(ntiles, OUTPUT) + val pop_p_rep_data = Bits(ntiles, OUTPUT) + val pop_p_rep_dep = Bits(ntiles, OUTPUT) + val pop_x_init = Bits(ntiles, OUTPUT) + val pop_x_init_data = Bits(ntiles, OUTPUT) + val pop_x_init_dep = Bits(ntiles, OUTPUT) + val send_x_rep_ack = Bool(OUTPUT) + } + + def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { + req_cmd.valid := !cmd_sent && at_front_of_dep_queue + req_cmd.bits.rw := Bool(true) + req_data.valid := data.valid && at_front_of_dep_queue + req_data.bits := data.bits + lock := at_front_of_dep_queue + when(req_cmd.ready && req_cmd.valid) { + cmd_sent := Bool(true) + } + when(req_data.ready && at_front_of_dep_queue) { + pop_data := UFix(1) << tile_id + when (data.valid) { + mem_cnt := mem_cnt_next + when(mem_cnt_next === UFix(0)) { + pop_dep := UFix(1) << tile_id + trigger := Bool(false) + } + } + } + } + + def doMemReqRead(req_cmd: ioDecoupled[MemReqCmd], trigger: Bool) { + req_cmd.valid := Bool(true) + req_cmd.bits.rw := Bool(false) + when(req_cmd.ready) { + trigger := Bool(false) + } + } + + val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } + val state = Reg(resetVal = s_idle) + val addr_ = Reg{ UFix() } + val t_type_ = Reg{ Bits() } + val init_tile_id_ = Reg{ Bits() } + val tile_xact_id_ = Reg{ Bits() } + val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) + val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) + val p_rep_tile_id_ = Reg{ Bits() } + val x_needs_read = Reg(resetVal = Bool(false)) + val x_init_data_needs_write = Reg(resetVal = Bool(false)) + val p_rep_data_needs_write = Reg(resetVal = Bool(false)) + val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) + val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val mem_cnt_next = mem_cnt + UFix(1) + val mem_cnt_max = ~UFix(0, width = log2up(REFILL_CYCLES)) + + io.busy := state != s_idle + io.addr := addr_ + io.init_tile_id := init_tile_id_ + io.p_rep_tile_id := p_rep_tile_id_ + io.tile_xact_id := tile_xact_id_ + io.sharer_count := UFix(ntiles) // TODO: Broadcast only + io.t_type := t_type_ + + io.mem_req_cmd.valid := Bool(false) + io.mem_req_cmd.bits.rw := Bool(false) + io.mem_req_cmd.bits.addr := addr_ + io.mem_req_cmd.bits.tag := UFix(id) + io.mem_req_data.valid := Bool(false) + io.mem_req_data.bits.data := UFix(0) + io.mem_req_lock := Bool(false) + io.probe_req.valid := Bool(false) + io.probe_req.bits.p_type := getProbeRequestType(t_type_, UFix(0)) + io.probe_req.bits.global_xact_id := UFix(id) + io.probe_req.bits.address := addr_ + io.push_p_req := Bits(0, width = ntiles) + io.pop_p_rep := Bits(0, width = ntiles) + io.pop_p_rep_data := Bits(0, width = ntiles) + io.pop_p_rep_dep := Bits(0, width = ntiles) + io.pop_x_init := Bits(0, width = ntiles) + io.pop_x_init_data := Bits(0, width = ntiles) + io.pop_x_init_dep := Bits(0, width = ntiles) + io.send_x_rep_ack := Bool(false) + + switch (state) { + is(s_idle) { + when( io.alloc_req.valid && io.can_alloc ) { + addr_ := io.alloc_req.bits.xact_init.address + t_type_ := io.alloc_req.bits.xact_init.t_type + init_tile_id_ := io.alloc_req.bits.tile_id + tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id + x_init_data_needs_write := hasData(io.alloc_req.bits.xact_init) + x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) + if(ntiles > 1) p_rep_count := UFix(ntiles-1) + val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only + p_req_flags := p_req_initial_flags + mem_cnt := UFix(0) + p_w_mem_cmd_sent := Bool(false) + x_w_mem_cmd_sent := Bool(false) + io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id + state := Mux(p_req_initial_flags.orR, s_probe, s_mem) + } + } + is(s_probe) { + when(p_req_flags.orR) { + io.push_p_req := p_req_flags + io.probe_req.valid := Bool(true) + } + when(io.p_req_cnt_inc.orR) { + p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs + } + when(io.p_rep_cnt_dec.orR) { + val dec = PopCount(io.p_rep_cnt_dec) + io.pop_p_rep := io.p_rep_cnt_dec + if(ntiles > 1) p_rep_count := p_rep_count - dec + when(p_rep_count === dec) { + state := s_mem + } + } + when(io.p_data.valid) { + p_rep_data_needs_write := Bool(true) + p_rep_tile_id_ := io.p_data.bits.tile_id + } + } + is(s_mem) { + when (p_rep_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.p_rep_data, + p_rep_data_needs_write, + p_w_mem_cmd_sent, + io.pop_p_rep_data, + io.pop_p_rep_dep, + io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id)), + p_rep_tile_id_) + } . elsewhen(x_init_data_needs_write) { + doMemReqWrite(io.mem_req_cmd, + io.mem_req_data, + io.mem_req_lock, + io.x_init_data, + x_init_data_needs_write, + x_w_mem_cmd_sent, + io.pop_x_init_data, + io.pop_x_init_dep, + io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id)), + init_tile_id_) + } . elsewhen (x_needs_read) { + doMemReqRead(io.mem_req_cmd, x_needs_read) + } . otherwise { + state := Mux(needsAckReply(t_type_, UFix(0)), s_ack, s_busy) + } + } + is(s_ack) { + io.send_x_rep_ack := Bool(true) + when(io.sent_x_rep_ack) { state := s_busy } + } + is(s_busy) { // Nothing left to do but wait for transaction to complete + when (io.xact_finish) { + state := s_idle + } + } + } +} + +abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy { + val io = new Bundle { + val tiles = Vec(ntiles) { new ioTileLink() }.flip + val mem = new ioMem + } +} + +class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence +{ + val x_init = io.tiles(0).xact_init + val is_write = x_init.bits.t_type === X_INIT_WRITE_UNCACHED + x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp + io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) + io.mem.req_cmd.bits.rw := is_write + io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id + io.mem.req_cmd.bits.addr := x_init.bits.address + io.mem.req_data <> io.tiles(0).xact_init_data + + val x_rep = io.tiles(0).xact_rep + x_rep.bits.t_type := Mux(io.mem.resp.valid, X_REP_READ_EXCLUSIVE, X_REP_WRITE_UNCACHED) + x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) + x_rep.bits.global_xact_id := UFix(0) // don't care + x_rep.bits.data := io.mem.resp.bits.data + x_rep.bits.require_ack := Bool(true) + x_rep.valid := io.mem.resp.valid || x_init.valid && is_write && io.mem.req_cmd.ready + + io.tiles(0).xact_abort.valid := Bool(false) + io.tiles(0).xact_finish.ready := Bool(true) + io.tiles(0).probe_req.valid := Bool(false) + io.tiles(0).probe_rep.ready := Bool(true) + io.tiles(0).probe_rep_data.ready := Bool(true) +} + + +class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourStateCoherence +{ + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _)) + + val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } + val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } + val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } + val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } + val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + + val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } + val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } + val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } + val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bits(width = TILE_ID_BITS)} } + val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } + + for( i <- 0 until NGLOBAL_XACTS) { + val t = trackerList(i).io + busy_arr(i) := t.busy + addr_arr(i) := t.addr + init_tile_id_arr(i) := t.init_tile_id + tile_xact_id_arr(i) := t.tile_xact_id + t_type_arr(i) := t.t_type + sh_count_arr(i) := t.sharer_count + send_x_rep_ack_arr(i) := t.send_x_rep_ack + t.xact_finish := do_free_arr(i) + t.p_data.bits.tile_id := p_data_tile_id_arr(i) + t.p_data.valid := p_data_valid_arr(i) + t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits + t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits + t.sent_x_rep_ack := sent_x_rep_ack_arr(i) + do_free_arr(i) := Bool(false) + sent_x_rep_ack_arr(i) := Bool(false) + p_data_tile_id_arr(i) := Bits(0, width = TILE_ID_BITS) + p_data_valid_arr(i) := Bool(false) + for( j <- 0 until ntiles) { + p_rep_cnt_dec_arr(i)(j) := Bool(false) + p_req_cnt_inc_arr(i)(j) := Bool(false) + } + } + + val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY + val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY + + // Free finished transactions + for( j <- 0 until ntiles ) { + val finish = io.tiles(j).xact_finish + when (finish.valid) { + do_free_arr(finish.bits.global_xact_id) := Bool(true) + } + finish.ready := Bool(true) + } + + // Reply to initial requestor + // Forward memory responses from mem to tile or arbitrate to ack + val mem_idx = io.mem.resp.bits.tag + val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) + for( j <- 0 until ntiles ) { + val rep = io.tiles(j).xact_rep + rep.bits.t_type := UFix(0) + rep.bits.tile_xact_id := UFix(0) + rep.bits.global_xact_id := UFix(0) + rep.bits.data := io.mem.resp.bits.data + rep.bits.require_ack := Bool(true) + rep.valid := Bool(false) + when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { + rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) + rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) + rep.bits.global_xact_id := mem_idx + rep.valid := Bool(true) + } . otherwise { + rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) + rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) + rep.bits.global_xact_id := ack_idx + when (UFix(j) === init_tile_id_arr(ack_idx)) { + rep.valid := send_x_rep_ack_arr.toBits.orR + sent_x_rep_ack_arr(ack_idx) := Bool(true) + } + } + } + // If there were a ready signal due to e.g. intervening network use: + //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready + + // Create an arbiter for the one memory port + // We have to arbitrate between the different trackers' memory requests + // and once we have picked a request, get the right write data + val mem_req_cmd_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemReqCmd() } + val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } + for( i <- 0 until NGLOBAL_XACTS ) { + mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd + mem_req_cmd_arb.io.lock(i) <> trackerList(i).io.mem_req_lock + mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data + mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock + } + io.mem.req_cmd <> mem_req_cmd_arb.io.out + io.mem.req_data <> mem_req_data_arb.io.out + + // Handle probe replies, which may or may not have data + for( j <- 0 until ntiles ) { + val p_rep = io.tiles(j).probe_rep + val p_rep_data = io.tiles(j).probe_rep_data + val idx = p_rep.bits.global_xact_id + val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) + val do_pop = foldR(pop_p_reps)(_ || _) + p_rep.ready := Bool(true) + p_rep_data_dep_list(j).io.enq.valid := do_pop + p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) + p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) + when (p_rep.valid && co.messageHasData(p_rep.bits)) { + p_data_valid_arr(idx) := Bool(true) + p_data_tile_id_arr(idx) := UFix(j) + } + p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) + } + for( i <- 0 until NGLOBAL_XACTS ) { + trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid + trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits + + trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) + trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) + + for( j <- 0 until ntiles) { + val p_rep = io.tiles(j).probe_rep + p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) + } + } + + // Nack conflicting transaction init attempts + val s_idle :: s_abort_drain :: s_abort_send :: s_abort_complete :: Nil = Enum(4){ UFix() } + val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } + val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } + for( j <- 0 until ntiles ) { + val x_init = io.tiles(j).xact_init + val x_init_data = io.tiles(j).xact_init_data + val x_abort = io.tiles(j).xact_abort + val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } + for( i <- 0 until NGLOBAL_XACTS) { + val t = trackerList(i).io + conflicts(i) := t.busy && x_init.valid && isCoherenceConflict(t.addr, x_init.bits.address) + } + x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id + want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && hasData(x_init.bits))) + + x_abort.valid := Bool(false) + switch(abort_state_arr(j)) { + is(s_idle) { + when(want_to_abort_arr(j)) { + when(hasData(x_init.bits)) { + abort_state_arr(j) := s_abort_drain + } . otherwise { + abort_state_arr(j) := s_abort_send + } + } + } + is(s_abort_drain) { // raises x_init_data.ready below + when(x_init_data.valid) { + abort_cnt := abort_cnt + UFix(1) + when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { + abort_state_arr(j) := s_abort_send + } + } + } + is(s_abort_send) { // nothing is dequeued for now + x_abort.valid := Bool(true) + when(x_abort.ready) { + abort_state_arr(j) := s_abort_complete + } + } + is(s_abort_complete) { // raises x_init.ready below + abort_state_arr(j) := s_idle + } + } + } + + // Handle transaction initiation requests + // Only one allocation per cycle + // Init requests may or may not have data + val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } + val init_arb = (new Arbiter(ntiles)) { new TrackerAllocReq() } + for( i <- 0 until NGLOBAL_XACTS ) { + alloc_arb.io.in(i).valid := !trackerList(i).io.busy + trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready + trackerList(i).io.alloc_req.bits := init_arb.io.out.bits + trackerList(i).io.alloc_req.valid := init_arb.io.out.valid + + trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits + trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid + trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) + trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) + } + for( j <- 0 until ntiles ) { + val x_init = io.tiles(j).xact_init + val x_init_data = io.tiles(j).xact_init_data + val x_init_data_dep = x_init_data_dep_list(j).io.deq + init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid + init_arb.io.in(j).bits.xact_init := x_init.bits + init_arb.io.in(j).bits.tile_id := UFix(j) + val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) + val do_pop = foldR(pop_x_inits)(_||_) + x_init_data_dep_list(j).io.enq.valid := do_pop && hasData(x_init.bits) && (abort_state_arr(j) === s_idle) + x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) + x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop + x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) + x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) + } + + alloc_arb.io.out.ready := init_arb.io.out.valid + + // Handle probe request generation + // Must arbitrate for each request port + val p_req_arb_arr = List.fill(ntiles)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) + for( j <- 0 until ntiles ) { + for( i <- 0 until NGLOBAL_XACTS ) { + val t = trackerList(i).io + p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits + p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) + p_req_cnt_inc_arr(i)(j) := p_req_arb_arr(j).io.in(i).ready + } + p_req_arb_arr(j).io.out <> io.tiles(j).probe_req + } + +} From 0b4937f70ff94c068b3026c73f00d7d3ca8e1828 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 3 Apr 2012 18:06:02 -0700 Subject: [PATCH 0387/1087] changed coherence message type names --- rocket/src/main/scala/coherence.scala | 147 +++++++++----------- rocket/src/main/scala/htif.scala | 2 +- rocket/src/main/scala/icache.scala | 5 +- rocket/src/main/scala/icache_prefetch.scala | 5 +- rocket/src/main/scala/nbdcache.scala | 12 +- rocket/src/main/scala/uncore.scala | 28 ++-- 6 files changed, 91 insertions(+), 108 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index ee51e469..7af9c903 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -4,7 +4,7 @@ import Chisel._ import Constants._ class TransactionInit extends Bundle { - val t_type = Bits(width = X_INIT_TYPE_BITS) + val x_type = Bits(width = X_INIT_TYPE_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val address = UFix(width = PADDR_BITS - OFFSET_BITS) } @@ -29,7 +29,7 @@ class ProbeReply extends Bundle { class ProbeReplyData extends MemData class TransactionReply extends MemData { - val t_type = Bits(width = X_REP_TYPE_BITS) + val x_type = Bits(width = X_REP_TYPE_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) val require_ack = Bool() @@ -54,13 +54,13 @@ trait CoherencePolicy { } trait ThreeStateIncoherence extends CoherencePolicy { val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } - val X_INIT_READ_SHARED = UFix(0, 2) - val X_INIT_READ_EXCLUSIVE = UFix(1, 2) - val X_INIT_WRITE_UNCACHED = UFix(3, 2) - val X_REP_READ_SHARED = UFix(0, X_REP_TYPE_BITS) - val X_REP_READ_EXCLUSIVE = UFix(1, X_REP_TYPE_BITS) - val X_REP_WRITE_UNCACHED = UFix(3, X_REP_TYPE_BITS) - val P_REP_INVALIDATE_ACK = UFix(3, P_REP_TYPE_BITS) + val xactInitReadShared = UFix(0, 2) + val xactInitReadExclusive = UFix(1, 2) + val xactInitWriteUncached = UFix(3, 2) + val xactReplyReadShared = UFix(0, X_REP_TYPE_BITS) + val xactReplyReadExclusive = UFix(1, X_REP_TYPE_BITS) + val xactReplyWriteUncached = UFix(3, X_REP_TYPE_BITS) + val probeRepInvalidateAck = UFix(3, P_REP_TYPE_BITS) def isHit ( cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) @@ -84,57 +84,38 @@ trait ThreeStateIncoherence extends CoherencePolicy { def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write || cmd === M_PFW, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) + Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) } def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, X_INIT_READ_EXCLUSIVE, outstanding.t_type) + Mux(write, xactInitReadExclusive, outstanding.x_type) } def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = Bool(false) def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { - Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileDirty, tileClean) + Mux(outstanding.x_type === xactInitReadExclusive, tileDirty, tileClean) } def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = state def newProbeReply (incoming: ProbeRequest, has_data: Bool): ProbeReply = { val reply = Wire() { new ProbeReply() } - reply.p_type := P_REP_INVALIDATE_ACK + reply.p_type := probeRepInvalidateAck reply.global_xact_id := UFix(0) reply } def probeReplyHasData (reply: ProbeReply): Bool = Bool(false) - def transactionInitHasData (init: TransactionInit): Bool = (init.t_type === X_INIT_WRITE_UNCACHED) + def transactionInitHasData (init: TransactionInit): Bool = (init.x_type === xactInitWriteUncached) } trait FourStateCoherence extends CoherencePolicy { val tileInvalid :: tileShared :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(4){ UFix() } val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } - val probeInvalidate :: probeDowngrade :: probeCopy :: Nil = Enum(3){ UFix() } - val X_INIT_READ_SHARED = UFix(0, X_INIT_TYPE_BITS) - val X_INIT_READ_EXCLUSIVE = UFix(1, X_INIT_TYPE_BITS) - val X_INIT_READ_UNCACHED = UFix(2, X_INIT_TYPE_BITS) - val X_INIT_WRITE_UNCACHED = UFix(3, X_INIT_TYPE_BITS) + val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(4){ UFix() } + val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: Nil = Enum(5){ UFix() } + val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } + val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } - val X_REP_READ_SHARED = UFix(0, X_REP_TYPE_BITS) - val X_REP_READ_EXCLUSIVE = UFix(1, X_REP_TYPE_BITS) - val X_REP_READ_UNCACHED = UFix(2, X_REP_TYPE_BITS) - val X_REP_WRITE_UNCACHED = UFix(3, X_REP_TYPE_BITS) - val X_REP_READ_EXCLUSIVE_ACK = UFix(4, X_REP_TYPE_BITS) - - val P_REQ_INVALIDATE = UFix(0, P_REQ_TYPE_BITS) - val P_REQ_DOWNGRADE = UFix(1, P_REQ_TYPE_BITS) - val P_REQ_COPY = UFix(2, P_REQ_TYPE_BITS) - - val P_REP_INVALIDATE_DATA = UFix(0, P_REP_TYPE_BITS) - val P_REP_DOWNGRADE_DATA = UFix(1, P_REP_TYPE_BITS) - val P_REP_COPY_DATA = UFix(2, P_REP_TYPE_BITS) - val P_REP_INVALIDATE_ACK = UFix(3, P_REP_TYPE_BITS) - val P_REP_DOWNGRADE_ACK = UFix(4, P_REP_TYPE_BITS) - val P_REP_COPY_ACK = UFix(5, P_REP_TYPE_BITS) - - - def isHit ( cmd: Bits, state: UFix): Bool = { + def isHit (cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) Mux(write, (state === tileExclusiveClean || state === tileExclusiveDirty), (state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) @@ -145,8 +126,8 @@ trait FourStateCoherence extends CoherencePolicy { def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { val (read, write) = cpuCmdToRW(cmd) - (read && (outstanding.t_type === X_INIT_READ_UNCACHED || outstanding.t_type === X_INIT_WRITE_UNCACHED)) || - (write && (outstanding.t_type != X_INIT_READ_EXCLUSIVE)) + (read && (outstanding.x_type === xactInitReadUncached || outstanding.x_type === xactInitWriteUncached)) || + (write && (outstanding.x_type != xactInitReadExclusive)) } def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { MuxLookup(cmd, (state === tileExclusiveDirty), Array( @@ -171,44 +152,44 @@ trait FourStateCoherence extends CoherencePolicy { def newStateOnWriteback() = newStateOnCacheControl(M_INV) def newStateOnFlush() = newStateOnCacheControl(M_INV) def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { - MuxLookup(incoming.t_type, tileInvalid, Array( - X_REP_READ_SHARED -> tileShared, - X_REP_READ_EXCLUSIVE -> Mux(outstanding.t_type === X_INIT_READ_EXCLUSIVE, tileExclusiveDirty, tileExclusiveClean), - X_REP_READ_EXCLUSIVE_ACK -> tileExclusiveDirty, - X_REP_READ_UNCACHED -> tileInvalid, - X_REP_WRITE_UNCACHED -> tileInvalid + MuxLookup(incoming.x_type, tileInvalid, Array( + xactReplyReadShared -> tileShared, + xactReplyReadExclusive -> Mux(outstanding.x_type === xactInitReadExclusive, tileExclusiveDirty, tileExclusiveClean), + xactReplyReadExclusiveAck -> tileExclusiveDirty, + xactReplyReadUncached -> tileInvalid, + xactReplyWriteUncached -> tileInvalid )) } def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { MuxLookup(incoming.p_type, state, Array( - probeInvalidate -> tileInvalid, - probeDowngrade -> tileShared, - probeCopy -> state + probeReqInvalidate -> tileInvalid, + probeReqDowngrade -> tileShared, + probeReqCopy -> state )) } def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write || cmd === M_PFW, X_INIT_READ_EXCLUSIVE, X_INIT_READ_SHARED) + Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) } def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, X_INIT_READ_EXCLUSIVE, outstanding.t_type) + Mux(write, xactInitReadExclusive, outstanding.x_type) } - def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = X_INIT_WRITE_UNCACHED + def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { val reply = Wire() { new ProbeReply() } - val with_data = MuxLookup(incoming.p_type, P_REP_INVALIDATE_DATA, Array( - probeInvalidate -> P_REP_INVALIDATE_DATA, - probeDowngrade -> P_REP_DOWNGRADE_DATA, - probeCopy -> P_REP_COPY_DATA + val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( + probeReqInvalidate -> probeRepInvalidateData, + probeReqDowngrade -> probeRepDowngradeData, + probeReqCopy -> probeRepCopyData )) - val without_data = MuxLookup(incoming.p_type, P_REP_INVALIDATE_ACK, Array( - probeInvalidate -> P_REP_INVALIDATE_ACK, - probeDowngrade -> P_REP_DOWNGRADE_ACK, - probeCopy -> P_REP_COPY_ACK + val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( + probeReqInvalidate -> probeRepInvalidateAck, + probeReqDowngrade -> probeRepDowngradeAck, + probeReqCopy -> probeRepCopyAck )) reply.p_type := Mux(needsWriteback(state), with_data, without_data) reply.global_xact_id := incoming.global_xact_id @@ -216,44 +197,44 @@ trait FourStateCoherence extends CoherencePolicy { } def hasData (reply: ProbeReply): Bool = { - (reply.p_type === P_REP_INVALIDATE_DATA || - reply.p_type === P_REP_DOWNGRADE_DATA || - reply.p_type === P_REP_COPY_DATA) + (reply.p_type === probeRepInvalidateData || + reply.p_type === probeRepDowngradeData || + reply.p_type === probeRepCopyData) } def hasData (init: TransactionInit): Bool = { - (init.t_type === X_INIT_WRITE_UNCACHED) + (init.x_type === xactInitWriteUncached) } def hasData (reply: TransactionReply): Bool = { - (reply.t_type != X_REP_WRITE_UNCACHED && reply.t_type != X_REP_READ_EXCLUSIVE_ACK) + (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck) } def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) - def getTransactionReplyType(t_type: UFix, count: UFix): Bits = { - MuxLookup(t_type, X_REP_READ_UNCACHED, Array( - X_INIT_READ_SHARED -> Mux(count > UFix(0), X_REP_READ_SHARED, X_REP_READ_EXCLUSIVE), - X_INIT_READ_EXCLUSIVE -> X_REP_READ_EXCLUSIVE, - X_INIT_READ_UNCACHED -> X_REP_READ_UNCACHED, - X_INIT_WRITE_UNCACHED -> X_REP_WRITE_UNCACHED + def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { + MuxLookup(x_type, xactReplyReadUncached, Array( + xactInitReadShared -> Mux(count > UFix(0), xactReplyReadShared, xactReplyReadExclusive), + xactInitReadExclusive -> xactReplyReadExclusive, + xactInitReadUncached -> xactReplyReadUncached, + xactInitWriteUncached -> xactReplyWriteUncached )) } - def getProbeRequestType(t_type: UFix, global_state: UFix): UFix = { - MuxLookup(t_type, P_REQ_COPY, Array( - X_INIT_READ_SHARED -> P_REQ_DOWNGRADE, - X_INIT_READ_EXCLUSIVE -> P_REQ_INVALIDATE, - X_INIT_READ_UNCACHED -> P_REQ_COPY, - X_INIT_WRITE_UNCACHED -> P_REQ_INVALIDATE + def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { + MuxLookup(x_type, probeReqCopy, Array( + xactInitReadShared -> probeReqDowngrade, + xactInitReadExclusive -> probeReqInvalidate, + xactInitReadUncached -> probeReqCopy, + xactInitWriteUncached -> probeReqInvalidate )) } - def needsMemRead(t_type: UFix, global_state: UFix): Bool = { - (t_type != X_INIT_WRITE_UNCACHED) + def needsMemRead(x_type: UFix, global_state: UFix): Bool = { + (x_type != xactInitWriteUncached) } - def needsMemWrite(t_type: UFix, global_state: UFix): Bool = { - (t_type === X_INIT_WRITE_UNCACHED) + def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) } - def needsAckReply(t_type: UFix, global_state: UFix): Bool = { - (t_type === X_INIT_WRITE_UNCACHED) + def needsAckReply(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) } } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 253fdf14..0b312136 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -165,7 +165,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence mem_req_data = Cat(packet_ram(idx), mem_req_data) } io.mem.xact_init.valid := state === state_mem_req - io.mem.xact_init.bits.t_type := Mux(cmd === cmd_writemem, X_INIT_WRITE_UNCACHED, X_INIT_READ_UNCACHED) + io.mem.xact_init.bits.x_type := Mux(cmd === cmd_writemem, xactInitWriteUncached, xactInitReadUncached) io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 79ee7a23..08d44ae7 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -27,7 +27,8 @@ class ioRocketICache extends Bundle() // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines // parameters : // lines = # cache lines -class rocketICache(sets: Int, assoc: Int) extends Component { +class rocketICache(sets: Int, assoc: Int) extends Component with FourStateCoherence +{ val io = new ioRocketICache(); val lines = sets * assoc; @@ -135,7 +136,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component { rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED + io.mem.xact_init.bits.x_type := xactInitReadUncached io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix io.mem.xact_finish <> finish_q.io.deq diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 1781175e..9e2b52f5 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -11,7 +11,8 @@ class ioIPrefetcher extends Bundle() { val invalidate = Bool(INPUT) } -class rocketIPrefetcher extends Component() { +class rocketIPrefetcher extends Component with FourStateCoherence +{ val io = new ioIPrefetcher(); val pdq = (new queue(REFILL_CYCLES, flushable = true)) { Bits(width = MEM_DATA_BITS) }; @@ -33,7 +34,7 @@ class rocketIPrefetcher extends Component() { val finish_q = (new queue(1)) { new TransactionFinish } io.mem.xact_abort.ready := Bool(true) io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) && finish_q.io.enq.ready - io.mem.xact_init.bits.t_type := X_INIT_READ_UNCACHED + io.mem.xact_init.bits.x_type := xactInitReadUncached io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index aed8fb82..a26f6634 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -188,7 +188,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val state = Reg(resetVal = s_invalid) val flush = Reg { Bool() } - val xact_type = Reg { UFix() } + val xacx_type = Reg { UFix() } val line_state = Reg { UFix() } val refill_count = Reg { UFix(width = log2up(REFILL_CYCLES)) } val req = Reg { new MSHRReq() } @@ -239,13 +239,13 @@ class MSHR(id: Int) extends Component with FourStateCoherence { } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - xact_type := getTransactionInitTypeOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) + xacx_type := getTransactionInitTypeOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { flush := req_cmd === M_FLA line_state := newStateOnFlush() refill_count := UFix(0) - xact_type := getTransactionInitTypeOnPrimaryMiss(req_cmd, newStateOnFlush()) + xacx_type := getTransactionInitTypeOnPrimaryMiss(req_cmd, newStateOnFlush()) req := io.req_bits when (io.req_bits.tag_miss) { @@ -278,7 +278,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { io.probe_refill.ready := (state != s_refill_resp) || !idx_match io.mem_req.valid := (state === s_refill_req) && !flush - io.mem_req.bits.t_type := xact_type + io.mem_req.bits.x_type := xacx_type io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq @@ -469,7 +469,7 @@ class WritebackUnit extends Component with FourStateCoherence{ io.data_req.bits.data := Bits(0) io.mem_req.valid := valid && !cmd_sent - io.mem_req.bits.t_type := getTransactionInitTypeOnWriteback() + io.mem_req.bits.x_type := getTransactionInitTypeOnWriteback() io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id io.mem_req_data.valid := data_req_fired && !is_probe @@ -894,7 +894,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.data := io.mem.xact_rep.bits.data data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - data_arb.io.in(0).valid := io.mem.xact_rep.valid && (io.mem.xact_rep.bits.t_type === X_REP_READ_SHARED || io.mem.xact_rep.bits.t_type === X_REP_READ_EXCLUSIVE) + data_arb.io.in(0).valid := io.mem.xact_rep.valid && (io.mem.xact_rep.bits.x_type === xactReplyReadShared || io.mem.xact_rep.bits.x_type === xactReplyReadExclusive) // load hits data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index fa6e2b1e..8dc88d44 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -74,7 +74,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val t_type = Bits(X_INIT_TYPE_BITS, OUTPUT) + val x_type = Bits(X_INIT_TYPE_BITS, OUTPUT) val push_p_req = Bits(ntiles, OUTPUT) val pop_p_rep = Bits(ntiles, OUTPUT) val pop_p_rep_data = Bits(ntiles, OUTPUT) @@ -117,7 +117,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } val state = Reg(resetVal = s_idle) val addr_ = Reg{ UFix() } - val t_type_ = Reg{ Bits() } + val x_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) @@ -138,7 +138,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.p_rep_tile_id := p_rep_tile_id_ io.tile_xact_id := tile_xact_id_ io.sharer_count := UFix(ntiles) // TODO: Broadcast only - io.t_type := t_type_ + io.x_type := x_type_ io.mem_req_cmd.valid := Bool(false) io.mem_req_cmd.bits.rw := Bool(false) @@ -148,7 +148,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.mem_req_data.bits.data := UFix(0) io.mem_req_lock := Bool(false) io.probe_req.valid := Bool(false) - io.probe_req.bits.p_type := getProbeRequestType(t_type_, UFix(0)) + io.probe_req.bits.p_type := getProbeRequestType(x_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) io.probe_req.bits.address := addr_ io.push_p_req := Bits(0, width = ntiles) @@ -164,11 +164,11 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc is(s_idle) { when( io.alloc_req.valid && io.can_alloc ) { addr_ := io.alloc_req.bits.xact_init.address - t_type_ := io.alloc_req.bits.xact_init.t_type + x_type_ := io.alloc_req.bits.xact_init.x_type init_tile_id_ := io.alloc_req.bits.tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := hasData(io.alloc_req.bits.xact_init) - x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.t_type, UFix(0)) + x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) if(ntiles > 1) p_rep_count := UFix(ntiles-1) val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only p_req_flags := p_req_initial_flags @@ -226,7 +226,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { - state := Mux(needsAckReply(t_type_, UFix(0)), s_ack, s_busy) + state := Mux(needsAckReply(x_type_, UFix(0)), s_ack, s_busy) } } is(s_ack) { @@ -251,7 +251,7 @@ abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence { val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.t_type === X_INIT_WRITE_UNCACHED + val is_write = x_init.bits.x_type === xactInitWriteUncached x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write @@ -260,7 +260,7 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep - x_rep.bits.t_type := Mux(io.mem.resp.valid, X_REP_READ_EXCLUSIVE, X_REP_WRITE_UNCACHED) + x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyReadExclusive, xactReplyWriteUncached) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data @@ -283,7 +283,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val t_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } + val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } @@ -300,7 +300,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS addr_arr(i) := t.addr init_tile_id_arr(i) := t.init_tile_id tile_xact_id_arr(i) := t.tile_xact_id - t_type_arr(i) := t.t_type + x_type_arr(i) := t.x_type sh_count_arr(i) := t.sharer_count send_x_rep_ack_arr(i) := t.send_x_rep_ack t.xact_finish := do_free_arr(i) @@ -337,19 +337,19 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) for( j <- 0 until ntiles ) { val rep = io.tiles(j).xact_rep - rep.bits.t_type := UFix(0) + rep.bits.x_type := UFix(0) rep.bits.tile_xact_id := UFix(0) rep.bits.global_xact_id := UFix(0) rep.bits.data := io.mem.resp.bits.data rep.bits.require_ack := Bool(true) rep.valid := Bool(false) when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { - rep.bits.t_type := getTransactionReplyType(t_type_arr(mem_idx), sh_count_arr(mem_idx)) + rep.bits.x_type := getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) rep.bits.global_xact_id := mem_idx rep.valid := Bool(true) } . otherwise { - rep.bits.t_type := getTransactionReplyType(t_type_arr(ack_idx), sh_count_arr(ack_idx)) + rep.bits.x_type := getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx when (UFix(j) === init_tile_id_arr(ack_idx)) { From 551e09c9d5e1bc59a59bc683d2064765a5181143 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 4 Apr 2012 13:57:08 -0700 Subject: [PATCH 0388/1087] changed coherence type width names to represent max sizes for all protocols --- rocket/src/main/scala/coherence.scala | 16 ++++++++-------- rocket/src/main/scala/consts.scala | 8 ++++---- rocket/src/main/scala/uncore.scala | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 7af9c903..be6638aa 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -4,7 +4,7 @@ import Chisel._ import Constants._ class TransactionInit extends Bundle { - val x_type = Bits(width = X_INIT_TYPE_BITS) + val x_type = Bits(width = X_INIT_TYPE_MAX_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val address = UFix(width = PADDR_BITS - OFFSET_BITS) } @@ -16,20 +16,20 @@ class TransactionAbort extends Bundle { } class ProbeRequest extends Bundle { - val p_type = Bits(width = P_REQ_TYPE_BITS) + val p_type = Bits(width = P_REQ_TYPE_MAX_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) val address = Bits(width = PADDR_BITS - OFFSET_BITS) } class ProbeReply extends Bundle { - val p_type = Bits(width = P_REP_TYPE_BITS) + val p_type = Bits(width = P_REP_TYPE_MAX_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) } class ProbeReplyData extends MemData class TransactionReply extends MemData { - val x_type = Bits(width = X_REP_TYPE_BITS) + val x_type = Bits(width = X_REP_TYPE_MAX_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) val require_ack = Bool() @@ -57,10 +57,10 @@ trait ThreeStateIncoherence extends CoherencePolicy { val xactInitReadShared = UFix(0, 2) val xactInitReadExclusive = UFix(1, 2) val xactInitWriteUncached = UFix(3, 2) - val xactReplyReadShared = UFix(0, X_REP_TYPE_BITS) - val xactReplyReadExclusive = UFix(1, X_REP_TYPE_BITS) - val xactReplyWriteUncached = UFix(3, X_REP_TYPE_BITS) - val probeRepInvalidateAck = UFix(3, P_REP_TYPE_BITS) + val xactReplyReadShared = UFix(0, X_REP_TYPE_MAX_BITS) + val xactReplyReadExclusive = UFix(1, X_REP_TYPE_MAX_BITS) + val xactReplyWriteUncached = UFix(3, X_REP_TYPE_MAX_BITS) + val probeRepInvalidateAck = UFix(3, P_REP_TYPE_MAX_BITS) def isHit ( cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 9cac98ef..7dbe5aef 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -197,10 +197,10 @@ object Constants val GLOBAL_XACT_ID_BITS = 2 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS - val X_INIT_TYPE_BITS = 2 - val X_REP_TYPE_BITS = 3 - val P_REQ_TYPE_BITS = 2 - val P_REP_TYPE_BITS = 3 + val X_INIT_TYPE_MAX_BITS = 2 + val X_REP_TYPE_MAX_BITS = 3 + val P_REQ_TYPE_MAX_BITS = 2 + val P_REP_TYPE_MAX_BITS = 3 // external memory interface val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 8dc88d44..b907c51d 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -74,7 +74,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val x_type = Bits(X_INIT_TYPE_BITS, OUTPUT) + val x_type = Bits(X_INIT_TYPE_MAX_BITS, OUTPUT) val push_p_req = Bits(ntiles, OUTPUT) val pop_p_rep = Bits(ntiles, OUTPUT) val pop_p_rep_data = Bits(ntiles, OUTPUT) @@ -283,7 +283,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_BITS)} } + val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_MAX_BITS)} } val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } From 9c8f849f5077c6b3add4267a99e24299ddc28eee Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 4 Apr 2012 15:51:33 -0700 Subject: [PATCH 0389/1087] defined abstract coherence traits in base trait, added Incoherent trait, cleaned up incoherent policy --- rocket/src/main/scala/coherence.scala | 114 +++++++++++++++++--------- rocket/src/main/scala/uncore.scala | 4 +- 2 files changed, 79 insertions(+), 39 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index be6638aa..1bd19bbd 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -50,59 +50,99 @@ object cpuCmdToRW { } } -trait CoherencePolicy { } +trait CoherencePolicy { + def isHit (cmd: Bits, state: UFix): Bool + def isValid (state: UFix): Bool -trait ThreeStateIncoherence extends CoherencePolicy { + def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool + def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool + def needsWriteback (state: UFix): Bool + + def newStateOnHit(cmd: Bits, state: UFix): UFix + def newStateOnCacheControl(cmd: Bits): UFix + def newStateOnWriteback(): UFix + def newStateOnFlush(): UFix + def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix + def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits + + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix + def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix + def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits + def getTransactionInitTypeOnWriteback(): Bits + + def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply + + def hasData (reply: ProbeReply): Bool + def hasData (init: TransactionInit): Bool + def hasData (reply: TransactionReply): Bool + + def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool + def getTransactionReplyType(x_type: UFix, count: UFix): Bits + def getProbeRequestType(x_type: UFix, global_state: UFix): UFix + def needsMemRead(x_type: UFix, global_state: UFix): Bool + def needsMemWrite(x_type: UFix, global_state: UFix): Bool + def needsAckReply(x_type: UFix, global_state: UFix): Bool +} + +trait IncoherentPolicy extends CoherencePolicy { + // UNIMPLEMENTED + def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = state + def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { + val reply = Wire() { new ProbeReply() } + reply.p_type := UFix(0) + reply.global_xact_id := UFix(0) + reply + } + def hasData (reply: ProbeReply) = Bool(false) + def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = Bool(false) + def getTransactionReplyType(x_type: UFix, count: UFix): Bits = Bits(0) + def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = UFix(0) + def needsMemRead(x_type: UFix, global_state: UFix): Bool = Bool(false) + def needsMemWrite(x_type: UFix, global_state: UFix): Bool = Bool(false) + def needsAckReply(x_type: UFix, global_state: UFix): Bool = Bool(false) +} + +trait ThreeStateIncoherence extends IncoherentPolicy { val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } - val xactInitReadShared = UFix(0, 2) - val xactInitReadExclusive = UFix(1, 2) - val xactInitWriteUncached = UFix(3, 2) - val xactReplyReadShared = UFix(0, X_REP_TYPE_MAX_BITS) - val xactReplyReadExclusive = UFix(1, X_REP_TYPE_MAX_BITS) - val xactReplyWriteUncached = UFix(3, X_REP_TYPE_MAX_BITS) - val probeRepInvalidateAck = UFix(3, P_REP_TYPE_MAX_BITS) + val xactInitReadClean :: xactInitReadDirty :: xactInitWriteback :: Nil = Enum(3){ UFix() } + val xactReplyData :: xactReplyAck :: Nil = Enum(2){ UFix() } + val probeRepInvalidateAck :: Nil = Enum(1){ UFix() } - def isHit ( cmd: Bits, state: UFix): Bool = { - val (read, write) = cpuCmdToRW(cmd) - ( state === tileClean || state === tileDirty) - } + def isHit ( cmd: Bits, state: UFix): Bool = (state === tileClean || state === tileDirty) + def isValid (state: UFix): Bool = state != tileInvalid - def isValid (state: UFix): Bool = { - state != tileInvalid - } + def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit) = Bool(false) + def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = state === tileDirty + def needsWriteback (state: UFix): Bool = state === tileDirty - def needsWriteback (state: UFix): Bool = { - state === tileDirty - } - - def newStateOnWriteback() = tileInvalid - def newStateOnCacheControl(cmd: Bits) = tileInvalid def newState(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write, tileDirty, Mux(read, Mux(state === tileDirty, tileDirty, tileClean), state)) } def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) + def newStateOnCacheControl(cmd: Bits) = tileInvalid //TODO + def newStateOnWriteback() = tileInvalid + def newStateOnFlush() = tileInvalid + def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit) = { + MuxLookup(incoming.x_type, tileInvalid, Array( + xactReplyData -> Mux(outstanding.x_type === xactInitReadDirty, tileDirty, tileClean), + xactReplyAck -> tileInvalid + )) + } + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) + Mux(write || cmd === M_PFW, xactInitReadDirty, xactInitReadClean) } def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { val (read, write) = cpuCmdToRW(cmd) - Mux(write, xactInitReadExclusive, outstanding.x_type) + Mux(write, xactInitReadDirty, outstanding.x_type) } - def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = Bool(false) - def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { - Mux(outstanding.x_type === xactInitReadExclusive, tileDirty, tileClean) - } - def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = state - def newProbeReply (incoming: ProbeRequest, has_data: Bool): ProbeReply = { - val reply = Wire() { new ProbeReply() } - reply.p_type := probeRepInvalidateAck - reply.global_xact_id := UFix(0) - reply - } - def probeReplyHasData (reply: ProbeReply): Bool = Bool(false) - def transactionInitHasData (init: TransactionInit): Bool = (init.x_type === xactInitWriteUncached) + def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteback //TODO + def getTransactionInitTypeOnWriteback(): Bits = xactInitWriteback + + def hasData (init: TransactionInit): Bool = (init.x_type === xactInitWriteback) + def hasData (reply: TransactionReply) = (reply.x_type === xactReplyData) } trait FourStateCoherence extends CoherencePolicy { diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index b907c51d..14164f7f 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -251,7 +251,7 @@ abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence { val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.x_type === xactInitWriteUncached + val is_write = x_init.bits.x_type === xactInitWriteback x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write @@ -260,7 +260,7 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep - x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyReadExclusive, xactReplyWriteUncached) + x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyData, xactReplyAck) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data From 3cdd166153894a40e44e0d944221d519158b0d3b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 10 Apr 2012 00:09:58 -0700 Subject: [PATCH 0390/1087] Refactored coherence as member rather than trait. MI and MEI protocols. --- rocket/src/main/scala/coherence.scala | 284 +++++++++++++++++++- rocket/src/main/scala/htif.scala | 6 +- rocket/src/main/scala/icache.scala | 4 +- rocket/src/main/scala/icache_prefetch.scala | 4 +- rocket/src/main/scala/nbdcache.scala | 62 ++--- rocket/src/main/scala/tile.scala | 8 +- rocket/src/main/scala/top.scala | 7 +- rocket/src/main/scala/uncore.scala | 36 +-- 8 files changed, 331 insertions(+), 80 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 1bd19bbd..e0f5e5c8 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -50,7 +50,7 @@ object cpuCmdToRW { } } -trait CoherencePolicy { +abstract class CoherencePolicy { def isHit (cmd: Bits, state: UFix): Bool def isValid (state: UFix): Bool @@ -72,9 +72,10 @@ trait CoherencePolicy { def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply - def hasData (reply: ProbeReply): Bool - def hasData (init: TransactionInit): Bool - def hasData (reply: TransactionReply): Bool + def messageHasData (reply: ProbeReply): Bool + def messageHasData (init: TransactionInit): Bool + def messageHasData (reply: TransactionReply): Bool + def messageUpdatesDataArray (reply: TransactionReply): Bool def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool def getTransactionReplyType(x_type: UFix, count: UFix): Bits @@ -84,7 +85,14 @@ trait CoherencePolicy { def needsAckReply(x_type: UFix, global_state: UFix): Bool } -trait IncoherentPolicy extends CoherencePolicy { +trait UncachedTransactions { + def getTransactionInitTypeOnUncachedRead(): UFix + def getTransactionInitTypeOnUncachedWrite(): UFix +} + +abstract class CoherencePolicyWithUncached extends CoherencePolicy with UncachedTransactions + +abstract class IncoherentPolicy extends CoherencePolicy { // UNIMPLEMENTED def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = state def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { @@ -93,7 +101,7 @@ trait IncoherentPolicy extends CoherencePolicy { reply.global_xact_id := UFix(0) reply } - def hasData (reply: ProbeReply) = Bool(false) + def messageHasData (reply: ProbeReply) = Bool(false) def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = Bool(false) def getTransactionReplyType(x_type: UFix, count: UFix): Bits = Bits(0) def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = UFix(0) @@ -102,7 +110,7 @@ trait IncoherentPolicy extends CoherencePolicy { def needsAckReply(x_type: UFix, global_state: UFix): Bool = Bool(false) } -trait ThreeStateIncoherence extends IncoherentPolicy { +class ThreeStateIncoherence extends IncoherentPolicy { val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } val xactInitReadClean :: xactInitReadDirty :: xactInitWriteback :: Nil = Enum(3){ UFix() } val xactReplyData :: xactReplyAck :: Nil = Enum(2){ UFix() } @@ -141,11 +149,256 @@ trait ThreeStateIncoherence extends IncoherentPolicy { def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteback //TODO def getTransactionInitTypeOnWriteback(): Bits = xactInitWriteback - def hasData (init: TransactionInit): Bool = (init.x_type === xactInitWriteback) - def hasData (reply: TransactionReply) = (reply.x_type === xactReplyData) + def messageHasData (init: TransactionInit): Bool = (init.x_type === xactInitWriteback) + def messageHasData (reply: TransactionReply) = (reply.x_type === xactReplyData) + def messageUpdatesDataArray (reply: TransactionReply) = (reply.x_type === xactReplyData) } -trait FourStateCoherence extends CoherencePolicy { +class TwoStateCoherence extends CoherencePolicyWithUncached { + + val tileInvalid :: tileValid :: Nil = Enum(2){ UFix() } + val globalInvalid :: globalValid :: Nil = Enum(2){ UFix() } + + val xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(3){ UFix() } + val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: Nil = Enum(3){ UFix() } + val probeReqInvalidate :: probeReqCopy :: Nil = Enum(2){ UFix() } + val probeRepInvalidateData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepCopyAck :: Nil = Enum(4){ UFix() } + + def isHit (cmd: Bits, state: UFix): Bool = state != tileInvalid + def isValid (state: UFix): Bool = state != tileInvalid + + def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = (outstanding.x_type != xactInitReadExclusive) + def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { + MuxLookup(cmd, (state === tileValid), Array( + M_INV -> (state === tileValid), + M_CLN -> (state === tileValid) + )) + } + def needsWriteback (state: UFix): Bool = { + needsTransactionOnCacheControl(M_INV, state) + } + + def newStateOnHit(cmd: Bits, state: UFix): UFix = state + def newStateOnCacheControl(cmd: Bits) = { + MuxLookup(cmd, tileInvalid, Array( + M_INV -> tileInvalid, + M_CLN -> tileValid + )) + } + def newStateOnWriteback() = newStateOnCacheControl(M_INV) + def newStateOnFlush() = newStateOnCacheControl(M_INV) + def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + MuxLookup(incoming.x_type, tileInvalid, Array( + xactReplyReadExclusive -> tileValid, + xactReplyReadUncached -> tileInvalid, + xactReplyWriteUncached -> tileInvalid + )) + } + def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { + MuxLookup(incoming.p_type, state, Array( + probeReqInvalidate -> tileInvalid, + probeReqCopy -> state + )) + } + + def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached + def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = xactInitReadExclusive + def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = xactInitReadExclusive + def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached + def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) + + def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { + val reply = Wire() { new ProbeReply() } + val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( + probeReqInvalidate -> probeRepInvalidateData, + probeReqCopy -> probeRepCopyData + )) + val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( + probeReqInvalidate -> probeRepInvalidateAck, + probeReqCopy -> probeRepCopyAck + )) + reply.p_type := Mux(needsWriteback(state), with_data, without_data) + reply.global_xact_id := incoming.global_xact_id + reply + } + + def messageHasData (reply: ProbeReply): Bool = { + (reply.p_type === probeRepInvalidateData || + reply.p_type === probeRepCopyData) + } + def messageHasData (init: TransactionInit): Bool = { + (init.x_type === xactInitWriteUncached) + } + def messageHasData (reply: TransactionReply): Bool = { + (reply.x_type != xactReplyWriteUncached) + } + def messageUpdatesDataArray (reply: TransactionReply): Bool = { + (reply.x_type === xactReplyReadExclusive) + } + + def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) + + def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { + MuxLookup(x_type, xactReplyReadUncached, Array( + xactInitReadExclusive -> xactReplyReadExclusive, + xactInitReadUncached -> xactReplyReadUncached, + xactInitWriteUncached -> xactReplyWriteUncached + )) + } + + def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { + MuxLookup(x_type, probeReqCopy, Array( + xactInitReadExclusive -> probeReqInvalidate, + xactInitReadUncached -> probeReqCopy, + xactInitWriteUncached -> probeReqInvalidate + )) + } + + def needsMemRead(x_type: UFix, global_state: UFix): Bool = { + (x_type != xactInitWriteUncached) + } + def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) + } + def needsAckReply(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) + } +} + +class ThreeStateCoherence extends CoherencePolicyWithUncached { //MEI + + val tileInvalid :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(3){ UFix() } + val globalInvalid :: globalExclusiveClean :: Nil = Enum(2){ UFix() } + + val xactInitReadExclusiveClean :: xactInitReadExclusiveDirty :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(4){ UFix() } + val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: Nil = Enum(4){ UFix() } + val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } + val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } + + def isHit (cmd: Bits, state: UFix): Bool = state != tileInvalid + def isValid (state: UFix): Bool = state != tileInvalid + + def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { + val (read, write) = cpuCmdToRW(cmd) + (read && (outstanding.x_type === xactInitReadUncached || outstanding.x_type === xactInitWriteUncached)) || + (write && (outstanding.x_type != xactInitReadExclusiveDirty)) + } + def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { + MuxLookup(cmd, (state === tileExclusiveDirty), Array( + M_INV -> (state === tileExclusiveDirty), + M_CLN -> (state === tileExclusiveDirty) + )) + } + def needsWriteback (state: UFix): Bool = { + needsTransactionOnCacheControl(M_INV, state) + } + + def newStateOnHit(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, tileExclusiveDirty, state) + } + def newStateOnCacheControl(cmd: Bits) = { + MuxLookup(cmd, tileInvalid, Array( + M_INV -> tileInvalid, + M_CLN -> tileExclusiveClean + )) + } + def newStateOnWriteback() = newStateOnCacheControl(M_INV) + def newStateOnFlush() = newStateOnCacheControl(M_INV) + def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + MuxLookup(incoming.x_type, tileInvalid, Array( + xactReplyReadExclusive -> Mux(outstanding.x_type === xactInitReadExclusiveDirty, tileExclusiveDirty, tileExclusiveClean), + xactReplyReadExclusiveAck -> tileExclusiveDirty, + xactReplyReadUncached -> tileInvalid, + xactReplyWriteUncached -> tileInvalid + )) + } + def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { + MuxLookup(incoming.p_type, state, Array( + probeReqInvalidate -> tileInvalid, + probeReqDowngrade -> tileExclusiveClean, + probeReqCopy -> state + )) + } + + def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached + def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, xactInitReadExclusiveDirty, xactInitReadExclusiveClean) + } + def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, xactInitReadExclusiveDirty, outstanding.x_type) + } + def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached + def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) + + def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { + val reply = Wire() { new ProbeReply() } + val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( + probeReqInvalidate -> probeRepInvalidateData, + probeReqDowngrade -> probeRepDowngradeData, + probeReqCopy -> probeRepCopyData + )) + val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( + probeReqInvalidate -> probeRepInvalidateAck, + probeReqDowngrade -> probeRepDowngradeAck, + probeReqCopy -> probeRepCopyAck + )) + reply.p_type := Mux(needsWriteback(state), with_data, without_data) + reply.global_xact_id := incoming.global_xact_id + reply + } + + def messageHasData (reply: ProbeReply): Bool = { + (reply.p_type === probeRepInvalidateData || + reply.p_type === probeRepDowngradeData || + reply.p_type === probeRepCopyData) + } + def messageHasData (init: TransactionInit): Bool = { + (init.x_type === xactInitWriteUncached) + } + def messageHasData (reply: TransactionReply): Bool = { + (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck) + } + def messageUpdatesDataArray (reply: TransactionReply): Bool = { + (reply.x_type === xactReplyReadExclusive) + } + + def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) + + def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { + MuxLookup(x_type, xactReplyReadUncached, Array( + xactInitReadExclusiveClean -> xactReplyReadExclusive, + xactInitReadExclusiveDirty -> xactReplyReadExclusive, + xactInitReadUncached -> xactReplyReadUncached, + xactInitWriteUncached -> xactReplyWriteUncached + )) + } + + def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { + MuxLookup(x_type, probeReqCopy, Array( + xactInitReadExclusiveClean -> probeReqInvalidate, + xactInitReadExclusiveDirty -> probeReqInvalidate, + xactInitReadUncached -> probeReqCopy, + xactInitWriteUncached -> probeReqInvalidate + )) + } + + def needsMemRead(x_type: UFix, global_state: UFix): Bool = { + (x_type != xactInitWriteUncached) + } + def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) + } + def needsAckReply(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) + } +} + +class FourStateCoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileShared :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(4){ UFix() } val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } @@ -208,6 +461,8 @@ trait FourStateCoherence extends CoherencePolicy { )) } + def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached + def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) @@ -236,17 +491,20 @@ trait FourStateCoherence extends CoherencePolicy { reply } - def hasData (reply: ProbeReply): Bool = { + def messageHasData (reply: ProbeReply): Bool = { (reply.p_type === probeRepInvalidateData || reply.p_type === probeRepDowngradeData || reply.p_type === probeRepCopyData) } - def hasData (init: TransactionInit): Bool = { + def messageHasData (init: TransactionInit): Bool = { (init.x_type === xactInitWriteUncached) } - def hasData (reply: TransactionReply): Bool = { + def messageHasData (reply: TransactionReply): Bool = { (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck) } + def messageUpdatesDataArray (reply: TransactionReply): Bool = { + (reply.x_type === xactReplyReadShared || reply.x_type === xactReplyReadExclusive) + } def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 0b312136..a8bf52cc 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -30,7 +30,7 @@ class ioHTIF extends Bundle val pcr_rep = (new ioPipe) { Bits(width = 64) } } -class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence +class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends Component { val io = new Bundle { val host = new ioHost(w) @@ -165,7 +165,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence mem_req_data = Cat(packet_ram(idx), mem_req_data) } io.mem.xact_init.valid := state === state_mem_req - io.mem.xact_init.bits.x_type := Mux(cmd === cmd_writemem, xactInitWriteUncached, xactInitReadUncached) + io.mem.xact_init.bits.x_type := Mux(cmd === cmd_writemem, co.getTransactionInitTypeOnUncachedWrite, co.getTransactionInitTypeOnUncachedRead) io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data @@ -175,7 +175,7 @@ class rocketHTIF(w: Int, ncores: Int) extends Component with FourStateCoherence val probe_q = (new queue(1)) { new ProbeReply } probe_q.io.enq.valid := io.mem.probe_req.valid io.mem.probe_req.ready := probe_q.io.enq.ready - probe_q.io.enq.bits := newProbeReply(io.mem.probe_req.bits, newStateOnFlush()) + probe_q.io.enq.bits := co.newProbeReply(io.mem.probe_req.bits, co.newStateOnFlush()) io.mem.probe_rep <> probe_q.io.deq io.mem.probe_rep_data.valid := Bool(false) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 08d44ae7..0b716563 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -27,7 +27,7 @@ class ioRocketICache extends Bundle() // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines // parameters : // lines = # cache lines -class rocketICache(sets: Int, assoc: Int) extends Component with FourStateCoherence +class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) extends Component { val io = new ioRocketICache(); @@ -136,7 +136,7 @@ class rocketICache(sets: Int, assoc: Int) extends Component with FourStateCohere rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits.x_type := xactInitReadUncached + io.mem.xact_init.bits.x_type := co.getTransactionInitTypeOnUncachedRead io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix io.mem.xact_finish <> finish_q.io.deq diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 9e2b52f5..956dd08f 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -11,7 +11,7 @@ class ioIPrefetcher extends Bundle() { val invalidate = Bool(INPUT) } -class rocketIPrefetcher extends Component with FourStateCoherence +class rocketIPrefetcher(co: CoherencePolicyWithUncached) extends Component { val io = new ioIPrefetcher(); val pdq = (new queue(REFILL_CYCLES, flushable = true)) { Bits(width = MEM_DATA_BITS) }; @@ -34,7 +34,7 @@ class rocketIPrefetcher extends Component with FourStateCoherence val finish_q = (new queue(1)) { new TransactionFinish } io.mem.xact_abort.ready := Bool(true) io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) && finish_q.io.enq.ready - io.mem.xact_init.bits.x_type := xactInitReadUncached + io.mem.xact_init.bits.x_type := co.getTransactionInitTypeOnUncachedRead io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a26f6634..cc91f1de 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -158,7 +158,7 @@ class MetaArrayReq extends Bundle { val data = new MetaData() } -class MSHR(id: Int) extends Component with FourStateCoherence { +class MSHR(id: Int, co: CoherencePolicy) extends Component { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -196,7 +196,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { val req_cmd = io.req_bits.cmd val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) val idx_match = req.idx === io.req_bits.idx - val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val rpq = (new queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq @@ -220,7 +220,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { when (refill_done) { state := s_drain_rpq } when (reply) { refill_count := refill_count + UFix(1) - line_state := newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) + line_state := co.newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) } when (abort) { state := s_refill_req } } @@ -239,13 +239,13 @@ class MSHR(id: Int) extends Component with FourStateCoherence { } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - xacx_type := getTransactionInitTypeOnSecondaryMiss(req_cmd, newStateOnFlush(), io.mem_req.bits) + xacx_type := co.getTransactionInitTypeOnSecondaryMiss(req_cmd, co.newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { flush := req_cmd === M_FLA - line_state := newStateOnFlush() + line_state := co.newStateOnFlush() refill_count := UFix(0) - xacx_type := getTransactionInitTypeOnPrimaryMiss(req_cmd, newStateOnFlush()) + xacx_type := co.getTransactionInitTypeOnPrimaryMiss(req_cmd, co.newStateOnFlush()) req := io.req_bits when (io.req_bits.tag_miss) { @@ -289,7 +289,7 @@ class MSHR(id: Int) extends Component with FourStateCoherence { io.replay.bits.way_oh := req.way_oh } -class MSHRFile extends Component { +class MSHRFile(co: CoherencePolicy) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) @@ -343,7 +343,7 @@ class MSHRFile extends Component { var refill_probe_rdy = Bool(true) for (i <- 0 to NMSHR-1) { - val mshr = new MSHR(i) + val mshr = new MSHR(i, co) tag_mux.io.sel(i) := mshr.io.idx_match tag_mux.io.in(i) := mshr.io.tag @@ -409,7 +409,8 @@ class MSHRFile extends Component { io.cpu_resp_tag := Reg(replay.bits.cpu_tag) } -class WritebackUnit extends Component with FourStateCoherence{ + +class WritebackUnit(co: CoherencePolicy) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new WritebackReq() }.flip val probe = (new ioDecoupled) { new WritebackReq() }.flip @@ -469,7 +470,7 @@ class WritebackUnit extends Component with FourStateCoherence{ io.data_req.bits.data := Bits(0) io.mem_req.valid := valid && !cmd_sent - io.mem_req.bits.x_type := getTransactionInitTypeOnWriteback() + io.mem_req.bits.x_type := co.getTransactionInitTypeOnWriteback() io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id io.mem_req_data.valid := data_req_fired && !is_probe @@ -478,7 +479,7 @@ class WritebackUnit extends Component with FourStateCoherence{ io.probe_rep_data.bits.data := io.data_resp } -class ProbeUnit extends Component with FourStateCoherence { +class ProbeUnit(co: CoherencePolicy) extends Component { val io = new Bundle { val req = (new ioDecoupled) { new ProbeRequest }.flip val rep = (new ioDecoupled) { new ProbeReply } @@ -504,7 +505,7 @@ class ProbeUnit extends Component with FourStateCoherence { state := s_writeback_resp } when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { - state := Mux(hit && needsWriteback(line_state), s_writeback_req, s_invalid) + state := Mux(hit && co.needsWriteback(line_state), s_writeback_req, s_invalid) } when (state === s_meta_resp) { way_oh := io.tag_match_way_oh @@ -521,13 +522,13 @@ class ProbeUnit extends Component with FourStateCoherence { io.req.ready := state === s_invalid io.rep.valid := state === s_probe_rep && io.meta_req.ready - io.rep.bits := newProbeReply(req, Mux(hit, line_state, newStateOnFlush())) + io.rep.bits := co.newProbeReply(req, Mux(hit, line_state, co.newStateOnFlush())) io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && hit io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) io.meta_req.bits.rw := state === s_probe_rep io.meta_req.bits.idx := req.address - io.meta_req.bits.data.state := newStateOnProbeRequest(req, line_state) + io.meta_req.bits.data.state := co.newStateOnProbeRequest(req, line_state) io.meta_req.bits.data.tag := req.address >> UFix(IDX_BITS) io.mshr_req.valid := state === s_meta_resp io.address := req.address @@ -538,7 +539,7 @@ class ProbeUnit extends Component with FourStateCoherence { io.wb_req.bits.tag := req.address >> UFix(IDX_BITS) } -class FlushUnit(lines: Int) extends Component with FourStateCoherence{ +class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { val io = new Bundle { val req = (new ioDecoupled) { Bool() }.flip val meta_req = (new ioDecoupled) { new MetaArrayReq() } @@ -583,7 +584,7 @@ class FlushUnit(lines: Int) extends Component with FourStateCoherence{ io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) io.meta_req.bits.idx := idx_cnt io.meta_req.bits.rw := (state === s_reset) - io.meta_req.bits.data.state := newStateOnFlush() + io.meta_req.bits.data.state := co.newStateOnFlush() io.meta_req.bits.data.tag := UFix(0) } @@ -732,16 +733,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { val resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT); } -abstract class HellaCache extends Component { - def isHit ( cmd: Bits, state: UFix): Bool - def isValid (state: UFix): Bool - def needsWriteback (state: UFix): Bool - def newStateOnWriteback(): UFix - def newStateOnFlush(): UFix - def newStateOnHit(cmd: Bits, state: UFix): UFix -} - -class HellaCacheUniproc extends HellaCache with FourStateCoherence { +class HellaCache(co: CoherencePolicy) extends Component { val io = new Bundle { val cpu = new ioDmem() val mem = new ioTileLink @@ -794,10 +786,10 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch val nack_hit = Wire() { Bool() } - val wb = new WritebackUnit - val prober = new ProbeUnit - val mshr = new MSHRFile() - val flusher = new FlushUnit(lines) + val wb = new WritebackUnit(co) + val prober = new ProbeUnit(co) + val mshr = new MSHRFile(co) + val flusher = new FlushUnit(lines, co) val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool // reset and flush unit @@ -856,10 +848,10 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { val early_tag_nack = !meta_arb.io.in(3).ready val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req_ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until NWAYS).map( w => isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_arr = (0 until NWAYS).map( w => co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec - val tag_hit_arr = (0 until NWAYS).map( w => isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_hit_arr = (0 until NWAYS).map( w => co.isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_hit = Cat(Bits(0),tag_hit_arr:_*).orR val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, data.io.way_en) @@ -894,7 +886,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.data := io.mem.xact_rep.bits.data data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - data_arb.io.in(0).valid := io.mem.xact_rep.valid && (io.mem.xact_rep.bits.x_type === xactReplyReadShared || io.mem.xact_rep.bits.x_type === xactReplyReadExclusive) + data_arb.io.in(0).valid := io.mem.xact_rep.valid && co.messageUpdatesDataArray(io.mem.xact_rep.bits) // load hits data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) @@ -924,7 +916,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { p_store_valid := p_store_valid && !drain_store || (r_cpu_req_val && tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo // tag update after a store to an exclusive clean line. - val new_hit_state = newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) + val new_hit_state = co.newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) val set_hit_state = r_cpu_req_val && tag_hit && meta_resp_mux.state != new_hit_state meta.io.state_req.bits.rw := Bool(true) meta.io.state_req.bits.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) @@ -948,7 +940,7 @@ class HellaCacheUniproc extends HellaCache with FourStateCoherence { // miss handling mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid mshr.io.req.bits.tag_miss := !tag_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.old_dirty := needsWriteback(meta_wb_mux.state) && (!tag_match || flusher.io.mshr_req.valid) // don't wb upgrades + mshr.io.req.bits.old_dirty := co.needsWriteback(meta_wb_mux.state) && (!tag_match || flusher.io.mshr_req.valid) // don't wb upgrades mshr.io.req.bits.old_tag := meta_wb_mux.tag mshr.io.req.bits.tag := cpu_req_tag mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 1b7f6e05..ef368a85 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,7 +4,7 @@ import Chisel._ import Node._ import Constants._ -class Tile extends Component +class Tile(co: CoherencePolicyWithUncached) extends Component { val io = new Bundle { val tilelink = new ioTileLink @@ -12,8 +12,8 @@ class Tile extends Component } val cpu = new rocketProc(resetSignal = io.host.reset) - val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) - val dcache = new HellaCacheUniproc + val icache = new rocketICache(128, 4, co) // 128 sets x 4 ways (32KB) + val dcache = new HellaCache(co) val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) arbiter.io.requestor(0) <> dcache.io.mem @@ -30,7 +30,7 @@ class Tile extends Component if (HAVE_VEC) { - val vicache = new rocketICache(128, 1) // 128 sets x 1 ways (8KB) + val vicache = new rocketICache(128, 1, co) // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index a74c9ff4..bc90cf99 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -20,10 +20,11 @@ class Top extends Component val htif_width = 8 val io = new ioTop(htif_width) - val tile = new Tile - val htif = new rocketHTIF(htif_width, 1) + val co = new FourStateCoherence + val tile = new Tile(co) + val htif = new rocketHTIF(htif_width, 1, co) - val hub = new CoherenceHubBroadcast(2) + val hub = new CoherenceHubBroadcast(2, co) hub.io.tiles(0) <> tile.io.tilelink hub.io.tiles(1) <> htif.io.mem diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 14164f7f..a8fc9432 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -50,7 +50,7 @@ class ioTileLink extends Bundle { val xact_finish = (new ioDecoupled) { new TransactionFinish } } -class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherence { +class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val io = new Bundle { val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip val p_data = (new ioPipe) { new TrackerProbeData }.flip @@ -140,7 +140,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.sharer_count := UFix(ntiles) // TODO: Broadcast only io.x_type := x_type_ - io.mem_req_cmd.valid := Bool(false) + io.mem_req_cmd.valid := Bool(false) io.mem_req_cmd.bits.rw := Bool(false) io.mem_req_cmd.bits.addr := addr_ io.mem_req_cmd.bits.tag := UFix(id) @@ -148,7 +148,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc io.mem_req_data.bits.data := UFix(0) io.mem_req_lock := Bool(false) io.probe_req.valid := Bool(false) - io.probe_req.bits.p_type := getProbeRequestType(x_type_, UFix(0)) + io.probe_req.bits.p_type := co.getProbeRequestType(x_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) io.probe_req.bits.address := addr_ io.push_p_req := Bits(0, width = ntiles) @@ -167,8 +167,8 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc x_type_ := io.alloc_req.bits.xact_init.x_type init_tile_id_ := io.alloc_req.bits.tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id - x_init_data_needs_write := hasData(io.alloc_req.bits.xact_init) - x_needs_read := needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) + x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) + x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) if(ntiles > 1) p_rep_count := UFix(ntiles-1) val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only p_req_flags := p_req_initial_flags @@ -226,7 +226,7 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc } . elsewhen (x_needs_read) { doMemReqRead(io.mem_req_cmd, x_needs_read) } . otherwise { - state := Mux(needsAckReply(x_type_, UFix(0)), s_ack, s_busy) + state := Mux(co.needsAckReply(x_type_, UFix(0)), s_ack, s_busy) } } is(s_ack) { @@ -241,17 +241,17 @@ class XactTracker(ntiles: Int, id: Int) extends Component with FourStateCoherenc } } -abstract class CoherenceHub(ntiles: Int) extends Component with CoherencePolicy { +abstract class CoherenceHub(ntiles: Int, co: CoherencePolicy) extends Component { val io = new Bundle { val tiles = Vec(ntiles) { new ioTileLink() }.flip val mem = new ioMem } } -class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence +class CoherenceHubNull(co: ThreeStateIncoherence) extends CoherenceHub(1, co) { val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.x_type === xactInitWriteback + val is_write = x_init.bits.x_type === co.xactInitWriteback x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write @@ -260,7 +260,7 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep - x_rep.bits.x_type := Mux(io.mem.resp.valid, xactReplyData, xactReplyAck) + x_rep.bits.x_type := Mux(io.mem.resp.valid, co.xactReplyData, co.xactReplyAck) x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) x_rep.bits.global_xact_id := UFix(0) // don't care x_rep.bits.data := io.mem.resp.bits.data @@ -275,9 +275,9 @@ class CoherenceHubNull extends CoherenceHub(1) with ThreeStateIncoherence } -class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourStateCoherence +class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceHub(ntiles, co) { - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _)) + val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _, co)) val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } @@ -344,12 +344,12 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS rep.bits.require_ack := Bool(true) rep.valid := Bool(false) when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { - rep.bits.x_type := getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) + rep.bits.x_type := co.getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) rep.bits.global_xact_id := mem_idx rep.valid := Bool(true) } . otherwise { - rep.bits.x_type := getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) + rep.bits.x_type := co.getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) rep.bits.global_xact_id := ack_idx when (UFix(j) === init_tile_id_arr(ack_idx)) { @@ -417,16 +417,16 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(i) := t.busy && x_init.valid && isCoherenceConflict(t.addr, x_init.bits.address) + conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.address) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && hasData(x_init.bits))) + want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && co.messageHasData(x_init.bits))) x_abort.valid := Bool(false) switch(abort_state_arr(j)) { is(s_idle) { when(want_to_abort_arr(j)) { - when(hasData(x_init.bits)) { + when(co.messageHasData(x_init.bits)) { abort_state_arr(j) := s_abort_drain } . otherwise { abort_state_arr(j) := s_abort_send @@ -478,7 +478,7 @@ class CoherenceHubBroadcast(ntiles: Int) extends CoherenceHub(ntiles) with FourS init_arb.io.in(j).bits.tile_id := UFix(j) val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) val do_pop = foldR(pop_x_inits)(_||_) - x_init_data_dep_list(j).io.enq.valid := do_pop && hasData(x_init.bits) && (abort_state_arr(j) === s_idle) + x_init_data_dep_list(j).io.enq.valid := do_pop && co.messageHasData(x_init.bits) && (abort_state_arr(j) === s_idle) x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) From c0ec3794bfb54be479cd1239050028beacb5eb34 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 10 Apr 2012 02:22:45 -0700 Subject: [PATCH 0391/1087] coherence mostly works now --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/nbdcache.scala | 4 ++-- rocket/src/main/scala/top.scala | 19 +++++++++++-------- rocket/src/main/scala/uncore.scala | 8 ++++---- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 7dbe5aef..eb1f4c1d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -5,6 +5,7 @@ import scala.math._ object Constants { + val NTILES = 1 val HAVE_RVC = false val HAVE_FPU = true val HAVE_VEC = true diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index cc91f1de..37f82a55 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -826,8 +826,8 @@ class HellaCache(co: CoherencePolicy) extends Component { (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - io.cpu.xcpt_ma_ld := r_cpu_req_val_ && r_req_read && misaligned - io.cpu.xcpt_ma_st := r_cpu_req_val_ && r_req_write && misaligned + io.cpu.xcpt_ma_ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned + io.cpu.xcpt_ma_st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned // tags val meta = new MetaDataArrayArray(lines) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index bc90cf99..db85eb0d 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -21,12 +21,9 @@ class Top extends Component val io = new ioTop(htif_width) val co = new FourStateCoherence - val tile = new Tile(co) - val htif = new rocketHTIF(htif_width, 1, co) - - val hub = new CoherenceHubBroadcast(2, co) - hub.io.tiles(0) <> tile.io.tilelink - hub.io.tiles(1) <> htif.io.mem + val htif = new rocketHTIF(htif_width, NTILES, co) + val hub = new CoherenceHubBroadcast(NTILES+1, co) + hub.io.tiles(NTILES) <> htif.io.mem // mux between main and backup memory ports val mem_serdes = new MemSerdes @@ -67,8 +64,14 @@ class Top extends Component io.mem_backup.resp <> mio.io.in_slow io.mem_backup_clk := mio.io.clk_slow - tile.io.host <> htif.io.cpu(0) - io.debug <> tile.io.host.debug + var error_mode = Bool(false) + for (i <- 0 until NTILES) { + val tile = new Tile(co) + tile.io.host <> htif.io.cpu(i) + hub.io.tiles(i) <> tile.io.tilelink + error_mode = error_mode || tile.io.host.debug.error_mode + } + io.debug.error_mode := error_mode } object top_main { diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index a8fc9432..756c6e7b 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -86,11 +86,11 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { } def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { - req_cmd.valid := !cmd_sent && at_front_of_dep_queue + req_cmd.valid := !cmd_sent && data.valid && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) req_data.valid := data.valid && at_front_of_dep_queue req_data.bits := data.bits - lock := at_front_of_dep_queue + lock := data.valid && at_front_of_dep_queue when(req_cmd.ready && req_cmd.valid) { cmd_sent := Bool(true) } @@ -383,8 +383,8 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) val do_pop = foldR(pop_p_reps)(_ || _) p_rep.ready := Bool(true) - p_rep_data_dep_list(j).io.enq.valid := do_pop - p_rep_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_p_reps) + p_rep_data_dep_list(j).io.enq.valid := p_rep.valid && co.messageHasData(p_rep.bits) + p_rep_data_dep_list(j).io.enq.bits.global_xact_id := p_rep.bits.global_xact_id p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) when (p_rep.valid && co.messageHasData(p_rep.bits)) { p_data_valid_arr(idx) := Bool(true) From fef58f1b3a692d44c8b753248372ee734d355ac6 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 11 Apr 2012 17:56:59 -0700 Subject: [PATCH 0392/1087] Policy determined by constants. MSI policy added. --- rocket/src/main/scala/coherence.scala | 147 +++++++++++++++++++++++++- rocket/src/main/scala/consts.scala | 7 +- rocket/src/main/scala/top.scala | 8 +- 3 files changed, 155 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index e0f5e5c8..b4b23b4b 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -154,7 +154,7 @@ class ThreeStateIncoherence extends IncoherentPolicy { def messageUpdatesDataArray (reply: TransactionReply) = (reply.x_type === xactReplyData) } -class TwoStateCoherence extends CoherencePolicyWithUncached { +class MICoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileValid :: Nil = Enum(2){ UFix() } val globalInvalid :: globalValid :: Nil = Enum(2){ UFix() } @@ -266,7 +266,7 @@ class TwoStateCoherence extends CoherencePolicyWithUncached { } } -class ThreeStateCoherence extends CoherencePolicyWithUncached { //MEI +class MEICoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(3){ UFix() } val globalInvalid :: globalExclusiveClean :: Nil = Enum(2){ UFix() } @@ -398,7 +398,146 @@ class ThreeStateCoherence extends CoherencePolicyWithUncached { //MEI } } -class FourStateCoherence extends CoherencePolicyWithUncached { +class MSICoherence extends CoherencePolicyWithUncached { + + val tileInvalid :: tileShared :: tileExclusiveDirty :: Nil = Enum(3){ UFix() } + val globalInvalid :: globalShared :: globalExclusive :: Nil = Enum(3){ UFix() } + + val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(4){ UFix() } + val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: Nil = Enum(5){ UFix() } + val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } + val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } + + def isHit (cmd: Bits, state: UFix): Bool = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, (state === tileExclusiveDirty), + (state === tileShared || state === tileExclusiveDirty)) + } + def isValid (state: UFix): Bool = { + state != tileInvalid + } + + def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { + val (read, write) = cpuCmdToRW(cmd) + (read && (outstanding.x_type === xactInitReadUncached || outstanding.x_type === xactInitWriteUncached)) || + (write && (outstanding.x_type != xactInitReadExclusive)) + } + def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { + MuxLookup(cmd, (state === tileExclusiveDirty), Array( + M_INV -> (state === tileExclusiveDirty), + M_CLN -> (state === tileExclusiveDirty) + )) + } + def needsWriteback (state: UFix): Bool = { + needsTransactionOnCacheControl(M_INV, state) + } + + def newStateOnHit(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, tileExclusiveDirty, state) + } + def newStateOnCacheControl(cmd: Bits) = { + MuxLookup(cmd, tileInvalid, Array( + M_INV -> tileInvalid, + M_CLN -> tileShared + )) + } + def newStateOnWriteback() = newStateOnCacheControl(M_INV) + def newStateOnFlush() = newStateOnCacheControl(M_INV) + def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { + MuxLookup(incoming.x_type, tileInvalid, Array( + xactReplyReadShared -> tileShared, + xactReplyReadExclusive -> tileExclusiveDirty, + xactReplyReadExclusiveAck -> tileExclusiveDirty, + xactReplyReadUncached -> tileInvalid, + xactReplyWriteUncached -> tileInvalid + )) + } + def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { + MuxLookup(incoming.p_type, state, Array( + probeReqInvalidate -> tileInvalid, + probeReqDowngrade -> tileShared, + probeReqCopy -> state + )) + } + + def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached + def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) + } + def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { + val (read, write) = cpuCmdToRW(cmd) + Mux(write, xactInitReadExclusive, outstanding.x_type) + } + def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached + def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) + + def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { + val reply = Wire() { new ProbeReply() } + val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( + probeReqInvalidate -> probeRepInvalidateData, + probeReqDowngrade -> probeRepDowngradeData, + probeReqCopy -> probeRepCopyData + )) + val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( + probeReqInvalidate -> probeRepInvalidateAck, + probeReqDowngrade -> probeRepDowngradeAck, + probeReqCopy -> probeRepCopyAck + )) + reply.p_type := Mux(needsWriteback(state), with_data, without_data) + reply.global_xact_id := incoming.global_xact_id + reply + } + + def messageHasData (reply: ProbeReply): Bool = { + (reply.p_type === probeRepInvalidateData || + reply.p_type === probeRepDowngradeData || + reply.p_type === probeRepCopyData) + } + def messageHasData (init: TransactionInit): Bool = { + (init.x_type === xactInitWriteUncached) + } + def messageHasData (reply: TransactionReply): Bool = { + (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck) + } + def messageUpdatesDataArray (reply: TransactionReply): Bool = { + (reply.x_type === xactReplyReadShared || reply.x_type === xactReplyReadExclusive) + } + + def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) + + def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { + MuxLookup(x_type, xactReplyReadUncached, Array( + xactInitReadShared -> Mux(count > UFix(0), xactReplyReadShared, xactReplyReadExclusive), + xactInitReadExclusive -> xactReplyReadExclusive, + xactInitReadUncached -> xactReplyReadUncached, + xactInitWriteUncached -> xactReplyWriteUncached + )) + } + + def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { + MuxLookup(x_type, probeReqCopy, Array( + xactInitReadShared -> probeReqDowngrade, + xactInitReadExclusive -> probeReqInvalidate, + xactInitReadUncached -> probeReqCopy, + xactInitWriteUncached -> probeReqInvalidate + )) + } + + def needsMemRead(x_type: UFix, global_state: UFix): Bool = { + (x_type != xactInitWriteUncached) + } + def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) + } + def needsAckReply(x_type: UFix, global_state: UFix): Bool = { + (x_type === xactInitWriteUncached) + } +} + +class MESICoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileShared :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(4){ UFix() } val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } @@ -439,7 +578,7 @@ class FourStateCoherence extends CoherencePolicyWithUncached { def newStateOnCacheControl(cmd: Bits) = { MuxLookup(cmd, tileInvalid, Array( M_INV -> tileInvalid, - M_CLN -> tileExclusiveClean + M_CLN -> tileShared )) } def newStateOnWriteback() = newStateOnCacheControl(M_INV) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index eb1f4c1d..256c169e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -192,10 +192,13 @@ object Constants require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); // coherence parameters + val ENABLE_SHARING = true + val ENABLE_CLEAN_EXCLUSIVE = true + val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 - val TILE_ID_BITS = 1 + val TILE_ID_BITS = log2up(NTILES)+1 val TILE_XACT_ID_BITS = log2up(NMSHR)+3 - val GLOBAL_XACT_ID_BITS = 2 + val GLOBAL_XACT_ID_BITS = log2up(NTILES*NMSHR)+1 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS val X_INIT_TYPE_MAX_BITS = 2 diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index db85eb0d..5ee443b6 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -20,7 +20,13 @@ class Top extends Component val htif_width = 8 val io = new ioTop(htif_width) - val co = new FourStateCoherence + val co = if(ENABLE_SHARING) { + if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence + else new MSICoherence + } else { + if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence + else new MICoherence + } val htif = new rocketHTIF(htif_width, NTILES, co) val hub = new CoherenceHubBroadcast(NTILES+1, co) hub.io.tiles(NTILES) <> htif.io.mem From 00d934cfacee8fe226931bb20e5e27ab58ca6da8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 12 Apr 2012 21:57:37 -0700 Subject: [PATCH 0393/1087] fix coherence bugs in cache --- rocket/src/main/scala/nbdcache.scala | 35 ++++++++++++++++------------ 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 37f82a55..79fd774b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -184,7 +184,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val probe_refill = (new ioDecoupled) { Bool() }.flip } - val s_invalid :: s_wb_req :: s_wb_resp :: s_refill_req :: s_refill_resp :: s_drain_rpq :: Nil = Enum(6) { UFix() } + val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_drain_rpq :: Nil = Enum(7) { UFix() } val state = Reg(resetVal = s_invalid) val flush = Reg { Bool() } @@ -196,7 +196,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val req_cmd = io.req_bits.cmd val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) val idx_match = req.idx === io.req_bits.idx - val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val rpq = (new queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq @@ -228,14 +228,16 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { when (flush) { state := s_drain_rpq } .elsewhen (io.mem_req.ready) { state := s_refill_resp } } + when (state === s_meta_clear && io.meta_req.ready) { + state := s_refill_req + } when (state === s_wb_resp) { - when (reply) { state := s_refill_req } + when (reply) { state := s_meta_clear } when (abort) { state := s_wb_req } } - when (state === s_wb_req && io.wb_req.ready) { + when (state === s_wb_req && io.meta_req.ready) { when (io.probe_writeback.valid && idx_match) { state := s_refill_req } - when (io.wb_req.ready) { state := s_wb_resp } - state := s_wb_resp + .elsewhen (io.wb_req.ready) { state := s_wb_resp } } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req @@ -261,21 +263,21 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { io.req_pri_rdy := (state === s_invalid) io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.meta_req.valid := (state === s_drain_rpq) && !rpq.io.deq.valid && !finish_q.io.deq.valid + io.meta_req.valid := (state === s_drain_rpq) && !rpq.io.deq.valid && !finish_q.io.deq.valid || (state === s_meta_clear) io.meta_req.bits.rw := Bool(true) io.meta_req.bits.idx := req.idx - io.meta_req.bits.data.state := line_state + io.meta_req.bits.data.state := Mux(state === s_meta_clear, co.newStateOnFlush(), line_state) io.meta_req.bits.data.tag := req.tag io.meta_req.bits.way_en := req.way_oh - io.wb_req.valid := (state === s_wb_req) + io.wb_req.valid := (state === s_wb_req) && !(io.probe_writeback.valid && idx_match) io.wb_req.bits.tag := req.old_tag io.wb_req.bits.idx := req.idx io.wb_req.bits.way_oh := req.way_oh io.wb_req.bits.tile_xact_id := Bits(id) - io.probe_writeback.ready := (state != s_wb_resp) || !idx_match - io.probe_refill.ready := (state != s_refill_resp) || !idx_match + io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear) || !idx_match + io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match io.mem_req.valid := (state === s_refill_req) && !flush io.mem_req.bits.x_type := xacx_type @@ -491,7 +493,7 @@ class ProbeUnit(co: CoherencePolicy) extends Component { val address = Bits(PADDR_BITS-OFFSET_BITS, OUTPUT) } - val s_invalid :: s_meta_req :: s_meta_resp :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(6) { UFix() } + val s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(7) { UFix() } val state = Reg(resetVal = s_invalid) val line_state = Reg() { UFix() } val way_oh = Reg() { Bits() } @@ -507,10 +509,13 @@ class ProbeUnit(co: CoherencePolicy) extends Component { when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { state := Mux(hit && co.needsWriteback(line_state), s_writeback_req, s_invalid) } + when ((state === s_mshr_req) && io.mshr_req.ready) { + state := s_meta_req + } when (state === s_meta_resp) { way_oh := io.tag_match_way_oh line_state := io.line_state - state := Mux(!io.mshr_req.ready, s_meta_req, s_probe_rep) + state := Mux(!io.mshr_req.ready, s_mshr_req, s_probe_rep) } when ((state === s_meta_req) && io.meta_req.ready) { state := s_meta_resp @@ -524,13 +529,13 @@ class ProbeUnit(co: CoherencePolicy) extends Component { io.rep.valid := state === s_probe_rep && io.meta_req.ready io.rep.bits := co.newProbeReply(req, Mux(hit, line_state, co.newStateOnFlush())) - io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_probe_rep && hit + io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_mshr_req || state === s_probe_rep && hit io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) io.meta_req.bits.rw := state === s_probe_rep io.meta_req.bits.idx := req.address io.meta_req.bits.data.state := co.newStateOnProbeRequest(req, line_state) io.meta_req.bits.data.tag := req.address >> UFix(IDX_BITS) - io.mshr_req.valid := state === s_meta_resp + io.mshr_req.valid := state === s_meta_resp || state === s_mshr_req io.address := req.address io.wb_req.valid := state === s_writeback_req From 724735f13f2acc1e375f7a7b5ba738b06cba38d1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 13 Apr 2012 03:16:48 -0700 Subject: [PATCH 0394/1087] fix writeback bug --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 79fd774b..3a4bb0ab 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -235,7 +235,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { when (reply) { state := s_meta_clear } when (abort) { state := s_wb_req } } - when (state === s_wb_req && io.meta_req.ready) { + when (state === s_wb_req) { when (io.probe_writeback.valid && idx_match) { state := s_refill_req } .elsewhen (io.wb_req.ready) { state := s_wb_resp } } From fb4408b150e2b2b88a234c3ca855638c39bce084 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 15 Apr 2012 22:56:02 -0700 Subject: [PATCH 0395/1087] fix AMO replay/coherence deadlock --- rocket/src/main/scala/nbdcache.scala | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3a4bb0ab..0ecfc6ad 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -809,6 +809,9 @@ class HellaCache(co: CoherencePolicy) extends Component { r_cpu_req_type := io.cpu.req_type r_cpu_req_tag := io.cpu.req_tag } + when (prober.io.meta_req.valid) { + r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) + } when (replay_amo_val) { r_cpu_req_idx := Cat(mshr.io.data_req.bits.idx, mshr.io.data_req.bits.offset) r_cpu_req_cmd := mshr.io.data_req.bits.cmd @@ -816,9 +819,6 @@ class HellaCache(co: CoherencePolicy) extends Component { r_amo_replay_data := mshr.io.data_req.bits.data r_way_oh := mshr.io.data_req.bits.way_oh } - when (prober.io.meta_req.valid) { - r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) - } when (flusher.io.meta_req.valid) { r_cpu_req_idx := Cat(flusher.io.meta_req.bits.idx, mshr.io.data_req.bits.offset) r_cpu_req_cmd := M_FLA @@ -869,15 +869,6 @@ class HellaCache(co: CoherencePolicy) extends Component { wb.io.data_resp <> data_resp_mux wb.io.probe_rep_data <> io.mem.probe_rep_data - // probes - prober.io.req <> io.mem.probe_req - prober.io.rep <> io.mem.probe_rep - prober.io.meta_req <> meta_arb.io.in(2) - prober.io.mshr_req <> mshr.io.probe - prober.io.wb_req <> wb.io.probe - prober.io.tag_match_way_oh := tag_match_way_oh - prober.io.line_state := meta_resp_mux.state - // replacement policy val replacer = new RandomReplacementWayGen() replacer.io.way_en := ~UFix(0, NWAYS) @@ -964,7 +955,7 @@ class HellaCache(co: CoherencePolicy) extends Component { // replays val replay = mshr.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || flusher.io.meta_req.valid || prober.io.meta_req.valid || p_store_valid + val stall_replay = r_replay_amo || p_amo || flusher.io.meta_req.valid || p_store_valid val replay_val = mshr.io.data_req.valid val replay_fire = replay_val && !stall_replay val replay_rdy = data_arb.io.in(1).ready && !stall_replay @@ -976,6 +967,17 @@ class HellaCache(co: CoherencePolicy) extends Component { mshr.io.data_req.ready := replay_rdy r_replay_amo := replay_amo_val && replay_rdy + // probes + prober.io.req <> io.mem.probe_req + prober.io.rep <> io.mem.probe_rep + prober.io.mshr_req <> mshr.io.probe + prober.io.wb_req <> wb.io.probe + prober.io.tag_match_way_oh := tag_match_way_oh + prober.io.line_state := meta_resp_mux.state + prober.io.meta_req.ready := meta_arb.io.in(2).ready && !replay_amo_val + meta_arb.io.in(2).valid := prober.io.meta_req.valid + meta_arb.io.in(2).bits := prober.io.meta_req.bits + // store write mask generation. // assumes store replays are higher-priority than pending stores. val maskgen = new StoreMaskGen From a39080d0b1cc121a5723db687d2d462e117c994e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 18 Apr 2012 16:24:41 -0700 Subject: [PATCH 0396/1087] Fixed abort bug: xact_abort.ready was not pinned high --- rocket/src/main/scala/nbdcache.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0ecfc6ad..3ce359a1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -950,6 +950,7 @@ class HellaCache(co: CoherencePolicy) extends Component { mshr.io.mem_rep <> io.mem.xact_rep mshr.io.mem_abort.valid := io.mem.xact_abort.valid mshr.io.mem_abort.bits := io.mem.xact_abort.bits + io.mem.xact_abort.ready := Bool(true) mshr.io.meta_req <> meta_arb.io.in(1) replacer.io.pick_new_way := mshr.io.req.valid && mshr.io.req.ready From 55e86b5cf45da1ba5065a69d8855ddc6c6a2ee04 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 18 Apr 2012 16:31:14 -0700 Subject: [PATCH 0397/1087] Fixed coherence bug: probe counting for single tile --- rocket/src/main/scala/uncore.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 756c6e7b..29d0befd 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -169,14 +169,16 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - if(ntiles > 1) p_rep_count := UFix(ntiles-1) val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only - p_req_flags := p_req_initial_flags + p_req_flags := p_req_initial_flags(ntiles-1,0) mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) x_w_mem_cmd_sent := Bool(false) io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id - state := Mux(p_req_initial_flags.orR, s_probe, s_mem) + if(ntiles > 1) { + p_rep_count := UFix(ntiles-1) + state := Mux(p_req_initial_flags(ntiles-1,0).orR, s_probe, s_mem) + } else state := s_mem } } is(s_probe) { From 1ed89f1cab183956507885c2ce8be27b1904265e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 24 Apr 2012 15:11:59 -0700 Subject: [PATCH 0398/1087] Fixed abort bug: removed uneeded state, added mshr guard on xact_abort.valid and xact_init.ready on same cycle --- rocket/src/main/scala/nbdcache.scala | 1 + rocket/src/main/scala/uncore.scala | 12 +++++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3ce359a1..55e0acd1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -226,6 +226,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { } when (state === s_refill_req) { when (flush) { state := s_drain_rpq } + .elsewhen (abort) { state := s_refill_req } .elsewhen (io.mem_req.ready) { state := s_refill_resp } } when (state === s_meta_clear && io.meta_req.ready) { diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 29d0befd..0f482113 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -408,7 +408,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } // Nack conflicting transaction init attempts - val s_idle :: s_abort_drain :: s_abort_send :: s_abort_complete :: Nil = Enum(4){ UFix() } + val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() } val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } for( j <- 0 until ntiles ) { @@ -445,13 +445,10 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } is(s_abort_send) { // nothing is dequeued for now x_abort.valid := Bool(true) - when(x_abort.ready) { - abort_state_arr(j) := s_abort_complete + when(x_abort.ready) { // raises x_init.ready below + abort_state_arr(j) := s_idle } } - is(s_abort_complete) { // raises x_init.ready below - abort_state_arr(j) := s_idle - } } } @@ -475,6 +472,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_init_data_dep = x_init_data_dep_list(j).io.deq + val x_abort = io.tiles(j).xact_abort init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid init_arb.io.in(j).bits.xact_init := x_init.bits init_arb.io.in(j).bits.tile_id := UFix(j) @@ -482,7 +480,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val do_pop = foldR(pop_x_inits)(_||_) x_init_data_dep_list(j).io.enq.valid := do_pop && co.messageHasData(x_init.bits) && (abort_state_arr(j) === s_idle) x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) - x_init.ready := (abort_state_arr(j) === s_abort_complete) || do_pop + x_init.ready := (x_abort.valid && x_abort.ready) || do_pop x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) } From 6d8fc74378bdf527cf7f0b84bdf80e52dbbc6d50 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 16 Apr 2012 21:20:25 -0700 Subject: [PATCH 0399/1087] fix DTLB permissions bug --- rocket/src/main/scala/dtlb.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index cd1917c0..99f7f385 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -139,15 +139,18 @@ class rocketDTLB(entries: Int) extends Component } } - val access_fault_common = - tlb_hit && + val load_fault_common = tlb_hit && ((status_s && !sr_array(tag_hit_addr)) || (status_u && !ur_array(tag_hit_addr)) || bad_va) + val store_fault_common = tlb_hit && + ((status_s && !sw_array(tag_hit_addr)) || + (status_u && !uw_array(tag_hit_addr)) || + bad_va) - io.cpu_resp.xcpt_ld := access_fault_common && (req_load || req_amo) - io.cpu_resp.xcpt_st := access_fault_common && (req_store || req_amo) - io.cpu_resp.xcpt_pf := access_fault_common && req_pf + io.cpu_resp.xcpt_ld := load_fault_common && (req_load || req_amo) + io.cpu_resp.xcpt_st := store_fault_common && (req_store || req_amo) + io.cpu_resp.xcpt_pf := load_fault_common && req_pf io.cpu_req.ready := (state === s_ready) && !tlb_miss; io.cpu_resp.miss := tlb_miss; From a0378c5d2f37cf43c620717121e77af01513318b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 16 Apr 2012 22:28:00 -0700 Subject: [PATCH 0400/1087] remove faulting TLB entry after page fault this vastly reduces the frequency with which the TLB must be flushed --- rocket/src/main/scala/dtlb.scala | 1 + rocket/src/main/scala/itlb.scala | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 99f7f385..c7b92662 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -79,6 +79,7 @@ class rocketDTLB(entries: Int) extends Component val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); tag_cam.io.clear := io.invalidate; + tag_cam.io.clear_hit := io.cpu_resp.xcpt_ld || io.cpu_resp.xcpt_st || io.cpu_resp.xcpt_pf tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 2178aa00..016a2efb 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -7,6 +7,7 @@ import scala.math._; class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { val clear = Bool(INPUT); + val clear_hit = Bool(INPUT) val tag = Bits(tag_bits, INPUT); val hit = Bool(OUTPUT); val hit_addr = UFix(addr_bits, OUTPUT); @@ -21,17 +22,20 @@ class rocketCAM(entries: Int, tag_bits: Int) extends Component { val addr_bits = ceil(log(entries)/log(2)).toInt; val io = new ioCAM(entries, addr_bits, tag_bits); val cam_tags = Mem(entries, io.write, io.write_addr, io.write_tag); + val mux = (new Mux1H(entries)) { Bits(width = addr_bits) } val vb_array = Reg(resetVal = Bits(0, entries)); when (io.clear) { vb_array := Bits(0, entries); } + .elsewhen (io.clear_hit) { + vb_array := vb_array & ~mux.io.sel.toBits + } .elsewhen (io.write) { vb_array := vb_array.bitSet(io.write_addr, Bool(true)); } var l_hit = Bool(false) - val mux = (new Mux1H(entries)) { Bits(width = addr_bits) } for (i <- 0 to entries-1) { val my_hit = vb_array(UFix(i)).toBool && (cam_tags(UFix(i)) === io.tag) l_hit = l_hit || my_hit @@ -114,6 +118,7 @@ class rocketITLB(entries: Int) extends Component val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); tag_cam.io.clear := io.cpu.invalidate; + tag_cam.io.clear_hit := io.cpu.exception tag_cam.io.tag := lookup_tag; tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; tag_cam.io.write_tag := r_refill_tag; From 66f86a2194f11cfa4996933829da15bc7b12c8c9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 16 Apr 2012 22:28:56 -0700 Subject: [PATCH 0401/1087] use pseudo-LRU replacement for TLBs --- rocket/src/main/scala/dtlb.scala | 10 +++++----- rocket/src/main/scala/itlb.scala | 33 +++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index c7b92662..73f6f6fa 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -55,7 +55,6 @@ class rocketDTLB(entries: Int) extends Component val r_cpu_req_asid = Reg() { Bits() } val r_refill_tag = Reg() { Bits() } val r_refill_waddr = Reg() { UFix() } - val repl_count = Reg(resetVal = UFix(0,addr_bits)); when (io.cpu_req.valid && io.cpu_req.ready) { r_cpu_req_vpn := io.cpu_req.bits.vpn; @@ -122,7 +121,8 @@ class rocketDTLB(entries: Int) extends Component // high if there are any unused (invalid) entries in the TLB val has_invalid_entry = !tag_cam.io.valid_bits.andR val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) - val repl_waddr = Mux(has_invalid_entry, invalid_entry, repl_count).toUFix; + val plru = new PseudoLRU(entries) + val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace).toUFix; val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu_req.bits.kill && (req_load || req_store || req_amo || req_pf); val lookup_hit = lookup && tag_hit; @@ -135,9 +135,9 @@ class rocketDTLB(entries: Int) extends Component when (tlb_miss) { r_refill_tag := lookup_tag; r_refill_waddr := repl_waddr; - when (!has_invalid_entry) { - repl_count := repl_count + UFix(1); - } + } + when (tlb_hit) { + plru.access(tag_hit_addr) } val load_fault_common = tlb_hit && diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 016a2efb..78f2d886 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -48,6 +48,29 @@ class rocketCAM(entries: Int, tag_bits: Int) extends Component { io.hit_addr := mux.io.out.toUFix; } +class PseudoLRU(n: Int) +{ + val state = Reg() { Bits(width = n) } + def access(way: UFix) = { + var next_state = state + var idx = UFix(1,1) + for (i <- log2up(n)-1 to 0 by -1) { + val bit = way(i) + val mask = (UFix(1) << idx)(n-1,0) + next_state = next_state & ~mask | Mux(bit, UFix(0), mask) + //next_state.bitSet(idx, !bit) + idx = Cat(idx, bit) + } + state := next_state + } + def replace = { + var idx = UFix(1,1) + for (i <- 0 until log2up(n)) + idx = Cat(idx, state(idx)) + idx(log2up(n)-1,0) + } +} + // interface between TLB and PTW class ioTLB_PTW extends Bundle { @@ -100,7 +123,6 @@ class rocketITLB(entries: Int) extends Component val r_cpu_req_asid = Reg() { Bits() }; val r_refill_tag = Reg() { Bits() }; val r_refill_waddr = Reg() { UFix() }; - val repl_count = Reg(resetVal = UFix(0, addr_bits)); when (io.cpu.req_val && io.cpu.req_rdy) { r_cpu_req_vpn := io.cpu.req_vpn; @@ -153,7 +175,8 @@ class rocketITLB(entries: Int) extends Component // high if there are any unused entries in the ITLB val has_invalid_entry = !tag_cam.io.valid_bits.andR val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) - val repl_waddr = Mux(has_invalid_entry, invalid_entry, repl_count).toUFix; + val plru = new PseudoLRU(entries) + val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace).toUFix; val lookup = (state === s_ready) && r_cpu_req_val; val lookup_hit = lookup && tag_hit; @@ -164,9 +187,9 @@ class rocketITLB(entries: Int) extends Component when (tlb_miss) { r_refill_tag := lookup_tag; r_refill_waddr := repl_waddr; - when (!has_invalid_entry) { - repl_count := repl_count + UFix(1); - } + } + when (tlb_hit) { + plru.access(tag_hit_addr) } val access_fault = From c13d3e6f88f6a664f7147b7a40ca2593fe279d97 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Apr 2012 00:59:37 -0700 Subject: [PATCH 0402/1087] fix probe tag read-modify-write atomicity violation --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 55e0acd1..bae2746b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -277,7 +277,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { io.wb_req.bits.way_oh := req.way_oh io.wb_req.bits.tile_xact_id := Bits(id) - io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear) || !idx_match + io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match io.mem_req.valid := (state === s_refill_req) && !flush From eafdffe1253462bb48708a70c9cf0a95d2f7b811 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 1 May 2012 01:24:36 -0700 Subject: [PATCH 0403/1087] simplify page table walker; speed up emulator --- rocket/src/main/scala/ctrl_util.scala | 13 ++- rocket/src/main/scala/dpath_alu.scala | 8 +- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/ptw.scala | 116 ++++++++------------------ rocket/src/main/scala/util.scala | 34 ++++---- 5 files changed, 64 insertions(+), 109 deletions(-) diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 5e85ba60..e74b748e 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -20,14 +20,13 @@ class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component val w = Vec(nwrite) { new write_port() } } - val busybits = Vec(entries) { Reg(resetVal = Bool(false)) } + val busybits = Reg(resetVal = Bits(0, entries)) + + val wmasks = (0 until nwrite).map(i => Fill(entries, io.w(i).en) & (UFix(1) << io.w(i).addr)) + val wdatas = (0 until nwrite).map(i => Mux(io.w(i).data, wmasks(i), UFix(0))) + var next = busybits & ~wmasks.reduceLeft(_|_) | wdatas.reduceLeft(_|_) + busybits := next for (i <- 0 until nread) io.r(i).data := busybits(io.r(i).addr) - - for (i <- 0 until nwrite) { - when (io.w(i).en) { - busybits(io.w(i).addr) := io.w(i).data - } - } } diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 38ae335a..4a0bb607 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -34,9 +34,9 @@ class rocketDpathALU extends Component val shright = sra || (io.fn === FN_SR) val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32)) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) - val shin_r = Cat(shin_hi, io.in1(31,0)) - val shin = Mux(shright, shin_r, Reverse(shin_r)) - val shout_r = (Cat(sra & shin_r(63), shin).toFix >> shamt)(63,0) + val shin = Cat(shin_hi, io.in1(31,0)) + val shout_r = (Cat(sra & shin(63), shin).toFix >> shamt)(63,0) + val shout_l = (shin << shamt)(63,0) val bitwise_logic = Mux(io.fn === FN_AND, io.in1 & io.in2, @@ -48,7 +48,7 @@ class rocketDpathALU extends Component Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less, Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, - Mux(io.fn === FN_SL, Reverse(shout_r), + Mux(io.fn === FN_SL, shout_l, bitwise_logic)))) val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 0b716563..d505fd21 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -92,7 +92,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; - val data_mux = (new Mux1H(assoc)){Bits(width = MEM_DATA_BITS)} + val data_mux = (new Mux1H(assoc)){Bits(width = databits)} var any_hit = Bool(false) for (i <- 0 until assoc) { diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 938a942e..feff7896 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -81,17 +81,23 @@ class rocketPTW extends Component { val io = new ioPTW(); - val s_ready :: s_l1_req :: s_l1_wait :: s_l1_fake :: s_l2_req :: s_l2_wait :: s_l2_fake:: s_l3_req :: s_l3_wait :: s_done :: s_error :: Nil = Enum(11) { UFix() }; + val levels = 3 + val bitsPerLevel = VPN_BITS/levels + require(VPN_BITS == levels * bitsPerLevel) + + val count = Reg() { UFix(width = log2up(levels)) } + val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); val r_req_vpn = Reg() { Bits() } val r_req_dest = Reg() { Bits() } - val req_addr = Reg() { UFix() }; + val req_addr = Reg() { Bits() } val r_resp_ppn = Reg() { Bits() }; val r_resp_perm = Reg() { Bits() }; - val vpn_idx = Mux(state === s_l2_wait, r_req_vpn(9,0), r_req_vpn(19,10)); + val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) + val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) val req_val = io.itlb.req_val || io.dtlb.req_val || io.vitlb.req_val // give ITLB requests priority over DTLB requests @@ -102,44 +108,40 @@ class rocketPTW extends Component when ((state === s_ready) && req_itlb_val) { r_req_vpn := io.itlb.req_vpn; r_req_dest := Bits(0) - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } when ((state === s_ready) && req_dtlb_val) { r_req_vpn := io.dtlb.req_vpn; r_req_dest := Bits(1) - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } when ((state === s_ready) && req_vitlb_val) { r_req_vpn := io.vitlb.req_vpn; r_req_dest := Bits(2) - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-10), Bits(0,3)).toUFix; + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) when (dmem_resp_val) { - req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)).toUFix; + req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) r_resp_perm := io.dmem.resp_data_subword(9,4); r_resp_ppn := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS); } - io.dmem.req_val := - (state === s_l1_req) || - (state === s_l2_req) || - (state === s_l3_req); - + io.dmem.req_val := state === s_req io.dmem.req_cmd := M_XRD; io.dmem.req_type := MT_D; io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) io.dmem.req_kill := Bool(false) - val resp_val = (state === s_done) || (state === s_l1_fake) || (state === s_l2_fake); - val resp_err = (state === s_error); + val resp_val = state === s_done + val resp_err = state === s_error - val resp_ptd = (io.dmem.resp_data_subword(1,0) === Bits(1,2)); - val resp_pte = (io.dmem.resp_data_subword(1,0) === Bits(2,2)); + val resp_ptd = io.dmem.resp_data_subword(1,0) === Bits(1) + val resp_pte = io.dmem.resp_data_subword(1,0) === Bits(2) io.itlb.req_rdy := (state === s_ready) io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val @@ -153,11 +155,9 @@ class rocketPTW extends Component io.itlb.resp_perm := r_resp_perm io.dtlb.resp_perm := r_resp_perm io.vitlb.resp_perm:= r_resp_perm - - val resp_ppn = - Mux(state === s_l1_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-7), r_req_vpn(VPN_BITS-11, 0)), - Mux(state === s_l2_fake, Cat(r_resp_ppn(PPN_BITS-1, PPN_BITS-17), r_req_vpn(VPN_BITS-21, 0)), - r_resp_ppn)); + + val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn(PPN_BITS-1, VPN_BITS-bitsPerLevel*(i+1)), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) + val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j)) io.itlb.resp_ppn := resp_ppn; io.dtlb.resp_ppn := resp_ppn; @@ -167,78 +167,34 @@ class rocketPTW extends Component switch (state) { is (s_ready) { when (req_val) { - state := s_l1_req; + state := s_req; } + count := UFix(0) } - // level 1 - is (s_l1_req) { + is (s_req) { when (io.dmem.req_rdy) { - state := s_l1_wait; + state := s_wait; } } - is (s_l1_wait) { + is (s_wait) { when (io.dmem.resp_nack) { - state := s_l1_req - } - when (dmem_resp_val) { - when (resp_ptd) { // page table descriptor - state := s_l2_req; - } - .elsewhen (resp_pte) { // page table entry - state := s_l1_fake; - } - .otherwise { - state := s_error; - } - } - } - is (s_l1_fake) { - state := s_ready; - } - // level 2 - is (s_l2_req) { - when (io.dmem.req_rdy) { - state := s_l2_wait; - } - } - is (s_l2_wait) { - when (io.dmem.resp_nack) { - state := s_l2_req - } - when (dmem_resp_val) { - when (resp_ptd) { // page table descriptor - state := s_l3_req; - } - .elsewhen (resp_pte) { // page table entry - state := s_l2_fake; - } - .otherwise { - state := s_error; - } - } - } - is (s_l2_fake) { - state := s_ready; - } - // level 3 - is (s_l3_req) { - when (io.dmem.req_rdy) { - state := s_l3_wait; - } - } - is (s_l3_wait) { - when (io.dmem.resp_nack) { - state := s_l3_req + state := s_req } when (dmem_resp_val) { when (resp_pte) { // page table entry - state := s_done; + state := s_done } .otherwise { - state := s_error; + count := count + UFix(1) + when (resp_ptd && count < UFix(levels-1)) { + state := s_req + } + .otherwise { + state := s_error + } } } - } + } is (s_done) { state := s_ready; } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index a5137174..87d17c1f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -69,27 +69,27 @@ object PopCount object Reverse { - def apply(in: Bits) = + def doit(in: Bits, base: Int, length: Int): Bits = { - var out = in(in.getWidth-1) - for (i <- 1 until in.getWidth) - out = Cat(in(in.getWidth-i-1), out) - out + val half = (1 << log2up(length))/2 + if (length == 1) + in(base) + else + Cat(doit(in, base, half), doit(in, base+half, length-half)) } + def apply(in: Bits) = doit(in, 0, in.getWidth) } object OHToUFix { - def apply(in: Bits): UFix = - { - val out = MuxCase( UFix(0), (0 until in.getWidth).map( i => (in(i).toBool, UFix(i)))) - out.toUFix - } - def apply(in: Seq[Bool]): UFix = - { - val out = MuxCase( UFix(0), in.zipWithIndex map {case (b,i) => (b, UFix(i))}) - out.toUFix + def apply(in: Seq[Bits]): UFix = { + if (in.size <= 1) return UFix(0) + if (in.size == 2) return in(1) + val hi = in.slice(in.size/2, in.size) + val lo = in.slice(0, in.size/2) + Cat(hi.reduceLeft(_||_), apply(hi zip lo map { case (x, y) => x || y })) } + def apply(in: Bits): UFix = apply((0 until in.getWidth).map(in(_))) } object UFixToOH @@ -119,7 +119,7 @@ object ShiftRegister object Mux1H { - def buildMux[T <: Data](sel: Bits, in: Vec[T], i: Int, n: Int): T = { + def buildMux[T <: Data](sel: Bits, in: Seq[T], i: Int, n: Int): T = { if (n == 1) in(i) else @@ -131,8 +131,8 @@ object Mux1H } } - def apply [T <: Data](sel: Bits, in: Vec[T]): T = buildMux(sel, in, 0, sel.getWidth) - def apply [T <: Data](sel: Vec[Bool], in: Vec[T]): T = apply(sel.toBits, in) + def apply [T <: Data](sel: Bits, in: Seq[T]): T = buildMux(sel, in, 0, sel.getWidth) + def apply [T <: Data](sel: Seq[Bool], in: Seq[T]): T = buildMux(Cat(Bits(0),sel.reverse:_*), in, 0, sel.size) } class Mux1H [T <: Data](n: Int)(gen: => T) extends Component From 5819beed64a3f75b0cdb08b0d45acd51cba2353b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 1 May 2012 01:25:43 -0700 Subject: [PATCH 0404/1087] use parameterized FP units --- rocket/src/main/scala/fpu.scala | 100 ++++++++++++-------------------- 1 file changed, 36 insertions(+), 64 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 54a463df..ec21bfcd 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -195,57 +195,48 @@ class rocketFPIntUnit extends Component val exc = Bits(5, OUTPUT) } - val unrec_s = new hardfloat.recodedFloat32ToFloat32 - val unrec_d = new hardfloat.recodedFloat64ToFloat64 - unrec_s.io.in := io.in1 - unrec_d.io.in := io.in1 + val unrec_s = hardfloat.recodedFloatNToFloatN(io.in1, 23, 9) + val unrec_d = hardfloat.recodedFloatNToFloatN(io.in1, 52, 12) - io.store_data := Mux(io.single, Cat(unrec_s.io.out, unrec_s.io.out), unrec_d.io.out) + io.store_data := Mux(io.single, Cat(unrec_s, unrec_s), unrec_d) - val scmp = new hardfloat.recodedFloat32Compare + val scmp = new hardfloat.recodedFloatNCompare(23, 9) scmp.io.a := io.in1 scmp.io.b := io.in2 val scmp_out = (io.cmd & Cat(scmp.io.a_lt_b, scmp.io.a_eq_b)).orR val scmp_exc = (io.cmd & Cat(scmp.io.a_lt_b_invalid, scmp.io.a_eq_b_invalid)).orR << UFix(4) - val s2i = new hardfloat.recodedFloat32ToAny - s2i.io.in := io.in1 - s2i.io.roundingMode := io.rm - s2i.io.typeOp := ~io.cmd(1,0) - - val dcmp = new hardfloat.recodedFloat64Compare + val dcmp = new hardfloat.recodedFloatNCompare(52, 12) dcmp.io.a := io.in1 dcmp.io.b := io.in2 val dcmp_out = (io.cmd & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR val dcmp_exc = (io.cmd & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UFix(4) - val d2i = new hardfloat.recodedFloat64ToAny - d2i.io.in := io.in1 - d2i.io.roundingMode := io.rm - d2i.io.typeOp := ~io.cmd(1,0) + val s2i = hardfloat.recodedFloatNToAny(io.in1, io.rm, ~io.cmd(1,0), 23, 9, 64) + val d2i = hardfloat.recodedFloatNToAny(io.in1, io.rm, ~io.cmd(1,0), 52, 12, 64) // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) - out_s := Cat(Fill(32, unrec_s.io.out(31)), unrec_s.io.out) + out_s := Cat(Fill(32, unrec_s(31)), unrec_s) exc_s := Bits(0) val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) - out_d := unrec_d.io.out + out_d := unrec_d exc_d := Bits(0) when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { out_s := io.fsr } when (io.cmd === FCMD_CVT_W_FMT || io.cmd === FCMD_CVT_WU_FMT) { - out_s := Cat(Fill(32, s2i.io.out(31)), s2i.io.out(31,0)) - exc_s := s2i.io.exceptionFlags - out_d := Cat(Fill(32, d2i.io.out(31)), d2i.io.out(31,0)) - exc_d := d2i.io.exceptionFlags + out_s := Cat(Fill(32, s2i._1(31)), s2i._1(31,0)) + exc_s := s2i._2 + out_d := Cat(Fill(32, d2i._1(31)), d2i._1(31,0)) + exc_d := d2i._2 } when (io.cmd === FCMD_CVT_L_FMT || io.cmd === FCMD_CVT_LU_FMT) { - out_s := s2i.io.out - exc_s := s2i.io.exceptionFlags - out_d := d2i.io.out - exc_d := d2i.io.exceptionFlags + out_s := s2i._1 + exc_s := s2i._2 + out_d := d2i._1 + exc_d := d2i._2 } when (io.cmd === FCMD_EQ || io.cmd === FCMD_LT || io.cmd === FCMD_LE) { out_s := scmp_out @@ -277,21 +268,8 @@ class rocketFPUFastPipe extends Component val exc_d = Bits(5, OUTPUT) } - // int->fp units - val rec_s = new hardfloat.float32ToRecodedFloat32 - val rec_d = new hardfloat.float64ToRecodedFloat64 - rec_s.io.in := io.fromint - rec_d.io.in := io.fromint - - val i2s = new hardfloat.anyToRecodedFloat32 - i2s.io.in := io.fromint - i2s.io.roundingMode := io.rm - i2s.io.typeOp := ~io.cmd(1,0) - - val i2d = new hardfloat.anyToRecodedFloat64 - i2d.io.in := io.fromint - i2d.io.roundingMode := io.rm - i2d.io.typeOp := ~io.cmd(1,0) + val i2s = hardfloat.anyToRecodedFloatN(io.fromint, io.rm, ~io.cmd(1,0), 23, 9, 64) + val i2d = hardfloat.anyToRecodedFloatN(io.fromint, io.rm, ~io.cmd(1,0), 52, 12, 64) // fp->fp units val sign_s = Mux(io.cmd === FCMD_SGNJ, io.in2(32), @@ -303,12 +281,8 @@ class rocketFPUFastPipe extends Component val fsgnj = Cat(Mux(io.single, io.in1(64), sign_d), io.in1(63,33), Mux(io.single, sign_s, io.in1(32)), io.in1(31,0)) - val s2d = new hardfloat.recodedFloat32ToRecodedFloat64 - s2d.io.in := io.in1 - - val d2s = new hardfloat.recodedFloat64ToRecodedFloat32 - d2s.io.in := io.in1 - d2s.io.roundingMode := io.rm + val s2d = hardfloat.recodedFloatNToRecodedFloatM(io.in1, io.rm, 23, 9, 52, 12) + val d2s = hardfloat.recodedFloatNToRecodedFloatM(io.in1, io.rm, 52, 12, 23, 9) val isnan1 = Mux(io.single, io.in1(31,29) === Bits("b111"), io.in1(63,61) === Bits("b111")) val isnan2 = Mux(io.single, io.in2(31,29) === Bits("b111"), io.in2(63,61) === Bits("b111")) @@ -321,10 +295,10 @@ class rocketFPUFastPipe extends Component // output muxing val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) - out_s := Reg(rec_s.io.out) + out_s := Reg(hardfloat.floatNToRecodedFloatN(io.fromint, 23, 9)) exc_s := Bits(0) val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) - out_d := Reg(rec_d.io.out) + out_d := Reg(hardfloat.floatNToRecodedFloatN(io.fromint, 52, 12)) exc_d := Bits(0) val r_cmd = Reg(io.cmd) @@ -346,17 +320,17 @@ class rocketFPUFastPipe extends Component exc_d := r_minmax_exc } when (r_cmd === FCMD_CVT_FMT_S || r_cmd === FCMD_CVT_FMT_D) { - out_s := Reg(d2s.io.out) - exc_s := Reg(d2s.io.exceptionFlags) - out_d := Reg(s2d.io.out) - exc_d := Reg(s2d.io.exceptionFlags) + out_s := Reg(d2s._1) + exc_s := Reg(d2s._2) + out_d := Reg(s2d._1) + exc_d := Reg(s2d._2) } when (r_cmd === FCMD_CVT_FMT_W || r_cmd === FCMD_CVT_FMT_WU || r_cmd === FCMD_CVT_FMT_L || r_cmd === FCMD_CVT_FMT_LU) { - out_s := Reg(i2s.io.out) - exc_s := Reg(i2s.io.exceptionFlags) - out_d := Reg(i2d.io.out) - exc_d := Reg(i2d.io.exceptionFlags) + out_s := Reg(i2s._1) + exc_s := Reg(i2s._2) + out_d := Reg(i2d._1) + exc_d := Reg(i2d._2) } io.out_s := out_s @@ -401,7 +375,7 @@ class rocketFPUSFMAPipe(latency: Int) extends Component in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } - val fma = new hardfloat.mulAddSubRecodedFloat32_1 + val fma = new hardfloat.mulAddSubRecodedFloatN(23, 9) fma.io.op := cmd fma.io.roundingMode := rm fma.io.a := in1 @@ -437,7 +411,7 @@ class rocketFPUDFMAPipe(latency: Int) extends Component in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } - val fma = new hardfloat.mulAddSubRecodedFloat64_1 + val fma = new hardfloat.mulAddSubRecodedFloatN(52, 12) fma.io.op := cmd fma.io.roundingMode := rm fma.io.a := in1 @@ -483,12 +457,10 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component load_wb_data := io.dpath.dmem_resp_data load_wb_tag := io.dpath.dmem_resp_tag } - val rec_s = new hardfloat.float32ToRecodedFloat32 - val rec_d = new hardfloat.float64ToRecodedFloat64 - rec_s.io.in := load_wb_data - rec_d.io.in := load_wb_data + val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) + val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) val sp_msbs = Fill(32, UFix(1,1)) - val load_wb_data_recoded = Mux(load_wb_single, Cat(sp_msbs, rec_s.io.out), rec_d.io.out) + val load_wb_data_recoded = Mux(load_wb_single, Cat(sp_msbs, rec_s), rec_d) val fsr_rm = Reg() { Bits(width = 3) } val fsr_exc = Reg() { Bits(width = 5) } From 4cfa6cd9a85ac0974893e1ed3f878aa31f569d49 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 1 May 2012 19:55:16 -0700 Subject: [PATCH 0405/1087] force Top.main's return type to Unit --- rocket/src/main/scala/top.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 5ee443b6..4ebb94d8 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -81,7 +81,7 @@ class Top extends Component } object top_main { - def main(args: Array[String]) = { + def main(args: Array[String]): Unit = { chiselMain(args.drop(1), () => Class.forName(args(0)).newInstance.asInstanceOf[Component]) } } From 65ff3971224ed4dee85d15ad45e94c1f97b5be7f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 1 May 2012 20:16:36 -0700 Subject: [PATCH 0406/1087] improved instruction decoding it now makes use of don't-cares by performing logic minimization --- rocket/src/main/scala/consts.scala | 47 ++- rocket/src/main/scala/cpu.scala | 5 - rocket/src/main/scala/ctrl.scala | 452 +++++++++++++++-------------- rocket/src/main/scala/decode.scala | 186 ++++++++++++ rocket/src/main/scala/fpu.scala | 124 ++++---- 5 files changed, 506 insertions(+), 308 deletions(-) create mode 100644 rocket/src/main/scala/decode.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 256c169e..50b20a6d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -12,6 +12,7 @@ object Constants val MEM_BACKUP_WIDTH = 16 + val BR_X = Bits("b????", 4) val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); val BR_NE = UFix(2, 4); @@ -31,10 +32,7 @@ object Constants val PC_EVEC = UFix(6, 3); val PC_JR = UFix(7, 3); - val REN_Y = UFix(1, 1); - val REN_N = UFix(0, 1); - - val A2_X = UFix(0, 3); + val A2_X = Bits("b???", 3) val A2_BTYPE = UFix(0, 3); val A2_LTYPE = UFix(1, 3); val A2_ITYPE = UFix(2, 3); @@ -42,38 +40,33 @@ object Constants val A2_JTYPE = UFix(5, 3); val A2_RTYPE = UFix(6, 3); - val MUL_X = UFix(0, 2); + val MUL_X = Bits("b??", 2) val MUL_LO = UFix(0, 2); val MUL_H = UFix(1, 2); val MUL_HSU = UFix(2, 2); val MUL_HU = UFix(3, 2); - val DIV_X = UFix(0, 2); + val DIV_X = Bits("b??", 2) val DIV_D = UFix(0, 2); val DIV_DU = UFix(1, 2); val DIV_R = UFix(2, 2); val DIV_RU = UFix(3, 2); - val M_N = UFix(0, 1); - val M_Y = UFix(1, 1); + val X = Bits("b?", 1) + val N = UFix(0, 1); + val Y = UFix(1, 1); - val WEN_N = UFix(0, 1); - val WEN_Y = UFix(1, 1); + val WA_X = X + val WA_RD = N + val WA_RA = Y - val WA_X = UFix(0, 1); - val WA_RD = UFix(0, 1); - val WA_RA = UFix(1, 1); - - val WB_X = UFix(0, 3); + val WB_X = Bits("b???", 3) val WB_PC = UFix(0, 3); val WB_ALU = UFix(2, 3); val WB_TSC = UFix(4, 3); val WB_IRT = UFix(5, 3); - val N = UFix(0, 1); - val Y = UFix(1, 1); - - val FN_X = UFix(0, 4); + val FN_X = Bits("b????", 4) val FN_ADD = UFix(0, 4); val FN_SUB = UFix(1, 4); val FN_SLT = UFix(2, 4); @@ -86,14 +79,14 @@ object Constants val FN_SRA = UFix(9, 4); val FN_OP2 = UFix(10, 4); - val DW_X = UFix(0, 1); - val DW_32 = UFix(0, 1); - val DW_64 = UFix(1, 1); - val DW_XPR = UFix(1, 1); + val DW_X = X + val DW_32 = N + val DW_64 = Y + val DW_XPR = Y val RA = UFix(1, 5); - val MT_X = Bits("b000", 3); + val MT_X = Bits("b???", 3); val MT_B = Bits("b000", 3); val MT_H = Bits("b001", 3); val MT_W = Bits("b010", 3); @@ -102,7 +95,7 @@ object Constants val MT_HU = Bits("b101", 3); val MT_WU = Bits("b110", 3); - val M_X = UFix(0, 4); + val M_X = Bits("b????", 4); val M_XRD = Bits("b0000", 4); // int load val M_XWR = Bits("b0001", 4); // int store val M_PFR = Bits("b0010", 4); // prefetch with intent to read @@ -120,12 +113,14 @@ object Constants val M_XA_MINU = Bits("b1110", 4); val M_XA_MAXU = Bits("b1111", 4); + val PCR_X = Bits("b???", 3) val PCR_N = Bits(0,3) val PCR_F = Bits(1,3) // mfpcr val PCR_T = Bits(4,3) // mtpcr val PCR_C = Bits(6,3) // clearpcr val PCR_S = Bits(7,3) // setpcr + val SYNC_X = Bits("b??", 2) val SYNC_N = Bits(0,2); val SYNC_D = Bits(1,2); val SYNC_I = Bits(2,2); @@ -223,7 +218,7 @@ object Constants val VEC_N = UFix(0, 1); val VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N; - val VEC_X = UFix(0, 2) + val VEC_X = Bits("b??", 2).toUFix val VEC_FN_N = UFix(0, 2) val VEC_VL = UFix(1, 2) val VEC_CFG = UFix(2, 2) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 1468b07d..6fb6cf8b 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -130,11 +130,6 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } - else - { - ctrl.io.fpu.dec.valid := Bool(false) - ctrl.io.fpu.dec.wen := Bool(false) - } if (HAVE_VEC) { diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 5dfeff12..0e747d6e 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -94,213 +94,239 @@ object rocketCtrlDecode val xpr64 = Y; val decode_default = - // eret - // | syscall - // vec_val mem_val mul_val div_val pcr | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - List(N, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N) + // eret + // fp_val renx2 | syscall + // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged + // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,DIV_X, X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) val xdecode = Array( - // eret - // | syscall - // vec_val mem_val mul_val div_val pcr | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | wsync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,BR_NE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BEQ-> List(Y, N,BR_EQ, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLT-> List(Y, N,BR_LT, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLTU-> List(Y, N,BR_LTU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGE-> List(Y, N,BR_GE, REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGEU-> List(Y, N,BR_GEU,REN_Y,REN_Y,A2_BTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + // eret + // fp_val renx2 | syscall + // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged + // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - J-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - JAL-> List(Y, N,BR_J, REN_N,REN_N,A2_JTYPE,DW_X, FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_C-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_J-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_R-> List(Y, N,BR_JR, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - RDNPC-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + J-> List(Y, N,N,BR_J, N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_C-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_J-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_R-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + RDNPC-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - LB-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LH-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LW-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LD-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LBU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LHU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LWU-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SW-> List(Y, N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SD-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SB-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SH-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SW-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOADD_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_W-> List(Y, N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOADD_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LUI-> List(Y, N,BR_N, REN_N,REN_N,A2_LTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTI -> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTIU-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ANDI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - XORI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAI-> List(Y, N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADD-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUB-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLT-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLT, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SLTU,M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvAND-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_AND, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_OR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_XOR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRA-> List(Y, N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_XPR,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvAND-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvOR-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUBW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SUB, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SL, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SR, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_RTYPE,DW_32,FN_SRA, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - MUL-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULH-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_H, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HU, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHSU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, Y,MUL_LO, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIV-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REM-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMU-> List(Y, N,BR_N, REN_Y,REN_Y,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_D, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_DU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_R, WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,N,BR_N, REN_Y,REN_Y,A2_X, DW_32, FN_X, M_N,M_X, MT_X, N,MUL_X, Y,DIV_RU,WEN_Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_D, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_DU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REM-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_R, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_RU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_D, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_DU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_R, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_RU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - SYSCALL-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), - SETPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), - CLEARPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_ITYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), - ERET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), - CFLUSH-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), - MTPCR-> List(Y, N,BR_N, REN_Y,REN_N,A2_RTYPE,DW_XPR,FN_OP2, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDINSTRET-> List(Y, N,BR_N, REN_N,REN_N,A2_X, DW_XPR,FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) + SYSCALL-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), + ERET-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), + CFLUSH-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), + MTPCR-> List(Y, N,N,BR_N, Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) val fdecode = Array( - // eret - // | syscall - // vec_val mem_val mul_val div_val pcr | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - MFTX_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_S-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_D-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_S-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_D-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - MFFSR-> List(FPU_Y,N,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FLW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FLD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSW-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSD-> List(FPU_Y,N,BR_N, REN_N,REN_Y,A2_BTYPE,DW_XPR,FN_ADD, M_Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) + // eret + // fp_val renx2 | syscall + // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged + // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_S-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_D-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MTFSR-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLW-> List(FPU_Y,Y,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FLD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSW-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) val vdecode = Array( - // eret - // | syscall - // vec_val mem_val mul_val div_val pcr | | privileged - // val | brtype renx2 renx1 s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn wen s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VVCFG-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VSETVL-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VF-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ITYPE,DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFMVV-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_G-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - VLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_D, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + // eret + // fp_val renx2 | syscall + // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged + // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VVCFG-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VSETVL-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VF-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VMVV-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + VMSV-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + VLD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLWU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLH-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLHU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLB-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLBU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSH-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSB-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VENQCMD-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQCNT-> List(VEC_Y,Y,BR_N, REN_Y,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,Y,BR_N, REN_N,REN_Y,A2_ZERO, DW_XPR,FN_ADD, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,Y,BR_N, REN_N,REN_N,A2_X, DW_X, FN_X, M_N,M_X, MT_X, N,MUL_X, N,DIV_X, WEN_N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) + VENQCMD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQCNT-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), + VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) } class rocketCtrl extends Component @@ -311,9 +337,9 @@ class rocketCtrl extends Component if (HAVE_FPU) decode_table ++= rocketCtrlDecode.fdecode if (HAVE_VEC) decode_table ++= rocketCtrlDecode.vdecode - val cs = ListLookup(io.dpath.inst, rocketCtrlDecode.decode_default, decode_table) + val cs = DecodeLogic(io.dpath.inst, rocketCtrlDecode.decode_default, decode_table) - val id_int_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs + val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 val id_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 @@ -335,12 +361,12 @@ class rocketCtrl extends Component val id_reg_replay = Reg(resetVal = Bool(false)); val id_load_use = Wire(){Bool()}; - val ex_reg_br_type = Reg(){UFix(width = 4)}; + val ex_reg_br_type = Reg(){Bits()} val ex_reg_btb_hit = Reg(){Bool()}; val ex_reg_div_val = Reg(){Bool()}; val ex_reg_mul_val = Reg(){Bool()}; val ex_reg_mem_val = Reg(){Bool()}; - val ex_reg_mem_cmd = Reg(){UFix(width = 4)}; + val ex_reg_mem_cmd = Reg(){Bits()}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; val ex_reg_valid = Reg(resetVal = Bool(false)); val ex_reg_pcr = Reg(resetVal = PCR_N); @@ -445,20 +471,18 @@ class rocketCtrl extends Component val mask_pfcntq_ready = !vec_dec.io.sigs.enq_pfcntq || io.vec_iface.vpfcntq_ready vec_stalld = - vec_dec.io.sigs.valid && ( + id_vec_val && ( !mask_cmdq_ready || !mask_ximm1q_ready || !mask_ximm2q_ready || !mask_cntq_ready || - !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready) || - id_vec_val && vec_dec.io.sigs.vfence && !vec.io.vfence_ready + !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready || + vec_dec.io.sigs.vfence && !vec.io.vfence_ready) vec_replay = vec.io.replay vec_irq = vec.io.irq vec_irq_cause = vec.io.irq_cause } - // executing ERET when traps are enabled causes an illegal instruction exception (as per ISA sim) - val illegal_inst = - !(id_int_val.toBool || io.fpu.dec.valid || id_vec_val.toBool) || - (id_eret.toBool && io.dpath.status(SR_ET).toBool); + // executing ERET when traps are enabled causes an illegal instruction exception + val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status(SR_ET)) val p_irq_timer = (io.dpath.status(SR_IM+IRQ_TIMER).toBool && io.dpath.irq_timer); val p_irq_ipi = (io.dpath.status(SR_IM+IRQ_IPI).toBool && io.dpath.irq_ipi); @@ -504,7 +528,7 @@ class rocketCtrl extends Component ex_reg_valid := id_reg_valid ex_reg_pcr := id_pcr ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); - ex_reg_fp_wen := io.fpu.dec.wen; + ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := (id_sync === SYNC_I); ex_reg_xcpt_ma_inst := id_reg_xcpt_ma_inst; @@ -512,7 +536,7 @@ class rocketCtrl extends Component ex_reg_xcpt_illegal := illegal_inst; ex_reg_xcpt_privileged := (id_privileged & ~io.dpath.status(SR_S)).toBool; ex_reg_xcpt_syscall := id_syscall.toBool; - ex_reg_fp_val := io.fpu.dec.valid + ex_reg_fp_val := id_fp_val ex_reg_fp_sboard_set := io.fpu.dec.sboard ex_reg_vec_val := id_vec_val.toBool ex_reg_replay := id_reg_replay @@ -543,8 +567,8 @@ class rocketCtrl extends Component val mem_reg_div_mul_val = Reg(){Bool()}; val mem_reg_eret = Reg(){Bool()}; val mem_reg_mem_val = Reg(){Bool()}; - val mem_reg_mem_cmd = Reg(){UFix(width = 4)}; - val mem_reg_mem_type = Reg(){UFix(width = 3)}; + val mem_reg_mem_cmd = Reg(){Bits()} + val mem_reg_mem_type = Reg(){Bits()} when (reset.toBool || io.dpath.killx) { mem_reg_valid := Bool(false); @@ -792,7 +816,7 @@ class rocketCtrl extends Component ( id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || - id_stall_fpu || + id_fp_val && id_stall_fpu || id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || vec_stalld @@ -813,12 +837,12 @@ class rocketCtrl extends Component io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; - io.dpath.sel_alu2 := id_sel_alu2 + io.dpath.sel_alu2 := id_sel_alu2.toUFix io.dpath.fn_dw := id_fn_dw.toBool; - io.dpath.fn_alu := id_fn_alu; - io.dpath.div_fn := id_div_fn; + io.dpath.fn_alu := id_fn_alu.toUFix + io.dpath.div_fn := id_div_fn.toUFix io.dpath.div_val := id_div_val.toBool && id_waddr != UFix(0); - io.dpath.mul_fn := id_mul_fn; + io.dpath.mul_fn := id_mul_fn.toUFix io.dpath.mul_val := id_mul_val.toBool && id_waddr != UFix(0); io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; @@ -827,13 +851,13 @@ class rocketCtrl extends Component io.dpath.wb_wen := wb_reg_wen; io.dpath.wb_valid := wb_reg_valid && !vec_replay io.dpath.sel_wa := id_sel_wa.toBool; - io.dpath.sel_wb := id_sel_wb; + io.dpath.sel_wb := id_sel_wb.toUFix io.dpath.pcr := wb_reg_pcr.toUFix io.dpath.id_eret := id_eret.toBool; io.dpath.wb_eret := wb_reg_eret; io.dpath.ex_mem_type := ex_reg_mem_type - io.fpu.valid := !io.dpath.killd && io.fpu.dec.valid + io.fpu.valid := !io.dpath.killd && id_fp_val io.fpu.killx := kill_ex io.fpu.killm := kill_mem diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala new file mode 100644 index 00000000..e2472820 --- /dev/null +++ b/rocket/src/main/scala/decode.scala @@ -0,0 +1,186 @@ +package rocket + +import Chisel._ +import Node._ + +object DecodeLogic +{ + def term(b: Literal) = { + if (b.isZ) { + var (bits, mask, swidth) = Literal.parseLit(b.toString) + new Term(BigInt(bits, 2), BigInt(2).pow(b.width)-(BigInt(mask, 2)+1)) + } else { + new Term(b.value) + } + } + def logic(addr: Bits, keys: Seq[Bits], cache: scala.collection.mutable.Map[Term,Bits], terms: Set[Term]) = { + terms.map { t => + if (!cache.contains(t)) + cache += t -> ((if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addr.width)-(t.mask+1), addr.width){Bits()}) === Lit(t.value, addr.width){Bits()}) + cache(t) + }.foldLeft(Bool(false))(_||_) + } + def apply(addr: Bits, default: List[Bits], mapping: Array[(Bits, List[Bits])]) = { + var map = mapping + var cache = scala.collection.mutable.Map[Term,Bits]() + default map { d => + val dlit = d.litOf + val dterm = term(dlit) + val (keys, values) = map.unzip + val keysterms = keys.map(k => term(k.litOf)) zip values.map(v => term(v.head.litOf)) + + val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => + if (((dterm.mask >> i) & 1) != 0) { + var mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1).toSet + var maxt = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1).toSet + logic(addr, keys, cache, SimplifyDC(mint, maxt, addr.width)).toBits + } else { + val want = 1 - ((dterm.value.toInt >> i) & 1) + val mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == want }.map(_._1).toSet + val dc = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1).toSet + val bit = logic(addr, keys, cache, Simplify(mint, dc, addr.width)).toBits + if (want == 1) bit else ~bit + } + }).reverse.reduceRight(Cat(_,_)) + map = map map { case (x,y) => (x, y.tail) } + result + } + } +} + +class Term(val value: BigInt, val mask: BigInt = 0) +{ + var prime = true + + def covers(x: Term) = ((value ^ x.value) &~ mask) == 0 + def intersects(x: Term) = ((value ^ x.value) &~ mask &~ x.mask) == 0 + override def equals(that: Any) = that match { + case x: Term => x.value == value && x.mask == mask + case _ => false + } + override def hashCode = value.toInt + def similar(x: Term) = { + val diff = value - x.value + mask == x.mask && value > x.value && (diff & diff-1) == 0 + } + def merge(x: Term) = { + prime = false + x.prime = false + val bit = value - x.value + new Term(value &~ bit, mask | bit) + } + + override def toString = value.toString + "-" + mask + (if (prime) "p" else "") +} + +object Simplify +{ + def getPrimeImplicants(implicants: Set[Term], bits: Int) = { + var prime = Set[Term]() + implicants.foreach(_.prime = true) + val cols = (0 to bits).map(b => implicants.filter(b == _.mask.bitCount)) + val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set() ++ c.filter(b == _.value.bitCount))) + for (i <- 0 to bits) { + for (j <- 0 until bits-i) + table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_.similar(a)).map(_.merge(a))) + prime ++= table(i).map(_.filter(_.prime)).reduceLeft(_++_) + } + prime + } + def getEssentialPrimeImplicants(prime: Set[Term], minterms: Set[Term]): Tuple3[Set[Term],Set[Term],Set[Term]] = { + val useful1 = prime.toSeq + for (i <- 0 until useful1.size) { + val icover = minterms.filter(useful1(i) covers _) + for (j <- 0 until useful1.size) { + val jcover = minterms.filter(useful1(j) covers _) + if (icover.size > jcover.size && jcover.forall(useful1(i) covers _)) + return getEssentialPrimeImplicants(prime - useful1(j), minterms) + } + } + + val essentiallyCovered = minterms.filter(t => prime.count(_ covers t) == 1) + val essential = prime.filter(p => essentiallyCovered.exists(p covers _)) + val nonessential = prime -- essential + val uncovered = minterms.filterNot(t => essential.exists(_ covers t)) + if (essential.isEmpty || uncovered.isEmpty) + (essential, nonessential, uncovered) + else { + val (a, b, c) = getEssentialPrimeImplicants(nonessential, uncovered) + (essential ++ a, b, c) + } + } + def getCost(cover: Set[Term], bits: Int) = cover.map(bits - _.mask.bitCount).sum + def getCover(implicants: Set[Term], minterms: Set[Term], bits: Int) = { + var cover = minterms.map(m => implicants.filter(_.covers(m)).map(i => Set(i))).toList + while (cover.size > 1) + cover = cover(0).map(a => cover(1).map(_ ++ a)).reduceLeft(_++_) :: cover.tail.tail + if (cover.isEmpty) + Set[Term]() + else + cover(0).reduceLeft((a, b) => if (getCost(a, bits) < getCost(b, bits)) a else b) + } + def stringify(s: Set[Term], bits: Int) = s.map(t => (0 until bits).map(i => if ((t.mask & (1 << i)) != 0) "x" else ((t.value >> i) & 1).toString).reduceLeft(_+_).reverse).reduceLeft(_+" + "+_) + + def apply(minterms: Set[Term], dontcares: Set[Term], bits: Int) = { + val prime = getPrimeImplicants(minterms ++ dontcares, bits) + minterms.foreach(t => assert(prime.exists(_.covers(t)))) + val (eprime, prime2, uncovered) = getEssentialPrimeImplicants(prime, minterms) + val cover = eprime ++ getCover(prime2, uncovered, bits) + minterms.foreach(t => assert(cover.exists(_.covers(t)))) // sanity check + cover + } +} + +object SimplifyDC +{ + def getImplicitDC(maxterms: Set[Term], term: Term, bits: Int, above: Boolean): Term = { + for (i <- 0 until bits) { + var t: Term = null + if (above && ((term.value | term.mask) & (1L << i)) == 0) + t = new Term(term.value | (1L << i), term.mask) + else if (!above && (term.value & (1L << i)) != 0) + t = new Term(term.value & ~(1L << i), term.mask) + if (t != null && !maxterms.exists(_.intersects(t))) + return t + } + null + } + def getPrimeImplicants(minterms: Set[Term], maxterms: Set[Term], bits: Int) = { + var prime = Set[Term]() + minterms.foreach(_.prime = true) + var mint = minterms.map(t => new Term(t.value, t.mask)) + val cols = (0 to bits).map(b => mint.filter(b == _.mask.bitCount)) + val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set() ++ c.filter(b == _.value.bitCount))) + + for (i <- 0 to bits) { + for (j <- 0 until bits-i) { + table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_ similar a).map(_ merge a)) + } + for (j <- 0 until bits-i) { + for (a <- table(i)(j).filter(_.prime)) { + val dc = getImplicitDC(maxterms, a, bits, true) + if (dc != null) + table(i+1)(j) += dc merge a + } + for (a <- table(i)(j+1).filter(_.prime)) { + val dc = getImplicitDC(maxterms, a, bits, false) + if (dc != null) + table(i+1)(j) += a merge dc + } + } + prime ++= table(i).map(_.filter(_.prime)).reduceLeft(_++_) + } + prime + } + + def apply(minterms: Set[Term], maxterms: Set[Term], bits: Int) = { + val prime = getPrimeImplicants(minterms, maxterms, bits) + assert(minterms.forall(t => prime.exists(_ covers t))) + val (eprime, prime2, uncovered) = Simplify.getEssentialPrimeImplicants(prime, minterms) + assert(uncovered.forall(t => prime2.exists(_ covers t))) + val cover = eprime ++ Simplify.getCover(prime2, uncovered, bits) + minterms.foreach(t => assert(cover.exists(_.covers(t)))) // sanity check + maxterms.foreach(t => assert(!cover.exists(_.intersects(t)))) // sanity check + cover + } +} diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index ec21bfcd..1da046cc 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -40,6 +40,7 @@ object rocketFPConstants val FCMD_NMADD = Bits("b100111") val FCMD_LOAD = Bits("b111000") val FCMD_STORE = Bits("b111001") + val FCMD_X = Bits("b??????") val FCMD_WIDTH = 6 val FSR_WIDTH = 8 } @@ -48,7 +49,6 @@ import rocketFPConstants._ class rocketFPUCtrlSigs extends Bundle { val cmd = Bits(width = FCMD_WIDTH) - val valid = Bool() val wen = Bool() val sboard = Bool() val ren1 = Bool() @@ -74,71 +74,69 @@ class rocketFPUDecoder extends Component val N = Bool(false) val Y = Bool(true) val X = Bool(false) - val FCMD_X = FCMD_ADD val decoder = ListLookup(io.inst, - List (N,FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X), - Array(FLW -> List(Y,FCMD_LOAD, Y,N,N,N,N,Y,N,N,N,N,N,N,N), - FLD -> List(Y,FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N,N), - FSW -> List(Y,FCMD_STORE, N,N,N,Y,N,Y,N,N,N,N,Y,N,N), - FSD -> List(Y,FCMD_STORE, N,N,N,Y,N,N,N,N,N,N,Y,N,N), - MXTF_S -> List(Y,FCMD_MXTF, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - MXTF_D -> List(Y,FCMD_MXTF, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_S_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_S_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_S_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_S_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_D_W -> List(Y,FCMD_CVT_FMT_W, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_D_WU-> List(Y,FCMD_CVT_FMT_WU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_D_L -> List(Y,FCMD_CVT_FMT_L, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_D_LU-> List(Y,FCMD_CVT_FMT_LU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - MFTX_S -> List(Y,FCMD_MFTX, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - MFTX_D -> List(Y,FCMD_MFTX, N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_W_S -> List(Y,FCMD_CVT_W_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_WU_S-> List(Y,FCMD_CVT_WU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_L_S -> List(Y,FCMD_CVT_L_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_LU_S-> List(Y,FCMD_CVT_LU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_W_D -> List(Y,FCMD_CVT_W_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_WU_D-> List(Y,FCMD_CVT_WU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_L_D -> List(Y,FCMD_CVT_L_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_LU_D-> List(Y,FCMD_CVT_LU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_S_D -> List(Y,FCMD_CVT_FMT_D, Y,N,Y,N,N,Y,N,N,Y,N,N,N,N), - FCVT_D_S -> List(Y,FCMD_CVT_FMT_S, Y,N,Y,N,N,N,N,N,Y,N,N,N,N), - FEQ_S -> List(Y,FCMD_EQ, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), - FLT_S -> List(Y,FCMD_LT, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), - FLE_S -> List(Y,FCMD_LE, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), - FEQ_D -> List(Y,FCMD_EQ, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), - FLT_D -> List(Y,FCMD_LT, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), - FLE_D -> List(Y,FCMD_LE, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), - MTFSR -> List(Y,FCMD_MTFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,Y), - MFFSR -> List(Y,FCMD_MFFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,N), - FSGNJ_S -> List(Y,FCMD_SGNJ, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FSGNJN_S -> List(Y,FCMD_SGNJN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FSGNJX_S -> List(Y,FCMD_SGNJX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FSGNJ_D -> List(Y,FCMD_SGNJ, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FSGNJN_D -> List(Y,FCMD_SGNJN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FSGNJX_D -> List(Y,FCMD_SGNJX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FMIN_S -> List(Y,FCMD_MIN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FMAX_S -> List(Y,FCMD_MAX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FMIN_D -> List(Y,FCMD_MIN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FMAX_D -> List(Y,FCMD_MAX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FADD_S -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), - FSUB_S -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), - FMUL_S -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), - FADD_D -> List(Y,FCMD_ADD, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), - FSUB_D -> List(Y,FCMD_SUB, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), - FMUL_D -> List(Y,FCMD_MUL, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), - FMADD_S -> List(Y,FCMD_MADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FMSUB_S -> List(Y,FCMD_MSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FNMADD_S -> List(Y,FCMD_NMADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FNMSUB_S -> List(Y,FCMD_NMSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FMADD_D -> List(Y,FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), - FMSUB_D -> List(Y,FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), - FNMADD_D -> List(Y,FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), - FNMSUB_D -> List(Y,FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N) + List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X), + Array(FLW -> List(FCMD_LOAD, Y,N,N,N,N,Y,N,N,N,N,N,N,N), + FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N,N), + FSW -> List(FCMD_STORE, N,N,N,Y,N,Y,N,N,N,N,Y,N,N), + FSD -> List(FCMD_STORE, N,N,N,Y,N,N,N,N,N,N,Y,N,N), + MXTF_S -> List(FCMD_MXTF, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + MXTF_D -> List(FCMD_MXTF, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), + FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), + MFTX_S -> List(FCMD_MFTX, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + MFTX_D -> List(FCMD_MFTX, N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_W_S -> List(FCMD_CVT_W_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_L_S -> List(FCMD_CVT_L_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), + FCVT_W_D -> List(FCMD_CVT_W_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_L_D -> List(FCMD_CVT_L_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,N,Y,N,N,Y,N,N,Y,N,N,N,N), + FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,N,Y,N,N,N,N,N,Y,N,N,N,N), + FEQ_S -> List(FCMD_EQ, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), + FLT_S -> List(FCMD_LT, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), + FLE_S -> List(FCMD_LE, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), + FEQ_D -> List(FCMD_EQ, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), + FLT_D -> List(FCMD_LT, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), + FLE_D -> List(FCMD_LE, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), + MTFSR -> List(FCMD_MTFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,Y), + MFFSR -> List(FCMD_MFFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,N), + FSGNJ_S -> List(FCMD_SGNJ, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FSGNJN_S -> List(FCMD_SGNJN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FSGNJX_S -> List(FCMD_SGNJX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FSGNJ_D -> List(FCMD_SGNJ, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FSGNJN_D -> List(FCMD_SGNJN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FSGNJX_D -> List(FCMD_SGNJX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FMIN_S -> List(FCMD_MIN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FMAX_S -> List(FCMD_MAX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), + FMIN_D -> List(FCMD_MIN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FMAX_D -> List(FCMD_MAX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), + FADD_S -> List(FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), + FSUB_S -> List(FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), + FMUL_S -> List(FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), + FADD_D -> List(FCMD_ADD, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), + FSUB_D -> List(FCMD_SUB, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), + FMUL_D -> List(FCMD_MUL, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), + FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), + FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), + FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), + FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), + FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N) )) - val valid :: cmd :: wen :: sboard :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: store :: rdfsr :: wrfsr :: Nil = decoder + val cmd :: wen :: sboard :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: store :: rdfsr :: wrfsr :: Nil = decoder - io.sigs.valid := valid.toBool io.sigs.cmd := cmd io.sigs.wen := wen.toBool io.sigs.sboard := sboard.toBool From 622a801bb1a31676bf0ad4886427dfc796ac718c Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 1 May 2012 18:23:04 -0700 Subject: [PATCH 0407/1087] Refactored cpu/cache interface to use nested bundles --- rocket/src/main/scala/cpu.scala | 68 ++++++++------ rocket/src/main/scala/ctrl.scala | 28 +++--- rocket/src/main/scala/dpath.scala | 30 +++---- rocket/src/main/scala/nbdcache.scala | 95 +++++++++++++------- rocket/src/main/scala/ptw.scala | 127 ++++++++++++++------------- 5 files changed, 198 insertions(+), 150 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 6fb6cf8b..dda9fc81 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -7,15 +7,15 @@ import hwacha._ class ioRocket extends Bundle() { - val host = new ioHTIF(); - val imem = new ioImem().flip - val vimem = new ioImem().flip - val dmem = new ioDmem().flip + val host = new ioHTIF + val imem = (new ioImem).flip + val vimem = (new ioImem).flip + val dmem = new ioHellaCache } class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) { - val io = new ioRocket(); + val io = new ioRocket val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); @@ -24,7 +24,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val itlb = new rocketITLB(ITLB_ENTRIES); val vitlb = new rocketITLB(VITLB_ENTRIES) val ptw = new rocketPTW(); - val arb = new rocketDmemArbiter(DCACHE_PORTS) + val arb = new rocketHellaCacheArbiter(DCACHE_PORTS) var vu: vu = null if (HAVE_VEC) @@ -59,7 +59,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // connect DTLB to ctrl+dpath dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill - dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req_cmd + dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req.bits.cmd dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready @@ -75,7 +75,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // connect DTLB to ctrl+dpath dtlb.io.cpu_req.valid := ctrl.io.dtlb_val dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill - dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req_cmd + dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld @@ -87,8 +87,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.status := dpath.io.ctrl.status - arb.io.requestor(DMEM_CPU).req_ppn := dtlb.io.cpu_resp.ppn - ctrl.io.dmem.req_rdy := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req_rdy + arb.io.requestor(DMEM_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn + ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req.ready // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) @@ -96,8 +96,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) ptw.io.itlb <> itlb.io.ptw; ptw.io.vitlb <> vitlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; - arb.io.requestor(DMEM_PTW) <> ptw.io.dmem - arb.io.dmem <> io.dmem + arb.io.requestor(DMEM_PTW) <> ptw.io.mem + arb.io.mem <> io.dmem ctrl.io.dpath <> dpath.io.ctrl; dpath.io.host <> io.host; @@ -120,8 +120,18 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect arbiter to ctrl+dpath+DTLB - arb.io.requestor(DMEM_CPU) <> ctrl.io.dmem - arb.io.requestor(DMEM_CPU) <> dpath.io.dmem + arb.io.requestor(DMEM_CPU).resp <> ctrl.io.dmem.resp + arb.io.requestor(DMEM_CPU).xcpt <> ctrl.io.dmem.xcpt + arb.io.requestor(DMEM_CPU).resp <> dpath.io.dmem.resp + //TODO: views on nested bundles? + arb.io.requestor(DMEM_CPU).req.valid := ctrl.io.dmem.req.valid + ctrl.io.dmem.req.ready := arb.io.requestor(DMEM_CPU).req.ready + arb.io.requestor(DMEM_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill + arb.io.requestor(DMEM_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd + arb.io.requestor(DMEM_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ + arb.io.requestor(DMEM_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx + arb.io.requestor(DMEM_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag + arb.io.requestor(DMEM_CPU).req.bits.data := dpath.io.dmem.req.bits.data var fpu: rocketFPU = null if (HAVE_FPU) @@ -207,21 +217,21 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) storegen.io.typ := vu.io.dmem_req.bits.typ storegen.io.din := vu.io.dmem_req.bits.data - arb.io.requestor(DMEM_VU).req_val := vu.io.dmem_req.valid - arb.io.requestor(DMEM_VU).req_kill := vu.io.dmem_req.bits.kill - arb.io.requestor(DMEM_VU).req_cmd := vu.io.dmem_req.bits.cmd - arb.io.requestor(DMEM_VU).req_type := vu.io.dmem_req.bits.typ - arb.io.requestor(DMEM_VU).req_idx := vu.io.dmem_req.bits.idx - arb.io.requestor(DMEM_VU).req_ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(DMEM_VU).req_data := Reg(storegen.io.dout) - arb.io.requestor(DMEM_VU).req_tag := vu.io.dmem_req.bits.tag + arb.io.requestor(DMEM_VU).req.valid := vu.io.dmem_req.valid + arb.io.requestor(DMEM_VU).req.bits.kill := vu.io.dmem_req.bits.kill + arb.io.requestor(DMEM_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd + arb.io.requestor(DMEM_VU).req.bits.typ := vu.io.dmem_req.bits.typ + arb.io.requestor(DMEM_VU).req.bits.idx := vu.io.dmem_req.bits.idx + arb.io.requestor(DMEM_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) + arb.io.requestor(DMEM_VU).req.bits.data := Reg(storegen.io.dout) + arb.io.requestor(DMEM_VU).req.bits.tag := vu.io.dmem_req.bits.tag - vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req_rdy - vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp_val) - vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp_nack - vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp_data_subword - vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp_tag) - vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp_type) + vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req.ready + vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp.valid) + vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp.bits.nack + vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp.bits.data_subword + vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp.bits.tag) + vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp.bits.typ) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -233,7 +243,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) } else { - arb.io.requestor(DMEM_VU).req_val := Bool(false) + arb.io.requestor(DMEM_VU).req.valid := Bool(false) if (HAVE_FPU) { fpu.io.sfma.valid := Bool(false) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 0e747d6e..dafe0d1f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -76,7 +76,7 @@ class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); val imem = new ioImem(List("req_val", "resp_val")).flip - val dmem = new ioDmem(List("req_val", "req_kill", "req_rdy", "req_cmd", "req_type", "resp_miss", "resp_nack", "xcpt_ma_ld", "xcpt_ma_st")).flip + val dmem = new ioHellaCache val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); val dtlb_rdy = Bool(INPUT); @@ -351,7 +351,7 @@ class rocketCtrl extends Component val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) - val wb_reg_dcache_miss = Reg(io.dmem.resp_miss || io.dmem.resp_nack, resetVal = Bool(false)); + val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)); val id_reg_valid = Reg(resetVal = Bool(false)); val id_reg_btb_hit = Reg(resetVal = Bool(false)); @@ -681,8 +681,8 @@ class rocketCtrl extends Component } // exception handling - val mem_xcpt_ma_ld = io.dmem.xcpt_ma_ld && !mem_reg_kill - val mem_xcpt_ma_st = io.dmem.xcpt_ma_st && !mem_reg_kill + val mem_xcpt_ma_ld = io.dmem.xcpt.ma.ld && !mem_reg_kill + val mem_xcpt_ma_st = io.dmem.xcpt.ma.st && !mem_reg_kill val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill @@ -722,7 +722,7 @@ class rocketCtrl extends Component // replay mem stage PC on a DTLB miss or a long-latency writeback val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp_nack) + val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || fpu_kill_mem @@ -731,7 +731,7 @@ class rocketCtrl extends Component // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_replay || ex_reg_mem_val && !(io.dmem.req_rdy && io.dtlb_rdy) || + ex_reg_replay || ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy val kill_ex = take_pc_wb || replay_ex @@ -817,8 +817,8 @@ class rocketCtrl extends Component id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_fp_val && id_stall_fpu || - id_mem_val.toBool && !(io.dmem.req_rdy && io.dtlb_rdy) || - ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req_rdy || + id_mem_val.toBool && !(io.dmem.req.ready && io.dtlb_rdy) || + ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req.ready || vec_stalld ); val ctrl_stallf = ctrl_stalld; @@ -861,10 +861,10 @@ class rocketCtrl extends Component io.fpu.killx := kill_ex io.fpu.killm := kill_mem - io.dtlb_val := ex_reg_mem_val - io.dtlb_kill := mem_reg_kill; - io.dmem.req_val := ex_reg_mem_val - io.dmem.req_kill := kill_dcache; - io.dmem.req_cmd := ex_reg_mem_cmd; - io.dmem.req_type := ex_reg_mem_type; + io.dtlb_val := ex_reg_mem_val + io.dtlb_kill := mem_reg_kill + io.dmem.req.valid := ex_reg_mem_val + io.dmem.req.bits.kill := kill_dcache + io.dmem.req.bits.cmd := ex_reg_mem_cmd + io.dmem.req.bits.typ := ex_reg_mem_type } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 180d4427..3a6ee698 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -17,7 +17,7 @@ class ioDpathAll extends Bundle() { val host = new ioHTIF(); val ctrl = new ioCtrlDpath().flip - val dmem = new ioDmem(List("req_idx", "req_tag", "req_data", "resp_val", "resp_miss", "resp_replay", "resp_type", "resp_tag", "resp_data", "resp_data_subword")).flip + val dmem = new ioHellaCache val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); @@ -274,9 +274,9 @@ class rocketDpath extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req_idx := ex_effective_address - io.dmem.req_data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) - io.dmem.req_tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) + io.dmem.req.bits.idx := ex_effective_address + io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) + io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) // processor control regfile read @@ -332,13 +332,13 @@ class rocketDpath extends Component // 32/64 bit load handling (moved to earlier in file) // writeback arbitration - val dmem_resp_xpu = !io.dmem.resp_tag(0).toBool - val dmem_resp_fpu = io.dmem.resp_tag(0).toBool - val dmem_resp_waddr = io.dmem.resp_tag.toUFix >> UFix(1) - dmem_resp_replay := io.dmem.resp_replay && dmem_resp_xpu; + val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool + val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool + val dmem_resp_waddr = io.dmem.resp.bits.tag.toUFix >> UFix(1) + dmem_resp_replay := io.dmem.resp.bits.replay && dmem_resp_xpu; r_dmem_resp_replay := dmem_resp_replay r_dmem_resp_waddr := dmem_resp_waddr - r_dmem_fp_replay := io.dmem.resp_replay && dmem_resp_fpu; + r_dmem_fp_replay := io.dmem.resp.bits.replay && dmem_resp_fpu; val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, Mux(div.io.resp_val, div.io.resp_tag, @@ -350,9 +350,9 @@ class rocketDpath extends Component mem_reg_wdata))) val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val - io.fpu.dmem_resp_val := io.dmem.resp_val && dmem_resp_fpu - io.fpu.dmem_resp_data := io.dmem.resp_data - io.fpu.dmem_resp_type := io.dmem.resp_type + io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu + io.fpu.dmem_resp_data := io.dmem.resp.bits.data + io.fpu.dmem_resp_type := io.dmem.resp.bits.typ io.fpu.dmem_resp_tag := dmem_resp_waddr // writeback stage @@ -362,7 +362,7 @@ class rocketDpath extends Component wb_reg_rs2 := mem_reg_rs2 wb_reg_waddr := mem_ll_waddr wb_reg_wdata := mem_ll_wdata - wb_reg_dmem_wdata := io.dmem.resp_data + wb_reg_dmem_wdata := io.dmem.resp.bits.data wb_reg_vec_waddr := mem_reg_waddr wb_reg_vec_wdata := mem_reg_wdata wb_reg_raddr1 := mem_reg_raddr1 @@ -395,7 +395,7 @@ class rocketDpath extends Component wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), - Mux(wb_src_dmem, io.dmem.resp_data_subword, + Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, wb_reg_wdata)) } else @@ -406,7 +406,7 @@ class rocketDpath extends Component pcr.io.vec_nfregs := UFix(0) wb_wdata := - Mux(wb_src_dmem, io.dmem.resp_data_subword, + Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, wb_reg_wdata) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index bae2746b..c713b6f3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -716,6 +716,41 @@ class AMOALU extends Component { io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) } +class HellaCacheReq extends Bundle { + val cmd = Bits(width = 4) + val typ = Bits(width = 3) + val idx = Bits(width = PGIDX_BITS) + val ppn = Bits(width = PPN_BITS) + val data = Bits(width = 64) + val kill = Bool() + val tag = Bits(width = DCACHE_TAG_BITS) +} + +class HellaCacheResp extends Bundle { + val miss = Bool() + val nack = Bool() + val replay = Bool() + val typ = Bits(width = 3) + val data = Bits(width = 64) + val data_subword = Bits(width = 64) + val tag = Bits(width = DCACHE_TAG_BITS) +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions +} + +class ioHellaCache extends Bundle { + val req = (new ioDecoupled){ new HellaCacheReq } + val resp = (new ioPipe){ new HellaCacheResp }.flip + val xcpt = (new HellaCacheExceptions).asInput +} + // interface between D$ and processor/DTLB class ioDmem(view: List[String] = null) extends Bundle(view) { val req_kill = Bool(INPUT); @@ -741,7 +776,7 @@ class ioDmem(view: List[String] = null) extends Bundle(view) { class HellaCache(co: CoherencePolicy) extends Component { val io = new Bundle { - val cpu = new ioDmem() + val cpu = (new ioHellaCache).flip val mem = new ioTileLink } @@ -759,8 +794,8 @@ class HellaCache(co: CoherencePolicy) extends Component { val ramindexlsb = log2up(MEM_DATA_BITS/8) val early_nack = Reg { Bool() } - val r_cpu_req_val_ = Reg(io.cpu.req_val && io.cpu.req_rdy, resetVal = Bool(false)) - val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req_kill && !early_nack + val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) + val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } @@ -776,9 +811,9 @@ class HellaCache(co: CoherencePolicy) extends Component { val p_store_way_oh = Reg() { Bits() } val r_replay_amo = Reg(resetVal = Bool(false)) - val req_store = (io.cpu.req_cmd === M_XWR) - val req_load = (io.cpu.req_cmd === M_XRD) - val req_amo = io.cpu.req_cmd(3).toBool + val req_store = (io.cpu.req.bits.cmd === M_XWR) + val req_load = (io.cpu.req.bits.cmd === M_XRD) + val req_amo = io.cpu.req.bits.cmd(3).toBool val req_read = req_load || req_amo val req_write = req_store || req_amo val r_req_load = (r_cpu_req_cmd === M_XRD) @@ -804,11 +839,11 @@ class HellaCache(co: CoherencePolicy) extends Component { flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed flusher.io.mshr_req.ready := mshr.io.req.ready - when (io.cpu.req_val) { - r_cpu_req_idx := io.cpu.req_idx - r_cpu_req_cmd := io.cpu.req_cmd - r_cpu_req_type := io.cpu.req_type - r_cpu_req_tag := io.cpu.req_tag + when (io.cpu.req.valid) { + r_cpu_req_idx := io.cpu.req.bits.idx + r_cpu_req_cmd := io.cpu.req.bits.cmd + r_cpu_req_type := io.cpu.req.bits.typ + r_cpu_req_tag := io.cpu.req.bits.tag } when (prober.io.meta_req.valid) { r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) @@ -825,15 +860,15 @@ class HellaCache(co: CoherencePolicy) extends Component { r_cpu_req_cmd := M_FLA r_way_oh := flusher.io.meta_req.bits.way_en } - val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req_data) + val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req.bits.data) val misaligned = (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - io.cpu.xcpt_ma_ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned - io.cpu.xcpt_ma_st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned + io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned + io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned // tags val meta = new MetaDataArrayArray(lines) @@ -847,12 +882,12 @@ class HellaCache(co: CoherencePolicy) extends Component { data_arb.io.out <> data.io.req // cpu tag check - meta_arb.io.in(3).valid := io.cpu.req_val - meta_arb.io.in(3).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + meta_arb.io.in(3).valid := io.cpu.req.valid + meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req_ppn) + val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req.bits.ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until NWAYS).map( w => co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR @@ -886,10 +921,10 @@ class HellaCache(co: CoherencePolicy) extends Component { data_arb.io.in(0).valid := io.mem.xact_rep.valid && co.messageUpdatesDataArray(io.mem.xact_rep.bits) // load hits - data_arb.io.in(4).bits.offset := io.cpu.req_idx(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.idx := io.cpu.req_idx(indexmsb,indexlsb) + data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) + data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) data_arb.io.in(4).bits.rw := Bool(false) - data_arb.io.in(4).valid := io.cpu.req_val && req_read + data_arb.io.in(4).valid := io.cpu.req.valid && req_read data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check val early_load_nack = req_read && !data_arb.io.in(4).ready @@ -900,7 +935,7 @@ class HellaCache(co: CoherencePolicy) extends Component { val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req_val || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match + val drain_store_val = (p_store_valid && (!io.cpu.req.valid || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) data_arb.io.in(2).bits.rw := Bool(true) @@ -1020,15 +1055,15 @@ class HellaCache(co: CoherencePolicy) extends Component { !flushed && r_req_flush val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush - io.cpu.req_rdy := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence - io.cpu.resp_nack := r_cpu_req_val_ && !io.cpu.req_kill && nack - io.cpu.resp_val := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val - io.cpu.resp_replay := mshr.io.cpu_resp_val - io.cpu.resp_miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read - io.cpu.resp_tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) - io.cpu.resp_type := loadgen.io.typ - io.cpu.resp_data := loadgen.io.dout - io.cpu.resp_data_subword := loadgen.io.r_dout_subword + io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence + io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val + io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack + io.cpu.resp.bits.replay := mshr.io.cpu_resp_val + io.cpu.resp.bits.miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read + io.cpu.resp.bits.tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) + io.cpu.resp.bits.typ := loadgen.io.typ + io.cpu.resp.bits.data := loadgen.io.dout + io.cpu.resp.bits.data_subword := loadgen.io.r_dout_subword val xact_init_arb = (new Arbiter(2)) { new TransactionInit } xact_init_arb.io.in(0) <> wb.io.mem_req diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index feff7896..8e5bd4b2 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,81 +5,84 @@ import Node._; import Constants._; import scala.math._; -class ioDmemArbiter(n: Int) extends Bundle +class ioHellaCacheArbiter(n: Int) extends Bundle { - val dmem = new ioDmem().flip - val requestor = Vec(n) { new ioDmem() } + val requestor = Vec(n) { new ioHellaCache() }.flip + val mem = new ioHellaCache } -class rocketDmemArbiter(n: Int) extends Component +class rocketHellaCacheArbiter(n: Int) extends Component { - val io = new ioDmemArbiter(n) + val io = new ioHellaCacheArbiter(n) require(DCACHE_TAG_BITS >= log2up(n) + CPU_TAG_BITS) var req_val = Bool(false) - var req_rdy = io.dmem.req_rdy + var req_rdy = io.mem.req.ready for (i <- 0 until n) { - io.requestor(i).req_rdy := req_rdy - req_val = req_val || io.requestor(i).req_val - req_rdy = req_rdy && !io.requestor(i).req_val + io.requestor(i).req.ready := req_rdy + req_val = req_val || io.requestor(i).req.valid + req_rdy = req_rdy && !io.requestor(i).req.valid } - var req_cmd = io.requestor(n-1).req_cmd - var req_type = io.requestor(n-1).req_type - var req_idx = io.requestor(n-1).req_idx - var req_ppn = io.requestor(n-1).req_ppn - var req_data = io.requestor(n-1).req_data - var req_tag = io.requestor(n-1).req_tag - var req_kill = io.requestor(n-1).req_kill + var req_cmd = io.requestor(n-1).req.bits.cmd + var req_type = io.requestor(n-1).req.bits.typ + var req_idx = io.requestor(n-1).req.bits.idx + var req_ppn = io.requestor(n-1).req.bits.ppn + var req_data = io.requestor(n-1).req.bits.data + var req_kill = io.requestor(n-1).req.bits.kill + var req_tag = io.requestor(n-1).req.bits.tag for (i <- n-1 to 0 by -1) { - req_cmd = Mux(io.requestor(i).req_val, io.requestor(i).req_cmd, req_cmd) - req_type = Mux(io.requestor(i).req_val, io.requestor(i).req_type, req_type) - req_idx = Mux(io.requestor(i).req_val, io.requestor(i).req_idx, req_idx) - req_ppn = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_ppn, req_ppn) - req_data = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_data, req_data) - req_tag = Mux(io.requestor(i).req_val, Cat(io.requestor(i).req_tag, UFix(i, log2up(n))), req_tag) - req_kill = Mux(Reg(io.requestor(i).req_val), io.requestor(i).req_kill, req_kill) + val r = io.requestor(i).req + req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) + req_type = Mux(r.valid, r.bits.typ, req_type) + req_idx = Mux(r.valid, r.bits.idx, req_idx) + req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) + req_data = Mux(Reg(r.valid), r.bits.data, req_data) + req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) + req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2up(n))), req_tag) } - io.dmem.req_val := req_val - io.dmem.req_cmd := req_cmd - io.dmem.req_type := req_type - io.dmem.req_idx := req_idx - io.dmem.req_ppn := req_ppn - io.dmem.req_data := req_data - io.dmem.req_tag := req_tag - io.dmem.req_kill := req_kill + io.mem.req.valid := req_val + io.mem.req.bits.cmd := req_cmd + io.mem.req.bits.typ := req_type + io.mem.req.bits.idx := req_idx + io.mem.req.bits.ppn := req_ppn + io.mem.req.bits.data := req_data + io.mem.req.bits.kill := req_kill + io.mem.req.bits.tag := req_tag for (i <- 0 until n) { - val tag_hit = io.dmem.resp_tag(log2up(n)-1,0) === UFix(i) - io.requestor(i).xcpt_ma_ld := io.dmem.xcpt_ma_ld && Reg(io.requestor(i).req_val) - io.requestor(i).xcpt_ma_st := io.dmem.xcpt_ma_st && Reg(io.requestor(i).req_val) - io.requestor(i).resp_nack := io.dmem.resp_nack && Reg(io.requestor(i).req_val) - io.requestor(i).resp_miss := io.dmem.resp_miss && tag_hit - io.requestor(i).resp_val := io.dmem.resp_val && tag_hit - io.requestor(i).resp_replay := io.dmem.resp_replay && tag_hit - io.requestor(i).resp_data := io.dmem.resp_data - io.requestor(i).resp_data_subword := io.dmem.resp_data_subword - io.requestor(i).resp_type := io.dmem.resp_type - io.requestor(i).resp_tag := io.dmem.resp_tag >> UFix(log2up(n)) + val r = io.requestor(i).resp + val x = io.requestor(i).xcpt + val tag_hit = io.mem.resp.bits.tag(log2up(n)-1,0) === UFix(i) + x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) + x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) + r.valid := io.mem.resp.valid && tag_hit + r.bits.miss := io.mem.resp.bits.miss && tag_hit + r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) + r.bits.replay := io.mem.resp.bits.replay && tag_hit + r.bits.data := io.mem.resp.bits.data + r.bits.data_subword := io.mem.resp.bits.data_subword + r.bits.typ := io.mem.resp.bits.typ + r.bits.tag := io.mem.resp.bits.tag >> UFix(log2up(n)) } } class ioPTW extends Bundle { - val itlb = new ioTLB_PTW().flip - val dtlb = new ioTLB_PTW().flip - val vitlb = new ioTLB_PTW().flip - val dmem = new ioDmem().flip - val ptbr = UFix(PADDR_BITS, INPUT); + val itlb = (new ioTLB_PTW).flip + val dtlb = (new ioTLB_PTW).flip + val vitlb = (new ioTLB_PTW).flip + val mem = new ioHellaCache + val ptbr = UFix(PADDR_BITS, INPUT) } class rocketPTW extends Component { - val io = new ioPTW(); + val io = new ioPTW val levels = 3 val bitsPerLevel = VPN_BITS/levels @@ -123,25 +126,25 @@ class rocketPTW extends Component req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } - val dmem_resp_val = Reg(io.dmem.resp_val, resetVal = Bool(false)) + val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) when (dmem_resp_val) { - req_addr := Cat(io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) - r_resp_perm := io.dmem.resp_data_subword(9,4); - r_resp_ppn := io.dmem.resp_data_subword(PADDR_BITS-1, PGIDX_BITS); + req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) + r_resp_perm := io.mem.resp.bits.data_subword(9,4); + r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); } - io.dmem.req_val := state === s_req - io.dmem.req_cmd := M_XRD; - io.dmem.req_type := MT_D; - io.dmem.req_idx := req_addr(PGIDX_BITS-1,0); - io.dmem.req_ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) - io.dmem.req_kill := Bool(false) + io.mem.req.valid := state === s_req + io.mem.req.bits.cmd := M_XRD + io.mem.req.bits.typ := MT_D + io.mem.req.bits.idx := req_addr(PGIDX_BITS-1,0) + io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) + io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done val resp_err = state === s_error - val resp_ptd = io.dmem.resp_data_subword(1,0) === Bits(1) - val resp_pte = io.dmem.resp_data_subword(1,0) === Bits(2) + val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1) + val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2) io.itlb.req_rdy := (state === s_ready) io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val @@ -172,12 +175,12 @@ class rocketPTW extends Component count := UFix(0) } is (s_req) { - when (io.dmem.req_rdy) { + when (io.mem.req.ready) { state := s_wait; } } is (s_wait) { - when (io.dmem.resp_nack) { + when (io.mem.resp.bits.nack) { state := s_req } when (dmem_resp_val) { From 2d4e5d3813379537b82765c04dcc5432b8410c65 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 2 May 2012 19:27:27 -0700 Subject: [PATCH 0408/1087] fix pseudo-LRU verilog generation bug --- rocket/src/main/scala/itlb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 78f2d886..f8deb5cd 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -56,7 +56,7 @@ class PseudoLRU(n: Int) var idx = UFix(1,1) for (i <- log2up(n)-1 to 0 by -1) { val bit = way(i) - val mask = (UFix(1) << idx)(n-1,0) + val mask = (UFix(1,n) << idx)(n-1,0) next_state = next_state & ~mask | Mux(bit, UFix(0), mask) //next_state.bitSet(idx, !bit) idx = Cat(idx, bit) From e1f9dc2c1ffc59a86a50a28b7aa28ef283ccf825 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 3 May 2012 02:29:09 -0700 Subject: [PATCH 0409/1087] generalize page table walker also, don't instantiate vitlb when !HAVE_VEC --- rocket/src/main/scala/cpu.scala | 10 ++--- rocket/src/main/scala/ptw.scala | 68 +++++++++++--------------------- rocket/src/main/scala/util.scala | 9 +++-- 3 files changed, 34 insertions(+), 53 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index dda9fc81..d1842e21 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -22,8 +22,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) val dtlb = new rocketDTLB(DTLB_ENTRIES); val itlb = new rocketITLB(ITLB_ENTRIES); - val vitlb = new rocketITLB(VITLB_ENTRIES) - val ptw = new rocketPTW(); + val ptw = new rocketPTW(if (HAVE_VEC) 3 else 2) val arb = new rocketHellaCacheArbiter(DCACHE_PORTS) var vu: vu = null @@ -92,9 +91,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) - ptw.io.dtlb <> dtlb.io.ptw; - ptw.io.itlb <> itlb.io.ptw; - ptw.io.vitlb <> vitlb.io.ptw + ptw.io.requestor(0) <> itlb.io.ptw + ptw.io.requestor(1) <> dtlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; arb.io.requestor(DMEM_PTW) <> ptw.io.mem arb.io.mem <> io.dmem @@ -146,6 +144,8 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) dpath.io.vec_ctrl <> ctrl.io.vec_dpath // hooking up vector I$ + val vitlb = new rocketITLB(VITLB_ENTRIES) + ptw.io.requestor(2) <> vitlb.io.ptw vitlb.io.cpu.invalidate := dpath.io.ptbr_wen vitlb.io.cpu.status := dpath.io.ctrl.status vitlb.io.cpu.req_val := vu.io.imem_req.valid diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 8e5bd4b2..38364c85 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -71,18 +71,16 @@ class rocketHellaCacheArbiter(n: Int) extends Component } } -class ioPTW extends Bundle +class ioPTW(n: Int) extends Bundle { - val itlb = (new ioTLB_PTW).flip - val dtlb = (new ioTLB_PTW).flip - val vitlb = (new ioTLB_PTW).flip + val requestor = Vec(n) { new ioTLB_PTW }.flip val mem = new ioHellaCache val ptbr = UFix(PADDR_BITS, INPUT) } -class rocketPTW extends Component +class rocketPTW(n: Int) extends Component { - val io = new ioPTW + val io = new ioPTW(n) val levels = 3 val bitsPerLevel = VPN_BITS/levels @@ -101,29 +99,20 @@ class rocketPTW extends Component val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) - val req_val = io.itlb.req_val || io.dtlb.req_val || io.vitlb.req_val - - // give ITLB requests priority over DTLB requests - val req_itlb_val = io.itlb.req_val; - val req_dtlb_val = io.dtlb.req_val && !io.itlb.req_val; - val req_vitlb_val = io.vitlb.req_val && !io.itlb.req_val && !io.dtlb.req_val - - when ((state === s_ready) && req_itlb_val) { - r_req_vpn := io.itlb.req_vpn; - r_req_dest := Bits(0) - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.itlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) + + val req_rdy = state === s_ready + var req_val = Bool(false) + for (r <- io.requestor) { + r.req_rdy := req_rdy && !req_val + req_val = req_val || r.req_val } + val req_dest = PriorityEncoder(io.requestor.map(_.req_val)) + val req_vpn = io.requestor.slice(0, n-1).foldRight(io.requestor(n-1).req_vpn)((r, v) => Mux(r.req_val, r.req_vpn, v)) - when ((state === s_ready) && req_dtlb_val) { - r_req_vpn := io.dtlb.req_vpn; - r_req_dest := Bits(1) - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.dtlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) - } - - when ((state === s_ready) && req_vitlb_val) { - r_req_vpn := io.vitlb.req_vpn; - r_req_dest := Bits(2) - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), io.vitlb.req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) + when (state === s_ready && req_val) { + r_req_vpn := req_vpn + r_req_dest := req_dest + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) @@ -145,26 +134,17 @@ class rocketPTW extends Component val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1) val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2) - - io.itlb.req_rdy := (state === s_ready) - io.dtlb.req_rdy := (state === s_ready) && !io.itlb.req_val - io.vitlb.req_rdy := (state === s_ready) && !io.itlb.req_val && !io.dtlb.req_val - io.itlb.resp_val := r_req_dest === Bits(0) && resp_val - io.dtlb.resp_val := r_req_dest === Bits(1) && resp_val - io.vitlb.resp_val := r_req_dest === Bits(2) && resp_val - io.itlb.resp_err := r_req_dest === Bits(0) && resp_err - io.dtlb.resp_err := r_req_dest === Bits(1) && resp_err - io.vitlb.resp_err := r_req_dest === Bits(2) && resp_err - io.itlb.resp_perm := r_resp_perm - io.dtlb.resp_perm := r_resp_perm - io.vitlb.resp_perm:= r_resp_perm val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn(PPN_BITS-1, VPN_BITS-bitsPerLevel*(i+1)), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j)) - - io.itlb.resp_ppn := resp_ppn; - io.dtlb.resp_ppn := resp_ppn; - io.vitlb.resp_ppn := resp_ppn; + + for (i <- 0 until io.requestor.size) { + val me = r_req_dest === UFix(i) + io.requestor(i).resp_val := resp_val && me + io.requestor(i).resp_err := resp_err && me + io.requestor(i).resp_perm := r_resp_perm + io.requestor(i).resp_ppn := resp_ppn + } // control state machine switch (state) { diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 87d17c1f..d83ce82e 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -256,13 +256,14 @@ class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { object PriorityEncoder { - def apply(in: Bits): UFix = doApply(in, 0) - def doApply(in: Bits, n: Int = 0): UFix = { - if (n >= in.getWidth-1) + def doit(in: Seq[Bits], n: Int): UFix = { + if (n >= in.size-1) UFix(n) else - Mux(in(n), UFix(n), doApply(in, n+1)) + Mux(in(n), UFix(n), doit(in, n+1)) } + def apply(in: Seq[Bits]): UFix = doit(in, 0) + def apply(in: Bits): UFix = apply((0 until in.getWidth).map(in(_))) } object PriorityEncoderOH From 171c87002e02c7f699e7980a8183c77b98d5ab50 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 3 May 2012 04:21:11 -0700 Subject: [PATCH 0410/1087] reduce HTIF clock divider for now --- rocket/src/main/scala/slowio.scala | 5 +++-- rocket/src/main/scala/top.scala | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/slowio.scala b/rocket/src/main/scala/slowio.scala index 395bdc8d..e54996ef 100644 --- a/rocket/src/main/scala/slowio.scala +++ b/rocket/src/main/scala/slowio.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import Constants._ -class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Component +class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) extends Component { val io = new Bundle { val out_fast = new ioDecoupled()(data).flip @@ -15,8 +15,9 @@ class slowIO[T <: Data](divisor: Int, hold_cycles: Int)(data: => T) extends Comp val clk_slow = Bool(OUTPUT) } + val hold_cycles = if (hold_cycles_in == -1) divisor/4 else hold_cycles_in require((divisor & (divisor-1)) == 0) - require(hold_cycles < divisor/2 && hold_cycles >= 2) + require(hold_cycles < divisor/2 && hold_cycles >= 1) val cnt = Reg() { UFix(width = log2up(divisor)) } cnt := cnt + UFix(1) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 4ebb94d8..55bf08f9 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -16,7 +16,7 @@ class ioTop(htif_width: Int) extends Bundle { class Top extends Component { - val clkdiv = 32 + val clkdiv = 8 val htif_width = 8 val io = new ioTop(htif_width) @@ -53,7 +53,7 @@ class Top extends Component hub.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits) // pad out the HTIF using a divided clock - val hio = (new slowIO(clkdiv, 4)) { Bits(width = htif_width) } + val hio = (new slowIO(clkdiv)) { Bits(width = htif_width) } htif.io.host.out <> hio.io.out_fast io.host.out <> hio.io.out_slow htif.io.host.in <> hio.io.in_fast @@ -61,7 +61,7 @@ class Top extends Component io.host_clk := hio.io.clk_slow // pad out the backup memory link with the HTIF divided clk - val mio = (new slowIO(clkdiv, 4)) { Bits(width = MEM_BACKUP_WIDTH) } + val mio = (new slowIO(clkdiv)) { Bits(width = MEM_BACKUP_WIDTH) } mem_serdes.io.narrow.req <> mio.io.out_fast io.mem_backup.req <> mio.io.out_slow mem_serdes.io.narrow.resp.valid := mio.io.in_fast.valid From b851f1b34c2291cf8f3ce30b6ec9b09eec045b36 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 3 May 2012 21:11:02 -0700 Subject: [PATCH 0411/1087] support maximum-MTU HTIF packets --- rocket/src/main/scala/htif.scala | 78 ++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index a8bf52cc..e56ed49b 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -42,52 +42,58 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val long_request_bits = 576 require(short_request_bits % w == 0) - val rx_count_w = 13 + log2up(8) - log2up(w) // data size field is 12 bits + val rx_count_w = 13 + log2up(64) - log2up(w) // data size field is 12 bits val rx_count = Reg(resetVal = UFix(0,rx_count_w)) val rx_shifter = Reg() { Bits(width = short_request_bits) } - val header = Reg() { Bits() } val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w)) + val next_cmd = rx_shifter_in(3,0) + val cmd = Reg() { Bits() } + val size = Reg() { Bits() } + val pos = Reg() { Bits() } + val seqno = Reg() { Bits() } + val addr = Reg() { Bits() } when (io.host.in.valid && io.host.in.ready) { rx_shifter := rx_shifter_in rx_count := rx_count + UFix(1) when (rx_count === UFix(short_request_bits/w-1)) { - header := rx_shifter_in + cmd := next_cmd + size := rx_shifter_in(15,4) + pos := rx_shifter_in(15,4+OFFSET_BITS-3) + seqno := rx_shifter_in(23,16) + addr := rx_shifter_in(63,24) } } - val rx_count_words = rx_count >> UFix(log2up(short_request_bits/w)) - val packet_ram_wen = rx_count(log2up(short_request_bits/w)-1,0).andR && - io.host.in.valid && io.host.in.ready - val packet_ram = Vec(long_request_bits/short_request_bits-1) { Reg() { Bits(width = short_request_bits) } } - when (packet_ram_wen) { - packet_ram(rx_count_words - UFix(1)) := rx_shifter_in + val rx_word_count = (rx_count >> UFix(log2up(short_request_bits/w))) + val rx_word_done = io.host.in.valid && rx_count(log2up(short_request_bits/w)-1,0).andR + val packet_ram_depth = long_request_bits/short_request_bits-1 + val packet_ram = Vec(packet_ram_depth) { Reg() { Bits(width = short_request_bits) } } + when (rx_word_done && io.host.in.ready) { + packet_ram(rx_word_count(log2up(packet_ram_depth)-1,0) - UFix(1)) := rx_shifter_in } val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } - val cmd = header(3,0) - val size = header(15,4) - val seqno = header(23,16) - val addr = header(63,24).toUFix val pcr_addr = addr(4,0) val pcr_coreid = if (ncores == 1) UFix(0) else addr(20+log2up(ncores),20) val pcr_wdata = packet_ram(0) - val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, size != UFix((1 << OFFSET_BITS)/8), + val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR + val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, bad_mem_packet, Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UFix(1), Bool(true))) - val tx_count = Reg(resetVal = UFix(0, log2up(long_request_bits/w+1))) - val packet_ram_raddr = (tx_count >> UFix(log2up(short_request_bits/w))) + val tx_count = Reg(resetVal = UFix(0, rx_count_w)) + val tx_subword_count = tx_count(log2up(short_request_bits/w)-1,0) + val tx_word_count = tx_count(rx_count_w-1, log2up(short_request_bits/w)) + val packet_ram_raddr = tx_word_count(log2up(packet_ram_depth)-1,0) - UFix(1) when (io.host.out.valid && io.host.out.ready) { tx_count := tx_count + UFix(1) } - val rx_size = Mux(cmd === cmd_writemem || cmd === cmd_writecr, size, UFix(0)) - val rx_done = rx_count >= UFix(short_request_bits/w) && rx_count_words-UFix(1) === rx_size - val tx_size = Mux(!nack && cmd === cmd_readmem, UFix((1 << OFFSET_BITS)/8), - Mux(!nack && cmd === cmd_readcr, UFix(1), UFix(0))) - val tx_done = packet_ram_raddr - UFix(1) === tx_size + val rx_done = rx_word_done && Mux(rx_word_count === UFix(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2up(packet_ram_depth)-1,0) === UFix(0)) + val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr), size, UFix(0)) + val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UFix(0) && packet_ram_raddr.andR) val mem_acked = Reg(resetVal = Bool(false)) val mem_gxid = Reg() { Bits() } @@ -104,8 +110,9 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val state = Reg(resetVal = state_rx) when (state === state_rx && rx_done) { - state := Mux(cmd === cmd_readmem || cmd === cmd_writemem, state_mem_req, - Mux(cmd === cmd_readcr || cmd === cmd_writecr, state_pcr, + val rx_cmd = Mux(rx_word_count === UFix(0), next_cmd, cmd) + state := Mux(rx_cmd === cmd_readmem || rx_cmd === cmd_writemem, state_mem_req, + Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr, state_tx)) } @@ -148,11 +155,15 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C mem_acked := Bool(false) } when (state === state_mem_finish && io.mem.xact_finish.ready) { - state := state_tx + state := Mux(cmd === cmd_readmem || pos === UFix(1), state_tx, state_rx) + pos := pos - UFix(1) + addr := addr + UFix(1 << OFFSET_BITS-3) } when (state === state_tx && tx_done) { - rx_count := UFix(0) - tx_count := UFix(0) + when (tx_word_count === tx_size) { + rx_count := UFix(0) + tx_count := UFix(0) + } state := state_rx } @@ -166,7 +177,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } io.mem.xact_init.valid := state === state_mem_req io.mem.xact_init.bits.x_type := Mux(cmd === cmd_writemem, co.getTransactionInitTypeOnUncachedWrite, co.getTransactionInitTypeOnUncachedRead) - io.mem.xact_init.bits.address := addr >> UFix(OFFSET_BITS-3) + io.mem.xact_init.bits.address := addr.toUFix >> UFix(OFFSET_BITS-3) io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack @@ -210,13 +221,12 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val tx_cmd = Mux(nack, cmd_nack, cmd_ack) val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) - val tx_size_ext = Cat(Bits(0, 12-tx_size.getWidth), tx_size) - val tx_header = Cat(addr, seqno, tx_size_ext, tx_cmd_ext) - val tx_data = Mux(packet_ram_raddr === UFix(0), tx_header, - Mux(packet_ram_raddr === UFix(1) && cmd === cmd_readcr, pcr_mux.io.out, - packet_ram(packet_ram_raddr - UFix(1)))) + val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext) + val tx_data = Mux(tx_word_count === UFix(0), tx_header, + Mux(cmd === cmd_readcr, pcr_mux.io.out, + packet_ram(packet_ram_raddr))) - io.host.in.ready := state === state_rx && !rx_done - io.host.out.valid := state === state_tx && !tx_done + io.host.in.ready := state === state_rx + io.host.out.valid := state === state_tx io.host.out.bits := tx_data >> Cat(tx_count(log2up(short_request_bits/w)-1,0), Bits(0, log2up(w))) } From 87cbae2c8ace6861ea057e7765e1da6c75437a35 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 7 May 2012 17:28:18 -0700 Subject: [PATCH 0412/1087] Removed defunct ioDmem --- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 28 +++------------------------- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index d1842e21..0799b8e0 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -118,10 +118,10 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) io.imem.itlb_miss := itlb.io.cpu.resp_miss; // connect arbiter to ctrl+dpath+DTLB + //TODO: views on nested bundles? arb.io.requestor(DMEM_CPU).resp <> ctrl.io.dmem.resp arb.io.requestor(DMEM_CPU).xcpt <> ctrl.io.dmem.xcpt arb.io.requestor(DMEM_CPU).resp <> dpath.io.dmem.resp - //TODO: views on nested bundles? arb.io.requestor(DMEM_CPU).req.valid := ctrl.io.dmem.req.valid ctrl.io.dmem.req.ready := arb.io.requestor(DMEM_CPU).req.ready arb.io.requestor(DMEM_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c713b6f3..c2e93e8e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -717,13 +717,13 @@ class AMOALU extends Component { } class HellaCacheReq extends Bundle { - val cmd = Bits(width = 4) + val kill = Bool() val typ = Bits(width = 3) val idx = Bits(width = PGIDX_BITS) val ppn = Bits(width = PPN_BITS) val data = Bits(width = 64) - val kill = Bool() val tag = Bits(width = DCACHE_TAG_BITS) + val cmd = Bits(width = 4) } class HellaCacheResp extends Bundle { @@ -745,35 +745,13 @@ class HellaCacheExceptions extends Bundle { val ma = new AlignmentExceptions } +// interface between D$ and processor/DTLB class ioHellaCache extends Bundle { val req = (new ioDecoupled){ new HellaCacheReq } val resp = (new ioPipe){ new HellaCacheResp }.flip val xcpt = (new HellaCacheExceptions).asInput } -// interface between D$ and processor/DTLB -class ioDmem(view: List[String] = null) extends Bundle(view) { - val req_kill = Bool(INPUT); - val req_val = Bool(INPUT); - val req_rdy = Bool(OUTPUT); - val req_cmd = Bits(4, INPUT); - val req_type = Bits(3, INPUT); - val req_idx = Bits(PGIDX_BITS, INPUT); - val req_ppn = Bits(PPN_BITS, INPUT); - val req_data = Bits(64, INPUT); - val req_tag = Bits(DCACHE_TAG_BITS, INPUT); - val xcpt_ma_ld = Bool(OUTPUT); // misaligned load - val xcpt_ma_st = Bool(OUTPUT); // misaligned store - val resp_miss = Bool(OUTPUT); - val resp_nack = Bool(OUTPUT); - val resp_val = Bool(OUTPUT); - val resp_replay = Bool(OUTPUT); - val resp_type = Bits(3, OUTPUT); - val resp_data = Bits(64, OUTPUT); - val resp_data_subword = Bits(64, OUTPUT); - val resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT); -} - class HellaCache(co: CoherencePolicy) extends Component { val io = new Bundle { val cpu = (new ioHellaCache).flip From e0e1cd5d32f15c72f8e4f8f3a80fab3e455abb89 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 8 May 2012 22:58:00 -0700 Subject: [PATCH 0413/1087] add IPIs and an IPI test IPIs are routed through the HTIF, which seems weird, but that makes it so cores can bring each other out of reset with IPIs. --- rocket/src/main/scala/dpath_util.scala | 8 +++++--- rocket/src/main/scala/htif.scala | 22 +++++++++++++++++----- rocket/src/main/scala/instructions.scala | 2 +- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 30a2959e..572bad9d 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -115,7 +115,7 @@ class rocketDpathPCR extends Component val reg_status_et = Reg(resetVal = Bool(false)); val r_irq_timer = Reg(resetVal = Bool(false)); - val r_irq_ipi = Reg(resetVal = Bool(false)); + val r_irq_ipi = Reg(resetVal = Bool(true)) val rdata = Wire() { Bits() }; @@ -174,6 +174,8 @@ class rocketDpathPCR extends Component io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; + io.host.ipi.valid := Bool(false) + io.host.ipi.bits := wdata when (wen) { when (waddr === PCR_STATUS) { @@ -194,8 +196,8 @@ class rocketDpathPCR extends Component when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } when (waddr === PCR_FROMHOST) { reg_fromhost := wdata; reg_tohost := Bits(0) } when (waddr === PCR_TOHOST) { reg_tohost := wdata; reg_fromhost := Bits(0) } - when (waddr === PCR_SEND_IPI) { r_irq_ipi := Bool(true); } - when (waddr === PCR_CLR_IPI) { r_irq_ipi := Bool(false); } + when (waddr === PCR_SEND_IPI) { io.host.ipi.valid := Bool(true) } + when (waddr === PCR_CLR_IPI) { r_irq_ipi := wdata(0) } when (waddr === PCR_K0) { reg_k0 := wdata; } when (waddr === PCR_K1) { reg_k1 := wdata; } when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index e56ed49b..4b29f09c 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -28,6 +28,7 @@ class ioHTIF extends Bundle val debug = new ioDebug val pcr_req = (new ioDecoupled) { new PCRReq }.flip val pcr_rep = (new ioPipe) { Bits(width = 64) } + val ipi = (new ioDecoupled) { Bits(width = log2up(NTILES)) } } class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends Component @@ -194,18 +195,29 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } for (i <- 0 until ncores) { val my_reset = Reg(resetVal = Bool(true)) + val my_ipi = Reg(resetVal = Bool(false)) val rdata = Reg() { Bits() } val cpu = io.cpu(i) val me = pcr_coreid === UFix(i) - cpu.pcr_req.valid := state === state_pcr && me - cpu.pcr_req.bits.rw := cmd === cmd_writecr - cpu.pcr_req.bits.addr := pcr_addr - cpu.pcr_req.bits.data := pcr_wdata + cpu.pcr_req.valid := my_ipi || state === state_pcr && me + cpu.pcr_req.bits.rw := my_ipi || cmd === cmd_writecr + cpu.pcr_req.bits.addr := Mux(my_ipi, PCR_CLR_IPI, pcr_addr) + cpu.pcr_req.bits.data := my_ipi | pcr_wdata cpu.reset := my_reset + for (j <- 0 until ncores) { + when (io.cpu(j).ipi.valid && io.cpu(j).ipi.bits === UFix(i)) { + my_ipi := Bool(true) + my_reset := Bool(false) + } + } + when (my_ipi) { + my_ipi := !cpu.pcr_req.ready + } + when (state === state_pcr && me && cmd === cmd_writecr) { - pcr_done := cpu.pcr_req.ready + pcr_done := cpu.pcr_req.ready && !my_ipi when (pcr_addr === PCR_RESET) { my_reset := pcr_wdata(0) } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 3bd73b80..42aad6ef 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -255,5 +255,5 @@ object Instructions val VXCPTEVAC = Bits("b00000_?????_00000_0001000110_1111011",32) val VXCPTHOLD = Bits("b00000_00000_00000_0001001110_1111011",32) - val NOP = ADDI & Bits("b00000000000000000000001111111111", 32); + val NOP = Bits("b00000_00000_000000000000_000_0010011",32); } From a2f6d01c1b56fc714c44baf3376d07fbdbc7e11f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 9 May 2012 03:09:22 -0700 Subject: [PATCH 0414/1087] add programmable coreid register --- rocket/src/main/scala/consts.scala | 1 - rocket/src/main/scala/dpath_util.scala | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 50b20a6d..44bfae10 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -162,7 +162,6 @@ object Constants val IRQ_IPI = 5 val IRQ_TIMER = 7 - val COREID = 0; val PADDR_BITS = 40; val VADDR_BITS = 43; val PGIDX_BITS = 13; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 572bad9d..a301702c 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -97,6 +97,7 @@ class rocketDpathPCR extends Component val reg_cause = Reg() { Bits() }; val reg_tohost = Reg(resetVal = Bits(0, 64)); val reg_fromhost = Reg(resetVal = Bits(0, 64)); + val reg_coreid = Reg() { Bits() } val reg_k0 = Reg() { Bits() }; val reg_k1 = Reg() { Bits() }; val reg_ptbr = Reg() { UFix() }; @@ -194,6 +195,7 @@ class rocketDpathPCR extends Component when (waddr === PCR_EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } when (waddr === PCR_COUNT) { reg_count := wdata(31,0).toUFix; } when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } + when (waddr === PCR_COREID) { reg_coreid := wdata(15,0) } when (waddr === PCR_FROMHOST) { reg_fromhost := wdata; reg_tohost := Bits(0) } when (waddr === PCR_TOHOST) { reg_tohost := wdata; reg_fromhost := Bits(0) } when (waddr === PCR_SEND_IPI) { io.host.ipi.valid := Bool(true) } @@ -212,7 +214,7 @@ class rocketDpathPCR extends Component is (PCR_COUNT) { rdata := Cat(Fill(32, reg_count(31)), reg_count); } is (PCR_COMPARE) { rdata := Cat(Fill(32, reg_compare(31)), reg_compare); } is (PCR_CAUSE) { rdata := Cat(reg_cause(5), Bits(0,58), reg_cause(4,0)); } - is (PCR_COREID) { rdata := Bits(COREID,64); } + is (PCR_COREID) { rdata := reg_coreid } is (PCR_IMPL) { rdata := Bits(2) } is (PCR_FROMHOST) { rdata := reg_fromhost; } is (PCR_TOHOST) { rdata := reg_tohost; } From d0bc995c88f7564c3e614a8cb2a382536df48287 Mon Sep 17 00:00:00 2001 From: Gage W Eads Date: Mon, 14 May 2012 22:25:12 -0700 Subject: [PATCH 0415/1087] Fixed IRQ_IPI -> IRQ_TIMER typo --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dafe0d1f..7aeca367 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -493,7 +493,7 @@ class rocketCtrl extends Component vec_irq); val id_cause = Mux(p_irq_ipi, UFix(CAUSE_INTERRUPT+IRQ_IPI,6), - Mux(p_irq_timer, UFix(CAUSE_INTERRUPT+IRQ_IPI,6), + Mux(p_irq_timer, UFix(CAUSE_INTERRUPT+IRQ_TIMER,6), vec_irq_cause)) when (reset.toBool || io.dpath.killd) { From c9602a0d2ef21b3f70545e885a6ae9cea72acc22 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 15 May 2012 10:26:16 -0700 Subject: [PATCH 0416/1087] fix vector control decode bug --- rocket/src/main/scala/ctrl_vec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index a52be58a..7e4d2eec 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -117,7 +117,7 @@ class rocketCtrlVecDecoder extends Component // wen | | | | | | | | | | | | | xcpthold // val vcmd vimm vimm2 | fn | | | | | | | | | | | | | | // | | | | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_X, N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( + List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), From faee45bf4ca43fd2e29b30897920ab615c4a5a8f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 21 May 2012 07:25:35 -0700 Subject: [PATCH 0417/1087] fix setpcr/clearpcr not writing rd --- rocket/src/main/scala/ctrl.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7aeca367..461f62ae 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -197,8 +197,8 @@ object rocketCtrlDecode REMUW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_RU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), SYSCALL-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), + SETPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), ERET-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), FENCE-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), FENCE_I-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), From 181b20d69c599c0914b378b9d515960ac08a62cf Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Fri, 18 May 2012 12:43:32 -0700 Subject: [PATCH 0418/1087] working vec unit with pvfb --- rocket/src/main/scala/consts.scala | 2 ++ rocket/src/main/scala/dpath_vec.scala | 7 ++++++- rocket/src/main/scala/top.scala | 26 ++++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 44bfae10..0cac1b92 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -10,6 +10,8 @@ object Constants val HAVE_FPU = true val HAVE_VEC = true + val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK + val MEM_BACKUP_WIDTH = 16 val BR_X = Bits("b????", 4) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 52f72618..3c2c60c7 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -46,7 +46,9 @@ class rocketDpathVec extends Component val nfregs = Mux(nfregs_stage(5), Bits(32), nfregs_stage) + UFix(0,7) val nregs = nxregs + nfregs - val uts_per_bank = MuxLookup( + //val uts_per_bank = UFix(4,9) + + val nreg_mod_bank = MuxLookup( nregs, UFix(4,9), Array( UFix(0,7) -> UFix(256,9), UFix(1,7) -> UFix(256,9), @@ -103,6 +105,9 @@ class rocketDpathVec extends Component UFix(52,7) -> UFix(5,9) )) + val uts_per_bank = Mux(nreg_mod_bank > UFix(MAX_THREADS,9), UFix(MAX_THREADS, 9), nreg_mod_bank) + + val reg_hwvl = Reg(resetVal = UFix(32, 12)) val reg_appvl0 = Reg(resetVal = Bool(true)) val hwvl_vcfg = (uts_per_bank * io.vecbankcnt)(11,0) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 55bf08f9..55f16e62 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -81,7 +81,29 @@ class Top extends Component } object top_main { - def main(args: Array[String]): Unit = { - chiselMain(args.drop(1), () => Class.forName(args(0)).newInstance.asInstanceOf[Component]) + def main(args: Array[String]): Unit = { + val design_args = args.slice(5, 10) + var i = 0 + while (i < design_args.length) { + val arg = design_args(i) + arg match { + case "--NUM_PVFB" => { + hwacha.Constants.NUM_PVFB = design_args(i+1).toInt + i += 1 + } + case "--WIDTH_PVFB" => { + hwacha.Constants.WIDTH_PVFB = design_args(i+1).toInt + hwacha.Constants.DEPTH_PVFB = design_args(i+1).toInt + i += 1 + } + case "--CG" => { + hwacha.Constants.coarseGrained = true + } + case any => println("UNKNOWN: " + arg) + } + println(arg) + i += 1 + } + chiselMain(args.slice(1,5), () => Class.forName(args(0)).newInstance.asInstanceOf[Component]) } } From 7408c9ab691ef124615d8cb8faff2cde342eaeb7 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Thu, 24 May 2012 10:33:15 -0700 Subject: [PATCH 0419/1087] removing wires --- rocket/src/main/scala/arbiter.scala | 4 ++-- rocket/src/main/scala/coherence.scala | 10 ++++----- rocket/src/main/scala/ctrl.scala | 6 +++--- rocket/src/main/scala/dpath.scala | 6 +++--- rocket/src/main/scala/dpath_util.scala | 6 +++--- rocket/src/main/scala/fpu.scala | 8 +++---- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/uncore.scala | 30 +++++++++++++------------- rocket/src/main/scala/util.scala | 8 +++---- 10 files changed, 42 insertions(+), 42 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index ac741a9d..a96753dd 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -26,12 +26,12 @@ class rocketMemArbiter(n: Int) extends Component { xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid } - var xi_bits = Wire() { new TransactionInit } + var xi_bits = new TransactionInit xi_bits := io.requestor(n-1).xact_init.bits xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2up(n))) for (i <- n-2 to 0 by -1) { - var my_xi_bits = Wire() { new TransactionInit } + var my_xi_bits = new TransactionInit my_xi_bits := io.requestor(i).xact_init.bits my_xi_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2up(n))) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index b4b23b4b..04843fec 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -96,7 +96,7 @@ abstract class IncoherentPolicy extends CoherencePolicy { // UNIMPLEMENTED def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = state def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = Wire() { new ProbeReply() } + val reply = new ProbeReply() reply.p_type := UFix(0) reply.global_xact_id := UFix(0) reply @@ -209,7 +209,7 @@ class MICoherence extends CoherencePolicyWithUncached { def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = Wire() { new ProbeReply() } + val reply = new ProbeReply() val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( probeReqInvalidate -> probeRepInvalidateData, probeReqCopy -> probeRepCopyData @@ -336,7 +336,7 @@ class MEICoherence extends CoherencePolicyWithUncached { def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = Wire() { new ProbeReply() } + val reply = new ProbeReply() val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( probeReqInvalidate -> probeRepInvalidateData, probeReqDowngrade -> probeRepDowngradeData, @@ -475,7 +475,7 @@ class MSICoherence extends CoherencePolicyWithUncached { def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = Wire() { new ProbeReply() } + val reply = new ProbeReply() val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( probeReqInvalidate -> probeRepInvalidateData, probeReqDowngrade -> probeRepDowngradeData, @@ -614,7 +614,7 @@ class MESICoherence extends CoherencePolicyWithUncached { def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = Wire() { new ProbeReply() } + val reply = new ProbeReply() val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( probeReqInvalidate -> probeRepInvalidateData, probeReqDowngrade -> probeRepDowngradeData, diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 461f62ae..dce2992a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -359,7 +359,7 @@ class rocketCtrl extends Component val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); val id_reg_icmiss = Reg(resetVal = Bool(false)); val id_reg_replay = Reg(resetVal = Bool(false)); - val id_load_use = Wire(){Bool()}; + val id_load_use = Bool(); val ex_reg_br_type = Reg(){Bits()} val ex_reg_btb_hit = Reg(){Bool()}; @@ -418,8 +418,8 @@ class rocketCtrl extends Component val wb_reg_fp_val = Reg(resetVal = Bool(false)); val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); - val take_pc = Wire(){Bool()} - val take_pc_wb = Wire(){Bool()} + val take_pc = Bool() + val take_pc_wb = Bool() when (!io.dpath.stalld) { when (io.dpath.killf) { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 3a6ee698..79e00bae 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -70,7 +70,7 @@ class rocketDpath extends Component val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); val ex_reg_ctrl_div_fn = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; - val ex_wdata = Wire() { Bits() }; + val ex_wdata = Bits(); // memory definitions val mem_reg_pc = Reg() { UFix() }; @@ -93,9 +93,9 @@ class rocketDpath extends Component val wb_reg_raddr1 = Reg() { UFix() }; val wb_reg_raddr2 = Reg() { UFix() }; val wb_reg_ll_wb = Reg(resetVal = Bool(false)); - val wb_wdata = Wire() { Bits() }; + val wb_wdata = Bits(); - val dmem_resp_replay = Wire() { Bool() } + val dmem_resp_replay = Bool() val r_dmem_resp_replay = Reg(resetVal = Bool(false)); val r_dmem_fp_replay = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg() { UFix() }; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a301702c..0178dcf2 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -25,8 +25,8 @@ class rocketDpathBTB(entries: Int) extends Component val repl_way = LFSR16(io.wen)(log2up(entries)-1,0) // TODO: pseudo-LRU var hit_reduction = Bool(false) - val hit = Wire() { Bool() } - val update = Wire() { Bool() } + val hit = Bool() + val update = Bool() var update_reduction = Bool(false) val mux = (new Mux1H(entries)) { Bits(width = VADDR_BITS) } @@ -118,7 +118,7 @@ class rocketDpathPCR extends Component val r_irq_timer = Reg(resetVal = Bool(false)); val r_irq_ipi = Reg(resetVal = Bool(true)) - val rdata = Wire() { Bits() }; + val rdata = Bits(); val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_req.bits.addr) io.host.pcr_rep.valid := io.host.pcr_req.valid && !io.r.en && !io.host.pcr_req.bits.rw diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 1da046cc..245514cd 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -214,10 +214,10 @@ class rocketFPIntUnit extends Component val d2i = hardfloat.recodedFloatNToAny(io.in1, io.rm, ~io.cmd(1,0), 52, 12, 64) // output muxing - val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) + val (out_s, exc_s) = (Bits(), Bits()) out_s := Cat(Fill(32, unrec_s(31)), unrec_s) exc_s := Bits(0) - val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) + val (out_d, exc_d) = (Bits(), Bits()) out_d := unrec_d exc_d := Bits(0) @@ -292,10 +292,10 @@ class rocketFPUFastPipe extends Component val minmax = Mux(isnan2 || !isnan1 && (min === lt), io.in1, io.in2) // output muxing - val (out_s, exc_s) = (Wire() { Bits() }, Wire() { Bits() }) + val (out_s, exc_s) = (Bits(), Bits()) out_s := Reg(hardfloat.floatNToRecodedFloatN(io.fromint, 23, 9)) exc_s := Bits(0) - val (out_d, exc_d) = (Wire() { Bits() }, Wire() { Bits() }) + val (out_d, exc_d) = (Bits(), Bits()) out_d := Reg(hardfloat.floatNToRecodedFloatN(io.fromint, 52, 12)) exc_d := Bits(0) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index d505fd21..1f90f5f4 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -56,8 +56,8 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten val r_cpu_req_ppn = Reg { Bits() } val r_cpu_req_val = Reg(resetVal = Bool(false)); - val rdy = Wire() { Bool() } - val tag_hit = Wire() { Bool() } + val rdy = Bool() + val tag_hit = Bool() when (io.cpu.req_val && rdy) { r_cpu_req_val := Bool(true) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c2e93e8e..39c75107 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -803,7 +803,7 @@ class HellaCache(co: CoherencePolicy) extends Component { val r_req_read = r_req_load || r_req_amo val r_req_write = r_req_store || r_req_amo val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch - val nack_hit = Wire() { Bool() } + val nack_hit = Bool() val wb = new WritebackUnit(co) val prober = new ProbeUnit(co) diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 0f482113..ab56d0f4 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -281,20 +281,20 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH { val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _, co)) - val busy_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val addr_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=PADDR_BITS-OFFSET_BITS)} } - val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_XACT_ID_BITS)} } - val x_type_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=X_INIT_TYPE_MAX_BITS)} } - val sh_count_arr = Vec(NGLOBAL_XACTS){ Wire(){Bits(width=TILE_ID_BITS)} } - val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } + val busy_arr = Vec(NGLOBAL_XACTS){ Bool() } + val addr_arr = Vec(NGLOBAL_XACTS){ Bits(width=PADDR_BITS-OFFSET_BITS) } + val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } + val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_XACT_ID_BITS) } + val x_type_arr = Vec(NGLOBAL_XACTS){ Bits(width=X_INIT_TYPE_MAX_BITS) } + val sh_count_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } + val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } - val do_free_arr = Vec(NGLOBAL_XACTS){ Wire(){Bool()} } - val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } - val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Wire(){Bool()} } } - val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } - val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bits(width = TILE_ID_BITS)} } - val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Wire(){ Bool()} } + val do_free_arr = Vec(NGLOBAL_XACTS){ Bool() } + val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } + val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } + val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } + val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width = TILE_ID_BITS) } + val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io @@ -410,13 +410,13 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH // Nack conflicting transaction init attempts val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() } val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } - val want_to_abort_arr = Vec(ntiles) { Wire() { Bool()} } + val want_to_abort_arr = Vec(ntiles) { Bool() } for( j <- 0 until ntiles ) { val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) - val conflicts = Vec(NGLOBAL_XACTS) { Wire() { Bool() } } + val conflicts = Vec(NGLOBAL_XACTS) { Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.address) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index d83ce82e..b01008ad 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -208,7 +208,7 @@ class RRArbiter[T <: Data](n: Int)(data: => T) extends Component { last_grant := choose } - val dvec = Vec(n) { Wire() { data } } + val dvec = Vec(n) { data } (0 until n).map(i => dvec(i) := io.in(i).bits ) io.out.valid := foldR(io.in.map(_.valid))(_||_) @@ -226,8 +226,8 @@ class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { val io = new ioLockingArbiter(n)(data) val locked = Vec(n) { Reg(resetVal = Bool(false)) } val any_lock_held = (locked.toBits & io.lock.toBits).orR - val valid_arr = Vec(n) { Wire() { Bool() } } - val bits_arr = Vec(n) { Wire() { data } } + val valid_arr = Vec(n) { Bool() } + val bits_arr = Vec(n) { data } for(i <- 0 until n) { valid_arr(i) := io.in(i).valid bits_arr(i) := io.in(i).bits @@ -270,7 +270,7 @@ object PriorityEncoderOH { def apply(in: Bits): UFix = doApply(in, 0) def doApply(in: Bits, n: Int = 0): UFix = { - val out = Vec(in.getWidth) { Wire() { Bool() } } + val out = Vec(in.getWidth) { Bool() } var none_hot = Bool(true) for (i <- 0 until in.getWidth) { out(i) := none_hot && in(i) From 7f6319047e4e6540e0d6eac4ab1abbed1ba1df37 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 6 Jun 2012 02:47:22 -0700 Subject: [PATCH 0420/1087] update to new scala/chisel/Mem --- rocket/src/main/scala/dpath_util.scala | 4 +-- rocket/src/main/scala/dtlb.scala | 3 +- rocket/src/main/scala/fpu.scala | 7 ++--- rocket/src/main/scala/icache.scala | 16 +++++----- rocket/src/main/scala/itlb.scala | 14 +++++---- rocket/src/main/scala/nbdcache.scala | 41 ++++++++++++++++---------- rocket/src/main/scala/queues.scala | 10 +++++-- rocket/src/main/scala/top.scala | 24 ++++++++------- 8 files changed, 69 insertions(+), 50 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 0178dcf2..2ca0f958 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -252,9 +252,7 @@ class rocketDpathRegfile extends Component override val io = new ioRegfile(); val regfile = Mem(32){ Bits(width=64) } - regfile.setReadLatency(0); - regfile.setTarget('inst); - regfile.write(io.w0.addr, io.w0.data, io.w0.en); + when (io.w0.en) { regfile(io.w0.addr) := io.w0.data } io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); } diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 73f6f6fa..f4208d24 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -74,7 +74,8 @@ class rocketDTLB(entries: Int) extends Component val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); - val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + val tag_ram = Mem(entries) { io.ptw.resp_ppn.clone } + when (io.ptw.resp_val) { tag_ram(r_refill_waddr) := io.ptw.resp_ppn } val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); tag_cam.io.clear := io.invalidate; diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 245514cd..c7ea88fa 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -464,9 +464,8 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val fsr_exc = Reg() { Bits(width = 5) } // regfile - val regfile = Mem(32, load_wb, load_wb_tag, load_wb_data_recoded); - regfile.setReadLatency(0); - regfile.setTarget('inst); + val regfile = Mem(32) { Bits(width = 65) } + when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } val ex_rs1 = regfile.read(ex_reg_inst(26,22)) val ex_rs2 = regfile.read(ex_reg_inst(21,17)) @@ -591,7 +590,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component Mux(wsrc === UFix(2), fastpipe.io.exc_d, fastpipe.io.exc_s))) val waddr = winfo(0).toUFix >> UFix(2) - regfile.write(waddr(4,0), wdata, wen(0)) + when (wen(0)) { regfile(waddr(4,0)) := wdata } when (wb_reg_valid && wb_ctrl.toint || wen(0)) { fsr_exc := fsr_exc | diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1f90f5f4..92377f47 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -97,10 +97,10 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten for (i <- 0 until assoc) { val repl_me = (repl_way === UFix(i)) - val tag_array = Mem(sets){ r_cpu_miss_tag } - tag_array.setReadLatency(1); - tag_array.setTarget('inst); - val tag_rdata = tag_array.rw(tag_addr, r_cpu_miss_tag, tag_we && repl_me); + val tag_array = Mem(sets){ Bits(width = tagbits) } + val tag_rdata = Reg() { Bits(width = tagbits) } + when (tag_we && repl_me) { tag_array(tag_addr) := r_cpu_miss_tag } + .otherwise { tag_rdata := tag_array(tag_addr) } // valid bit array val vb_array = Reg(resetVal = Bits(0, sets)); @@ -115,10 +115,10 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array - val data_array = Mem(sets*REFILL_CYCLES){ io.mem.xact_rep.bits.data } - data_array.setReadLatency(1); - data_array.setTarget('inst); - val data_out = data_array.rw(data_addr, io.mem.xact_rep.bits.data, io.mem.xact_rep.valid && repl_me) + val data_array = Mem(sets*REFILL_CYCLES){ io.mem.xact_rep.bits.data.clone } + val data_out = Reg(){ io.mem.xact_rep.bits.data.clone } + when (io.mem.xact_rep.valid && repl_me) { data_array(data_addr) := io.mem.xact_rep.bits.data } + .otherwise { data_out := data_array(data_addr) } data_mux.io.sel(i) := hit data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0); diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index f8deb5cd..51e82e61 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -21,23 +21,24 @@ class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { class rocketCAM(entries: Int, tag_bits: Int) extends Component { val addr_bits = ceil(log(entries)/log(2)).toInt; val io = new ioCAM(entries, addr_bits, tag_bits); - val cam_tags = Mem(entries, io.write, io.write_addr, io.write_tag); + val cam_tags = Vec(entries) { Reg() { Bits(width = tag_bits) } } val mux = (new Mux1H(entries)) { Bits(width = addr_bits) } val vb_array = Reg(resetVal = Bits(0, entries)); + when (io.write) { + vb_array := vb_array.bitSet(io.write_addr, Bool(true)); + cam_tags(io.write_addr) := io.write_tag + } when (io.clear) { vb_array := Bits(0, entries); } .elsewhen (io.clear_hit) { vb_array := vb_array & ~mux.io.sel.toBits } - .elsewhen (io.write) { - vb_array := vb_array.bitSet(io.write_addr, Bool(true)); - } var l_hit = Bool(false) for (i <- 0 to entries-1) { - val my_hit = vb_array(UFix(i)).toBool && (cam_tags(UFix(i)) === io.tag) + val my_hit = vb_array(UFix(i)).toBool && (cam_tags(i) === io.tag) l_hit = l_hit || my_hit mux.io.in(i) := Bits(i) mux.io.sel(i) := my_hit @@ -136,7 +137,8 @@ class rocketITLB(entries: Int) extends Component val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); - val tag_ram = Mem(entries, io.ptw.resp_val, r_refill_waddr.toUFix, io.ptw.resp_ppn); + val tag_ram = Mem(entries) { io.ptw.resp_ppn.clone } + when (io.ptw.resp_val) { tag_ram(r_refill_waddr) := io.ptw.resp_ppn } val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); tag_cam.io.clear := io.cpu.invalidate; diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 39c75107..d0601fde 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -321,9 +321,8 @@ class MSHRFile(co: CoherencePolicy) extends Component { val sdq_rdy = !sdq_val.andR val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) val sdq_enq = io.req.valid && io.req.ready && req_write - val sdq = Mem(NSDQ, sdq_enq, sdq_alloc_id, io.req.bits.data) - sdq.setReadLatency(1); - sdq.setTarget('inst) + val sdq = Mem(NSDQ) { io.req.bits.data.clone } + when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } val wb_probe_mux = (new Mux1H(NMSHR)) { new WritebackReq } @@ -406,7 +405,9 @@ class MSHRFile(co: CoherencePolicy) extends Component { val sdq_free = replay.valid && replay.ready && replay_write sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) | PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) - io.data_req.bits.data := sdq.read(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) + val sdq_rdata = Reg() { io.req.bits.data.clone } + sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) + io.data_req.bits.data := sdq_rdata io.cpu_resp_val := Reg(replay.valid && replay.ready && replay_read, resetVal = Bool(false)) io.cpu_resp_tag := Reg(replay.bits.cpu_tag) @@ -602,18 +603,23 @@ class MetaDataArray(lines: Int) extends Component { } val permissions_array = Mem(lines){ UFix(width = 2) } - permissions_array.write(io.state_req.bits.idx, io.state_req.bits.data.state, io.state_req.valid && io.state_req.bits.rw) - permissions_array.write(io.req.bits.idx, io.req.bits.data.state, io.req.valid && io.req.bits.rw) val raddr = Reg() { Bits() } - when (io.req.valid && !io.req.bits.rw) { raddr := io.req.bits.idx } - val permissions_rdata1 = permissions_array.read(raddr) + when (io.state_req.valid && io.state_req.bits.rw) { + permissions_array(io.state_req.bits.idx) := io.state_req.bits.data.state + } + when (io.req.valid) { + when (io.req.bits.rw) { permissions_array(io.req.bits.idx) := io.req.bits.data.state } + .otherwise { raddr := io.req.bits.idx } + } val tag_array = Mem(lines){ Bits(width=TAG_BITS) } - tag_array.setReadLatency(1); - tag_array.setTarget('inst) - val tag_rdata = tag_array.rw(io.req.bits.idx, io.req.bits.data.tag, io.req.valid && io.req.bits.rw, cs = io.req.valid) + val tag_rdata = Reg() { Bits() } + when (io.req.valid) { + when (io.req.bits.rw) { tag_array(io.req.bits.idx) := io.req.bits.data.tag } + .otherwise { tag_rdata := tag_array(io.req.bits.idx) } + } - io.resp.state := permissions_rdata1.toUFix + io.resp.state := permissions_array(raddr) io.resp.tag := tag_rdata io.req.ready := Bool(true) } @@ -652,12 +658,15 @@ class DataArray(lines: Int) extends Component { } val wmask = FillInterleaved(8, io.req.bits.wmask) + val addr = Cat(io.req.bits.idx, io.req.bits.offset) + val rdata = Reg() { Bits() } val array = Mem(lines*REFILL_CYCLES){ Bits(width=MEM_DATA_BITS) } - array.setReadLatency(1); - array.setTarget('inst) - val addr = Cat(io.req.bits.idx, io.req.bits.offset) - val rdata = array.rw(addr, io.req.bits.data, io.req.valid && io.req.bits.rw, wmask, cs = io.req.valid) + when (io.req.valid) { + when (io.req.bits.rw) { array.write(addr, io.req.bits.data, wmask) } + .otherwise { rdata := array(addr) } + } + io.resp := rdata io.req.ready := Bool(true) } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 0d92ddfc..44af8ff8 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -50,9 +50,12 @@ class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = } } + val ram = Vec(entries) { Reg() { data } } + when (do_enq) { ram(enq_ptr) := io.enq.bits } + io.deq.valid := maybe_full || enq_ptr != deq_ptr io.enq.ready := !maybe_full || enq_ptr != deq_ptr || (if (pipe) io.deq.ready else Bool(false)) - io.deq.bits <> Mem(entries, do_enq, enq_ptr, io.enq.bits).read(deq_ptr) + io.deq.bits <> ram(deq_ptr) } object Queue @@ -76,8 +79,11 @@ class pipereg[T <: Data]()(data: => T) extends Component // bits := io.enq.bits //} + val reg = Reg() { io.enq.bits.clone } + when (io.enq.valid) { reg := io.enq.bits } + io.deq.valid := Reg(io.enq.valid, resetVal = Bool(false)) - io.deq.bits <> Mem(1, io.enq.valid, UFix(0), io.enq.bits).read(UFix(0)) + io.deq.bits <> reg } object Pipe diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 55f16e62..03429459 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import Node._; import Constants._; +import collection.mutable._ class ioTop(htif_width: Int) extends Bundle { val debug = new ioDebug(); @@ -82,28 +83,31 @@ class Top extends Component object top_main { def main(args: Array[String]): Unit = { - val design_args = args.slice(5, 10) - var i = 0 - while (i < design_args.length) { - val arg = design_args(i) + val top = args(0) + val chiselArgs = ArrayBuffer[String]() + + var i = 1 + while (i < args.length) { + val arg = args(i) arg match { case "--NUM_PVFB" => { - hwacha.Constants.NUM_PVFB = design_args(i+1).toInt + hwacha.Constants.NUM_PVFB = args(i+1).toInt i += 1 } case "--WIDTH_PVFB" => { - hwacha.Constants.WIDTH_PVFB = design_args(i+1).toInt - hwacha.Constants.DEPTH_PVFB = design_args(i+1).toInt + hwacha.Constants.WIDTH_PVFB = args(i+1).toInt + hwacha.Constants.DEPTH_PVFB = args(i+1).toInt i += 1 } case "--CG" => { hwacha.Constants.coarseGrained = true } - case any => println("UNKNOWN: " + arg) + case any => chiselArgs += arg } - println(arg) i += 1 } - chiselMain(args.slice(1,5), () => Class.forName(args(0)).newInstance.asInstanceOf[Component]) + println(chiselArgs) + + chiselMain(chiselArgs.toArray, () => Class.forName(top).newInstance.asInstanceOf[Component]) } } From 943b6d0616481b0aaaf63ab0a045352eb32734d2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 6 Jun 2012 02:48:48 -0700 Subject: [PATCH 0421/1087] remove debug println --- rocket/src/main/scala/top.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 03429459..13f99e0a 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -106,7 +106,6 @@ object top_main { } i += 1 } - println(chiselArgs) chiselMain(chiselArgs.toArray, () => Class.forName(top).newInstance.asInstanceOf[Component]) } From c975c21e447d502aa6d3395d0a54ca2180210ab5 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Tue, 5 Jun 2012 13:30:39 -0700 Subject: [PATCH 0422/1087] views removed --- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/htif.scala | 4 ++-- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/itlb.scala | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index dce2992a..fbb05994 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -75,7 +75,7 @@ class ioCtrlDpath extends Bundle() class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); - val imem = new ioImem(List("req_val", "resp_val")).flip + val imem = new ioImem().flip val dmem = new ioHellaCache val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 4b29f09c..726dc553 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -4,12 +4,12 @@ import Chisel._ import Node._; import Constants._; -class ioDebug(view: List[String] = null) extends Bundle(view) +class ioDebug extends Bundle { val error_mode = Bool(OUTPUT); } -class ioHost(w: Int, view: List[String] = null) extends Bundle(view) +class ioHost(w: Int) extends Bundle { val in = new ioDecoupled()(Bits(width = w)).flip val out = new ioDecoupled()(Bits(width = w)) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 92377f47..1fedc7da 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -6,7 +6,7 @@ import Constants._; import scala.math._; // interface between I$ and pipeline/ITLB (32 bits wide) -class ioImem(view: List[String] = null) extends Bundle (view) +class ioImem extends Bundle { val invalidate = Bool(INPUT); val itlb_miss = Bool(INPUT); diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index 51e82e61..a14717a3 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -87,7 +87,7 @@ class ioTLB_PTW extends Bundle } // interface between ITLB and fetch stage of pipeline -class ioITLB_CPU(view: List[String] = null) extends Bundle(view) +class ioITLB_CPU extends Bundle { // status bits (from PCR), to check current permission and whether VM is enabled val status = Bits(32, INPUT); From 04304fe788cf6162dc57834d248a7a4dd8c47fb7 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Wed, 6 Jun 2012 12:47:17 -0700 Subject: [PATCH 0423/1087] moving util out into Chisel standard library --- rocket/src/main/scala/arbiter.scala | 12 +- rocket/src/main/scala/consts.scala | 8 +- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/ctrl_util.scala | 4 +- rocket/src/main/scala/divider.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/dtlb.scala | 2 +- rocket/src/main/scala/htif.scala | 26 +-- rocket/src/main/scala/icache.scala | 12 +- rocket/src/main/scala/icache_prefetch.scala | 4 +- rocket/src/main/scala/itlb.scala | 6 +- rocket/src/main/scala/memserdes.scala | 12 +- rocket/src/main/scala/multiplier.scala | 6 +- rocket/src/main/scala/nbdcache.scala | 30 ++-- rocket/src/main/scala/ptw.scala | 10 +- rocket/src/main/scala/queues.scala | 4 +- rocket/src/main/scala/slowio.scala | 4 +- rocket/src/main/scala/uncore.scala | 10 +- rocket/src/main/scala/util.scala | 190 -------------------- 19 files changed, 78 insertions(+), 268 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index a96753dd..c4fce66c 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -28,12 +28,12 @@ class rocketMemArbiter(n: Int) extends Component { var xi_bits = new TransactionInit xi_bits := io.requestor(n-1).xact_init.bits - xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2up(n))) + xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) for (i <- n-2 to 0 by -1) { var my_xi_bits = new TransactionInit my_xi_bits := io.requestor(i).xact_init.bits - my_xi_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2up(n))) + my_xi_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2Up(n))) xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) } @@ -60,17 +60,17 @@ class rocketMemArbiter(n: Int) extends Component { for (i <- 0 until n) { val tag = io.mem.xact_rep.bits.tile_xact_id - io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2up(n)-1,0) === UFix(i) + io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2Up(n)-1,0) === UFix(i) io.requestor(i).xact_rep.bits := io.mem.xact_rep.bits - io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2up(n)) + io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2Up(n)) } for (i <- 0 until n) { val tag = io.mem.xact_abort.bits.tile_xact_id - io.requestor(i).xact_abort.valid := io.mem.xact_abort.valid && tag(log2up(n)-1,0) === UFix(i) + io.requestor(i).xact_abort.valid := io.mem.xact_abort.valid && tag(log2Up(n)-1,0) === UFix(i) io.requestor(i).xact_abort.bits := io.mem.xact_abort.bits - io.requestor(i).xact_abort.bits.tile_xact_id := tag >> UFix(log2up(n)) + io.requestor(i).xact_abort.bits.tile_xact_id := tag >> UFix(log2Up(n)) } io.mem.xact_abort.ready := Bool(true) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 0cac1b92..32cefbe7 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -176,7 +176,7 @@ object Constants val DCACHE_PORTS = 3 val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; - val DCACHE_TAG_BITS = log2up(DCACHE_PORTS) + CPU_TAG_BITS + val DCACHE_TAG_BITS = log2Up(DCACHE_PORTS) + CPU_TAG_BITS val OFFSET_BITS = 6; // log2(cache line size in bytes) val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses val NRPQ = 16; // number of secondary misses @@ -192,9 +192,9 @@ object Constants val ENABLE_CLEAN_EXCLUSIVE = true val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 - val TILE_ID_BITS = log2up(NTILES)+1 - val TILE_XACT_ID_BITS = log2up(NMSHR)+3 - val GLOBAL_XACT_ID_BITS = log2up(NTILES*NMSHR)+1 + val TILE_ID_BITS = log2Up(NTILES)+1 + val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 + val GLOBAL_XACT_ID_BITS = log2Up(NTILES*NMSHR)+1 val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS val X_INIT_TYPE_MAX_BITS = 2 diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 0799b8e0..756476f6 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -31,7 +31,7 @@ class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) vu = new vu() // cpu, vector prefetch, and vector use the DTLB val dtlbarb = new RRArbiter(3)({new ioDTLB_CPU_req_bundle()}) - val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2up(3))) + val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2Up(3))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } // tlb respones come out a cycle later diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index e74b748e..77d855bb 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -6,12 +6,12 @@ import Node._; class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component { class read_port extends Bundle { - val addr = UFix(log2up(entries), INPUT) + val addr = UFix(log2Up(entries), INPUT) val data = Bool(OUTPUT) } class write_port extends Bundle { val en = Bool(INPUT) - val addr = UFix(log2up(entries), INPUT) + val addr = UFix(log2Up(entries), INPUT) val data = Bool(INPUT) } diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 26fc42b8..b016ee04 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -66,7 +66,7 @@ class rocketDivider(width: Int) extends Component { val rhs_in = Cat(rhs_hi, io.req.bits.in1(width/2-1,0)) when ((state === s_ready) && io.req.valid) { - count := UFix(0, log2up(width+1)); + count := UFix(0, log2Up(width+1)); half := (dw === DW_32); neg_quo := Bool(false); neg_rem := Bool(false); diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 2ca0f958..d93149fa 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -22,7 +22,7 @@ class rocketDpathBTB(entries: Int) extends Component { val io = new ioDpathBTB(); - val repl_way = LFSR16(io.wen)(log2up(entries)-1,0) // TODO: pseudo-LRU + val repl_way = LFSR16(io.wen)(log2Up(entries)-1,0) // TODO: pseudo-LRU var hit_reduction = Bool(false) val hit = Bool() diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index f4208d24..6246e8ad 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -17,7 +17,7 @@ class ioDTLB_CPU_req_bundle extends Bundle val asid = Bits(width=ASID_BITS) val vpn = Bits(width=VPN_BITS+1) } -class ioDTLB_CPU_req extends hwacha.ioDecoupled()( { new ioDTLB_CPU_req_bundle() } ) +class ioDTLB_CPU_req extends ioDecoupled()( { new ioDTLB_CPU_req_bundle() } ) class ioDTLB_CPU_resp extends Bundle { diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 726dc553..c7df3b69 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -28,7 +28,7 @@ class ioHTIF extends Bundle val debug = new ioDebug val pcr_req = (new ioDecoupled) { new PCRReq }.flip val pcr_rep = (new ioPipe) { Bits(width = 64) } - val ipi = (new ioDecoupled) { Bits(width = log2up(NTILES)) } + val ipi = (new ioDecoupled) { Bits(width = log2Up(NTILES)) } } class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends Component @@ -43,7 +43,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val long_request_bits = 576 require(short_request_bits % w == 0) - val rx_count_w = 13 + log2up(64) - log2up(w) // data size field is 12 bits + val rx_count_w = 13 + log2Up(64) - log2Up(w) // data size field is 12 bits val rx_count = Reg(resetVal = UFix(0,rx_count_w)) val rx_shifter = Reg() { Bits(width = short_request_bits) } val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w)) @@ -65,18 +65,18 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } } - val rx_word_count = (rx_count >> UFix(log2up(short_request_bits/w))) - val rx_word_done = io.host.in.valid && rx_count(log2up(short_request_bits/w)-1,0).andR + val rx_word_count = (rx_count >> UFix(log2Up(short_request_bits/w))) + val rx_word_done = io.host.in.valid && rx_count(log2Up(short_request_bits/w)-1,0).andR val packet_ram_depth = long_request_bits/short_request_bits-1 val packet_ram = Vec(packet_ram_depth) { Reg() { Bits(width = short_request_bits) } } when (rx_word_done && io.host.in.ready) { - packet_ram(rx_word_count(log2up(packet_ram_depth)-1,0) - UFix(1)) := rx_shifter_in + packet_ram(rx_word_count(log2Up(packet_ram_depth)-1,0) - UFix(1)) := rx_shifter_in } val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val pcr_addr = addr(4,0) - val pcr_coreid = if (ncores == 1) UFix(0) else addr(20+log2up(ncores),20) + val pcr_coreid = if (ncores == 1) UFix(0) else addr(20+log2Up(ncores),20) val pcr_wdata = packet_ram(0) val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR @@ -85,14 +85,14 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C Bool(true))) val tx_count = Reg(resetVal = UFix(0, rx_count_w)) - val tx_subword_count = tx_count(log2up(short_request_bits/w)-1,0) - val tx_word_count = tx_count(rx_count_w-1, log2up(short_request_bits/w)) - val packet_ram_raddr = tx_word_count(log2up(packet_ram_depth)-1,0) - UFix(1) + val tx_subword_count = tx_count(log2Up(short_request_bits/w)-1,0) + val tx_word_count = tx_count(rx_count_w-1, log2Up(short_request_bits/w)) + val packet_ram_raddr = tx_word_count(log2Up(packet_ram_depth)-1,0) - UFix(1) when (io.host.out.valid && io.host.out.ready) { tx_count := tx_count + UFix(1) } - val rx_done = rx_word_done && Mux(rx_word_count === UFix(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2up(packet_ram_depth)-1,0) === UFix(0)) + val rx_done = rx_word_done && Mux(rx_word_count === UFix(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2Up(packet_ram_depth)-1,0) === UFix(0)) val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr), size, UFix(0)) val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UFix(0) && packet_ram_raddr.andR) @@ -122,7 +122,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C state := state_tx } - val mem_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val mem_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) when (state === state_mem_req && io.mem.xact_init.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } @@ -170,7 +170,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C var mem_req_data: Bits = null for (i <- 0 until MEM_DATA_BITS/short_request_bits) { - val idx = Cat(mem_cnt, UFix(i, log2up(MEM_DATA_BITS/short_request_bits))) + val idx = Cat(mem_cnt, UFix(i, log2Up(MEM_DATA_BITS/short_request_bits))) when (state === state_mem_rdata && io.mem.xact_rep.valid) { packet_ram(idx) := io.mem.xact_rep.bits.data((i+1)*short_request_bits-1, i*short_request_bits) } @@ -240,5 +240,5 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C io.host.in.ready := state === state_rx io.host.out.valid := state === state_tx - io.host.out.bits := tx_data >> Cat(tx_count(log2up(short_request_bits/w)-1,0), Bits(0, log2up(w))) + io.host.out.bits := tx_data >> Cat(tx_count(log2Up(short_request_bits/w)-1,0), Bits(0, log2Up(w))) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1fedc7da..3f6523d6 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -33,7 +33,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten val lines = sets * assoc; val addrbits = PADDR_BITS; - val indexbits = log2up(sets); + val indexbits = log2Up(sets); val offsetbits = OFFSET_BITS; val tagmsb = addrbits - 1; val taglsb = indexbits+offsetbits; @@ -42,11 +42,11 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten val indexlsb = offsetbits; val offsetmsb = indexlsb-1; val databits = 32; - val offsetlsb = log2up(databits/8); - val rf_cnt_bits = log2up(REFILL_CYCLES); + val offsetlsb = log2Up(databits/8); + val rf_cnt_bits = log2Up(REFILL_CYCLES); require(PGIDX_BITS >= taglsb); // virtually-indexed, physically-tagged constraint - require(ispow2(sets) && ispow2(assoc)); + require(isPow2(sets) && isPow2(assoc)); val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_reset); @@ -82,8 +82,8 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten } val refill_done = io.mem.xact_rep.valid && refill_count.andR - val repl_way = if (assoc == 1) UFix(0) else LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2up(assoc)-1,0) - val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2up(databits))).toUFix + val repl_way = if (assoc == 1) UFix(0) else LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2Up(assoc)-1,0) + val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2Up(databits))).toUFix val tag_we = refill_done val tag_addr = Mux((state === s_refill), r_cpu_req_idx(indexmsb,indexlsb), diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala index 956dd08f..45ed9fc2 100644 --- a/rocket/src/main/scala/icache_prefetch.scala +++ b/rocket/src/main/scala/icache_prefetch.scala @@ -43,7 +43,7 @@ class rocketIPrefetcher(co: CoherencePolicyWithUncached) extends Component finish_arb.io.in(1) <> finish_q.io.deq io.mem.xact_finish <> finish_arb.io.out - val fill_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val fill_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) when (ip_mem_resp_val) { fill_cnt := fill_cnt + UFix(1) } val fill_done = fill_cnt.andR && ip_mem_resp_val @@ -51,7 +51,7 @@ class rocketIPrefetcher(co: CoherencePolicyWithUncached) extends Component finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id val forward = Reg(resetVal = Bool(false)) - val forward_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val forward_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) when (forward && pdq.io.deq.valid) { forward_cnt := forward_cnt + UFix(1) } val forward_done = forward_cnt.andR && pdq.io.deq.valid forward := demand_miss && hit || forward && !forward_done diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index a14717a3..d11fc359 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -55,7 +55,7 @@ class PseudoLRU(n: Int) def access(way: UFix) = { var next_state = state var idx = UFix(1,1) - for (i <- log2up(n)-1 to 0 by -1) { + for (i <- log2Up(n)-1 to 0 by -1) { val bit = way(i) val mask = (UFix(1,n) << idx)(n-1,0) next_state = next_state & ~mask | Mux(bit, UFix(0), mask) @@ -66,9 +66,9 @@ class PseudoLRU(n: Int) } def replace = { var idx = UFix(1,1) - for (i <- 0 until log2up(n)) + for (i <- 0 until log2Up(n)) idx = Cat(idx, state(idx)) - idx(log2up(n)-1,0) + idx(log2Up(n)-1,0) } } diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index 27bbaa95..8dc6a8ae 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -26,8 +26,8 @@ class MemSerdes extends Component val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } val state = Reg(resetVal = s_idle) - val send_cnt = Reg(resetVal = UFix(0, log2up((max(abits, dbits)+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_send_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val send_cnt = Reg(resetVal = UFix(0, log2Up((max(abits, dbits)+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) + val data_send_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) @@ -67,8 +67,8 @@ class MemSerdes extends Component send_cnt := UFix(0) } - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val recv_cnt = Reg(resetVal = UFix(0, log2Up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val resp_val = Reg(resetVal = Bool(false)) resp_val := Bool(false) @@ -98,8 +98,8 @@ class MemDessert extends Component // test rig side val rbits = io.wide.resp.bits.getWidth require(dbits >= abits && rbits >= dbits) - val recv_cnt = Reg(resetVal = UFix(0, log2up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_recv_cnt = Reg(resetVal = UFix(0, log2up(REFILL_CYCLES))) + val recv_cnt = Reg(resetVal = UFix(0, log2Up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) + val data_recv_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index a29ab658..a0e69332 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -26,7 +26,7 @@ class rocketVUMultiplier(nwbq: Int) extends Component { } val valid = Reg(resetVal = Bits(0, IMUL_STAGES)) - val wbq_cnt = Reg(resetVal = Bits(0, log2up(nwbq+1))) + val wbq_cnt = Reg(resetVal = Bits(0, log2Up(nwbq+1))) val tag = Vec(IMUL_STAGES) { Reg() { Bits() } } val fire = io.cpu.req.valid && io.cpu.req.ready @@ -80,7 +80,7 @@ class rocketMultiplier extends Component { val r_lhs = Reg { Bits() } val r_prod= Reg { Bits(width = w*2) } val r_lsb = Reg { Bits() } - val r_cnt = Reg { UFix(width = log2up(cycles+1)) } + val r_cnt = Reg { UFix(width = log2Up(cycles+1)) } val dw = io.req.bits.fn(io.req.bits.fn.width-1) val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) @@ -99,7 +99,7 @@ class rocketMultiplier extends Component { when (io.req.valid && io.req.ready) { r_val := Bool(true) - r_cnt := UFix(0, log2up(cycles+1)) + r_cnt := UFix(0, log2Up(cycles+1)) r_dw := dw r_fn := fn r_tag := io.req_tag diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d0601fde..7c12a2cd 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -6,7 +6,7 @@ import Constants._ class ioReplacementWayGen extends Bundle { val pick_new_way = Bool(dir = INPUT) val way_en = Bits(width = NWAYS, dir = INPUT) - val way_id = UFix(width = log2up(NWAYS), dir = OUTPUT) + val way_id = UFix(width = log2Up(NWAYS), dir = OUTPUT) } class RandomReplacementWayGen extends Component { @@ -15,7 +15,7 @@ class RandomReplacementWayGen extends Component { io.way_id := UFix(0) if(NWAYS > 1) { - val rand_way_id = LFSR16(io.pick_new_way)(log2up(NWAYS)-1,0) + val rand_way_id = LFSR16(io.pick_new_way)(log2Up(NWAYS)-1,0) when (rand_way_id < UFix(NWAYS)) { io.way_id := rand_way_id } } } @@ -58,7 +58,7 @@ class StoreDataGen extends Component { class LoadDataGen extends Component { val io = new Bundle { val typ = Bits(3, INPUT) - val addr = Bits(log2up(MEM_DATA_BITS/8), INPUT) + val addr = Bits(log2Up(MEM_DATA_BITS/8), INPUT) val din = Bits(MEM_DATA_BITS, INPUT) val dout = Bits(64, OUTPUT) val r_dout = Bits(64, OUTPUT) @@ -112,7 +112,7 @@ class RPQEntry extends Bundle { val offset = Bits(width = OFFSET_BITS) val cmd = Bits(width = 4) val typ = Bits(width = 3) - val sdq_id = UFix(width = log2up(NSDQ)) + val sdq_id = UFix(width = log2Up(NSDQ)) val cpu_tag = Bits(width = DCACHE_TAG_BITS) } @@ -133,7 +133,7 @@ class DataReq extends Bundle { class DataArrayReq extends Bundle { val way_en = Bits(width = NWAYS) val idx = Bits(width = IDX_BITS) - val offset = Bits(width = log2up(REFILL_CYCLES)) + val offset = Bits(width = log2Up(REFILL_CYCLES)) val rw = Bool() val wmask = Bits(width = MEM_DATA_BITS/8) val data = Bits(width = MEM_DATA_BITS) @@ -165,11 +165,11 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) val req_bits = new MSHRReq().asInput - val req_sdq_id = UFix(log2up(NSDQ), INPUT) + val req_sdq_id = UFix(log2Up(NSDQ), INPUT) val idx_match = Bool(OUTPUT) val idx = Bits(IDX_BITS, OUTPUT) - val refill_count = Bits(log2up(REFILL_CYCLES), OUTPUT) + val refill_count = Bits(log2Up(REFILL_CYCLES), OUTPUT) val tag = Bits(TAG_BITS, OUTPUT) val way_oh = Bits(NWAYS, OUTPUT) @@ -190,7 +190,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val xacx_type = Reg { UFix() } val line_state = Reg { UFix() } - val refill_count = Reg { UFix(width = log2up(REFILL_CYCLES)) } + val refill_count = Reg { UFix(width = log2Up(REFILL_CYCLES)) } val req = Reg { new MSHRReq() } val req_cmd = io.req_bits.cmd @@ -298,7 +298,7 @@ class MSHRFile(co: CoherencePolicy) extends Component { val secondary_miss = Bool(OUTPUT) val mem_resp_idx = Bits(IDX_BITS, OUTPUT) - val mem_resp_offset = Bits(log2up(REFILL_CYCLES), OUTPUT) + val mem_resp_offset = Bits(log2Up(REFILL_CYCLES), OUTPUT) val mem_resp_way_oh = Bits(NWAYS, OUTPUT) val fence_rdy = Bool(OUTPUT) @@ -429,7 +429,7 @@ class WritebackUnit(co: CoherencePolicy) extends Component { val is_probe = Reg() { Bool() } val data_req_fired = Reg(resetVal = Bool(false)) val cmd_sent = Reg() { Bool() } - val cnt = Reg() { UFix(width = log2up(REFILL_CYCLES+1)) } + val cnt = Reg() { UFix(width = log2Up(REFILL_CYCLES+1)) } val req = Reg() { new WritebackReq() } val dout_rdy = Mux(is_probe, io.probe_rep_data.ready, io.mem_req_data.ready) @@ -555,9 +555,9 @@ class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: Nil = Enum(4) { UFix() } val state = Reg(resetVal = s_reset) - val idx_cnt = Reg(resetVal = UFix(0, log2up(lines))) + val idx_cnt = Reg(resetVal = UFix(0, log2Up(lines))) val next_idx_cnt = idx_cnt + UFix(1) - val way_cnt = if (NWAYS == 1) UFix(0) else Reg(resetVal = UFix(0, log2up(NWAYS))) + val way_cnt = if (NWAYS == 1) UFix(0) else Reg(resetVal = UFix(0, log2Up(NWAYS))) val next_way_cnt = way_cnt + UFix(1) switch (state) { @@ -777,8 +777,8 @@ class HellaCache(co: CoherencePolicy) extends Component { val indexmsb = taglsb-1 val indexlsb = offsetbits val offsetmsb = indexlsb-1 - val offsetlsb = log2up(CPU_DATA_BITS/8) - val ramindexlsb = log2up(MEM_DATA_BITS/8) + val offsetlsb = log2Up(CPU_DATA_BITS/8) + val ramindexlsb = log2Up(MEM_DATA_BITS/8) val early_nack = Reg { Bool() } val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) @@ -1008,7 +1008,7 @@ class HellaCache(co: CoherencePolicy) extends Component { val store_offset = Mux(!replay_fire, p_store_idx(offsetmsb,0), replay.offset) maskgen.io.typ := Mux(!replay_fire, p_store_type, replay.typ) maskgen.io.addr := store_offset(offsetlsb-1,0) - val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2up(CPU_DATA_BITS/8))).toUFix + val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2Up(CPU_DATA_BITS/8))).toUFix val store_data = Mux(!replay_fire, p_store_data, replay.data) val store_data_wide = Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) data_arb.io.in(1).bits.data := store_data_wide diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 38364c85..f1d05e06 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -14,7 +14,7 @@ class ioHellaCacheArbiter(n: Int) extends Bundle class rocketHellaCacheArbiter(n: Int) extends Component { val io = new ioHellaCacheArbiter(n) - require(DCACHE_TAG_BITS >= log2up(n) + CPU_TAG_BITS) + require(DCACHE_TAG_BITS >= log2Up(n) + CPU_TAG_BITS) var req_val = Bool(false) var req_rdy = io.mem.req.ready @@ -41,7 +41,7 @@ class rocketHellaCacheArbiter(n: Int) extends Component req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) req_data = Mux(Reg(r.valid), r.bits.data, req_data) req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) - req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2up(n))), req_tag) + req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) } io.mem.req.valid := req_val @@ -57,7 +57,7 @@ class rocketHellaCacheArbiter(n: Int) extends Component { val r = io.requestor(i).resp val x = io.requestor(i).xcpt - val tag_hit = io.mem.resp.bits.tag(log2up(n)-1,0) === UFix(i) + val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) r.valid := io.mem.resp.valid && tag_hit @@ -67,7 +67,7 @@ class rocketHellaCacheArbiter(n: Int) extends Component r.bits.data := io.mem.resp.bits.data r.bits.data_subword := io.mem.resp.bits.data_subword r.bits.typ := io.mem.resp.bits.typ - r.bits.tag := io.mem.resp.bits.tag >> UFix(log2up(n)) + r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) } } @@ -86,7 +86,7 @@ class rocketPTW(n: Int) extends Component val bitsPerLevel = VPN_BITS/levels require(VPN_BITS == levels * bitsPerLevel) - val count = Reg() { UFix(width = log2up(levels)) } + val count = Reg() { UFix(width = log2Up(levels)) } val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 44af8ff8..12bf1d22 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -22,8 +22,8 @@ class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = if (entries > 1) { - enq_ptr = Reg(resetVal = UFix(0, log2up(entries))) - deq_ptr = Reg(resetVal = UFix(0, log2up(entries))) + enq_ptr = Reg(resetVal = UFix(0, log2Up(entries))) + deq_ptr = Reg(resetVal = UFix(0, log2Up(entries))) val pow2 = Bool((entries & (entries-1)) == 0) when (do_deq) { diff --git a/rocket/src/main/scala/slowio.scala b/rocket/src/main/scala/slowio.scala index e54996ef..0c513849 100644 --- a/rocket/src/main/scala/slowio.scala +++ b/rocket/src/main/scala/slowio.scala @@ -19,7 +19,7 @@ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) require((divisor & (divisor-1)) == 0) require(hold_cycles < divisor/2 && hold_cycles >= 1) - val cnt = Reg() { UFix(width = log2up(divisor)) } + val cnt = Reg() { UFix(width = log2Up(divisor)) } cnt := cnt + UFix(1) val out_en = cnt === UFix(divisor/2+hold_cycles-1) // rising edge + hold time val in_en = cnt === UFix(divisor/2-1) // rising edge @@ -46,5 +46,5 @@ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) io.in_slow.ready := in_slow_rdy io.out_slow.valid := out_slow_val io.out_slow.bits := out_slow_bits - io.clk_slow := cnt(log2up(divisor)-1).toBool + io.clk_slow := cnt(log2Up(divisor)-1).toBool } diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index ab56d0f4..9791ea0f 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -120,7 +120,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val x_type_ = Reg{ Bits() } val init_tile_id_ = Reg{ Bits() } val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2up(ntiles))) + val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2Up(ntiles))) val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) val p_rep_tile_id_ = Reg{ Bits() } val x_needs_read = Reg(resetVal = Bool(false)) @@ -128,9 +128,9 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val p_rep_data_needs_write = Reg(resetVal = Bool(false)) val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val mem_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) val mem_cnt_next = mem_cnt + UFix(1) - val mem_cnt_max = ~UFix(0, width = log2up(REFILL_CYCLES)) + val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) io.busy := state != s_idle io.addr := addr_ @@ -415,7 +415,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val x_init = io.tiles(j).xact_init val x_init_data = io.tiles(j).xact_init_data val x_abort = io.tiles(j).xact_abort - val abort_cnt = Reg(resetVal = UFix(0, width = log2up(REFILL_CYCLES))) + val abort_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) val conflicts = Vec(NGLOBAL_XACTS) { Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io @@ -438,7 +438,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH is(s_abort_drain) { // raises x_init_data.ready below when(x_init_data.valid) { abort_cnt := abort_cnt + UFix(1) - when(abort_cnt === ~UFix(0, width = log2up(REFILL_CYCLES))) { + when(abort_cnt === ~UFix(0, width = log2Up(REFILL_CYCLES))) { abort_state_arr(j) := s_abort_send } } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index b01008ad..0eb6b190 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -4,23 +4,6 @@ import Chisel._ import Node._ import scala.math._ - -object foldR -{ - def apply[T <: Bits](x: Seq[T])(f: (T, T) => T): T = - if (x.length == 1) x(0) else f(x(0), foldR(x.slice(1, x.length))(f)) -} - -object log2up -{ - def apply(in: Int) = ceil(log(in)/log(2)).toInt -} - -object ispow2 -{ - def apply(in: Int) = in > 0 && ((in & (in-1)) == 0) -} - object FillInterleaved { def apply(n: Int, in: Bits) = @@ -32,109 +15,6 @@ object FillInterleaved } } -// http://aggregate.ee.engr.uky.edu/MAGIC/#Population%20Count%20%28Ones%20Count%29 -// http://bits.stephan-brumme.com/countBits.html -object PopCount -{ - def apply(in: Bits) = - { - require(in.width <= 32) - val w = log2up(in.width+1) - var x = in - if(in.width == 2) { - x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) - } else if(in.width <= 4) { - x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) - x = (((x >> UFix(2)) & Bits("h_3333_3333")) + (x & Bits("h_3333_3333"))) - } else if(in.width <= 8) { - x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) - x = (((x >> UFix(2)) & Bits("h_3333_3333")) + (x & Bits("h_3333_3333"))) - x = ((x >> UFix(4)) + x) - } else { - // count bits of each 2-bit chunk - x = x - ((x >> UFix(1)) & Bits("h_5555_5555")) - // count bits of each 4-bit chunk - x = (((x >> UFix(2)) & Bits("h_3333_3333")) + (x & Bits("h_3333_3333"))) - // count bits of each 8-bit chunk - x = ((x >> UFix(4)) + x) - // mask junk in upper bits - x = x & Bits("h_0f0f_0f0f") - // add all four 8-bit chunks - x = x + (x >> UFix(8)) - x = x + (x >> UFix(16)) - } - x(w-1,0) - } -} - -object Reverse -{ - def doit(in: Bits, base: Int, length: Int): Bits = - { - val half = (1 << log2up(length))/2 - if (length == 1) - in(base) - else - Cat(doit(in, base, half), doit(in, base+half, length-half)) - } - def apply(in: Bits) = doit(in, 0, in.getWidth) -} - -object OHToUFix -{ - def apply(in: Seq[Bits]): UFix = { - if (in.size <= 1) return UFix(0) - if (in.size == 2) return in(1) - val hi = in.slice(in.size/2, in.size) - val lo = in.slice(0, in.size/2) - Cat(hi.reduceLeft(_||_), apply(hi zip lo map { case (x, y) => x || y })) - } - def apply(in: Bits): UFix = apply((0 until in.getWidth).map(in(_))) -} - -object UFixToOH -{ - def apply(in: UFix, width: Int): Bits = - { - (UFix(1) << in(log2up(width)-1,0)) - } -} - -object LFSR16 -{ - def apply(increment: Bool = Bool(true)) = - { - val width = 16 - val lfsr = Reg(resetVal = UFix(1, width)) - when (increment) { lfsr := Cat(lfsr(0)^lfsr(2)^lfsr(3)^lfsr(5), lfsr(width-1,1)).toUFix } - lfsr - } -} - -object ShiftRegister -{ - def apply [T <: Data](n: Int, in: T): T = - if (n > 0) Reg(apply(n-1, in)) else in -} - -object Mux1H -{ - def buildMux[T <: Data](sel: Bits, in: Seq[T], i: Int, n: Int): T = { - if (n == 1) - in(i) - else - { - val half_n = (1 << log2up(n))/2 - val left = buildMux(sel, in, i, half_n) - val right = buildMux(sel, in, i + half_n, n - half_n) - Mux(sel(i+n-1,i+half_n).orR, right, left) - } - } - - def apply [T <: Data](sel: Bits, in: Seq[T]): T = buildMux(sel, in, 0, sel.getWidth) - def apply [T <: Data](sel: Seq[Bool], in: Seq[T]): T = buildMux(Cat(Bits(0),sel.reverse:_*), in, 0, sel.size) -} - class Mux1H [T <: Data](n: Int)(gen: => T) extends Component { val io = new Bundle { @@ -146,76 +26,6 @@ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component io.out := Mux1H(io.sel, io.in) } - -class ioDecoupled[+T <: Data]()(data: => T) extends Bundle -{ - val ready = Bool(INPUT) - val valid = Bool(OUTPUT) - val bits = data.asOutput -} - -class ioPipe[+T <: Data]()(data: => T) extends Bundle -{ - val valid = Bool(OUTPUT) - val bits = data.asOutput -} - -class ioArbiter[T <: Data](n: Int)(data: => T) extends Bundle { - val in = Vec(n) { (new ioDecoupled()) { data } }.flip - val out = (new ioDecoupled()) { data } - val chosen = Bits(log2up(n), OUTPUT) -} - -object ArbiterCtrl -{ - def apply(request: Seq[Bool]) = { - Bool(true) +: (1 until request.length).map(i => !foldR(request.slice(0, i))(_||_)) - } -} - -class Arbiter[T <: Data](n: Int)(data: => T) extends Component { - val io = new ioArbiter(n)(data) - - val grant = ArbiterCtrl(io.in.map(_.valid)) - (0 until n).map(i => io.in(i).ready := grant(i) && io.out.ready) - - var dout = io.in(n-1).bits - var choose = Bits(n-1) - for (i <- n-2 to 0 by -1) { - dout = Mux(io.in(i).valid, io.in(i).bits, dout) - choose = Mux(io.in(i).valid, Bits(i), choose) - } - - io.out.valid := foldR(io.in.map(_.valid))(_||_) - io.out.bits <> dout - io.chosen := choose -} - -class RRArbiter[T <: Data](n: Int)(data: => T) extends Component { - val io = new ioArbiter(n)(data) - - val last_grant = Reg(resetVal = Bits(0, log2up(n))) - val g = ArbiterCtrl((0 until n).map(i => io.in(i).valid && UFix(i) > last_grant) ++ io.in.map(_.valid)) - val grant = (0 until n).map(i => g(i) && UFix(i) > last_grant || g(i+n)) - (0 until n).map(i => io.in(i).ready := grant(i) && io.out.ready) - - var choose = Bits(n-1) - for (i <- n-2 to 0 by -1) - choose = Mux(io.in(i).valid, Bits(i), choose) - for (i <- n-1 to 1 by -1) - choose = Mux(io.in(i).valid && UFix(i) > last_grant, Bits(i), choose) - when (io.out.valid && io.out.ready) { - last_grant := choose - } - - val dvec = Vec(n) { data } - (0 until n).map(i => dvec(i) := io.in(i).bits ) - - io.out.valid := foldR(io.in.map(_.valid))(_||_) - io.out.bits := dvec(choose) - io.chosen := choose -} - class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { val in = Vec(n) { (new ioDecoupled()) { data } }.flip val lock = Vec(n) { Bool() }.asInput From a99cebb4830084cd21bdef2375dca29f01acff97 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Wed, 6 Jun 2012 18:22:56 -0700 Subject: [PATCH 0424/1087] ioDecoupled -> FIFOIO, ioPipe -> PipeIO --- rocket/src/main/scala/arbiter.scala | 8 +-- rocket/src/main/scala/dtlb.scala | 2 +- rocket/src/main/scala/htif.scala | 10 ++-- rocket/src/main/scala/memserdes.scala | 4 +- rocket/src/main/scala/nbdcache.scala | 80 +++++++++++++-------------- rocket/src/main/scala/queues.scala | 12 ++-- rocket/src/main/scala/slowio.scala | 8 +-- rocket/src/main/scala/uncore.scala | 44 +++++++-------- rocket/src/main/scala/util.scala | 4 +- 9 files changed, 86 insertions(+), 86 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index c4fce66c..1a7595c3 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -5,10 +5,10 @@ import Node._; import Constants._; class ioUncachedRequestor extends Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit } - val xact_abort = (new ioDecoupled) { new TransactionAbort }.flip - val xact_rep = (new ioPipe) { new TransactionReply }.flip - val xact_finish = (new ioDecoupled) { new TransactionFinish } + val xact_init = (new FIFOIO) { new TransactionInit } + val xact_abort = (new FIFOIO) { new TransactionAbort }.flip + val xact_rep = (new PipeIO) { new TransactionReply }.flip + val xact_finish = (new FIFOIO) { new TransactionFinish } } class rocketMemArbiter(n: Int) extends Component { diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 6246e8ad..7d1b6b18 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -17,7 +17,7 @@ class ioDTLB_CPU_req_bundle extends Bundle val asid = Bits(width=ASID_BITS) val vpn = Bits(width=VPN_BITS+1) } -class ioDTLB_CPU_req extends ioDecoupled()( { new ioDTLB_CPU_req_bundle() } ) +class ioDTLB_CPU_req extends FIFOIO()( { new ioDTLB_CPU_req_bundle() } ) class ioDTLB_CPU_resp extends Bundle { diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index c7df3b69..08c1c5b7 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -11,8 +11,8 @@ class ioDebug extends Bundle class ioHost(w: Int) extends Bundle { - val in = new ioDecoupled()(Bits(width = w)).flip - val out = new ioDecoupled()(Bits(width = w)) + val in = new FIFOIO()(Bits(width = w)).flip + val out = new FIFOIO()(Bits(width = w)) } class PCRReq extends Bundle @@ -26,9 +26,9 @@ class ioHTIF extends Bundle { val reset = Bool(INPUT) val debug = new ioDebug - val pcr_req = (new ioDecoupled) { new PCRReq }.flip - val pcr_rep = (new ioPipe) { Bits(width = 64) } - val ipi = (new ioDecoupled) { Bits(width = log2Up(NTILES)) } + val pcr_req = (new FIFOIO) { new PCRReq }.flip + val pcr_rep = (new PipeIO) { Bits(width = 64) } + val ipi = (new FIFOIO) { Bits(width = log2Up(NTILES)) } } class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends Component diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index 8dc6a8ae..e20f1ec3 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -7,8 +7,8 @@ import scala.math._ class ioMemSerialized extends Bundle { - val req = (new ioDecoupled) { Bits(width = MEM_BACKUP_WIDTH) } - val resp = (new ioPipe) { Bits(width = MEM_BACKUP_WIDTH) }.flip + val req = (new FIFOIO) { Bits(width = MEM_BACKUP_WIDTH) } + val resp = (new PipeIO) { Bits(width = MEM_BACKUP_WIDTH) }.flip } class MemSerdes extends Component diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7c12a2cd..0fdca0fb 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -173,15 +173,15 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val tag = Bits(TAG_BITS, OUTPUT) val way_oh = Bits(NWAYS, OUTPUT) - val mem_req = (new ioDecoupled) { new TransactionInit } - val meta_req = (new ioDecoupled) { new MetaArrayReq() } - val replay = (new ioDecoupled) { new Replay() } - val mem_abort = (new ioPipe) { new TransactionAbort }.flip - val mem_rep = (new ioPipe) { new TransactionReply }.flip - val mem_finish = (new ioDecoupled) { new TransactionFinish } - val wb_req = (new ioDecoupled) { new WritebackReq } - val probe_writeback = (new ioDecoupled) { Bool() }.flip - val probe_refill = (new ioDecoupled) { Bool() }.flip + val mem_req = (new FIFOIO) { new TransactionInit } + val meta_req = (new FIFOIO) { new MetaArrayReq() } + val replay = (new FIFOIO) { new Replay() } + val mem_abort = (new PipeIO) { new TransactionAbort }.flip + val mem_rep = (new PipeIO) { new TransactionReply }.flip + val mem_finish = (new FIFOIO) { new TransactionFinish } + val wb_req = (new FIFOIO) { new WritebackReq } + val probe_writeback = (new FIFOIO) { Bool() }.flip + val probe_refill = (new FIFOIO) { Bool() }.flip } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_drain_rpq :: Nil = Enum(7) { UFix() } @@ -294,7 +294,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { class MSHRFile(co: CoherencePolicy) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MSHRReq }.flip + val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) val mem_resp_idx = Bits(IDX_BITS, OUTPUT) @@ -303,14 +303,14 @@ class MSHRFile(co: CoherencePolicy) extends Component { val fence_rdy = Bool(OUTPUT) - val mem_req = (new ioDecoupled) { new TransactionInit } - val meta_req = (new ioDecoupled) { new MetaArrayReq() } - val data_req = (new ioDecoupled) { new DataReq() } - val mem_abort = (new ioPipe) { new TransactionAbort }.flip - val mem_rep = (new ioPipe) { new TransactionReply }.flip - val mem_finish = (new ioDecoupled) { new TransactionFinish } - val wb_req = (new ioDecoupled) { new WritebackReq } - val probe = (new ioDecoupled) { Bool() }.flip + val mem_req = (new FIFOIO) { new TransactionInit } + val meta_req = (new FIFOIO) { new MetaArrayReq() } + val data_req = (new FIFOIO) { new DataReq() } + val mem_abort = (new PipeIO) { new TransactionAbort }.flip + val mem_rep = (new PipeIO) { new TransactionReply }.flip + val mem_finish = (new FIFOIO) { new TransactionFinish } + val wb_req = (new FIFOIO) { new WritebackReq } + val probe = (new FIFOIO) { Bool() }.flip val cpu_resp_val = Bool(OUTPUT) val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) @@ -416,13 +416,13 @@ class MSHRFile(co: CoherencePolicy) extends Component { class WritebackUnit(co: CoherencePolicy) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new WritebackReq() }.flip - val probe = (new ioDecoupled) { new WritebackReq() }.flip - val data_req = (new ioDecoupled) { new DataArrayReq() } + val req = (new FIFOIO) { new WritebackReq() }.flip + val probe = (new FIFOIO) { new WritebackReq() }.flip + val data_req = (new FIFOIO) { new DataArrayReq() } val data_resp = Bits(MEM_DATA_BITS, INPUT) - val mem_req = (new ioDecoupled) { new TransactionInit } - val mem_req_data = (new ioDecoupled) { new TransactionInitData } - val probe_rep_data = (new ioDecoupled) { new ProbeReplyData } + val mem_req = (new FIFOIO) { new TransactionInit } + val mem_req_data = (new FIFOIO) { new TransactionInitData } + val probe_rep_data = (new FIFOIO) { new ProbeReplyData } } val valid = Reg(resetVal = Bool(false)) @@ -485,11 +485,11 @@ class WritebackUnit(co: CoherencePolicy) extends Component { class ProbeUnit(co: CoherencePolicy) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new ProbeRequest }.flip - val rep = (new ioDecoupled) { new ProbeReply } - val meta_req = (new ioDecoupled) { new MetaArrayReq } - val mshr_req = (new ioDecoupled) { Bool() } - val wb_req = (new ioDecoupled) { new WritebackReq } + val req = (new FIFOIO) { new ProbeRequest }.flip + val rep = (new FIFOIO) { new ProbeReply } + val meta_req = (new FIFOIO) { new MetaArrayReq } + val mshr_req = (new FIFOIO) { Bool() } + val wb_req = (new FIFOIO) { new WritebackReq } val tag_match_way_oh = Bits(NWAYS, INPUT) val line_state = UFix(2, INPUT) val address = Bits(PADDR_BITS-OFFSET_BITS, OUTPUT) @@ -548,9 +548,9 @@ class ProbeUnit(co: CoherencePolicy) extends Component { class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { Bool() }.flip - val meta_req = (new ioDecoupled) { new MetaArrayReq() } - val mshr_req = (new ioDecoupled) { Bool() } + val req = (new FIFOIO) { Bool() }.flip + val meta_req = (new FIFOIO) { new MetaArrayReq() } + val mshr_req = (new FIFOIO) { Bool() } } val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: Nil = Enum(4) { UFix() } @@ -597,9 +597,9 @@ class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { class MetaDataArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MetaArrayReq() }.flip + val req = (new FIFOIO) { new MetaArrayReq() }.flip val resp = (new MetaData).asOutput() - val state_req = (new ioDecoupled) { new MetaArrayReq() }.flip + val state_req = (new FIFOIO) { new MetaArrayReq() }.flip } val permissions_array = Mem(lines){ UFix(width = 2) } @@ -626,9 +626,9 @@ class MetaDataArray(lines: Int) extends Component { class MetaDataArrayArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new MetaArrayReq() }.flip + val req = (new FIFOIO) { new MetaArrayReq() }.flip val resp = Vec(NWAYS){ (new MetaData).asOutput } - val state_req = (new ioDecoupled) { new MetaArrayReq() }.flip + val state_req = (new FIFOIO) { new MetaArrayReq() }.flip val way_en = Bits(width = NWAYS, dir = OUTPUT) } @@ -653,7 +653,7 @@ class MetaDataArrayArray(lines: Int) extends Component { class DataArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new DataArrayReq() }.flip + val req = (new FIFOIO) { new DataArrayReq() }.flip val resp = Bits(width = MEM_DATA_BITS, dir = OUTPUT) } @@ -673,7 +673,7 @@ class DataArray(lines: Int) extends Component { class DataArrayArray(lines: Int) extends Component { val io = new Bundle { - val req = (new ioDecoupled) { new DataArrayReq() }.flip + val req = (new FIFOIO) { new DataArrayReq() }.flip val resp = Vec(NWAYS){ Bits(width = MEM_DATA_BITS, dir = OUTPUT) } val way_en = Bits(width = NWAYS, dir = OUTPUT) } @@ -756,8 +756,8 @@ class HellaCacheExceptions extends Bundle { // interface between D$ and processor/DTLB class ioHellaCache extends Bundle { - val req = (new ioDecoupled){ new HellaCacheReq } - val resp = (new ioPipe){ new HellaCacheResp }.flip + val req = (new FIFOIO){ new HellaCacheReq } + val resp = (new PipeIO){ new HellaCacheResp }.flip val xcpt = (new HellaCacheExceptions).asInput } diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 12bf1d22..82cb2474 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -6,8 +6,8 @@ import Node._; class ioQueue[T <: Data](flushable: Boolean)(data: => T) extends Bundle { val flush = if (flushable) Bool(INPUT) else null - val enq = new ioDecoupled()(data).flip - val deq = new ioDecoupled()(data) + val enq = new FIFOIO()(data).flip + val deq = new FIFOIO()(data) } class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component @@ -60,7 +60,7 @@ class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = object Queue { - def apply[T <: Data](enq: ioDecoupled[T], entries: Int = 2, pipe: Boolean = false) = { + def apply[T <: Data](enq: FIFOIO[T], entries: Int = 2, pipe: Boolean = false) = { val q = (new queue(entries, pipe)) { enq.bits.clone } q.io.enq <> enq q.io.deq @@ -70,8 +70,8 @@ object Queue class pipereg[T <: Data]()(data: => T) extends Component { val io = new Bundle { - val enq = new ioPipe()(data).flip - val deq = new ioPipe()(data) + val enq = new PipeIO()(data).flip + val deq = new PipeIO()(data) } //val bits = Reg() { io.enq.bits.clone } @@ -88,7 +88,7 @@ class pipereg[T <: Data]()(data: => T) extends Component object Pipe { - def apply[T <: Data](enq: ioPipe[T], latency: Int = 1): ioPipe[T] = { + def apply[T <: Data](enq: PipeIO[T], latency: Int = 1): PipeIO[T] = { val q = (new pipereg) { enq.bits.clone } q.io.enq <> enq q.io.deq diff --git a/rocket/src/main/scala/slowio.scala b/rocket/src/main/scala/slowio.scala index 0c513849..6cf5a3d9 100644 --- a/rocket/src/main/scala/slowio.scala +++ b/rocket/src/main/scala/slowio.scala @@ -6,11 +6,11 @@ import Constants._ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) extends Component { val io = new Bundle { - val out_fast = new ioDecoupled()(data).flip - val out_slow = new ioDecoupled()(data) + val out_fast = new FIFOIO()(data).flip + val out_slow = new FIFOIO()(data) - val in_fast = new ioDecoupled()(data) - val in_slow = new ioDecoupled()(data).flip + val in_fast = new FIFOIO()(data) + val in_slow = new FIFOIO()(data).flip val clk_slow = Bool(OUTPUT) } diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 9791ea0f..6239981f 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -21,9 +21,9 @@ class MemResp () extends MemData class ioMem() extends Bundle { - val req_cmd = (new ioDecoupled) { new MemReqCmd() } - val req_data = (new ioDecoupled) { new MemData() } - val resp = (new ioPipe) { new MemResp() }.flip + val req_cmd = (new FIFOIO) { new MemReqCmd() } + val req_data = (new FIFOIO) { new MemData() } + val resp = (new PipeIO) { new MemResp() }.flip } class TrackerProbeData extends Bundle { @@ -40,34 +40,34 @@ class TrackerDependency extends Bundle { } class ioTileLink extends Bundle { - val xact_init = (new ioDecoupled) { new TransactionInit } - val xact_init_data = (new ioDecoupled) { new TransactionInitData } - val xact_abort = (new ioDecoupled) { new TransactionAbort }.flip - val probe_req = (new ioDecoupled) { new ProbeRequest }.flip - val probe_rep = (new ioDecoupled) { new ProbeReply } - val probe_rep_data = (new ioDecoupled) { new ProbeReplyData } - val xact_rep = (new ioPipe) { new TransactionReply }.flip - val xact_finish = (new ioDecoupled) { new TransactionFinish } + val xact_init = (new FIFOIO) { new TransactionInit } + val xact_init_data = (new FIFOIO) { new TransactionInitData } + val xact_abort = (new FIFOIO) { new TransactionAbort }.flip + val probe_req = (new FIFOIO) { new ProbeRequest }.flip + val probe_rep = (new FIFOIO) { new ProbeReply } + val probe_rep_data = (new FIFOIO) { new ProbeReplyData } + val xact_rep = (new PipeIO) { new TransactionReply }.flip + val xact_finish = (new FIFOIO) { new TransactionFinish } } class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val io = new Bundle { - val alloc_req = (new ioDecoupled) { new TrackerAllocReq }.flip - val p_data = (new ioPipe) { new TrackerProbeData }.flip + val alloc_req = (new FIFOIO) { new TrackerAllocReq }.flip + val p_data = (new PipeIO) { new TrackerProbeData }.flip val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(ntiles, INPUT) val p_req_cnt_inc = Bits(ntiles, INPUT) - val p_rep_data = (new ioPipe) { new ProbeReplyData }.flip - val x_init_data = (new ioPipe) { new TransactionInitData }.flip + val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip + val x_init_data = (new PipeIO) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) - val p_rep_data_dep = (new ioPipe) { new TrackerDependency }.flip - val x_init_data_dep = (new ioPipe) { new TrackerDependency }.flip + val p_rep_data_dep = (new PipeIO) { new TrackerDependency }.flip + val x_init_data_dep = (new PipeIO) { new TrackerDependency }.flip - val mem_req_cmd = (new ioDecoupled) { new MemReqCmd } - val mem_req_data = (new ioDecoupled) { new MemData } + val mem_req_cmd = (new FIFOIO) { new MemReqCmd } + val mem_req_data = (new FIFOIO) { new MemData } val mem_req_lock = Bool(OUTPUT) - val probe_req = (new ioDecoupled) { new ProbeRequest } + val probe_req = (new FIFOIO) { new ProbeRequest } val busy = Bool(OUTPUT) val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) @@ -85,7 +85,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val send_x_rep_ack = Bool(OUTPUT) } - def doMemReqWrite(req_cmd: ioDecoupled[MemReqCmd], req_data: ioDecoupled[MemData], lock: Bool, data: ioPipe[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { + def doMemReqWrite(req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: PipeIO[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { req_cmd.valid := !cmd_sent && data.valid && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) req_data.valid := data.valid && at_front_of_dep_queue @@ -106,7 +106,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { } } - def doMemReqRead(req_cmd: ioDecoupled[MemReqCmd], trigger: Bool) { + def doMemReqRead(req_cmd: FIFOIO[MemReqCmd], trigger: Bool) { req_cmd.valid := Bool(true) req_cmd.bits.rw := Bool(false) when(req_cmd.ready) { diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 0eb6b190..5b1727db 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -27,9 +27,9 @@ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component } class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { - val in = Vec(n) { (new ioDecoupled()) { data } }.flip + val in = Vec(n) { (new FIFOIO()) { data } }.flip val lock = Vec(n) { Bool() }.asInput - val out = (new ioDecoupled()) { data } + val out = (new FIFOIO()) { data } } class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { From 4e5f87426643d8e1a7906bd117d9ed9f6d2c0a64 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 8 Jun 2012 00:13:14 -0700 Subject: [PATCH 0425/1087] update to new chisel/hwacha --- rocket/src/main/scala/fpu.scala | 6 +++--- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index c7ea88fa..992d4792 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -467,9 +467,9 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val regfile = Mem(32) { Bits(width = 65) } when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } - val ex_rs1 = regfile.read(ex_reg_inst(26,22)) - val ex_rs2 = regfile.read(ex_reg_inst(21,17)) - val ex_rs3 = regfile.read(ex_reg_inst(16,12)) + val ex_rs1 = regfile(ex_reg_inst(26,22)) + val ex_rs2 = regfile(ex_reg_inst(21,17)) + val ex_rs3 = regfile(ex_reg_inst(16,12)) val ex_rm = Mux(ex_reg_inst(11,9) === Bits(7), fsr_rm, ex_reg_inst(11,9)) val mem_reg_valid = Reg(ex_reg_valid && !io.ctrl.killx, resetVal = Bool(false)) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 3f6523d6..3f627f77 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -97,7 +97,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten for (i <- 0 until assoc) { val repl_me = (repl_way === UFix(i)) - val tag_array = Mem(sets){ Bits(width = tagbits) } + val tag_array = Mem(sets, seqRead = true){ Bits(width = tagbits) } val tag_rdata = Reg() { Bits(width = tagbits) } when (tag_we && repl_me) { tag_array(tag_addr) := r_cpu_miss_tag } .otherwise { tag_rdata := tag_array(tag_addr) } @@ -115,7 +115,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) // data array - val data_array = Mem(sets*REFILL_CYCLES){ io.mem.xact_rep.bits.data.clone } + val data_array = Mem(sets*REFILL_CYCLES, seqRead = true){ io.mem.xact_rep.bits.data.clone } val data_out = Reg(){ io.mem.xact_rep.bits.data.clone } when (io.mem.xact_rep.valid && repl_me) { data_array(data_addr) := io.mem.xact_rep.bits.data } .otherwise { data_out := data_array(data_addr) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0fdca0fb..1a0977c8 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -612,7 +612,7 @@ class MetaDataArray(lines: Int) extends Component { .otherwise { raddr := io.req.bits.idx } } - val tag_array = Mem(lines){ Bits(width=TAG_BITS) } + val tag_array = Mem(lines, seqRead = true){ Bits(width=TAG_BITS) } val tag_rdata = Reg() { Bits() } when (io.req.valid) { when (io.req.bits.rw) { tag_array(io.req.bits.idx) := io.req.bits.data.tag } @@ -661,7 +661,7 @@ class DataArray(lines: Int) extends Component { val addr = Cat(io.req.bits.idx, io.req.bits.offset) val rdata = Reg() { Bits() } - val array = Mem(lines*REFILL_CYCLES){ Bits(width=MEM_DATA_BITS) } + val array = Mem(lines*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } when (io.req.valid) { when (io.req.bits.rw) { array.write(addr, io.req.bits.data, wmask) } .otherwise { rdata := array(addr) } From 39d198ecdcb812edf82ce380dd734b9ec726f934 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 26 Jun 2012 19:12:11 -0700 Subject: [PATCH 0426/1087] fix htif handling of large memory reads --- rocket/src/main/scala/htif.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 08c1c5b7..8c8a6acf 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -165,7 +165,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C rx_count := UFix(0) tx_count := UFix(0) } - state := state_rx + state := Mux(cmd === cmd_readmem && pos != UFix(0), state_mem_req, state_rx) } var mem_req_data: Bits = null From 5035374f363ea989cddf7deb76406bde749b3099 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 8 Jul 2012 17:59:41 -0700 Subject: [PATCH 0427/1087] update to new chisel --- rocket/src/main/scala/fpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 992d4792..4ac99d0b 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -74,7 +74,7 @@ class rocketFPUDecoder extends Component val N = Bool(false) val Y = Bool(true) val X = Bool(false) - val decoder = ListLookup(io.inst, + val decoder = DecodeLogic(io.inst, List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X), Array(FLW -> List(FCMD_LOAD, Y,N,N,N,N,Y,N,N,N,N,N,N,N), FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N,N), From f645fb4dd704f4046e9cdba2e7e8202bdb658527 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 10 Jul 2012 05:23:29 -0700 Subject: [PATCH 0428/1087] add L2$ It still has performance bugs but no correctness bugs AFAIK. --- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/llc.scala | 392 +++++++++++++++++++++++++++++ rocket/src/main/scala/queues.scala | 33 ++- rocket/src/main/scala/top.scala | 18 +- 4 files changed, 427 insertions(+), 18 deletions(-) create mode 100644 rocket/src/main/scala/llc.scala diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 4ac99d0b..d0a249bc 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -457,7 +457,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component } val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) - val sp_msbs = Fill(32, UFix(1,1)) + val sp_msbs = Fix(-1, 32) val load_wb_data_recoded = Mux(load_wb_single, Cat(sp_msbs, rec_s), rec_d) val fsr_rm = Reg() { Bits(width = 3) } diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala new file mode 100644 index 00000000..71dfa536 --- /dev/null +++ b/rocket/src/main/scala/llc.scala @@ -0,0 +1,392 @@ +package rocket + +import Chisel._ +import Node._ +import Constants._ + +class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component +{ + val io = new Bundle { + val addr = UFix(log2Up(n), INPUT) + val en = Bool(INPUT) + val rw = Bool(INPUT) + val wdata = gen.asInput + val wmask = gen.asInput + val rdata = gen.asOutput + } + require(readLatency >= 0 && readLatency <= 2) + val data = gen + val colMux = if (2*data.width <= leaf.data.width && n > leaf.n) 1 << math.floor(math.log(leaf.data.width/data.width)/math.log(2)).toInt else 1 + val nWide = if (data.width > leaf.data.width) 1+(data.width-1)/leaf.data.width else 1 + val nDeep = if (n > colMux*leaf.n) 1+(n-1)/(colMux*leaf.n) else 1 + if (nDeep > 1 || colMux > 1) + require(isPow2(n) && isPow2(leaf.n)) + + val idx = io.addr(log2Up(n/nDeep/colMux)-1, 0) + val rdataDeep = Vec(nDeep) { Bits() } + val rdataSel = Vec(nDeep) { Bool() } + val cond = Vec(nDeep) { Bool() } + val ren = Vec(nDeep) { Bool() } + val reg_ren = Vec(nDeep) { Reg() { Bool() } } + val reg2_ren = Vec(nDeep) { Reg() { Bool() } } + val reg_raddr = Vec(nDeep) { Reg() { UFix() } } + val reg2_raddr = Vec(nDeep) { Reg() { UFix() } } + val renOut = Vec(nDeep) { Bool() } + val raddrOut = Vec(nDeep) { UFix() } + val rdata = Vec(nDeep) { Vec(nWide) { Bits() } } + val wdata = io.wdata.toBits + val wmask = io.wmask.toBits + for (i <- 0 until nDeep) { + cond(i) := (if (nDeep == 1) io.en else io.en && UFix(i) === io.addr(log2Up(n)-1, log2Up(n/nDeep))) + ren(i) := cond(i) && !io.rw + reg_ren(i) := ren(i) + reg2_ren(i) := reg_ren(i) + when (ren(i)) { reg_raddr(i) := io.addr } + when (reg_ren(i)) { reg2_raddr(i) := reg_raddr(i) } + renOut(i) := (if (readLatency > 1) reg2_ren(i) else if (readLatency > 0) reg_ren(i) else ren(i)) + raddrOut(i) := (if (readLatency > 1) reg2_raddr(i) else if (readLatency > 0) reg_raddr(i) else io.addr) + + for (j <- 0 until nWide) { + val mem = leaf.clone + var dout: Bits = null + val dout1 = if (readLatency > 0) Reg() { Bits() } else null + + var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) + if (colMux > 1) + wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(io.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) + val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) + when (cond(i)) { + when (io.rw) { mem.write(idx, wdata0, wmask0) } + .otherwise { if (readLatency > 0) dout1 := mem(idx) } + } + + if (readLatency == 0) { + dout = mem(idx) + } else if (readLatency == 1) { + dout = dout1 + } else { + val dout2 = Reg() { Bits() } + when (reg_ren(i)) { dout2 := dout1 } + dout = dout2 + } + + rdata(i)(j) := dout + } + val rdataWide = rdata(i).reduceLeft((x, y) => Cat(y, x)) + + var colMuxOut = rdataWide + if (colMux > 1) { + val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.width*(k+1)-1, gen.width*k))) { Bits() } + colMuxOut = colMuxIn(raddrOut(i)(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) + } + + rdataDeep(i) := colMuxOut + rdataSel(i) := renOut(i) + } + + io.rdata := Mux1H(rdataSel, rdataDeep) +} + +class LLCDataReq(ways: Int) extends MemReqCmd +{ + val way = UFix(width = log2Up(ways)) + val isWriteback = Bool() + + override def clone = new LLCDataReq(ways).asInstanceOf[this.type] +} + +class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component +{ + val io = new Bundle { + val cpu = (new FIFOIO) { new MemReqCmd }.flip + val repl_way = UFix(log2Up(ways), INPUT) + val repl_dirty = Bool(INPUT) + val repl_tag = UFix(PADDR_BITS - OFFSET_BITS - log2Up(sets), INPUT) + val data = (new FIFOIO) { new LLCDataReq(ways) } + val tag = (new PipeIO) { new Bundle { + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val way = UFix(width = log2Up(ways)) + } } + val mem = new ioMem + val mem_resp_set = UFix(log2Up(sets), OUTPUT) + val mem_resp_way = UFix(log2Up(ways), OUTPUT) + } + + class MSHR extends Bundle { + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) + val way = UFix(width = log2Up(ways)) + val tag = io.cpu.bits.tag.clone + val refilled = Bool() + val refillCount = UFix(width = log2Up(REFILL_CYCLES)) + val requested = Bool() + val old_dirty = Bool() + val old_tag = UFix(width = PADDR_BITS - OFFSET_BITS - log2Up(sets)) + + override def clone = new MSHR().asInstanceOf[this.type] + } + + val valid = Vec(outstanding) { Reg(resetVal = Bool(false)) } + val validBits = valid.toBits + val freeId = PriorityEncoder(~validBits) + val mshr = Vec(outstanding) { Reg() { new MSHR } } + when (io.cpu.valid && io.cpu.ready) { + valid(freeId) := Bool(true) + mshr(freeId).addr := io.cpu.bits.addr + mshr(freeId).tag := io.cpu.bits.tag + mshr(freeId).way := io.repl_way + mshr(freeId).old_dirty := io.repl_dirty + mshr(freeId).old_tag := io.repl_tag + mshr(freeId).requested := Bool(false) + mshr(freeId).refillCount := UFix(0) + mshr(freeId).refilled := Bool(false) + } + + val requests = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && !mshr(i).old_dirty && !mshr(i).requested):_*) + val request = requests.orR + val requestId = PriorityEncoder(requests) + when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { mshr(requestId).requested := Bool(true) } + + val refillId = io.mem.resp.bits.tag(log2Up(outstanding)-1, 0) + val refillCount = mshr(refillId).refillCount + when (io.mem.resp.valid) { + mshr(refillId).refillCount := refillCount + UFix(1) + when (refillCount === UFix(REFILL_CYCLES-1)) { mshr(refillId).refilled := Bool(true) } + } + + val replays = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).refilled):_*) + val replay = replays.orR + val replayId = PriorityEncoder(replays) + when (replay && io.data.ready) { valid(replayId) := Bool(false) } + + val writebacks = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).old_dirty):_*) + val writeback = writebacks.orR + val writebackId = PriorityEncoder(writebacks) + when (writeback && io.data.ready && !replay) { mshr(writebackId).old_dirty := Bool(false) } + + val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) + io.cpu.ready := !conflicts.orR && !validBits.andR + + io.data.valid := replay || writeback + io.data.bits.rw := Bool(false) + io.data.bits.tag := mshr(replayId).tag + io.data.bits.isWriteback := Bool(true) + io.data.bits.addr := Cat(mshr(writebackId).old_tag, mshr(writebackId).addr(log2Up(sets)-1, 0)).toUFix + io.data.bits.way := mshr(writebackId).way + when (replay) { + io.data.bits.isWriteback := Bool(false) + io.data.bits.addr := mshr(replayId).addr + io.data.bits.way := mshr(replayId).way + } + io.tag.valid := replay && io.data.ready + io.tag.bits.addr := io.data.bits.addr + io.tag.bits.way := io.data.bits.way + + io.mem.req_cmd.valid := request + io.mem.req_cmd.bits.rw := Bool(false) + io.mem.req_cmd.bits.addr := mshr(requestId).addr + io.mem.req_cmd.bits.tag := requestId + io.mem_resp_set := mshr(refillId).addr + io.mem_resp_way := mshr(refillId).way +} + +class LLCWriteback(requestors: Int) extends Component +{ + val io = new Bundle { + val req = Vec(requestors) { (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) }.flip } + val data = Vec(requestors) { (new FIFOIO) { new MemData }.flip } + val mem = new ioMem + } + + val valid = Reg(resetVal = Bool(false)) + val who = Reg() { UFix() } + val addr = Reg() { UFix() } + val cmd_sent = Reg() { Bool() } + val data_sent = Reg() { Bool() } + val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) + + var anyReq = Bool(false) + for (i <- 0 until requestors) { + io.req(i).ready := !valid && !anyReq + io.data(i).ready := valid && who === UFix(i) && io.mem.req_data.ready + anyReq = anyReq || io.req(i).valid + } + + val nextWho = PriorityEncoder(io.req.map(_.valid)) + when (!valid && io.req.map(_.valid).reduceLeft(_||_)) { + valid := Bool(true) + cmd_sent := Bool(false) + data_sent := Bool(false) + who := nextWho + addr := io.req(nextWho).bits + } + + when (io.mem.req_data.valid && io.mem.req_data.ready) { + count := count + UFix(1) + when (count === UFix(REFILL_CYCLES-1)) { + data_sent := Bool(true) + when (cmd_sent) { valid := Bool(false) } + } + } + when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { cmd_sent := Bool(true) } + when (valid && cmd_sent && data_sent) { valid := Bool(false) } + + io.mem.req_cmd.valid := valid && !cmd_sent + io.mem.req_cmd.bits.addr := addr + io.mem.req_cmd.bits.rw := Bool(true) + + io.mem.req_data.valid := valid && !data_sent && io.data(who).valid + io.mem.req_data.bits := io.data(who).bits +} + +class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component +{ + val io = new Bundle { + val req = (new FIFOIO) { new LLCDataReq(ways) }.flip + val req_data = (new FIFOIO) { new MemData }.flip + val writeback = (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) } + val writeback_data = (new FIFOIO) { new MemData } + val resp = (new PipeIO) { new MemResp } + val mem_resp = (new PipeIO) { new MemResp }.flip + val mem_resp_set = UFix(log2Up(sets), INPUT) + val mem_resp_way = UFix(log2Up(ways), INPUT) + } + + val data = new BigMem(sets*ways*REFILL_CYCLES, 2, leaf)(Bits(width = MEM_DATA_BITS)) + class QEntry extends MemResp { + val isWriteback = Bool() + override def clone = new QEntry().asInstanceOf[this.type] + } + val q = (new queue(4)) { new QEntry } + val qReady = q.io.count <= UFix(q.entries - 3) + val valid = Reg(resetVal = Bool(false)) + val req = Reg() { io.req.bits.clone } + val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) + val refillCount = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) + + when (data.io.en && !io.mem_resp.valid) { + count := count + UFix(1) + when (valid && count === UFix(REFILL_CYCLES-1)) { valid := Bool(false) } + } + when (io.req.valid && io.req.ready) { valid := Bool(true); req := io.req.bits } + when (io.mem_resp.valid) { refillCount := refillCount + UFix(1) } + + data.io.en := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) + data.io.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix + data.io.rw := io.req.bits.rw + data.io.wdata := io.req_data.bits.data + data.io.wmask := Fix(-1, io.req_data.bits.data.width) + when (valid) { + data.io.en := Mux(req.rw, io.req_data.valid, qReady) + data.io.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix + data.io.rw := req.rw + } + when (io.mem_resp.valid) { + data.io.en := Bool(true) + data.io.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix + data.io.rw := Bool(true) + data.io.wdata := io.mem_resp.bits.data + } + + q.io.enq.valid := Reg(Reg(data.io.en && !data.io.rw, resetVal = Bool(false)), resetVal = Bool(false)) + q.io.enq.bits.tag := Reg(Reg(Mux(valid, req.tag, io.req.bits.tag))) + q.io.enq.bits.data := data.io.rdata + q.io.enq.bits.isWriteback := Reg(Reg(Mux(valid, req.isWriteback, io.req.bits.isWriteback))) + + io.req.ready := !valid && Mux(io.req.bits.isWriteback, io.writeback.ready, Bool(true)) + io.req_data.ready := !io.mem_resp.valid && Mux(valid, req.rw, io.req.valid && io.req.bits.rw) + + io.writeback.valid := io.req.valid && io.req.ready && io.req.bits.isWriteback + io.writeback.bits := io.req.bits.addr + + q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, Bool(true)) + io.resp.valid := q.io.deq.valid && !q.io.deq.bits.isWriteback + io.resp.bits := q.io.deq.bits + io.writeback_data.valid := q.io.deq.valid && q.io.deq.bits.isWriteback + io.writeback_data.bits := q.io.deq.bits +} + +class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component +{ + val io = new Bundle { + val cpu = new ioMem().flip + val mem = new ioMem + } + + val tagWidth = PADDR_BITS - OFFSET_BITS - log2Up(sets) + val metaWidth = tagWidth + 2 // valid + dirty + + val memCmdArb = (new Arbiter(2)) { new MemReqCmd } + val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } + val mshr = new LLCMSHRFile(sets, ways, outstanding) + val tags = new BigMem(sets, 2, tagLeaf)(Bits(width = metaWidth*ways)) + val data = new LLCData(sets, ways, dataLeaf) + val writeback = new LLCWriteback(2) + + val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) + val initialize = !initCount(log2Up(sets)) + when (initialize) { initCount := initCount + UFix(1) } + + val stall_s1 = Bool() + val replay_s1 = Reg(resetVal = Bool(false)) + val s1_valid = Reg(io.cpu.req_cmd.valid && !stall_s1 || replay_s1, resetVal = Bool(false)) + replay_s1 := s1_valid && stall_s1 + val s1 = Reg() { new MemReqCmd } + when (io.cpu.req_cmd.valid && io.cpu.req_cmd.ready) { s1 := io.cpu.req_cmd.bits } + + tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || mshr.io.tag.valid + tags.io.addr := Mux(initialize, initCount, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr))(log2Up(sets)-1,0)) + tags.io.rw := initialize || mshr.io.tag.valid + tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(Bool(false), Bool(true), mshr.io.tag.bits.addr(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) + tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(mshr.io.tag.bits.way))) + + val stall_s2 = Bool() + val s2_valid = Reg(resetVal = Bool(false)) + s2_valid := s1_valid && !replay_s1 && !stall_s1 || stall_s2 + val s2 = Reg() { new MemReqCmd } + when (s1_valid && !stall_s1 && !replay_s1) { s2 := s1 } + val s2_tags = Vec(ways) { Bits(width = metaWidth) } + for (i <- 0 until ways) s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) + val s2_hits = s2_tags.map(t => t(tagWidth) && s2.addr(s2.addr.width-1, s2.addr.width-tagWidth) === t(tagWidth-1, 0)) + val s2_hit = s2_hits.reduceLeft(_||_) + stall_s1 := initialize || mshr.io.tag.valid || s2_valid && !s2_hit || stall_s2 + val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) + val repl_tag = s2_tags(repl_way).toUFix + + mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw + mshr.io.cpu.bits := s2 + mshr.io.repl_way := repl_way + mshr.io.repl_dirty := repl_tag(tagWidth).toBool + mshr.io.repl_tag := repl_tag + mshr.io.mem.resp := io.mem.resp + + data.io.req <> dataArb.io.out + data.io.mem_resp := io.mem.resp + data.io.mem_resp_set := mshr.io.mem_resp_set + data.io.mem_resp_way := mshr.io.mem_resp_way + data.io.req_data.bits := io.cpu.req_data.bits + + writeback.io.req(0) <> data.io.writeback + writeback.io.data(0) <> data.io.writeback_data + writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw + writeback.io.req(1).bits := s2.addr + writeback.io.data(1).valid := io.cpu.req_data.valid + writeback.io.data(1).bits := io.cpu.req_data.bits + data.io.req_data.valid := io.cpu.req_data.valid && !writeback.io.data(1).ready + + memCmdArb.io.in(0) <> mshr.io.mem.req_cmd + memCmdArb.io.in(1) <> writeback.io.mem.req_cmd + + dataArb.io.in(0) <> mshr.io.data + dataArb.io.in(1).valid := s2_valid && s2_hit + dataArb.io.in(1).bits := s2 + dataArb.io.in(1).bits.way := OHToUFix(s2_hits) + dataArb.io.in(1).bits.isWriteback := Bool(false) + + stall_s2 := s2_valid && !Mux(s2_hit, dataArb.io.in(1).ready, Mux(s2.rw, writeback.io.req(1).ready, mshr.io.cpu.ready)) + + io.cpu.resp <> data.io.resp + io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 + io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready + io.mem.req_cmd <> memCmdArb.io.out + io.mem.req_data <> writeback.io.mem.req_data +} diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 82cb2474..d78df502 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -3,35 +3,39 @@ package rocket import Chisel._ import Node._; -class ioQueue[T <: Data](flushable: Boolean)(data: => T) extends Bundle +class ioQueue[T <: Data](entries: Int, flushable: Boolean)(data: => T) extends Bundle { val flush = if (flushable) Bool(INPUT) else null val enq = new FIFOIO()(data).flip val deq = new FIFOIO()(data) + val count = UFix(log2Up(entries+1), OUTPUT) } -class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component +class queue[T <: Data](val entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component { - val io = new ioQueue(flushable)(data) + val io = new ioQueue(entries, flushable)(data) val do_enq = io.enq.ready && io.enq.valid val do_deq = io.deq.ready && io.deq.valid var enq_ptr = UFix(0) var deq_ptr = UFix(0) + val pow2 = (entries & (entries-1)) == 0 if (entries > 1) { enq_ptr = Reg(resetVal = UFix(0, log2Up(entries))) deq_ptr = Reg(resetVal = UFix(0, log2Up(entries))) - val pow2 = Bool((entries & (entries-1)) == 0) - when (do_deq) { - deq_ptr := Mux(!pow2 && deq_ptr === UFix(entries-1), UFix(0), deq_ptr + UFix(1)) - } - when (do_enq) { - enq_ptr := Mux(!pow2 && enq_ptr === UFix(entries-1), UFix(0), enq_ptr + UFix(1)) + var enq_next = enq_ptr + UFix(1) + var deq_next = deq_ptr + UFix(1) + if (!pow2) { + enq_next = Mux(enq_ptr === UFix(entries-1), UFix(0), enq_next) + deq_next = Mux(deq_ptr === UFix(entries-1), UFix(0), deq_next) } + + when (do_deq) { deq_ptr := deq_next } + when (do_enq) { enq_ptr := enq_next } if (flushable) { when (io.flush) { deq_ptr := UFix(0) @@ -53,9 +57,16 @@ class queue[T <: Data](entries: Int, pipe: Boolean = false, flushable: Boolean = val ram = Vec(entries) { Reg() { data } } when (do_enq) { ram(enq_ptr) := io.enq.bits } - io.deq.valid := maybe_full || enq_ptr != deq_ptr - io.enq.ready := !maybe_full || enq_ptr != deq_ptr || (if (pipe) io.deq.ready else Bool(false)) + val ptr_match = enq_ptr === deq_ptr + io.deq.valid := maybe_full || !ptr_match + io.enq.ready := !maybe_full || !ptr_match || (if (pipe) io.deq.ready else Bool(false)) io.deq.bits <> ram(deq_ptr) + + val ptr_diff = enq_ptr - deq_ptr + if (pow2) + io.count := Cat(maybe_full && ptr_match, ptr_diff).toUFix + else + io.count := Mux(ptr_match, Mux(maybe_full, UFix(entries), UFix(0)), Mux(deq_ptr > enq_ptr, UFix(entries) + ptr_diff, ptr_diff)) } object Queue diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 13f99e0a..eb526ea2 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -30,28 +30,34 @@ class Top extends Component } val htif = new rocketHTIF(htif_width, NTILES, co) val hub = new CoherenceHubBroadcast(NTILES+1, co) + val llc_leaf = Mem(2048, seqRead = true) { Bits(width = 64) } + val llc = new DRAMSideLLC(2048, 8, 4, llc_leaf, llc_leaf) hub.io.tiles(NTILES) <> htif.io.mem + llc.io.cpu.req_cmd <> Queue(hub.io.mem.req_cmd) + llc.io.cpu.req_data <> Queue(hub.io.mem.req_data, REFILL_CYCLES) + hub.io.mem.resp <> llc.io.cpu.resp + // mux between main and backup memory ports val mem_serdes = new MemSerdes - val mem_cmdq = (new queue(1)) { new MemReqCmd } - mem_cmdq.io.enq <> hub.io.mem.req_cmd + val mem_cmdq = (new queue(2)) { new MemReqCmd } + mem_cmdq.io.enq <> llc.io.mem.req_cmd mem_cmdq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_cmd.ready, io.mem.req_cmd.ready) io.mem.req_cmd.valid := mem_cmdq.io.deq.valid && !io.mem_backup_en io.mem.req_cmd.bits := mem_cmdq.io.deq.bits mem_serdes.io.wide.req_cmd.valid := mem_cmdq.io.deq.valid && io.mem_backup_en mem_serdes.io.wide.req_cmd.bits := mem_cmdq.io.deq.bits - val mem_dataq = (new queue(2)) { new MemData } - mem_dataq.io.enq <> hub.io.mem.req_data + val mem_dataq = (new queue(REFILL_CYCLES)) { new MemData } + mem_dataq.io.enq <> llc.io.mem.req_data mem_dataq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_data.ready, io.mem.req_data.ready) io.mem.req_data.valid := mem_dataq.io.deq.valid && !io.mem_backup_en io.mem.req_data.bits := mem_dataq.io.deq.bits mem_serdes.io.wide.req_data.valid := mem_dataq.io.deq.valid && io.mem_backup_en mem_serdes.io.wide.req_data.bits := mem_dataq.io.deq.bits - hub.io.mem.resp.valid := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.valid, io.mem.resp.valid) - hub.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits) + llc.io.mem.resp.valid := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.valid, io.mem.resp.valid) + llc.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits) // pad out the HTIF using a divided clock val hio = (new slowIO(clkdiv)) { Bits(width = htif_width) } From 429fcbed8e387eadfca0caefc8e9999d51fec65f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 11 Jul 2012 17:56:39 -0700 Subject: [PATCH 0429/1087] fix some LLC bugs --- rocket/src/main/scala/llc.scala | 39 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index 71dfa536..e6f39c4a 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -103,7 +103,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val repl_dirty = Bool(INPUT) val repl_tag = UFix(PADDR_BITS - OFFSET_BITS - log2Up(sets), INPUT) val data = (new FIFOIO) { new LLCDataReq(ways) } - val tag = (new PipeIO) { new Bundle { + val tag = (new FIFOIO) { new Bundle { val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val way = UFix(width = log2Up(ways)) } } @@ -156,7 +156,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val replays = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).refilled):_*) val replay = replays.orR val replayId = PriorityEncoder(replays) - when (replay && io.data.ready) { valid(replayId) := Bool(false) } + when (replay && io.data.ready && io.tag.ready) { valid(replayId) := Bool(false) } val writebacks = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).old_dirty):_*) val writeback = writebacks.orR @@ -166,7 +166,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) io.cpu.ready := !conflicts.orR && !validBits.andR - io.data.valid := replay || writeback + io.data.valid := replay && io.tag.ready || writeback io.data.bits.rw := Bool(false) io.data.bits.tag := mshr(replayId).tag io.data.bits.isWriteback := Bool(true) @@ -318,7 +318,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val memCmdArb = (new Arbiter(2)) { new MemReqCmd } val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } val mshr = new LLCMSHRFile(sets, ways, outstanding) - val tags = new BigMem(sets, 2, tagLeaf)(Bits(width = metaWidth*ways)) + val tags = new BigMem(sets, 1, tagLeaf)(Bits(width = metaWidth*ways)) val data = new LLCData(sets, ways, dataLeaf) val writeback = new LLCWriteback(2) @@ -333,31 +333,38 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val s1 = Reg() { new MemReqCmd } when (io.cpu.req_cmd.valid && io.cpu.req_cmd.ready) { s1 := io.cpu.req_cmd.bits } - tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || mshr.io.tag.valid - tags.io.addr := Mux(initialize, initCount, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr))(log2Up(sets)-1,0)) - tags.io.rw := initialize || mshr.io.tag.valid - tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(Bool(false), Bool(true), mshr.io.tag.bits.addr(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) - tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(mshr.io.tag.bits.way))) - val stall_s2 = Bool() val s2_valid = Reg(resetVal = Bool(false)) s2_valid := s1_valid && !replay_s1 && !stall_s1 || stall_s2 val s2 = Reg() { new MemReqCmd } - when (s1_valid && !stall_s1 && !replay_s1) { s2 := s1 } - val s2_tags = Vec(ways) { Bits(width = metaWidth) } - for (i <- 0 until ways) s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) + val s2_tags = Vec(ways) { Reg() { Bits(width = metaWidth) } } + when (s1_valid && !stall_s1 && !replay_s1) { + s2 := s1 + for (i <- 0 until ways) + s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) + } val s2_hits = s2_tags.map(t => t(tagWidth) && s2.addr(s2.addr.width-1, s2.addr.width-tagWidth) === t(tagWidth-1, 0)) + val s2_hit_way = OHToUFix(s2_hits) val s2_hit = s2_hits.reduceLeft(_||_) - stall_s1 := initialize || mshr.io.tag.valid || s2_valid && !s2_hit || stall_s2 + val s2_hit_dirty = s2_tags(s2_hit_way)(tagWidth+1) val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) val repl_tag = s2_tags(repl_way).toUFix + val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty + stall_s1 := initialize || mshr.io.tag.valid || setDirty || s2_valid && !s2_hit || stall_s2 + + tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid + tags.io.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) + tags.io.rw := initialize || setDirty || mshr.io.tag.valid + tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) + tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw mshr.io.cpu.bits := s2 mshr.io.repl_way := repl_way - mshr.io.repl_dirty := repl_tag(tagWidth).toBool + mshr.io.repl_dirty := repl_tag(tagWidth+1, tagWidth).andR mshr.io.repl_tag := repl_tag mshr.io.mem.resp := io.mem.resp + mshr.io.tag.ready := !setDirty data.io.req <> dataArb.io.out data.io.mem_resp := io.mem.resp @@ -379,7 +386,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da dataArb.io.in(0) <> mshr.io.data dataArb.io.in(1).valid := s2_valid && s2_hit dataArb.io.in(1).bits := s2 - dataArb.io.in(1).bits.way := OHToUFix(s2_hits) + dataArb.io.in(1).bits.way := s2_hit_way dataArb.io.in(1).bits.isWriteback := Bool(false) stall_s2 := s2_valid && !Mux(s2_hit, dataArb.io.in(1).ready, Mux(s2.rw, writeback.io.req(1).ready, mshr.io.cpu.ready)) From bac82762d354316fedaf4e4c23902a4c739ac7c6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 12 Jul 2012 14:50:12 -0700 Subject: [PATCH 0430/1087] use only one (wide) tag ram for set assoc. caches --- rocket/src/main/scala/icache.scala | 36 +++++++++--------- rocket/src/main/scala/nbdcache.scala | 57 ++++++++++------------------ 2 files changed, 39 insertions(+), 54 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 3f627f77..5318ce7a 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -92,32 +92,32 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; + val tag_array = Mem(sets, seqRead = true) { Bits(width = tagbits*assoc) } + val tag_rdata = Reg() { Bits() } + when (tag_we) { + tag_array.write(tag_addr, Fill(assoc, r_cpu_miss_tag), FillInterleaved(tagbits, if (assoc > 1) UFixToOH(repl_way) else Bits(1))) + }.otherwise { + tag_rdata := tag_array(tag_addr) + } + + val vb_array = Reg(resetVal = Bits(0, lines)) + when (io.cpu.invalidate) { + vb_array := Bits(0) + }.elsewhen (tag_we) { + vb_array := vb_array.bitSet(Cat(r_cpu_req_idx(indexmsb,indexlsb), if (assoc > 1) repl_way else null), !invalidated) + } + val data_mux = (new Mux1H(assoc)){Bits(width = databits)} var any_hit = Bool(false) for (i <- 0 until assoc) { - val repl_me = (repl_way === UFix(i)) - val tag_array = Mem(sets, seqRead = true){ Bits(width = tagbits) } - val tag_rdata = Reg() { Bits(width = tagbits) } - when (tag_we && repl_me) { tag_array(tag_addr) := r_cpu_miss_tag } - .otherwise { tag_rdata := tag_array(tag_addr) } - - // valid bit array - val vb_array = Reg(resetVal = Bits(0, sets)); - when (io.cpu.invalidate) { - vb_array := Bits(0) - } - .elsewhen (tag_we && repl_me) { - vb_array := vb_array.bitSet(r_cpu_req_idx(indexmsb,indexlsb).toUFix, !invalidated) - } - - val valid = vb_array(r_cpu_req_idx(indexmsb,indexlsb)).toBool; - val hit = valid && (tag_rdata === r_cpu_hit_addr(tagmsb,taglsb)) + val valid = vb_array(Cat(r_cpu_req_idx(indexmsb,indexlsb), if (assoc > 1) UFix(i, log2Up(assoc)) else null)) + val hit = valid && tag_rdata(tagbits*(i+1)-1, tagbits*i) === r_cpu_hit_addr(tagmsb,taglsb) // data array val data_array = Mem(sets*REFILL_CYCLES, seqRead = true){ io.mem.xact_rep.bits.data.clone } val data_out = Reg(){ io.mem.xact_rep.bits.data.clone } - when (io.mem.xact_rep.valid && repl_me) { data_array(data_addr) := io.mem.xact_rep.bits.data } + when (io.mem.xact_rep.valid && repl_way === UFix(i)) { data_array(data_addr) := io.mem.xact_rep.bits.data } .otherwise { data_out := data_array(data_addr) } data_mux.io.sel(i) := hit diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 1a0977c8..fdc7376a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -595,35 +595,6 @@ class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { io.meta_req.bits.data.tag := UFix(0) } -class MetaDataArray(lines: Int) extends Component { - val io = new Bundle { - val req = (new FIFOIO) { new MetaArrayReq() }.flip - val resp = (new MetaData).asOutput() - val state_req = (new FIFOIO) { new MetaArrayReq() }.flip - } - - val permissions_array = Mem(lines){ UFix(width = 2) } - val raddr = Reg() { Bits() } - when (io.state_req.valid && io.state_req.bits.rw) { - permissions_array(io.state_req.bits.idx) := io.state_req.bits.data.state - } - when (io.req.valid) { - when (io.req.bits.rw) { permissions_array(io.req.bits.idx) := io.req.bits.data.state } - .otherwise { raddr := io.req.bits.idx } - } - - val tag_array = Mem(lines, seqRead = true){ Bits(width=TAG_BITS) } - val tag_rdata = Reg() { Bits() } - when (io.req.valid) { - when (io.req.bits.rw) { tag_array(io.req.bits.idx) := io.req.bits.data.tag } - .otherwise { tag_rdata := tag_array(io.req.bits.idx) } - } - - io.resp.state := permissions_array(raddr) - io.resp.tag := tag_rdata - io.req.ready := Bool(true) -} - class MetaDataArrayArray(lines: Int) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MetaArrayReq() }.flip @@ -632,18 +603,32 @@ class MetaDataArrayArray(lines: Int) extends Component { val way_en = Bits(width = NWAYS, dir = OUTPUT) } + val permBits = io.req.bits.data.state.width + val perms = Mem(lines) { UFix(width = permBits*NWAYS) } + val tags = Mem(lines*NWAYS, seqRead = true) { Bits(width = TAG_BITS*NWAYS) } + val tag = Reg() { Bits() } + val raddr = Reg() { Bits() } val way_en_ = Reg { Bits(width=NWAYS) } - when (io.req.valid && io.req.ready) { + + when (io.state_req.valid && io.state_req.bits.rw) { + perms.write(io.state_req.bits.idx, Fill(NWAYS, io.state_req.bits.data.state), FillInterleaved(permBits, io.state_req.bits.way_en)) + } + when (io.req.valid) { + when (io.req.bits.rw) { + perms.write(io.req.bits.idx, Fill(NWAYS, io.req.bits.data.state), FillInterleaved(permBits, io.req.bits.way_en)) + tags.write(io.req.bits.idx, Fill(NWAYS, io.req.bits.data.tag), FillInterleaved(TAG_BITS, io.req.bits.way_en)) + } + .otherwise { + raddr := io.req.bits.idx + tag := tags(io.req.bits.idx) + } way_en_ := io.req.bits.way_en } + val perm = perms(raddr) for(w <- 0 until NWAYS) { - val way = new MetaDataArray(lines) - way.io.req.bits <> io.req.bits - way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool - way.io.state_req.bits <> io.state_req.bits - way.io.state_req.valid := io.state_req.valid && io.state_req.bits.way_en(w).toBool - way.io.resp <> io.resp(w) + io.resp(w).state := perm(permBits*(w+1)-1, permBits*w) + io.resp(w).tag := tag(TAG_BITS*(w+1)-1, TAG_BITS*w) } io.way_en := way_en_ From fd951598379724bbfc769d75ef29bc4bbf8978ae Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Thu, 12 Jul 2012 18:12:49 -0700 Subject: [PATCH 0431/1087] INPUT/OUTPUT orderring swapped --- rocket/src/main/scala/ctrl.scala | 34 ++++++------- rocket/src/main/scala/ctrl_util.scala | 4 +- rocket/src/main/scala/ctrl_vec.scala | 16 +++---- rocket/src/main/scala/dpath.scala | 8 ++-- rocket/src/main/scala/dpath_alu.scala | 12 ++--- rocket/src/main/scala/dpath_util.scala | 38 +++++++-------- rocket/src/main/scala/dpath_vec.scala | 34 ++++++------- rocket/src/main/scala/dtlb.scala | 4 +- rocket/src/main/scala/fpu.scala | 66 +++++++++++++------------- rocket/src/main/scala/icache.scala | 6 +-- rocket/src/main/scala/itlb.scala | 24 +++++----- rocket/src/main/scala/llc.scala | 14 +++--- rocket/src/main/scala/multiplier.scala | 8 ++-- rocket/src/main/scala/nbdcache.scala | 62 ++++++++++++------------ rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/queues.scala | 2 +- rocket/src/main/scala/uncore.scala | 30 ++++++------ 17 files changed, 182 insertions(+), 182 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index fbb05994..2de93962 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -10,7 +10,7 @@ import hwacha._ class ioCtrlDpath extends Bundle() { // outputs to datapath - val sel_pc = UFix(3, OUTPUT); + val sel_pc = UFix(OUTPUT, 3); val wen_btb = Bool(OUTPUT); val clr_btb = Bool(OUTPUT); val stallf = Bool(OUTPUT); @@ -21,16 +21,16 @@ class ioCtrlDpath extends Bundle() val killm = Bool(OUTPUT); val ren2 = Bool(OUTPUT); val ren1 = Bool(OUTPUT); - val sel_alu2 = UFix(3, OUTPUT); + val sel_alu2 = UFix(OUTPUT, 3); val fn_dw = Bool(OUTPUT); - val fn_alu = UFix(4, OUTPUT); + val fn_alu = UFix(OUTPUT, 4); val mul_val = Bool(OUTPUT); - val mul_fn = UFix(2, OUTPUT); + val mul_fn = UFix(OUTPUT, 2); val div_val = Bool(OUTPUT); - val div_fn = UFix(2, OUTPUT); + val div_fn = UFix(OUTPUT, 2); val sel_wa = Bool(OUTPUT); - val sel_wb = UFix(3, OUTPUT); - val pcr = UFix(3, OUTPUT) + val sel_wb = UFix(OUTPUT, 3); + val pcr = UFix(OUTPUT, 3) val id_eret = Bool(OUTPUT); val wb_eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); @@ -41,16 +41,16 @@ class ioCtrlDpath extends Bundle() val wb_wen = Bool(OUTPUT); val wb_valid = Bool(OUTPUT) val flush_inst = Bool(OUTPUT); - val ex_mem_type = UFix(3,OUTPUT) + val ex_mem_type = UFix(OUTPUT, 3) // exception handling val exception = Bool(OUTPUT); - val cause = UFix(6,OUTPUT); + val cause = UFix(OUTPUT, 6); val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC val btb_hit = Bool(INPUT); - val inst = Bits(32, INPUT); + val inst = Bits(INPUT, 32); val br_eq = Bool(INPUT); val br_lt = Bool(INPUT); val br_ltu = Bool(INPUT); @@ -59,15 +59,15 @@ class ioCtrlDpath extends Bundle() val mul_rdy = Bool(INPUT); val mul_result_val = Bool(INPUT); val mem_wb = Bool(INPUT); - val ex_waddr = UFix(5,INPUT); // write addr from execute stage - val mem_waddr = UFix(5,INPUT); // write addr from memory stage - val wb_waddr = UFix(5,INPUT); // write addr from writeback stage - val status = Bits(32, INPUT); + val ex_waddr = UFix(INPUT, 5); // write addr from execute stage + val mem_waddr = UFix(INPUT, 5); // write addr from memory stage + val wb_waddr = UFix(INPUT, 5); // write addr from writeback stage + val status = Bits(INPUT, 32); val sboard_clr = Bool(INPUT); - val sboard_clra = UFix(5, INPUT); + val sboard_clra = UFix(INPUT, 5); val fp_sboard_clr = Bool(INPUT); - val fp_sboard_clra = UFix(5, INPUT); - val fp_sboard_wb_waddr = UFix(5, INPUT); + val fp_sboard_clra = UFix(INPUT, 5); + val fp_sboard_wb_waddr = UFix(INPUT, 5); val irq_timer = Bool(INPUT); val irq_ipi = Bool(INPUT); } diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 77d855bb..416033a6 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -6,12 +6,12 @@ import Node._; class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component { class read_port extends Bundle { - val addr = UFix(log2Up(entries), INPUT) + val addr = UFix(INPUT, log2Up(entries)) val data = Bool(OUTPUT) } class write_port extends Bundle { val en = Bool(INPUT) - val addr = UFix(log2Up(entries), INPUT) + val addr = UFix(INPUT, log2Up(entries)) val data = Bool(INPUT) } diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 7e4d2eec..4fe89d5d 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -7,14 +7,14 @@ import Instructions._ class ioCtrlDpathVec extends Bundle { - val inst = Bits(32, INPUT) + val inst = Bits(INPUT, 32) val appvl0 = Bool(INPUT) val pfq = Bool(INPUT) val wen = Bool(OUTPUT) - val fn = Bits(2, OUTPUT) - val sel_vcmd = Bits(3, OUTPUT) - val sel_vimm = Bits(1, OUTPUT) - val sel_vimm2 = Bits(1, OUTPUT) + val fn = Bits(OUTPUT, 2) + val sel_vcmd = Bits(OUTPUT, 3) + val sel_vimm = Bits(OUTPUT, 1) + val sel_vimm2 = Bits(OUTPUT, 1) } class ioCtrlVecInterface extends Bundle @@ -43,7 +43,7 @@ class ioCtrlVecInterface extends Bundle val vfence_ready = Bool(INPUT) val irq = Bool(INPUT) - val irq_cause = UFix(5, INPUT) + val irq_cause = UFix(INPUT, 5) val exception = Bool(OUTPUT) @@ -64,7 +64,7 @@ class ioCtrlVec extends Bundle val replay = Bool(OUTPUT) val vfence_ready = Bool(OUTPUT) val irq = Bool(OUTPUT) - val irq_cause = UFix(5, OUTPUT) + val irq_cause = UFix(OUTPUT, 5) } class rocketCtrlVecSigs extends Bundle @@ -95,7 +95,7 @@ class rocketCtrlVecDecoder extends Component { val io = new Bundle { - val inst = Bits(32, INPUT) + val inst = Bits(INPUT, 32) val sigs = new rocketCtrlVecSigs().asOutput } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 79e00bae..df1830c3 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -9,8 +9,8 @@ import hwacha._ class ioDpathImem extends Bundle() { - val req_addr = UFix(VADDR_BITS+1, OUTPUT); - val resp_data = Bits(32, INPUT); + val req_addr = UFix(OUTPUT, VADDR_BITS+1); + val resp_data = Bits(INPUT, 32); } class ioDpathAll extends Bundle() @@ -21,12 +21,12 @@ class ioDpathAll extends Bundle() val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val imem = new ioDpathImem(); val ptbr_wen = Bool(OUTPUT); - val ptbr = UFix(PADDR_BITS, OUTPUT); + val ptbr = UFix(OUTPUT, PADDR_BITS); val fpu = new ioDpathFPU(); val vec_ctrl = new ioCtrlDpathVec().flip val vec_iface = new ioDpathVecInterface() val vec_imul_req = new io_imul_req - val vec_imul_resp = Bits(hwacha.Constants.SZ_XLEN, INPUT) + val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) } class rocketDpath extends Component diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 4a0bb607..25fc2956 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -7,12 +7,12 @@ import Constants._ import Instructions._ class ioALU extends Bundle(){ - val dw = UFix(1, INPUT); - val fn = UFix(4, INPUT); - val in2 = UFix(64, INPUT); - val in1 = UFix(64, INPUT); - val out = UFix(64, OUTPUT); - val adder_out = UFix(64, OUTPUT); + val dw = UFix(INPUT, 1); + val fn = UFix(INPUT, 4); + val in2 = UFix(INPUT, 64); + val in1 = UFix(INPUT, 64); + val out = UFix(OUTPUT, 64); + val adder_out = UFix(OUTPUT, 64); } class rocketDpathALU extends Component diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index d93149fa..ac1a0b55 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -7,14 +7,14 @@ import scala.math._; class ioDpathBTB extends Bundle() { - val current_pc = UFix(VADDR_BITS, INPUT); + val current_pc = UFix(INPUT, VADDR_BITS); val hit = Bool(OUTPUT); - val target = UFix(VADDR_BITS, OUTPUT); + val target = UFix(OUTPUT, VADDR_BITS); val wen = Bool(INPUT); val clr = Bool(INPUT); val invalidate = Bool(INPUT); - val correct_pc = UFix(VADDR_BITS, INPUT); - val correct_target = UFix(VADDR_BITS, INPUT); + val correct_pc = UFix(INPUT, VADDR_BITS); + val correct_target = UFix(INPUT, VADDR_BITS); } // fully-associative branch target buffer @@ -63,26 +63,26 @@ class ioDpathPCR extends Bundle() val r = new ioReadPort(); val w = new ioWritePort(); - val status = Bits(32, OUTPUT); - val ptbr = UFix(PADDR_BITS, OUTPUT); - val evec = UFix(VADDR_BITS, OUTPUT); + val status = Bits(OUTPUT, 32); + val ptbr = UFix(OUTPUT, PADDR_BITS); + val evec = UFix(OUTPUT, VADDR_BITS); val exception = Bool(INPUT); - val cause = UFix(6, INPUT); + val cause = UFix(INPUT, 6); val badvaddr_wen = Bool(INPUT); - val vec_irq_aux = Bits(64, INPUT) + val vec_irq_aux = Bits(INPUT, 64) val vec_irq_aux_wen = Bool(INPUT) - val pc = UFix(VADDR_BITS+1, INPUT); + val pc = UFix(INPUT, VADDR_BITS+1); val eret = Bool(INPUT); val ei = Bool(INPUT); val di = Bool(INPUT); val ptbr_wen = Bool(OUTPUT); val irq_timer = Bool(OUTPUT); val irq_ipi = Bool(OUTPUT); - val vecbank = Bits(8, OUTPUT) - val vecbankcnt = UFix(4, OUTPUT) - val vec_appvl = UFix(12, INPUT) - val vec_nxregs = UFix(6, INPUT) - val vec_nfregs = UFix(6, INPUT) + val vecbank = Bits(OUTPUT, 8) + val vecbankcnt = UFix(OUTPUT, 4) + val vec_appvl = UFix(INPUT, 12) + val vec_nxregs = UFix(INPUT, 6) + val vec_nfregs = UFix(INPUT, 6) } class rocketDpathPCR extends Component @@ -228,16 +228,16 @@ class rocketDpathPCR extends Component class ioReadPort extends Bundle() { - val addr = UFix(5, INPUT); + val addr = UFix(INPUT, 5); val en = Bool(INPUT); - val data = Bits(64, OUTPUT); + val data = Bits(OUTPUT, 64); } class ioWritePort extends Bundle() { - val addr = UFix(5, INPUT); + val addr = UFix(INPUT, 5); val en = Bool(INPUT); - val data = Bits(64, INPUT); + val data = Bits(INPUT, 64); } class ioRegfile extends Bundle() diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 3c2c60c7..6f5f228f 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -8,13 +8,13 @@ import hwacha.Constants._ class ioDpathVecInterface extends Bundle { - val vcmdq_bits = Bits(SZ_VCMD, OUTPUT) - val vximm1q_bits = Bits(SZ_VIMM, OUTPUT) - val vximm2q_bits = Bits(SZ_VSTRIDE, OUTPUT) - val vcntq_bits = Bits(SZ_VLEN, OUTPUT) + val vcmdq_bits = Bits(OUTPUT, SZ_VCMD) + val vximm1q_bits = Bits(OUTPUT, SZ_VIMM) + val vximm2q_bits = Bits(OUTPUT, SZ_VSTRIDE) + val vcntq_bits = Bits(OUTPUT, SZ_VLEN) val vcntq_last = Bool(OUTPUT) - val evac_addr = Bits(64, OUTPUT) - val irq_aux = Bits(64, INPUT) + val evac_addr = Bits(OUTPUT, 64) + val irq_aux = Bits(INPUT, 64) } class ioDpathVec extends Bundle @@ -22,18 +22,18 @@ class ioDpathVec extends Bundle val ctrl = new ioCtrlDpathVec().flip val iface = new ioDpathVecInterface() val valid = Bool(INPUT) - val inst = Bits(32, INPUT) - val waddr = UFix(5, INPUT) - val raddr1 = UFix(5, INPUT) - val vecbank = Bits(8, INPUT) - val vecbankcnt = UFix(4, INPUT) - val wdata = Bits(64, INPUT) - val rs2 = Bits(64, INPUT) + val inst = Bits(INPUT, 32) + val waddr = UFix(INPUT, 5) + val raddr1 = UFix(INPUT, 5) + val vecbank = Bits(INPUT, 8) + val vecbankcnt = UFix(INPUT, 4) + val wdata = Bits(INPUT, 64) + val rs2 = Bits(INPUT, 64) val wen = Bool(OUTPUT) - val irq_aux = Bits(64, OUTPUT) - val appvl = UFix(12, OUTPUT) - val nxregs = UFix(6, OUTPUT) - val nfregs = UFix(6, OUTPUT) + val irq_aux = Bits(OUTPUT, 64) + val appvl = UFix(OUTPUT, 12) + val nxregs = UFix(OUTPUT, 6) + val nfregs = UFix(OUTPUT, 6) } class rocketDpathVec extends Component diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 7d1b6b18..99018351 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -23,7 +23,7 @@ class ioDTLB_CPU_resp extends Bundle { // lookup responses val miss = Bool(OUTPUT) - val ppn = Bits(PPN_BITS, OUTPUT) + val ppn = Bits(OUTPUT, PPN_BITS) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_pf = Bool(OUTPUT) @@ -32,7 +32,7 @@ class ioDTLB_CPU_resp extends Bundle class ioDTLB extends Bundle { // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(32, INPUT) + val status = Bits(INPUT, 32) // invalidate all TLB entries val invalidate = Bool(INPUT) val cpu_req = new ioDTLB_CPU_req().flip diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index d0a249bc..9f70d637 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -67,7 +67,7 @@ class rocketFPUCtrlSigs extends Bundle class rocketFPUDecoder extends Component { val io = new Bundle { - val inst = Bits(32, INPUT) + val inst = Bits(INPUT, 32) val sigs = new rocketFPUCtrlSigs().asOutput } @@ -154,16 +154,16 @@ class rocketFPUDecoder extends Component } class ioDpathFPU extends Bundle { - val inst = Bits(32, OUTPUT) - val fromint_data = Bits(64, OUTPUT) + val inst = Bits(OUTPUT, 32) + val fromint_data = Bits(OUTPUT, 64) - val store_data = Bits(64, INPUT) - val toint_data = Bits(64, INPUT) + val store_data = Bits(INPUT, 64) + val toint_data = Bits(INPUT, 64) val dmem_resp_val = Bool(OUTPUT) - val dmem_resp_type = Bits(3, OUTPUT) - val dmem_resp_tag = UFix(5, OUTPUT) - val dmem_resp_data = Bits(64, OUTPUT) + val dmem_resp_type = Bits(OUTPUT, 3) + val dmem_resp_tag = UFix(OUTPUT, 5) + val dmem_resp_data = Bits(OUTPUT, 64) } class ioCtrlFPU extends Bundle { @@ -174,23 +174,23 @@ class ioCtrlFPU extends Bundle { val killm = Bool(OUTPUT) val dec = new rocketFPUCtrlSigs().asInput val sboard_clr = Bool(INPUT) - val sboard_clra = UFix(5, INPUT) + val sboard_clra = UFix(INPUT, 5) } class rocketFPIntUnit extends Component { val io = new Bundle { val single = Bool(INPUT) - val cmd = Bits(FCMD_WIDTH, INPUT) - val rm = Bits(3, INPUT) - val fsr = Bits(FSR_WIDTH, INPUT) - val in1 = Bits(65, INPUT) - val in2 = Bits(65, INPUT) + val cmd = Bits(INPUT, FCMD_WIDTH) + val rm = Bits(INPUT, 3) + val fsr = Bits(INPUT, FSR_WIDTH) + val in1 = Bits(INPUT, 65) + val in2 = Bits(INPUT, 65) val lt_s = Bool(OUTPUT) val lt_d = Bool(OUTPUT) - val store_data = Bits(64, OUTPUT) - val toint_data = Bits(64, OUTPUT) - val exc = Bits(5, OUTPUT) + val store_data = Bits(OUTPUT, 64) + val toint_data = Bits(OUTPUT, 64) + val exc = Bits(OUTPUT, 5) } val unrec_s = hardfloat.recodedFloatNToFloatN(io.in1, 23, 9) @@ -253,17 +253,17 @@ class rocketFPUFastPipe extends Component { val io = new Bundle { val single = Bool(INPUT) - val cmd = Bits(FCMD_WIDTH, INPUT) - val rm = Bits(3, INPUT) - val fromint = Bits(64, INPUT) - val in1 = Bits(65, INPUT) - val in2 = Bits(65, INPUT) + val cmd = Bits(INPUT, FCMD_WIDTH) + val rm = Bits(INPUT, 3) + val fromint = Bits(INPUT, 64) + val in1 = Bits(INPUT, 65) + val in2 = Bits(INPUT, 65) val lt_s = Bool(INPUT) val lt_d = Bool(INPUT) - val out_s = Bits(33, OUTPUT) - val exc_s = Bits(5, OUTPUT) - val out_d = Bits(65, OUTPUT) - val exc_d = Bits(5, OUTPUT) + val out_s = Bits(OUTPUT, 33) + val exc_s = Bits(OUTPUT, 5) + val out_d = Bits(OUTPUT, 65) + val exc_d = Bits(OUTPUT, 5) } val i2s = hardfloat.anyToRecodedFloatN(io.fromint, io.rm, ~io.cmd(1,0), 23, 9, 64) @@ -339,13 +339,13 @@ class rocketFPUFastPipe extends Component class ioFMA(width: Int) extends Bundle { val valid = Bool(INPUT) - val cmd = Bits(FCMD_WIDTH, INPUT) - val rm = Bits(3, INPUT) - val in1 = Bits(width, INPUT) - val in2 = Bits(width, INPUT) - val in3 = Bits(width, INPUT) - val out = Bits(width, OUTPUT) - val exc = Bits(5, OUTPUT) + val cmd = Bits(INPUT, FCMD_WIDTH) + val rm = Bits(INPUT, 3) + val in1 = Bits(INPUT, width) + val in2 = Bits(INPUT, width) + val in3 = Bits(INPUT, width) + val out = Bits(OUTPUT, width) + val exc = Bits(OUTPUT, 5) } class rocketFPUSFMAPipe(latency: Int) extends Component diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 5318ce7a..fec70226 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -11,9 +11,9 @@ class ioImem extends Bundle val invalidate = Bool(INPUT); val itlb_miss = Bool(INPUT); val req_val = Bool(INPUT); - val req_idx = Bits(PGIDX_BITS, INPUT); - val req_ppn = Bits(PPN_BITS, INPUT); - val resp_data = Bits(32, OUTPUT); + val req_idx = Bits(INPUT, PGIDX_BITS); + val req_ppn = Bits(INPUT, PPN_BITS); + val resp_data = Bits(OUTPUT, 32); val resp_val = Bool(OUTPUT); } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index d11fc359..dac04fa2 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -8,14 +8,14 @@ import scala.math._; class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { val clear = Bool(INPUT); val clear_hit = Bool(INPUT) - val tag = Bits(tag_bits, INPUT); + val tag = Bits(INPUT, tag_bits); val hit = Bool(OUTPUT); - val hit_addr = UFix(addr_bits, OUTPUT); - val valid_bits = Bits(entries, OUTPUT); + val hit_addr = UFix(OUTPUT, addr_bits); + val valid_bits = Bits(OUTPUT, entries); val write = Bool(INPUT); - val write_tag = Bits(tag_bits, INPUT); - val write_addr = UFix(addr_bits, INPUT); + val write_tag = Bits(INPUT, tag_bits); + val write_addr = UFix(INPUT, addr_bits); } class rocketCAM(entries: Int, tag_bits: Int) extends Component { @@ -78,30 +78,30 @@ class ioTLB_PTW extends Bundle // requests val req_val = Bool(OUTPUT); val req_rdy = Bool(INPUT); - val req_vpn = Bits(VPN_BITS, OUTPUT); + val req_vpn = Bits(OUTPUT, VPN_BITS); // responses val resp_val = Bool(INPUT); val resp_err = Bool(INPUT); - val resp_ppn = Bits(PPN_BITS, INPUT); - val resp_perm = Bits(PERM_BITS, INPUT); + val resp_ppn = Bits(INPUT, PPN_BITS); + val resp_perm = Bits(INPUT, PERM_BITS); } // interface between ITLB and fetch stage of pipeline class ioITLB_CPU extends Bundle { // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(32, INPUT); + val status = Bits(INPUT, 32); // invalidate all TLB entries val invalidate = Bool(INPUT); // lookup requests val req_val = Bool(INPUT); val req_rdy = Bool(OUTPUT); - val req_asid = Bits(ASID_BITS, INPUT); - val req_vpn = UFix(VPN_BITS+1, INPUT); + val req_asid = Bits(INPUT, ASID_BITS); + val req_vpn = UFix(INPUT, VPN_BITS+1); // lookup responses val resp_miss = Bool(OUTPUT); // val resp_val = Bool(OUTPUT); - val resp_ppn = UFix(PPN_BITS, OUTPUT); + val resp_ppn = UFix(OUTPUT, PPN_BITS); val exception = Bool(OUTPUT); } diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index e6f39c4a..5f5007d1 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -7,7 +7,7 @@ import Constants._ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component { val io = new Bundle { - val addr = UFix(log2Up(n), INPUT) + val addr = UFix(INPUT, log2Up(n)) val en = Bool(INPUT) val rw = Bool(INPUT) val wdata = gen.asInput @@ -99,17 +99,17 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component { val io = new Bundle { val cpu = (new FIFOIO) { new MemReqCmd }.flip - val repl_way = UFix(log2Up(ways), INPUT) + val repl_way = UFix(INPUT, log2Up(ways)) val repl_dirty = Bool(INPUT) - val repl_tag = UFix(PADDR_BITS - OFFSET_BITS - log2Up(sets), INPUT) + val repl_tag = UFix(INPUT, PADDR_BITS - OFFSET_BITS - log2Up(sets)) val data = (new FIFOIO) { new LLCDataReq(ways) } val tag = (new FIFOIO) { new Bundle { val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val way = UFix(width = log2Up(ways)) } } val mem = new ioMem - val mem_resp_set = UFix(log2Up(sets), OUTPUT) - val mem_resp_way = UFix(log2Up(ways), OUTPUT) + val mem_resp_set = UFix(OUTPUT, log2Up(sets)) + val mem_resp_way = UFix(OUTPUT, log2Up(ways)) } class MSHR extends Bundle { @@ -247,8 +247,8 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component val writeback_data = (new FIFOIO) { new MemData } val resp = (new PipeIO) { new MemResp } val mem_resp = (new PipeIO) { new MemResp }.flip - val mem_resp_set = UFix(log2Up(sets), INPUT) - val mem_resp_way = UFix(log2Up(ways), INPUT) + val mem_resp_set = UFix(INPUT, log2Up(sets)) + val mem_resp_way = UFix(INPUT, log2Up(ways)) } val data = new BigMem(sets*ways*REFILL_CYCLES, 2, leaf)(Bits(width = MEM_DATA_BITS)) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index a0e69332..9d3145b5 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -8,12 +8,12 @@ import hwacha.Constants._ class ioMultiplier extends Bundle { val req = new io_imul_req().flip - val req_tag = Bits(5, INPUT) + val req_tag = Bits(INPUT, 5) val req_kill = Bool(INPUT) val resp_val = Bool(OUTPUT) val resp_rdy = Bool(INPUT) - val resp_tag = Bits(5, OUTPUT) - val resp_bits = Bits(SZ_XLEN, OUTPUT) + val resp_tag = Bits(OUTPUT, 5) + val resp_bits = Bits(OUTPUT, SZ_XLEN) } class rocketVUMultiplier(nwbq: Int) extends Component { @@ -21,7 +21,7 @@ class rocketVUMultiplier(nwbq: Int) extends Component { val cpu = new ioMultiplier val vu = new Bundle { val req = new io_imul_req - val resp = Bits(SZ_XLEN, INPUT) + val resp = Bits(INPUT, SZ_XLEN) } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index fdc7376a..c99cc0e0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -22,9 +22,9 @@ class RandomReplacementWayGen extends Component { class StoreMaskGen extends Component { val io = new Bundle { - val typ = Bits(3, INPUT) - val addr = Bits(3, INPUT) - val wmask = Bits(8, OUTPUT) + val typ = Bits(INPUT, 3) + val addr = Bits(INPUT, 3) + val wmask = Bits(OUTPUT, 8) } val word = (io.typ === MT_W) || (io.typ === MT_WU) @@ -39,9 +39,9 @@ class StoreMaskGen extends Component { class StoreDataGen extends Component { val io = new Bundle { - val typ = Bits(3, INPUT) - val din = Bits(64, INPUT) - val dout = Bits(64, OUTPUT) + val typ = Bits(INPUT, 3) + val din = Bits(INPUT, 64) + val dout = Bits(OUTPUT, 64) } val word = (io.typ === MT_W) || (io.typ === MT_WU) @@ -57,12 +57,12 @@ class StoreDataGen extends Component { // this currently requires that CPU_DATA_BITS == 64 class LoadDataGen extends Component { val io = new Bundle { - val typ = Bits(3, INPUT) - val addr = Bits(log2Up(MEM_DATA_BITS/8), INPUT) - val din = Bits(MEM_DATA_BITS, INPUT) - val dout = Bits(64, OUTPUT) - val r_dout = Bits(64, OUTPUT) - val r_dout_subword = Bits(64, OUTPUT) + val typ = Bits(INPUT, 3) + val addr = Bits(INPUT, log2Up(MEM_DATA_BITS/8)) + val din = Bits(INPUT, MEM_DATA_BITS) + val dout = Bits(OUTPUT, 64) + val r_dout = Bits(OUTPUT, 64) + val r_dout_subword = Bits(OUTPUT, 64) } val sext = (io.typ === MT_B) || (io.typ === MT_H) || @@ -165,13 +165,13 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) val req_bits = new MSHRReq().asInput - val req_sdq_id = UFix(log2Up(NSDQ), INPUT) + val req_sdq_id = UFix(INPUT, log2Up(NSDQ)) val idx_match = Bool(OUTPUT) - val idx = Bits(IDX_BITS, OUTPUT) - val refill_count = Bits(log2Up(REFILL_CYCLES), OUTPUT) - val tag = Bits(TAG_BITS, OUTPUT) - val way_oh = Bits(NWAYS, OUTPUT) + val idx = Bits(OUTPUT, IDX_BITS) + val refill_count = Bits(OUTPUT, log2Up(REFILL_CYCLES)) + val tag = Bits(OUTPUT, TAG_BITS) + val way_oh = Bits(OUTPUT, NWAYS) val mem_req = (new FIFOIO) { new TransactionInit } val meta_req = (new FIFOIO) { new MetaArrayReq() } @@ -297,9 +297,9 @@ class MSHRFile(co: CoherencePolicy) extends Component { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) - val mem_resp_idx = Bits(IDX_BITS, OUTPUT) - val mem_resp_offset = Bits(log2Up(REFILL_CYCLES), OUTPUT) - val mem_resp_way_oh = Bits(NWAYS, OUTPUT) + val mem_resp_idx = Bits(OUTPUT, IDX_BITS) + val mem_resp_offset = Bits(OUTPUT, log2Up(REFILL_CYCLES)) + val mem_resp_way_oh = Bits(OUTPUT, NWAYS) val fence_rdy = Bool(OUTPUT) @@ -313,7 +313,7 @@ class MSHRFile(co: CoherencePolicy) extends Component { val probe = (new FIFOIO) { Bool() }.flip val cpu_resp_val = Bool(OUTPUT) - val cpu_resp_tag = Bits(DCACHE_TAG_BITS, OUTPUT) + val cpu_resp_tag = Bits(OUTPUT, DCACHE_TAG_BITS) } val sdq_val = Reg(resetVal = Bits(0, NSDQ)) @@ -419,7 +419,7 @@ class WritebackUnit(co: CoherencePolicy) extends Component { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip val data_req = (new FIFOIO) { new DataArrayReq() } - val data_resp = Bits(MEM_DATA_BITS, INPUT) + val data_resp = Bits(INPUT, MEM_DATA_BITS) val mem_req = (new FIFOIO) { new TransactionInit } val mem_req_data = (new FIFOIO) { new TransactionInitData } val probe_rep_data = (new FIFOIO) { new ProbeReplyData } @@ -490,9 +490,9 @@ class ProbeUnit(co: CoherencePolicy) extends Component { val meta_req = (new FIFOIO) { new MetaArrayReq } val mshr_req = (new FIFOIO) { Bool() } val wb_req = (new FIFOIO) { new WritebackReq } - val tag_match_way_oh = Bits(NWAYS, INPUT) - val line_state = UFix(2, INPUT) - val address = Bits(PADDR_BITS-OFFSET_BITS, OUTPUT) + val tag_match_way_oh = Bits(INPUT, NWAYS) + val line_state = UFix(INPUT, 2) + val address = Bits(OUTPUT, PADDR_BITS-OFFSET_BITS) } val s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(7) { UFix() } @@ -659,7 +659,7 @@ class DataArray(lines: Int) extends Component { class DataArrayArray(lines: Int) extends Component { val io = new Bundle { val req = (new FIFOIO) { new DataArrayReq() }.flip - val resp = Vec(NWAYS){ Bits(width = MEM_DATA_BITS, dir = OUTPUT) } + val resp = Vec(NWAYS){ Bits(dir = OUTPUT, width = MEM_DATA_BITS) } val way_en = Bits(width = NWAYS, dir = OUTPUT) } @@ -681,11 +681,11 @@ class DataArrayArray(lines: Int) extends Component { class AMOALU extends Component { val io = new Bundle { - val cmd = Bits(4, INPUT) - val typ = Bits(3, INPUT) - val lhs = UFix(64, INPUT) - val rhs = UFix(64, INPUT) - val out = UFix(64, OUTPUT) + val cmd = Bits(INPUT, 4) + val typ = Bits(INPUT, 3) + val lhs = UFix(INPUT, 64) + val rhs = UFix(INPUT, 64) + val out = UFix(OUTPUT, 64) } val sgned = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index f1d05e06..d0c540c5 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -75,7 +75,7 @@ class ioPTW(n: Int) extends Bundle { val requestor = Vec(n) { new ioTLB_PTW }.flip val mem = new ioHellaCache - val ptbr = UFix(PADDR_BITS, INPUT) + val ptbr = UFix(INPUT, PADDR_BITS) } class rocketPTW(n: Int) extends Component diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index d78df502..752f986b 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -8,7 +8,7 @@ class ioQueue[T <: Data](entries: Int, flushable: Boolean)(data: => T) extends B val flush = if (flushable) Bool(INPUT) else null val enq = new FIFOIO()(data).flip val deq = new FIFOIO()(data) - val count = UFix(log2Up(entries+1), OUTPUT) + val count = UFix(OUTPUT, log2Up(entries+1)) } class queue[T <: Data](val entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 6239981f..7e34492c 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -56,8 +56,8 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val p_data = (new PipeIO) { new TrackerProbeData }.flip val can_alloc = Bool(INPUT) val xact_finish = Bool(INPUT) - val p_rep_cnt_dec = Bits(ntiles, INPUT) - val p_req_cnt_inc = Bits(ntiles, INPUT) + val p_rep_cnt_dec = Bits(INPUT, ntiles) + val p_req_cnt_inc = Bits(INPUT, ntiles) val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip val x_init_data = (new PipeIO) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) @@ -69,19 +69,19 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val mem_req_lock = Bool(OUTPUT) val probe_req = (new FIFOIO) { new ProbeRequest } val busy = Bool(OUTPUT) - val addr = Bits(PADDR_BITS - OFFSET_BITS, OUTPUT) - val init_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val p_rep_tile_id = Bits(TILE_ID_BITS, OUTPUT) - val tile_xact_id = Bits(TILE_XACT_ID_BITS, OUTPUT) - val sharer_count = Bits(TILE_ID_BITS+1, OUTPUT) - val x_type = Bits(X_INIT_TYPE_MAX_BITS, OUTPUT) - val push_p_req = Bits(ntiles, OUTPUT) - val pop_p_rep = Bits(ntiles, OUTPUT) - val pop_p_rep_data = Bits(ntiles, OUTPUT) - val pop_p_rep_dep = Bits(ntiles, OUTPUT) - val pop_x_init = Bits(ntiles, OUTPUT) - val pop_x_init_data = Bits(ntiles, OUTPUT) - val pop_x_init_dep = Bits(ntiles, OUTPUT) + val addr = Bits(OUTPUT, PADDR_BITS - OFFSET_BITS) + val init_tile_id = Bits(OUTPUT, TILE_ID_BITS) + val p_rep_tile_id = Bits(OUTPUT, TILE_ID_BITS) + val tile_xact_id = Bits(OUTPUT, TILE_XACT_ID_BITS) + val sharer_count = Bits(OUTPUT, TILE_ID_BITS+1) + val x_type = Bits(OUTPUT, X_INIT_TYPE_MAX_BITS) + val push_p_req = Bits(OUTPUT, ntiles) + val pop_p_rep = Bits(OUTPUT, ntiles) + val pop_p_rep_data = Bits(OUTPUT, ntiles) + val pop_p_rep_dep = Bits(OUTPUT, ntiles) + val pop_x_init = Bits(OUTPUT, ntiles) + val pop_x_init_data = Bits(OUTPUT, ntiles) + val pop_x_init_dep = Bits(OUTPUT, ntiles) val send_x_rep_ack = Bool(OUTPUT) } From e496cd7584a97c349326b8d825e4bd001be98b08 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 13 Jul 2012 21:48:05 -0700 Subject: [PATCH 0432/1087] use Mem to implement queues to speed things up --- rocket/src/main/scala/queues.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 752f986b..7c6d2b65 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -54,7 +54,7 @@ class queue[T <: Data](val entries: Int, pipe: Boolean = false, flushable: Boole } } - val ram = Vec(entries) { Reg() { data } } + val ram = Mem(entries) { data } when (do_enq) { ram(enq_ptr) := io.enq.bits } val ptr_match = enq_ptr === deq_ptr From f633a557222b1e9ff3c7f2da79b5b4e40fa1fc64 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 16 Jul 2012 22:19:03 -0700 Subject: [PATCH 0433/1087] fix dcache tag array size --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c99cc0e0..c9a172a2 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -605,7 +605,7 @@ class MetaDataArrayArray(lines: Int) extends Component { val permBits = io.req.bits.data.state.width val perms = Mem(lines) { UFix(width = permBits*NWAYS) } - val tags = Mem(lines*NWAYS, seqRead = true) { Bits(width = TAG_BITS*NWAYS) } + val tags = Mem(lines, seqRead = true) { Bits(width = TAG_BITS*NWAYS) } val tag = Reg() { Bits() } val raddr = Reg() { Bits() } val way_en_ = Reg { Bits(width=NWAYS) } From 4e44ed7400e4138f1c9f4cb2a167e11c6cb3cb6b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Jul 2012 22:52:53 -0700 Subject: [PATCH 0434/1087] allow back pressure on IPI requests --- rocket/src/main/scala/ctrl.scala | 3 ++- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/dpath_util.scala | 6 ++++-- rocket/src/main/scala/htif.scala | 4 ++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2de93962..24304498 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -70,6 +70,7 @@ class ioCtrlDpath extends Bundle() val fp_sboard_wb_waddr = UFix(INPUT, 5); val irq_timer = Bool(INPUT); val irq_ipi = Bool(INPUT); + val pcr_replay = Bool(INPUT) } class ioCtrlAll extends Bundle() @@ -743,7 +744,7 @@ class rocketCtrl extends Component wb_reg_exception := mem_exception && !take_pc_wb; wb_reg_cause := mem_cause; - val replay_wb = wb_reg_replay || vec_replay + val replay_wb = wb_reg_replay || vec_replay || io.dpath.pcr_replay val wb_badvaddr_wen = wb_reg_exception && ((wb_reg_cause === UFix(10)) || (wb_reg_cause === UFix(11))) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index df1830c3..1cdd38ee 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -288,6 +288,7 @@ class rocketDpath extends Component io.ctrl.irq_timer := pcr.io.irq_timer; io.ctrl.irq_ipi := pcr.io.irq_ipi; io.ctrl.status := pcr.io.status; + io.ctrl.pcr_replay := pcr.io.replay io.ptbr := pcr.io.ptbr; io.ptbr_wen := pcr.io.ptbr_wen; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index ac1a0b55..5d126f2e 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -78,6 +78,7 @@ class ioDpathPCR extends Bundle() val ptbr_wen = Bool(OUTPUT); val irq_timer = Bool(OUTPUT); val irq_ipi = Bool(OUTPUT); + val replay = Bool(OUTPUT) val vecbank = Bits(OUTPUT, 8) val vecbankcnt = UFix(OUTPUT, 4) val vec_appvl = UFix(INPUT, 12) @@ -175,8 +176,9 @@ class rocketDpathPCR extends Component io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; - io.host.ipi.valid := Bool(false) - io.host.ipi.bits := wdata + io.host.ipi.valid := io.w.en && io.w.addr === PCR_SEND_IPI + io.host.ipi.bits := io.w.data + io.replay := io.host.ipi.valid && !io.host.ipi.ready when (wen) { when (waddr === PCR_STATUS) { diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 8c8a6acf..7a71700c 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -209,9 +209,9 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C for (j <- 0 until ncores) { when (io.cpu(j).ipi.valid && io.cpu(j).ipi.bits === UFix(i)) { my_ipi := Bool(true) - my_reset := Bool(false) } } + cpu.ipi.ready := Bool(true) when (my_ipi) { my_ipi := !cpu.pcr_req.ready } @@ -228,7 +228,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } pcr_mux.io.sel(i) := me - pcr_mux.io.in(i) := Mux(pcr_addr === PCR_RESET, my_reset, rdata) + pcr_mux.io.in(i) := Mux(pcr_addr === PCR_RESET, Cat(Bits(0, 63), my_reset), rdata) } val tx_cmd = Mux(nack, cmd_nack, cmd_ack) From f42c6afed25ad055198c907308d21d0b47607149 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Jul 2012 22:55:00 -0700 Subject: [PATCH 0435/1087] decouple all interfaces between tile and top also, add an "incoherent" bit to tilelink to indicate no probes needed --- rocket/src/main/scala/arbiter.scala | 3 ++- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/htif.scala | 10 ++++------ rocket/src/main/scala/llc.scala | 10 +++++----- rocket/src/main/scala/queues.scala | 4 +++- rocket/src/main/scala/tile.scala | 20 ++++++++++---------- rocket/src/main/scala/top.scala | 15 ++++++++++++--- rocket/src/main/scala/uncore.scala | 17 +++++++++++++---- 8 files changed, 50 insertions(+), 31 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 1a7595c3..93a72be0 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -7,7 +7,7 @@ import Constants._; class ioUncachedRequestor extends Bundle { val xact_init = (new FIFOIO) { new TransactionInit } val xact_abort = (new FIFOIO) { new TransactionAbort }.flip - val xact_rep = (new PipeIO) { new TransactionReply }.flip + val xact_rep = (new FIFOIO) { new TransactionReply }.flip val xact_finish = (new FIFOIO) { new TransactionFinish } } @@ -74,4 +74,5 @@ class rocketMemArbiter(n: Int) extends Component { } io.mem.xact_abort.ready := Bool(true) + io.mem.xact_rep.ready := Bool(true) } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 756476f6..c0a2f1c7 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -13,7 +13,7 @@ class ioRocket extends Bundle() val dmem = new ioHellaCache } -class rocketProc(resetSignal: Bool = null) extends Component(resetSignal) +class rocketProc extends Component { val io = new ioRocket diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 7a71700c..06718f7e 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -105,6 +105,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C mem_gxid := io.mem.xact_rep.bits.global_xact_id mem_needs_ack := io.mem.xact_rep.bits.require_ack } + io.mem.xact_rep.ready := Bool(true) when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(8) { UFix() } @@ -183,13 +184,10 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C io.mem.xact_init_data.bits.data := mem_req_data io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack io.mem.xact_finish.bits.global_xact_id := mem_gxid - - val probe_q = (new queue(1)) { new ProbeReply } - probe_q.io.enq.valid := io.mem.probe_req.valid - io.mem.probe_req.ready := probe_q.io.enq.ready - probe_q.io.enq.bits := co.newProbeReply(io.mem.probe_req.bits, co.newStateOnFlush()) - io.mem.probe_rep <> probe_q.io.deq + io.mem.probe_req.ready := Bool(false) + io.mem.probe_rep.valid := Bool(false) io.mem.probe_rep_data.valid := Bool(false) + io.mem.incoherent := Bool(true) pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index 5f5007d1..1c9d49b8 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -107,7 +107,7 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val way = UFix(width = log2Up(ways)) } } - val mem = new ioMem + val mem = new ioMemPipe val mem_resp_set = UFix(OUTPUT, log2Up(sets)) val mem_resp_way = UFix(OUTPUT, log2Up(ways)) } @@ -194,7 +194,7 @@ class LLCWriteback(requestors: Int) extends Component val io = new Bundle { val req = Vec(requestors) { (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) }.flip } val data = Vec(requestors) { (new FIFOIO) { new MemData }.flip } - val mem = new ioMem + val mem = new ioMemPipe } val valid = Reg(resetVal = Bool(false)) @@ -245,7 +245,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component val req_data = (new FIFOIO) { new MemData }.flip val writeback = (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) } val writeback_data = (new FIFOIO) { new MemData } - val resp = (new PipeIO) { new MemResp } + val resp = (new FIFOIO) { new MemResp } val mem_resp = (new PipeIO) { new MemResp }.flip val mem_resp_set = UFix(INPUT, log2Up(sets)) val mem_resp_way = UFix(INPUT, log2Up(ways)) @@ -298,7 +298,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback.valid := io.req.valid && io.req.ready && io.req.bits.isWriteback io.writeback.bits := io.req.bits.addr - q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, Bool(true)) + q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, io.resp.ready) io.resp.valid := q.io.deq.valid && !q.io.deq.bits.isWriteback io.resp.bits := q.io.deq.bits io.writeback_data.valid := q.io.deq.valid && q.io.deq.bits.isWriteback @@ -309,7 +309,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da { val io = new Bundle { val cpu = new ioMem().flip - val mem = new ioMem + val mem = new ioMemPipe } val tagWidth = PADDR_BITS - OFFSET_BITS - log2Up(sets) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 7c6d2b65..3cbc9282 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -73,7 +73,9 @@ object Queue { def apply[T <: Data](enq: FIFOIO[T], entries: Int = 2, pipe: Boolean = false) = { val q = (new queue(entries, pipe)) { enq.bits.clone } - q.io.enq <> enq + q.io.enq.valid := enq.valid // not using <> so that override is allowed + q.io.enq.bits := enq.bits + enq.ready := q.io.enq.ready q.io.deq } } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index ef368a85..a5178fb4 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,14 +4,14 @@ import Chisel._ import Node._ import Constants._ -class Tile(co: CoherencePolicyWithUncached) extends Component +class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Component(resetSignal) { val io = new Bundle { val tilelink = new ioTileLink val host = new ioHTIF } - val cpu = new rocketProc(resetSignal = io.host.reset) + val cpu = new rocketProc val icache = new rocketICache(128, 4, co) // 128 sets x 4 ways (32KB) val dcache = new HellaCache(co) @@ -19,14 +19,14 @@ class Tile(co: CoherencePolicyWithUncached) extends Component arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache.io.mem - io.tilelink.xact_init <> Queue(arbiter.io.mem.xact_init) - io.tilelink.xact_init_data <> Queue(dcache.io.mem.xact_init_data) - arbiter.io.mem.xact_abort <> Queue(io.tilelink.xact_abort) - arbiter.io.mem.xact_rep <> Pipe(io.tilelink.xact_rep) - io.tilelink.xact_finish <> Queue(arbiter.io.mem.xact_finish) - dcache.io.mem.probe_req <> Queue(io.tilelink.probe_req) - io.tilelink.probe_rep <> Queue(dcache.io.mem.probe_rep, 1) - io.tilelink.probe_rep_data <> Queue(dcache.io.mem.probe_rep_data) + io.tilelink.xact_init <> arbiter.io.mem.xact_init + io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data + arbiter.io.mem.xact_abort <> io.tilelink.xact_abort + arbiter.io.mem.xact_rep <> io.tilelink.xact_rep + io.tilelink.xact_finish <> arbiter.io.mem.xact_finish + dcache.io.mem.probe_req <> io.tilelink.probe_req + io.tilelink.probe_rep <> dcache.io.mem.probe_rep + io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data if (HAVE_VEC) { diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index eb526ea2..1c304838 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -12,7 +12,7 @@ class ioTop(htif_width: Int) extends Bundle { val mem_backup = new ioMemSerialized val mem_backup_en = Bool(INPUT) val mem_backup_clk = Bool(OUTPUT) - val mem = new ioMem + val mem = new ioMemPipe } class Top extends Component @@ -79,9 +79,18 @@ class Top extends Component var error_mode = Bool(false) for (i <- 0 until NTILES) { - val tile = new Tile(co) + val tile = new Tile(co, resetSignal = htif.io.cpu(i).reset) + val h = hub.io.tiles(i) tile.io.host <> htif.io.cpu(i) - hub.io.tiles(i) <> tile.io.tilelink + h.xact_init <> Queue(tile.io.tilelink.xact_init) + h.xact_init_data <> Queue(tile.io.tilelink.xact_init_data) + tile.io.tilelink.xact_abort <> Queue(h.xact_abort) + tile.io.tilelink.xact_rep <> Queue(h.xact_rep, 1, pipe = true) + h.xact_finish <> Queue(tile.io.tilelink.xact_finish) + tile.io.tilelink.probe_req <> Queue(h.probe_req) + h.probe_rep <> Queue(tile.io.tilelink.probe_rep, 1) + h.probe_rep_data <> Queue(tile.io.tilelink.probe_rep_data) + h.incoherent := htif.io.cpu(i).reset error_mode = error_mode || tile.io.host.debug.error_mode } io.debug.error_mode := error_mode diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 7e34492c..ead75252 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -20,6 +20,13 @@ class MemResp () extends MemData } class ioMem() extends Bundle +{ + val req_cmd = (new FIFOIO) { new MemReqCmd() } + val req_data = (new FIFOIO) { new MemData() } + val resp = (new FIFOIO) { new MemResp() }.flip +} + +class ioMemPipe() extends Bundle { val req_cmd = (new FIFOIO) { new MemReqCmd() } val req_data = (new FIFOIO) { new MemData() } @@ -46,8 +53,9 @@ class ioTileLink extends Bundle { val probe_req = (new FIFOIO) { new ProbeRequest }.flip val probe_rep = (new FIFOIO) { new ProbeReply } val probe_rep_data = (new FIFOIO) { new ProbeReplyData } - val xact_rep = (new PipeIO) { new TransactionReply }.flip + val xact_rep = (new FIFOIO) { new TransactionReply }.flip val xact_finish = (new FIFOIO) { new TransactionFinish } + val incoherent = Bool(OUTPUT) } class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { @@ -58,6 +66,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val xact_finish = Bool(INPUT) val p_rep_cnt_dec = Bits(INPUT, ntiles) val p_req_cnt_inc = Bits(INPUT, ntiles) + val tile_incoherent = Bits(INPUT, ntiles) val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip val x_init_data = (new PipeIO) { new TransactionInitData }.flip val sent_x_rep_ack = Bool(INPUT) @@ -169,7 +178,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - val p_req_initial_flags = ~( UFix(1) << io.alloc_req.bits.tile_id ) //TODO: Broadcast only + val p_req_initial_flags = ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id)) //TODO: Broadcast only p_req_flags := p_req_initial_flags(ntiles-1,0) mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) @@ -310,6 +319,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH t.p_data.valid := p_data_valid_arr(i) t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits + t.tile_incoherent := (Vec(io.tiles.map(_.incoherent)) { Bool() }).toBits t.sent_x_rep_ack := sent_x_rep_ack_arr(i) do_free_arr(i) := Bool(false) sent_x_rep_ack_arr(i) := Bool(false) @@ -360,8 +370,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } } } - // If there were a ready signal due to e.g. intervening network use: - //io.mem.resp.ready := io.tiles(init_tile_id_arr.read(mem_idx)).xact_rep.ready + io.mem.resp.ready := io.tiles(init_tile_id_arr(mem_idx)).xact_rep.ready // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests From 0a97d6ab4d19c6dc789030796a80c12568745042 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Wed, 18 Jul 2012 13:03:06 -0700 Subject: [PATCH 0436/1087] type casting --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 1cdd38ee..84746631 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -344,7 +344,7 @@ class rocketDpath extends Component val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, Mux(div.io.resp_val, div.io.resp_tag, Mux(mul_io.resp_val, mul_io.resp_tag, - mem_reg_waddr))) + mem_reg_waddr))).toUFix val mem_ll_wdata = Mux(div.io.resp_val, div.io.resp_bits, Mux(mul_io.resp_val, mul_io.resp_bits, Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, From c892950bf13f218225f1626f2ff7aee0e8563a06 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 22 Jul 2012 17:48:17 -0700 Subject: [PATCH 0437/1087] hoist out uncore as its own component --- rocket/src/main/scala/top.scala | 70 ++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 1c304838..c97fb265 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -15,23 +15,32 @@ class ioTop(htif_width: Int) extends Bundle { val mem = new ioMemPipe } -class Top extends Component +class ioUncore(htif_width: Int, ntiles: Int) extends Bundle { + val debug = new ioDebug() + val host = new ioHost(htif_width) + val host_clk = Bool(OUTPUT) + val mem_backup = new ioMemSerialized + val mem_backup_en = Bool(INPUT) + val mem_backup_clk = Bool(OUTPUT) + val mem = new ioMemPipe + val tiles = Vec(ntiles) { new ioTileLink() }.flip + val htif = Vec(ntiles) { new ioHTIF() }.flip +} + +class Uncore(htif_width: Int, ntiles: Int, co: CoherencePolicyWithUncached) extends Component { val clkdiv = 8 - val htif_width = 8 - val io = new ioTop(htif_width) + val io = new ioUncore(htif_width, ntiles) - val co = if(ENABLE_SHARING) { - if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence - else new MSICoherence - } else { - if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence - else new MICoherence - } val htif = new rocketHTIF(htif_width, NTILES, co) val hub = new CoherenceHubBroadcast(NTILES+1, co) val llc_leaf = Mem(2048, seqRead = true) { Bits(width = 64) } val llc = new DRAMSideLLC(2048, 8, 4, llc_leaf, llc_leaf) + + for (i <- 0 until NTILES) { + hub.io.tiles(i) <> io.tiles(i) + htif.io.cpu(i) <> io.htif(i) + } hub.io.tiles(NTILES) <> htif.io.mem llc.io.cpu.req_cmd <> Queue(hub.io.mem.req_cmd) @@ -76,21 +85,38 @@ class Top extends Component mem_serdes.io.narrow.resp.bits := mio.io.in_fast.bits io.mem_backup.resp <> mio.io.in_slow io.mem_backup_clk := mio.io.clk_slow +} + +class Top extends Component +{ + val htif_width = 8 + val co = if(ENABLE_SHARING) { + if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence + else new MSICoherence + } else { + if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence + else new MICoherence + } + val io = new ioTop(htif_width) + + val uncore = new Uncore(htif_width, NTILES, co) + uncore.io <> io var error_mode = Bool(false) for (i <- 0 until NTILES) { - val tile = new Tile(co, resetSignal = htif.io.cpu(i).reset) - val h = hub.io.tiles(i) - tile.io.host <> htif.io.cpu(i) - h.xact_init <> Queue(tile.io.tilelink.xact_init) - h.xact_init_data <> Queue(tile.io.tilelink.xact_init_data) - tile.io.tilelink.xact_abort <> Queue(h.xact_abort) - tile.io.tilelink.xact_rep <> Queue(h.xact_rep, 1, pipe = true) - h.xact_finish <> Queue(tile.io.tilelink.xact_finish) - tile.io.tilelink.probe_req <> Queue(h.probe_req) - h.probe_rep <> Queue(tile.io.tilelink.probe_rep, 1) - h.probe_rep_data <> Queue(tile.io.tilelink.probe_rep_data) - h.incoherent := htif.io.cpu(i).reset + val hl = uncore.io.htif(i) + val tl = uncore.io.tiles(i) + val tile = new Tile(co, resetSignal = hl.reset) + tile.io.host <> hl + tl.xact_init <> Queue(tile.io.tilelink.xact_init) + tl.xact_init_data <> Queue(tile.io.tilelink.xact_init_data) + tile.io.tilelink.xact_abort <> Queue(tl.xact_abort) + tile.io.tilelink.xact_rep <> Queue(tl.xact_rep, 1, pipe = true) + tl.xact_finish <> Queue(tile.io.tilelink.xact_finish) + tile.io.tilelink.probe_req <> Queue(tl.probe_req) + tl.probe_rep <> Queue(tile.io.tilelink.probe_rep, 1) + tl.probe_rep_data <> Queue(tile.io.tilelink.probe_rep_data) + tl.incoherent := hl.reset error_mode = error_mode || tile.io.host.debug.error_mode } io.debug.error_mode := error_mode From 379f02135912b4b5fe7e48dc128bd8f3853eb6f1 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 22 Jul 2012 18:26:02 -0700 Subject: [PATCH 0438/1087] change ioHTIF interface between the tile/uncore boundary to cope with asynchrony --- rocket/src/main/scala/htif.scala | 4 +++- rocket/src/main/scala/top.scala | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 06718f7e..31400589 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -27,7 +27,7 @@ class ioHTIF extends Bundle val reset = Bool(INPUT) val debug = new ioDebug val pcr_req = (new FIFOIO) { new PCRReq }.flip - val pcr_rep = (new PipeIO) { Bits(width = 64) } + val pcr_rep = (new FIFOIO) { Bits(width = 64) } val ipi = (new FIFOIO) { Bits(width = log2Up(NTILES)) } } @@ -220,6 +220,8 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C my_reset := pcr_wdata(0) } } + + cpu.pcr_rep.ready := Bool(true) when (cpu.pcr_rep.valid) { pcr_done := Bool(true) rdata := cpu.pcr_rep.bits diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index c97fb265..d255ed5a 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -107,7 +107,13 @@ class Top extends Component val hl = uncore.io.htif(i) val tl = uncore.io.tiles(i) val tile = new Tile(co, resetSignal = hl.reset) - tile.io.host <> hl + + tile.io.host.reset := Reg(Reg(hl.reset)) + tile.io.host.pcr_req <> Queue(hl.pcr_req) + hl.pcr_rep <> Queue(tile.io.host.pcr_rep) + hl.ipi <> Queue(tile.io.host.ipi) + error_mode = error_mode || Reg(tile.io.host.debug.error_mode) + tl.xact_init <> Queue(tile.io.tilelink.xact_init) tl.xact_init_data <> Queue(tile.io.tilelink.xact_init_data) tile.io.tilelink.xact_abort <> Queue(tl.xact_abort) @@ -117,7 +123,6 @@ class Top extends Component tl.probe_rep <> Queue(tile.io.tilelink.probe_rep, 1) tl.probe_rep_data <> Queue(tile.io.tilelink.probe_rep_data) tl.incoherent := hl.reset - error_mode = error_mode || tile.io.host.debug.error_mode } io.debug.error_mode := error_mode } From 938effc053a583df2c8d94e345812b3d6d26c044 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 22 Jul 2012 21:05:52 -0700 Subject: [PATCH 0439/1087] don't dequeue probe queue during reset --- rocket/src/main/scala/nbdcache.scala | 5 +++-- rocket/src/main/scala/uncore.scala | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c9a172a2..c63e3696 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -495,8 +495,8 @@ class ProbeUnit(co: CoherencePolicy) extends Component { val address = Bits(OUTPUT, PADDR_BITS-OFFSET_BITS) } - val s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(7) { UFix() } - val state = Reg(resetVal = s_invalid) + val s_reset :: s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(8) { UFix() } + val state = Reg(resetVal = s_reset) val line_state = Reg() { UFix() } val way_oh = Reg() { Bits() } val req = Reg() { new ProbeRequest() } @@ -526,6 +526,7 @@ class ProbeUnit(co: CoherencePolicy) extends Component { state := s_meta_req req := io.req.bits } + when (state === s_reset) { state := s_invalid } io.req.ready := state === s_invalid io.rep.valid := state === s_probe_rep && io.meta_req.ready diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index ead75252..6ea359ce 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -140,6 +140,8 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) val mem_cnt_next = mem_cnt + UFix(1) val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) + val p_req_initial_flags = Bits(width = ntiles) + p_req_initial_flags := ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0))) //TODO: Broadcast only io.busy := state != s_idle io.addr := addr_ @@ -178,15 +180,14 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - val p_req_initial_flags = ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id)) //TODO: Broadcast only - p_req_flags := p_req_initial_flags(ntiles-1,0) + p_req_flags := p_req_initial_flags mem_cnt := UFix(0) p_w_mem_cmd_sent := Bool(false) x_w_mem_cmd_sent := Bool(false) io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id if(ntiles > 1) { - p_rep_count := UFix(ntiles-1) - state := Mux(p_req_initial_flags(ntiles-1,0).orR, s_probe, s_mem) + p_rep_count := PopCount(p_req_initial_flags) + state := Mux(p_req_initial_flags.orR, s_probe, s_mem) } else state := s_mem } } From a21c355114c5204dc136fe93df55aea7d6e771d0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 Jul 2012 17:15:16 -0700 Subject: [PATCH 0440/1087] fix htif split request/response --- rocket/src/main/scala/htif.scala | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 31400589..6bbf72e5 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -108,21 +108,16 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C io.mem.xact_rep.ready := Bool(true) when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } - val state_rx :: state_pcr :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(8) { UFix() } + val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = state_rx) when (state === state_rx && rx_done) { val rx_cmd = Mux(rx_word_count === UFix(0), next_cmd, cmd) state := Mux(rx_cmd === cmd_readmem || rx_cmd === cmd_writemem, state_mem_req, - Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr, + Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr_req, state_tx)) } - val pcr_done = Reg() { Bool() } - when (state === state_pcr && pcr_done) { - state := state_tx - } - val mem_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) when (state === state_mem_req && io.mem.xact_init.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) @@ -189,7 +184,6 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C io.mem.probe_rep_data.valid := Bool(false) io.mem.incoherent := Bool(true) - pcr_done := Bool(false) val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } for (i <- 0 until ncores) { val my_reset = Reg(resetVal = Bool(true)) @@ -198,7 +192,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val cpu = io.cpu(i) val me = pcr_coreid === UFix(i) - cpu.pcr_req.valid := my_ipi || state === state_pcr && me + cpu.pcr_req.valid := my_ipi || state === state_pcr_req && me cpu.pcr_req.bits.rw := my_ipi || cmd === cmd_writecr cpu.pcr_req.bits.addr := Mux(my_ipi, PCR_CLR_IPI, pcr_addr) cpu.pcr_req.bits.data := my_ipi | pcr_wdata @@ -214,17 +208,21 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C my_ipi := !cpu.pcr_req.ready } - when (state === state_pcr && me && cmd === cmd_writecr) { - pcr_done := cpu.pcr_req.ready && !my_ipi - when (pcr_addr === PCR_RESET) { - my_reset := pcr_wdata(0) + when (state === state_pcr_req && me && !my_ipi && cpu.pcr_req.ready) { + when (cmd === cmd_writecr) { + state := state_tx + when (pcr_addr === PCR_RESET) { + my_reset := pcr_wdata(0) + } + }.otherwise { + state := state_pcr_resp } } cpu.pcr_rep.ready := Bool(true) when (cpu.pcr_rep.valid) { - pcr_done := Bool(true) rdata := cpu.pcr_rep.bits + state := state_tx } pcr_mux.io.sel(i) := me From f4e3e72ad1df151f156b32c6ac817465a8e6e5d7 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 23 Jul 2012 17:30:04 -0700 Subject: [PATCH 0441/1087] hoist HTIF_WIDTH out to consts --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/top.scala | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 32cefbe7..ce243922 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -12,6 +12,7 @@ object Constants val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK + val HTIF_WIDTH = 8 val MEM_BACKUP_WIDTH = 16 val BR_X = Bits("b????", 4) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index d255ed5a..754f94c7 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -89,7 +89,6 @@ class Uncore(htif_width: Int, ntiles: Int, co: CoherencePolicyWithUncached) exte class Top extends Component { - val htif_width = 8 val co = if(ENABLE_SHARING) { if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence else new MSICoherence @@ -97,9 +96,9 @@ class Top extends Component if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence else new MICoherence } - val io = new ioTop(htif_width) + val io = new ioTop(HTIF_WIDTH) - val uncore = new Uncore(htif_width, NTILES, co) + val uncore = new Uncore(HTIF_WIDTH, NTILES, co) uncore.io <> io var error_mode = Bool(false) From 6541cf22a453c2b4c2ac97179241e7103ada908e Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 23 Jul 2012 20:56:55 -0700 Subject: [PATCH 0442/1087] fix bug in coherence hub, respect xact_rep.ready --- rocket/src/main/scala/uncore.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 6ea359ce..2035453c 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -367,7 +367,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH rep.bits.global_xact_id := ack_idx when (UFix(j) === init_tile_id_arr(ack_idx)) { rep.valid := send_x_rep_ack_arr.toBits.orR - sent_x_rep_ack_arr(ack_idx) := Bool(true) + sent_x_rep_ack_arr(ack_idx) := rep.ready } } } From 309193dd076286c1c8f2b8a80f4cf5dfa18d84b9 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 24 Jul 2012 14:10:29 -0700 Subject: [PATCH 0443/1087] change llc size --- rocket/src/main/scala/top.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 754f94c7..0e63e48d 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -34,8 +34,9 @@ class Uncore(htif_width: Int, ntiles: Int, co: CoherencePolicyWithUncached) exte val htif = new rocketHTIF(htif_width, NTILES, co) val hub = new CoherenceHubBroadcast(NTILES+1, co) - val llc_leaf = Mem(2048, seqRead = true) { Bits(width = 64) } - val llc = new DRAMSideLLC(2048, 8, 4, llc_leaf, llc_leaf) + val llc_tag_leaf = Mem(1024, seqRead = true) { Bits(width = 72) } + val llc_data_leaf = Mem(4096, seqRead = true) { Bits(width = 64) } + val llc = new DRAMSideLLC(1024, 8, 4, llc_tag_leaf, llc_data_leaf) for (i <- 0 until NTILES) { hub.io.tiles(i) <> io.tiles(i) From 177dbdadd93e08ee3bd83b8ce4d4f92786413073 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Jul 2012 00:18:02 -0700 Subject: [PATCH 0444/1087] merge HTIF port and backup memory port --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/top.scala | 34 +++++++++++++++++------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index ce243922..98ab864f 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -13,7 +13,7 @@ object Constants val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK val HTIF_WIDTH = 8 - val MEM_BACKUP_WIDTH = 16 + val MEM_BACKUP_WIDTH = HTIF_WIDTH val BR_X = Bits("b????", 4) val BR_N = UFix(0, 4); diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 0e63e48d..7c1dbf81 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -70,22 +70,26 @@ class Uncore(htif_width: Int, ntiles: Int, co: CoherencePolicyWithUncached) exte llc.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits) // pad out the HTIF using a divided clock - val hio = (new slowIO(clkdiv)) { Bits(width = htif_width) } - htif.io.host.out <> hio.io.out_fast - io.host.out <> hio.io.out_slow - htif.io.host.in <> hio.io.in_fast - io.host.in <> hio.io.in_slow - io.host_clk := hio.io.clk_slow + val hio = (new slowIO(clkdiv)) { Bits(width = htif_width+1) } + hio.io.out_fast.valid := htif.io.host.out.valid || mem_serdes.io.narrow.req.valid + hio.io.out_fast.bits := Cat(htif.io.host.out.valid, Mux(htif.io.host.out.valid, htif.io.host.out.bits, mem_serdes.io.narrow.req.bits)) + htif.io.host.out.ready := hio.io.out_fast.ready + mem_serdes.io.narrow.req.ready := hio.io.out_fast.ready && !htif.io.host.out.valid + io.host.out.valid := hio.io.out_slow.valid && hio.io.out_slow.bits(htif_width) + io.host.out.bits := hio.io.out_slow.bits + io.mem_backup.req.valid := hio.io.out_slow.valid && !hio.io.out_slow.bits(htif_width) + hio.io.out_slow.ready := Mux(hio.io.out_slow.bits(htif_width), io.host.out.ready, io.mem_backup.req.ready) - // pad out the backup memory link with the HTIF divided clk - val mio = (new slowIO(clkdiv)) { Bits(width = MEM_BACKUP_WIDTH) } - mem_serdes.io.narrow.req <> mio.io.out_fast - io.mem_backup.req <> mio.io.out_slow - mem_serdes.io.narrow.resp.valid := mio.io.in_fast.valid - mio.io.in_fast.ready := Bool(true) - mem_serdes.io.narrow.resp.bits := mio.io.in_fast.bits - io.mem_backup.resp <> mio.io.in_slow - io.mem_backup_clk := mio.io.clk_slow + val mem_backup_resp_valid = io.mem_backup_en && io.mem_backup.resp.valid + hio.io.in_slow.valid := mem_backup_resp_valid || io.host.in.valid + hio.io.in_slow.bits := Cat(mem_backup_resp_valid, io.host.in.bits) + io.host.in.ready := hio.io.in_slow.ready + mem_serdes.io.narrow.resp.valid := hio.io.in_fast.valid && hio.io.in_fast.bits(htif_width) + mem_serdes.io.narrow.resp.bits := hio.io.in_fast.bits + htif.io.host.in.valid := hio.io.in_fast.valid && !hio.io.in_fast.bits(htif_width) + htif.io.host.in.bits := hio.io.in_fast.bits + hio.io.in_fast.ready := Mux(hio.io.in_fast.bits(htif_width), Bool(true), htif.io.host.in.ready) + io.host_clk := hio.io.clk_slow } class Top extends Component From 3a2b305ddfd4b248764d3232fdc1636cbb45be73 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 25 Jul 2012 17:25:50 -0700 Subject: [PATCH 0445/1087] change htif width to 16 --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 98ab864f..51071105 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -12,7 +12,7 @@ object Constants val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK - val HTIF_WIDTH = 8 + val HTIF_WIDTH = 16 val MEM_BACKUP_WIDTH = HTIF_WIDTH val BR_X = Bits("b????", 4) From a5bea4364f94d37a8f2e98e4bc557aab86ff4d04 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jul 2012 00:03:55 -0700 Subject: [PATCH 0446/1087] memory system bug fixes --- rocket/src/main/scala/htif.scala | 11 ++++++---- rocket/src/main/scala/llc.scala | 4 ++-- rocket/src/main/scala/uncore.scala | 33 ++++++++++++++++-------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 6bbf72e5..3ee4d533 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -107,6 +107,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } io.mem.xact_rep.ready := Bool(true) when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } + io.mem.xact_abort.ready := Bool(true) val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = state_rx) @@ -119,7 +120,8 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } val mem_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - when (state === state_mem_req && io.mem.xact_init.ready) { + val x_init = new queue(1)(new TransactionInit) + when (state === state_mem_req && x_init.io.enq.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } when (state === state_mem_wdata && io.mem.xact_init_data.ready) { @@ -172,9 +174,10 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } mem_req_data = Cat(packet_ram(idx), mem_req_data) } - io.mem.xact_init.valid := state === state_mem_req - io.mem.xact_init.bits.x_type := Mux(cmd === cmd_writemem, co.getTransactionInitTypeOnUncachedWrite, co.getTransactionInitTypeOnUncachedRead) - io.mem.xact_init.bits.address := addr.toUFix >> UFix(OFFSET_BITS-3) + x_init.io.enq.valid := state === state_mem_req + x_init.io.enq.bits.x_type := Mux(cmd === cmd_writemem, co.getTransactionInitTypeOnUncachedWrite, co.getTransactionInitTypeOnUncachedRead) + x_init.io.enq.bits.address := addr.toUFix >> UFix(OFFSET_BITS-3) + io.mem.xact_init <> x_init.io.deq io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index 1c9d49b8..d95aeff7 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -378,7 +378,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da writeback.io.req(1).bits := s2.addr writeback.io.data(1).valid := io.cpu.req_data.valid writeback.io.data(1).bits := io.cpu.req_data.bits - data.io.req_data.valid := io.cpu.req_data.valid && !writeback.io.data(1).ready + data.io.req_data.valid := io.cpu.req_data.valid && writeback.io.req(1).ready memCmdArb.io.in(0) <> mshr.io.mem.req_cmd memCmdArb.io.in(1) <> writeback.io.mem.req_cmd @@ -393,7 +393,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da io.cpu.resp <> data.io.resp io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 - io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready + io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready && writeback.io.req(1).ready io.mem.req_cmd <> memCmdArb.io.out io.mem.req_data <> writeback.io.mem.req_data } diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 2035453c..5c58f151 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -95,21 +95,25 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { } def doMemReqWrite(req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: PipeIO[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { - req_cmd.valid := !cmd_sent && data.valid && at_front_of_dep_queue req_cmd.bits.rw := Bool(true) - req_data.valid := data.valid && at_front_of_dep_queue req_data.bits := data.bits - lock := data.valid && at_front_of_dep_queue when(req_cmd.ready && req_cmd.valid) { cmd_sent := Bool(true) } - when(req_data.ready && at_front_of_dep_queue) { - pop_data := UFix(1) << tile_id - when (data.valid) { - mem_cnt := mem_cnt_next - when(mem_cnt_next === UFix(0)) { - pop_dep := UFix(1) << tile_id - trigger := Bool(false) + when (at_front_of_dep_queue) { + req_cmd.valid := !cmd_sent && req_data.ready + lock := Bool(true) + when (req_cmd.ready || cmd_sent) { + req_data.valid := data.valid + when(req_data.ready) { + pop_data := UFix(1) << tile_id + when (data.valid) { + mem_cnt := mem_cnt_next + when(mem_cnt === UFix(REFILL_CYCLES-1)) { + pop_dep := UFix(1) << tile_id + trigger := Bool(false) + } + } } } } @@ -141,7 +145,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { val mem_cnt_next = mem_cnt + UFix(1) val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) val p_req_initial_flags = Bits(width = ntiles) - p_req_initial_flags := ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0))) //TODO: Broadcast only + p_req_initial_flags := (if (ntiles == 1) Bits(0) else ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0)))) //TODO: Broadcast only io.busy := state != s_idle io.addr := addr_ @@ -376,16 +380,15 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH // Create an arbiter for the one memory port // We have to arbitrate between the different trackers' memory requests // and once we have picked a request, get the right write data - val mem_req_cmd_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemReqCmd() } + val mem_req_cmd_arb = (new Arbiter(NGLOBAL_XACTS)) { new MemReqCmd() } val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } for( i <- 0 until NGLOBAL_XACTS ) { mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd - mem_req_cmd_arb.io.lock(i) <> trackerList(i).io.mem_req_lock mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock } - io.mem.req_cmd <> mem_req_cmd_arb.io.out - io.mem.req_data <> mem_req_data_arb.io.out + io.mem.req_cmd <> Queue(mem_req_cmd_arb.io.out) + io.mem.req_data <> Queue(mem_req_data_arb.io.out) // Handle probe replies, which may or may not have data for( j <- 0 until ntiles ) { From 9c50621a19eba2cb44d6bffe11117b83463158ff Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jul 2012 03:26:52 -0700 Subject: [PATCH 0447/1087] remove chip-specific uncore gunk --- rocket/src/main/scala/top.scala | 109 +++++--------------------------- 1 file changed, 17 insertions(+), 92 deletions(-) diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 7c1dbf81..bf52ca78 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -5,95 +5,14 @@ import Node._; import Constants._; import collection.mutable._ -class ioTop(htif_width: Int) extends Bundle { - val debug = new ioDebug(); - val host = new ioHost(htif_width); - val host_clk = Bool(OUTPUT) - val mem_backup = new ioMemSerialized - val mem_backup_en = Bool(INPUT) - val mem_backup_clk = Bool(OUTPUT) - val mem = new ioMemPipe -} - -class ioUncore(htif_width: Int, ntiles: Int) extends Bundle { - val debug = new ioDebug() - val host = new ioHost(htif_width) - val host_clk = Bool(OUTPUT) - val mem_backup = new ioMemSerialized - val mem_backup_en = Bool(INPUT) - val mem_backup_clk = Bool(OUTPUT) - val mem = new ioMemPipe - val tiles = Vec(ntiles) { new ioTileLink() }.flip - val htif = Vec(ntiles) { new ioHTIF() }.flip -} - -class Uncore(htif_width: Int, ntiles: Int, co: CoherencePolicyWithUncached) extends Component -{ - val clkdiv = 8 - val io = new ioUncore(htif_width, ntiles) - - val htif = new rocketHTIF(htif_width, NTILES, co) - val hub = new CoherenceHubBroadcast(NTILES+1, co) - val llc_tag_leaf = Mem(1024, seqRead = true) { Bits(width = 72) } - val llc_data_leaf = Mem(4096, seqRead = true) { Bits(width = 64) } - val llc = new DRAMSideLLC(1024, 8, 4, llc_tag_leaf, llc_data_leaf) - - for (i <- 0 until NTILES) { - hub.io.tiles(i) <> io.tiles(i) - htif.io.cpu(i) <> io.htif(i) - } - hub.io.tiles(NTILES) <> htif.io.mem - - llc.io.cpu.req_cmd <> Queue(hub.io.mem.req_cmd) - llc.io.cpu.req_data <> Queue(hub.io.mem.req_data, REFILL_CYCLES) - hub.io.mem.resp <> llc.io.cpu.resp - - // mux between main and backup memory ports - val mem_serdes = new MemSerdes - val mem_cmdq = (new queue(2)) { new MemReqCmd } - mem_cmdq.io.enq <> llc.io.mem.req_cmd - mem_cmdq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_cmd.ready, io.mem.req_cmd.ready) - io.mem.req_cmd.valid := mem_cmdq.io.deq.valid && !io.mem_backup_en - io.mem.req_cmd.bits := mem_cmdq.io.deq.bits - mem_serdes.io.wide.req_cmd.valid := mem_cmdq.io.deq.valid && io.mem_backup_en - mem_serdes.io.wide.req_cmd.bits := mem_cmdq.io.deq.bits - - val mem_dataq = (new queue(REFILL_CYCLES)) { new MemData } - mem_dataq.io.enq <> llc.io.mem.req_data - mem_dataq.io.deq.ready := Mux(io.mem_backup_en, mem_serdes.io.wide.req_data.ready, io.mem.req_data.ready) - io.mem.req_data.valid := mem_dataq.io.deq.valid && !io.mem_backup_en - io.mem.req_data.bits := mem_dataq.io.deq.bits - mem_serdes.io.wide.req_data.valid := mem_dataq.io.deq.valid && io.mem_backup_en - mem_serdes.io.wide.req_data.bits := mem_dataq.io.deq.bits - - llc.io.mem.resp.valid := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.valid, io.mem.resp.valid) - llc.io.mem.resp.bits := Mux(io.mem_backup_en, mem_serdes.io.wide.resp.bits, io.mem.resp.bits) - - // pad out the HTIF using a divided clock - val hio = (new slowIO(clkdiv)) { Bits(width = htif_width+1) } - hio.io.out_fast.valid := htif.io.host.out.valid || mem_serdes.io.narrow.req.valid - hio.io.out_fast.bits := Cat(htif.io.host.out.valid, Mux(htif.io.host.out.valid, htif.io.host.out.bits, mem_serdes.io.narrow.req.bits)) - htif.io.host.out.ready := hio.io.out_fast.ready - mem_serdes.io.narrow.req.ready := hio.io.out_fast.ready && !htif.io.host.out.valid - io.host.out.valid := hio.io.out_slow.valid && hio.io.out_slow.bits(htif_width) - io.host.out.bits := hio.io.out_slow.bits - io.mem_backup.req.valid := hio.io.out_slow.valid && !hio.io.out_slow.bits(htif_width) - hio.io.out_slow.ready := Mux(hio.io.out_slow.bits(htif_width), io.host.out.ready, io.mem_backup.req.ready) - - val mem_backup_resp_valid = io.mem_backup_en && io.mem_backup.resp.valid - hio.io.in_slow.valid := mem_backup_resp_valid || io.host.in.valid - hio.io.in_slow.bits := Cat(mem_backup_resp_valid, io.host.in.bits) - io.host.in.ready := hio.io.in_slow.ready - mem_serdes.io.narrow.resp.valid := hio.io.in_fast.valid && hio.io.in_fast.bits(htif_width) - mem_serdes.io.narrow.resp.bits := hio.io.in_fast.bits - htif.io.host.in.valid := hio.io.in_fast.valid && !hio.io.in_fast.bits(htif_width) - htif.io.host.in.bits := hio.io.in_fast.bits - hio.io.in_fast.ready := Mux(hio.io.in_fast.bits(htif_width), Bool(true), htif.io.host.in.ready) - io.host_clk := hio.io.clk_slow -} - class Top extends Component { + val io = new Bundle { + val debug = new ioDebug + val host = new ioHost(HTIF_WIDTH) + val mem = new ioMemPipe + } + val co = if(ENABLE_SHARING) { if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence else new MSICoherence @@ -101,15 +20,21 @@ class Top extends Component if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence else new MICoherence } - val io = new ioTop(HTIF_WIDTH) - val uncore = new Uncore(HTIF_WIDTH, NTILES, co) - uncore.io <> io + val htif = new rocketHTIF(HTIF_WIDTH, NTILES, co) + val hub = new CoherenceHubBroadcast(NTILES+1, co) + hub.io.tiles(NTILES) <> htif.io.mem + io.host <> htif.io.host + + io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) + io.mem.req_data <> Queue(hub.io.mem.req_data, REFILL_CYCLES) + hub.io.mem.resp <> Pipe(io.mem.resp) + Assert(hub.io.mem.resp.ready, "hub.io.mem.resp.ready") var error_mode = Bool(false) for (i <- 0 until NTILES) { - val hl = uncore.io.htif(i) - val tl = uncore.io.tiles(i) + val hl = htif.io.cpu(i) + val tl = hub.io.tiles(i) val tile = new Tile(co, resetSignal = hl.reset) tile.io.host.reset := Reg(Reg(hl.reset)) From 7778802395bff105e4f11d13ffc7bd60d8029884 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jul 2012 14:51:41 -0700 Subject: [PATCH 0448/1087] reduce number of outstanding transactions --- rocket/src/main/scala/consts.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 51071105..e4af3ec6 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -195,8 +195,8 @@ object Constants val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 val TILE_ID_BITS = log2Up(NTILES)+1 val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 - val GLOBAL_XACT_ID_BITS = log2Up(NTILES*NMSHR)+1 - val NGLOBAL_XACTS = 1 << GLOBAL_XACT_ID_BITS + val NGLOBAL_XACTS = 8 + val GLOBAL_XACT_ID_BITS = log2Up(NGLOBAL_XACTS) val X_INIT_TYPE_MAX_BITS = 2 val X_REP_TYPE_MAX_BITS = 3 From 130fa95ed6591d4b61fa1223ebf6ba614d480f22 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 Jul 2012 03:28:07 -0700 Subject: [PATCH 0449/1087] expand HTIF's PCR register space --- rocket/src/main/scala/dpath_util.scala | 3 +-- rocket/src/main/scala/htif.scala | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 5d126f2e..ebd39d8f 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -121,7 +121,7 @@ class rocketDpathPCR extends Component val rdata = Bits(); - val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_req.bits.addr) + val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_req.bits.addr(4,0)) io.host.pcr_rep.valid := io.host.pcr_req.valid && !io.r.en && !io.host.pcr_req.bits.rw io.host.pcr_rep.bits := rdata @@ -200,7 +200,6 @@ class rocketDpathPCR extends Component when (waddr === PCR_COREID) { reg_coreid := wdata(15,0) } when (waddr === PCR_FROMHOST) { reg_fromhost := wdata; reg_tohost := Bits(0) } when (waddr === PCR_TOHOST) { reg_tohost := wdata; reg_fromhost := Bits(0) } - when (waddr === PCR_SEND_IPI) { io.host.ipi.valid := Bool(true) } when (waddr === PCR_CLR_IPI) { r_irq_ipi := wdata(0) } when (waddr === PCR_K0) { reg_k0 := wdata; } when (waddr === PCR_K1) { reg_k1 := wdata; } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 3ee4d533..8a9af58b 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -18,7 +18,7 @@ class ioHost(w: Int) extends Bundle class PCRReq extends Bundle { val rw = Bool() - val addr = Bits(width = 5) + val addr = Bits(width = 6) val data = Bits(width = 64) } @@ -75,7 +75,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } - val pcr_addr = addr(4,0) + val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) val pcr_coreid = if (ncores == 1) UFix(0) else addr(20+log2Up(ncores),20) val pcr_wdata = packet_ram(0) From 32a16d183f11fbd2b7bda6b2724cbc7754c77d59 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Tue, 24 Jul 2012 19:05:49 -0700 Subject: [PATCH 0450/1087] consts file doesn't depend on WIDTH_PVFB if HAVE_PVFB == false --- rocket/src/main/scala/consts.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e4af3ec6..54786a23 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -5,12 +5,16 @@ import scala.math._ object Constants { - val NTILES = 1 + val NTILES = 2 val HAVE_RVC = false val HAVE_FPU = true val HAVE_VEC = true - val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK + val MAX_THREADS = + if(hwacha.Constants.HAVE_PVFB) + hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK + else + 256 val HTIF_WIDTH = 16 val MEM_BACKUP_WIDTH = HTIF_WIDTH From db91c4cf6c79d5013a4aed9536931214b8cdcff9 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Fri, 27 Jul 2012 18:12:23 -0700 Subject: [PATCH 0451/1087] hwacha --- rocket/src/main/scala/consts.scala | 5 +---- rocket/src/main/scala/dpath_vec.scala | 3 +-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 54786a23..78f979d8 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -5,16 +5,13 @@ import scala.math._ object Constants { - val NTILES = 2 + val NTILES = 1 val HAVE_RVC = false val HAVE_FPU = true val HAVE_VEC = true val MAX_THREADS = - if(hwacha.Constants.HAVE_PVFB) hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK - else - 256 val HTIF_WIDTH = 16 val MEM_BACKUP_WIDTH = HTIF_WIDTH diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 6f5f228f..8a4faabb 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -105,8 +105,7 @@ class rocketDpathVec extends Component UFix(52,7) -> UFix(5,9) )) - val uts_per_bank = Mux(nreg_mod_bank > UFix(MAX_THREADS,9), UFix(MAX_THREADS, 9), nreg_mod_bank) - + val uts_per_bank = Mux(Bool(hwacha.Constants.HAVE_PVFB) & nreg_mod_bank > UFix(MAX_THREADS,9), UFix(MAX_THREADS, 9), nreg_mod_bank) val reg_hwvl = Reg(resetVal = UFix(32, 12)) val reg_appvl0 = Reg(resetVal = Bool(true)) From 0a1cd1175c8a43dab325d77d376a6e4bd6a23988 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 27 Jul 2012 18:44:17 -0700 Subject: [PATCH 0452/1087] add reset pin to llc --- rocket/src/main/scala/llc.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index d95aeff7..739fd791 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -305,7 +305,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback_data.bits := q.io.deq.bits } -class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component +class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits], resetSignal: Bool = null) extends Component(resetSignal) { val io = new Bundle { val cpu = new ioMem().flip From 2af84f994ae8caa6b6b99c1aa796972d736014cf Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 28 Jul 2012 21:14:33 -0700 Subject: [PATCH 0453/1087] remove reset pin on llc --- rocket/src/main/scala/llc.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index 739fd791..d95aeff7 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -305,7 +305,7 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback_data.bits := q.io.deq.bits } -class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits], resetSignal: Bool = null) extends Component(resetSignal) +class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component { val io = new Bundle { val cpu = new ioMem().flip From 2ec76390e3877a714db52c89f71227c673ffd737 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 30 Jul 2012 16:06:30 -0700 Subject: [PATCH 0454/1087] improve PriorityEncoderOH and add Counter util --- rocket/src/main/scala/util.scala | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 5b1727db..fa37277e 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -78,14 +78,26 @@ object PriorityEncoder object PriorityEncoderOH { - def apply(in: Bits): UFix = doApply(in, 0) - def doApply(in: Bits, n: Int = 0): UFix = { - val out = Vec(in.getWidth) { Bool() } + def apply(in: Bits): Bits = Vec(apply((0 until in.getWidth).map(in(_)))){Bits()}.toBits + def apply(in: Seq[Bits]): Seq[Bool] = { var none_hot = Bool(true) - for (i <- 0 until in.getWidth) { - out(i) := none_hot && in(i) + val out = collection.mutable.ArrayBuffer[Bool]() + for (i <- 0 until in.size) { + out += none_hot && in(i) none_hot = none_hot && !in(i) } - out.toBits + out + } +} + +object Counter +{ + def apply(cond: Bool, n: Int) = { + val c = Reg(resetVal = UFix(0, log2Up(n))) + val wrap = c === UFix(n-1) + when (cond) { + c := Mux(Bool(!isPow2(n)) && wrap, UFix(0), c + UFix(1)) + } + (c, wrap && cond) } } From be4fa936ddaff34b8952d8185a563c4d863259d7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 30 Jul 2012 18:28:54 -0700 Subject: [PATCH 0455/1087] fix PriorityEncoderOH bug --- rocket/src/main/scala/util.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index fa37277e..5cd67ea2 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -78,7 +78,7 @@ object PriorityEncoder object PriorityEncoderOH { - def apply(in: Bits): Bits = Vec(apply((0 until in.getWidth).map(in(_)))){Bits()}.toBits + def apply(in: Bits): Bits = Vec(apply((0 until in.getWidth).map(in(_)))){Bool()}.toBits def apply(in: Seq[Bits]): Seq[Bool] = { var none_hot = Bool(true) val out = collection.mutable.ArrayBuffer[Bool]() From 80c243469e511ed48b81017187d4c74743347766 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 30 Jul 2012 18:45:13 -0700 Subject: [PATCH 0456/1087] add flow queues and skid buffers hopefully they work --- rocket/src/main/scala/queues.scala | 64 +++++++++++++++++++----------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 3cbc9282..ddd0216f 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -11,31 +11,21 @@ class ioQueue[T <: Data](entries: Int, flushable: Boolean)(data: => T) extends B val count = UFix(OUTPUT, log2Up(entries+1)) } -class queue[T <: Data](val entries: Int, pipe: Boolean = false, flushable: Boolean = false)(data: => T) extends Component +class queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = false, flushable: Boolean = false)(data: => T) extends Component { val io = new ioQueue(entries, flushable)(data) - val do_enq = io.enq.ready && io.enq.valid - val do_deq = io.deq.ready && io.deq.valid + val do_flow = Bool() + val do_enq = io.enq.ready && io.enq.valid && !do_flow + val do_deq = io.deq.ready && io.deq.valid && !do_flow var enq_ptr = UFix(0) var deq_ptr = UFix(0) - val pow2 = (entries & (entries-1)) == 0 if (entries > 1) { - enq_ptr = Reg(resetVal = UFix(0, log2Up(entries))) - deq_ptr = Reg(resetVal = UFix(0, log2Up(entries))) - - var enq_next = enq_ptr + UFix(1) - var deq_next = deq_ptr + UFix(1) - if (!pow2) { - enq_next = Mux(enq_ptr === UFix(entries-1), UFix(0), enq_next) - deq_next = Mux(deq_ptr === UFix(entries-1), UFix(0), deq_next) - } - - when (do_deq) { deq_ptr := deq_next } - when (do_enq) { enq_ptr := enq_next } + enq_ptr = Counter(do_enq, entries)._1 + deq_ptr = Counter(do_deq, entries)._1 if (flushable) { when (io.flush) { deq_ptr := UFix(0) @@ -58,12 +48,16 @@ class queue[T <: Data](val entries: Int, pipe: Boolean = false, flushable: Boole when (do_enq) { ram(enq_ptr) := io.enq.bits } val ptr_match = enq_ptr === deq_ptr - io.deq.valid := maybe_full || !ptr_match - io.enq.ready := !maybe_full || !ptr_match || (if (pipe) io.deq.ready else Bool(false)) - io.deq.bits <> ram(deq_ptr) + val empty = ptr_match && !maybe_full + val full = ptr_match && maybe_full + val maybe_flow = Bool(flow) && empty + do_flow := maybe_flow && io.deq.ready + io.deq.valid := !empty || Bool(flow) && io.enq.valid + io.enq.ready := !full || Bool(pipe) && io.deq.ready + io.deq.bits := Mux(maybe_flow, io.enq.bits, ram(deq_ptr)) val ptr_diff = enq_ptr - deq_ptr - if (pow2) + if (isPow2(entries)) io.count := Cat(maybe_full && ptr_match, ptr_diff).toUFix else io.count := Mux(ptr_match, Mux(maybe_full, UFix(entries), UFix(0)), Mux(deq_ptr > enq_ptr, UFix(entries) + ptr_diff, ptr_diff)) @@ -92,11 +86,11 @@ class pipereg[T <: Data]()(data: => T) extends Component // bits := io.enq.bits //} - val reg = Reg() { io.enq.bits.clone } - when (io.enq.valid) { reg := io.enq.bits } + val r = Reg() { io.enq.bits.clone } + when (io.enq.valid) { r := io.enq.bits } io.deq.valid := Reg(io.enq.valid, resetVal = Bool(false)) - io.deq.bits <> reg + io.deq.bits <> r } object Pipe @@ -112,3 +106,27 @@ object Pipe q.io.deq } } + +class SkidBuffer[T <: Data]()(data: => T) extends Component +{ + val io = new Bundle { + val enq = new FIFOIO()(data).flip + val deq = new FIFOIO()(data) + } + + val fq = new queue(1, flow = true)(data) + val pq = new queue(1, pipe = true)(data) + + fq.io.enq <> io.enq + pq.io.enq <> fq.io.deq + io.deq <> pq.io.deq +} + +object SkidBuffer +{ + def apply[T <: Data](enq: FIFOIO[T]): FIFOIO[T] = { + val s = new SkidBuffer()(enq.bits.clone) + s.io.enq <> enq + s.io.deq + } +} From 3a8f3e0de57bfcdd1b20d044c8e20136c57e0dbe Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 30 Jul 2012 20:12:11 -0700 Subject: [PATCH 0457/1087] further pipeline the LLC --- rocket/src/main/scala/llc.scala | 84 ++++++++++++++++++++++-------- rocket/src/main/scala/queues.scala | 9 ++-- 2 files changed, 68 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index d95aeff7..89fc5e2f 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -14,7 +14,6 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex val wmask = gen.asInput val rdata = gen.asOutput } - require(readLatency >= 0 && readLatency <= 2) val data = gen val colMux = if (2*data.width <= leaf.data.width && n > leaf.n) 1 << math.floor(math.log(leaf.data.width/data.width)/math.log(2)).toInt else 1 val nWide = if (data.width > leaf.data.width) 1+(data.width-1)/leaf.data.width else 1 @@ -28,9 +27,6 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex val cond = Vec(nDeep) { Bool() } val ren = Vec(nDeep) { Bool() } val reg_ren = Vec(nDeep) { Reg() { Bool() } } - val reg2_ren = Vec(nDeep) { Reg() { Bool() } } - val reg_raddr = Vec(nDeep) { Reg() { UFix() } } - val reg2_raddr = Vec(nDeep) { Reg() { UFix() } } val renOut = Vec(nDeep) { Bool() } val raddrOut = Vec(nDeep) { UFix() } val rdata = Vec(nDeep) { Vec(nWide) { Bits() } } @@ -40,11 +36,14 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex cond(i) := (if (nDeep == 1) io.en else io.en && UFix(i) === io.addr(log2Up(n)-1, log2Up(n/nDeep))) ren(i) := cond(i) && !io.rw reg_ren(i) := ren(i) - reg2_ren(i) := reg_ren(i) - when (ren(i)) { reg_raddr(i) := io.addr } - when (reg_ren(i)) { reg2_raddr(i) := reg_raddr(i) } - renOut(i) := (if (readLatency > 1) reg2_ren(i) else if (readLatency > 0) reg_ren(i) else ren(i)) - raddrOut(i) := (if (readLatency > 1) reg2_raddr(i) else if (readLatency > 0) reg_raddr(i) else io.addr) + + renOut(i) := ren(i) + raddrOut(i) := io.addr + if (readLatency > 0) { + val r = Pipe(ren(i), io.addr, readLatency) + renOut(i) := r.valid + raddrOut(i) := r.bits + } for (j <- 0 until nWide) { val mem = leaf.clone @@ -64,11 +63,8 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex dout = mem(idx) } else if (readLatency == 1) { dout = dout1 - } else { - val dout2 = Reg() { Bits() } - when (reg_ren(i)) { dout2 := dout1 } - dout = dout2 - } + } else + dout = Pipe(reg_ren(i), dout1, readLatency-1).bits rdata(i)(j) := dout } @@ -238,7 +234,7 @@ class LLCWriteback(requestors: Int) extends Component io.mem.req_data.bits := io.data(who).bits } -class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component +class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Component { val io = new Bundle { val req = (new FIFOIO) { new LLCDataReq(ways) }.flip @@ -251,13 +247,13 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component val mem_resp_way = UFix(INPUT, log2Up(ways)) } - val data = new BigMem(sets*ways*REFILL_CYCLES, 2, leaf)(Bits(width = MEM_DATA_BITS)) + val data = new BigMem(sets*ways*REFILL_CYCLES, latency, leaf)(Bits(width = MEM_DATA_BITS)) class QEntry extends MemResp { val isWriteback = Bool() override def clone = new QEntry().asInstanceOf[this.type] } - val q = (new queue(4)) { new QEntry } - val qReady = q.io.count <= UFix(q.entries - 3) + val q = (new queue(latency+2)) { new QEntry } + val qReady = q.io.count <= UFix(q.entries-latency-1) val valid = Reg(resetVal = Bool(false)) val req = Reg() { io.req.bits.clone } val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) @@ -287,10 +283,11 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component data.io.wdata := io.mem_resp.bits.data } - q.io.enq.valid := Reg(Reg(data.io.en && !data.io.rw, resetVal = Bool(false)), resetVal = Bool(false)) - q.io.enq.bits.tag := Reg(Reg(Mux(valid, req.tag, io.req.bits.tag))) + val tagPipe = Pipe(data.io.en && !data.io.rw, Mux(valid, req.tag, io.req.bits.tag), latency) + q.io.enq.valid := tagPipe.valid + q.io.enq.bits.tag := tagPipe.bits + q.io.enq.bits.isWriteback := Pipe(Mux(valid, req.isWriteback, io.req.bits.isWriteback), Bool(false), latency).valid q.io.enq.bits.data := data.io.rdata - q.io.enq.bits.isWriteback := Reg(Reg(Mux(valid, req.isWriteback, io.req.bits.isWriteback))) io.req.ready := !valid && Mux(io.req.bits.isWriteback, io.writeback.ready, Bool(true)) io.req_data.ready := !io.mem_resp.valid && Mux(valid, req.rw, io.req.valid && io.req.bits.rw) @@ -305,6 +302,49 @@ class LLCData(sets: Int, ways: Int, leaf: Mem[Bits]) extends Component io.writeback_data.bits := q.io.deq.bits } +class MemReqArb(n: Int) extends Component // UNTESTED +{ + val io = new Bundle { + val cpu = Vec(n) { new ioMem().flip } + val mem = new ioMem + } + + val lock = Reg(resetVal = Bool(false)) + val locker = Reg() { UFix() } + + val arb = new RRArbiter(n)(new MemReqCmd) + val respWho = io.mem.resp.bits.tag(log2Up(n)-1,0) + val respTag = io.mem.resp.bits.tag >> UFix(log2Up(n)) + for (i <- 0 until n) { + val me = UFix(i, log2Up(n)) + arb.io.in(i).valid := io.cpu(i).req_cmd.valid + arb.io.in(i).bits := io.cpu(i).req_cmd.bits + arb.io.in(i).bits.tag := Cat(io.cpu(i).req_cmd.bits.tag, me) + io.cpu(i).req_cmd.ready := arb.io.in(i).ready + io.cpu(i).req_data.ready := Bool(false) + + val getLock = io.cpu(i).req_cmd.fire() && io.cpu(i).req_cmd.bits.rw && !lock + val haveLock = lock && locker === me + when (getLock) { + lock := Bool(true) + locker := UFix(i) + } + when (getLock || haveLock) { + io.cpu(i).req_data.ready := io.mem.req_data.ready + io.mem.req_data.valid := Bool(true) + io.mem.req_data.bits := io.cpu(i).req_data.bits + } + + io.cpu(i).resp.valid := io.mem.resp.valid && respWho === me + io.cpu(i).resp.bits := io.mem.resp.bits + io.cpu(i).resp.bits.tag := respTag + } + io.mem.resp.ready := io.cpu(respWho).resp.ready + + val unlock = Counter(io.mem.req_data.fire(), REFILL_CYCLES)._2 + when (unlock) { lock := Bool(false) } +} + class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component { val io = new Bundle { @@ -319,7 +359,7 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } val mshr = new LLCMSHRFile(sets, ways, outstanding) val tags = new BigMem(sets, 1, tagLeaf)(Bits(width = metaWidth*ways)) - val data = new LLCData(sets, ways, dataLeaf) + val data = new LLCData(3, sets, ways, dataLeaf) val writeback = new LLCWriteback(2) val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index ddd0216f..543f2d18 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -95,9 +95,10 @@ class pipereg[T <: Data]()(data: => T) extends Component object Pipe { - def apply[T <: Data](enq: PipeIO[T], latency: Int = 1): PipeIO[T] = { - val q = (new pipereg) { enq.bits.clone } - q.io.enq <> enq + def apply[T <: Data](enqValid: Bool, enqBits: T, latency: Int): PipeIO[T] = { + val q = (new pipereg) { enqBits.clone } + q.io.enq.valid := enqValid + q.io.enq.bits := enqBits q.io.deq if (latency > 1) @@ -105,6 +106,8 @@ object Pipe else q.io.deq } + def apply[T <: Data](enqValid: Bool, enqBits: T): PipeIO[T] = apply(enqValid, enqBits, 1) + def apply[T <: Data](enq: PipeIO[T], latency: Int = 1): PipeIO[T] = apply(enq.valid, enq.bits, latency) } class SkidBuffer[T <: Data]()(data: => T) extends Component From def913096e4f8699420797e2591b52b72fbcad7b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 31 Jul 2012 17:44:53 -0700 Subject: [PATCH 0458/1087] pipeline LLC further --- rocket/src/main/scala/llc.scala | 104 +++++++++++++---------------- rocket/src/main/scala/queues.scala | 28 ++++---- 2 files changed, 61 insertions(+), 71 deletions(-) diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index 89fc5e2f..546da774 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -4,14 +4,17 @@ import Chisel._ import Node._ import Constants._ -class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component +class BigMem[T <: Data](n: Int, preLatency: Int, postLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component { - val io = new Bundle { + class Inputs extends Bundle { val addr = UFix(INPUT, log2Up(n)) - val en = Bool(INPUT) val rw = Bool(INPUT) val wdata = gen.asInput val wmask = gen.asInput + override def clone = new Inputs().asInstanceOf[this.type] + } + val io = new Bundle { + val in = new PipeIO()(new Inputs).flip val rdata = gen.asOutput } val data = gen @@ -21,63 +24,52 @@ class BigMem[T <: Data](n: Int, readLatency: Int, leaf: Mem[Bits])(gen: => T) ex if (nDeep > 1 || colMux > 1) require(isPow2(n) && isPow2(leaf.n)) - val idx = io.addr(log2Up(n/nDeep/colMux)-1, 0) val rdataDeep = Vec(nDeep) { Bits() } val rdataSel = Vec(nDeep) { Bool() } - val cond = Vec(nDeep) { Bool() } - val ren = Vec(nDeep) { Bool() } - val reg_ren = Vec(nDeep) { Reg() { Bool() } } - val renOut = Vec(nDeep) { Bool() } - val raddrOut = Vec(nDeep) { UFix() } - val rdata = Vec(nDeep) { Vec(nWide) { Bits() } } - val wdata = io.wdata.toBits - val wmask = io.wmask.toBits for (i <- 0 until nDeep) { - cond(i) := (if (nDeep == 1) io.en else io.en && UFix(i) === io.addr(log2Up(n)-1, log2Up(n/nDeep))) - ren(i) := cond(i) && !io.rw - reg_ren(i) := ren(i) + val in = Pipe(io.in.valid && (if (nDeep == 1) Bool(true) else UFix(i) === io.in.bits.addr(log2Up(n)-1, log2Up(n/nDeep))), io.in.bits, preLatency) + val idx = in.bits.addr(log2Up(n/nDeep/colMux)-1, 0) + val wdata = in.bits.wdata.toBits + val wmask = in.bits.wmask.toBits + val ren = in.valid && !in.bits.rw + val reg_ren = Reg(ren) + val rdata = Vec(nWide) { Bits() } - renOut(i) := ren(i) - raddrOut(i) := io.addr - if (readLatency > 0) { - val r = Pipe(ren(i), io.addr, readLatency) - renOut(i) := r.valid - raddrOut(i) := r.bits - } + val r = Pipe(ren, in.bits.addr, postLatency) for (j <- 0 until nWide) { val mem = leaf.clone var dout: Bits = null - val dout1 = if (readLatency > 0) Reg() { Bits() } else null + val dout1 = if (postLatency > 0) Reg() { Bits() } else null var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) if (colMux > 1) - wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(io.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) + wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(in.bits.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) - when (cond(i)) { - when (io.rw) { mem.write(idx, wdata0, wmask0) } - .otherwise { if (readLatency > 0) dout1 := mem(idx) } + when (in.valid) { + when (in.bits.rw) { mem.write(idx, wdata0, wmask0) } + .otherwise { if (postLatency > 0) dout1 := mem(idx) } } - if (readLatency == 0) { + if (postLatency == 0) { dout = mem(idx) - } else if (readLatency == 1) { + } else if (postLatency == 1) { dout = dout1 } else - dout = Pipe(reg_ren(i), dout1, readLatency-1).bits + dout = Pipe(reg_ren, dout1, postLatency-1).bits - rdata(i)(j) := dout + rdata(j) := dout } - val rdataWide = rdata(i).reduceLeft((x, y) => Cat(y, x)) + val rdataWide = rdata.reduceLeft((x, y) => Cat(y, x)) var colMuxOut = rdataWide if (colMux > 1) { val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.width*(k+1)-1, gen.width*k))) { Bits() } - colMuxOut = colMuxIn(raddrOut(i)(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) + colMuxOut = colMuxIn(r.bits(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) } rdataDeep(i) := colMuxOut - rdataSel(i) := renOut(i) + rdataSel(i) := r.valid } io.rdata := Mux1H(rdataSel, rdataDeep) @@ -247,7 +239,7 @@ class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Compo val mem_resp_way = UFix(INPUT, log2Up(ways)) } - val data = new BigMem(sets*ways*REFILL_CYCLES, latency, leaf)(Bits(width = MEM_DATA_BITS)) + val data = new BigMem(sets*ways*REFILL_CYCLES, 1, latency-1, leaf)(Bits(width = MEM_DATA_BITS)) class QEntry extends MemResp { val isWriteback = Bool() override def clone = new QEntry().asInstanceOf[this.type] @@ -259,31 +251,31 @@ class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Compo val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) val refillCount = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - when (data.io.en && !io.mem_resp.valid) { + when (data.io.in.valid && !io.mem_resp.valid) { count := count + UFix(1) when (valid && count === UFix(REFILL_CYCLES-1)) { valid := Bool(false) } } when (io.req.valid && io.req.ready) { valid := Bool(true); req := io.req.bits } when (io.mem_resp.valid) { refillCount := refillCount + UFix(1) } - data.io.en := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) - data.io.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix - data.io.rw := io.req.bits.rw - data.io.wdata := io.req_data.bits.data - data.io.wmask := Fix(-1, io.req_data.bits.data.width) + data.io.in.valid := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) + data.io.in.bits.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix + data.io.in.bits.rw := io.req.bits.rw + data.io.in.bits.wdata := io.req_data.bits.data + data.io.in.bits.wmask := Fix(-1, io.req_data.bits.data.width) when (valid) { - data.io.en := Mux(req.rw, io.req_data.valid, qReady) - data.io.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix - data.io.rw := req.rw + data.io.in.valid := Mux(req.rw, io.req_data.valid, qReady) + data.io.in.bits.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix + data.io.in.bits.rw := req.rw } when (io.mem_resp.valid) { - data.io.en := Bool(true) - data.io.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix - data.io.rw := Bool(true) - data.io.wdata := io.mem_resp.bits.data + data.io.in.valid := Bool(true) + data.io.in.bits.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix + data.io.in.bits.rw := Bool(true) + data.io.in.bits.wdata := io.mem_resp.bits.data } - val tagPipe = Pipe(data.io.en && !data.io.rw, Mux(valid, req.tag, io.req.bits.tag), latency) + val tagPipe = Pipe(data.io.in.valid && !data.io.in.bits.rw, Mux(valid, req.tag, io.req.bits.tag), latency) q.io.enq.valid := tagPipe.valid q.io.enq.bits.tag := tagPipe.bits q.io.enq.bits.isWriteback := Pipe(Mux(valid, req.isWriteback, io.req.bits.isWriteback), Bool(false), latency).valid @@ -358,8 +350,8 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val memCmdArb = (new Arbiter(2)) { new MemReqCmd } val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } val mshr = new LLCMSHRFile(sets, ways, outstanding) - val tags = new BigMem(sets, 1, tagLeaf)(Bits(width = metaWidth*ways)) - val data = new LLCData(3, sets, ways, dataLeaf) + val tags = new BigMem(sets, 0, 1, tagLeaf)(Bits(width = metaWidth*ways)) + val data = new LLCData(4, sets, ways, dataLeaf) val writeback = new LLCWriteback(2) val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) @@ -392,11 +384,11 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty stall_s1 := initialize || mshr.io.tag.valid || setDirty || s2_valid && !s2_hit || stall_s2 - tags.io.en := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid - tags.io.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) - tags.io.rw := initialize || setDirty || mshr.io.tag.valid - tags.io.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) - tags.io.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) + tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid + tags.io.in.bits.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) + tags.io.in.bits.rw := initialize || setDirty || mshr.io.tag.valid + tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) + tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw mshr.io.cpu.bits := s2 diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 543f2d18..2fba1b1e 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -74,37 +74,35 @@ object Queue } } -class pipereg[T <: Data]()(data: => T) extends Component +class pipereg[T <: Data](latency: Int = 1)(data: => T) extends Component { val io = new Bundle { val enq = new PipeIO()(data).flip val deq = new PipeIO()(data) } - //val bits = Reg() { io.enq.bits.clone } - //when (io.enq.valid) { - // bits := io.enq.bits - //} + var bits: T = io.enq.bits + var valid: Bool = io.enq.valid - val r = Reg() { io.enq.bits.clone } - when (io.enq.valid) { r := io.enq.bits } + for (i <- 0 until latency) { + val reg_bits = Reg() { io.enq.bits.clone } + val reg_valid = Reg(valid, resetVal = Bool(false)) + when (valid) { reg_bits := bits } + valid = reg_valid + bits = reg_bits + } - io.deq.valid := Reg(io.enq.valid, resetVal = Bool(false)) - io.deq.bits <> r + io.deq.valid := valid + io.deq.bits := bits } object Pipe { def apply[T <: Data](enqValid: Bool, enqBits: T, latency: Int): PipeIO[T] = { - val q = (new pipereg) { enqBits.clone } + val q = (new pipereg(latency)) { enqBits.clone } q.io.enq.valid := enqValid q.io.enq.bits := enqBits q.io.deq - - if (latency > 1) - Pipe(q.io.deq, latency-1) - else - q.io.deq } def apply[T <: Data](enqValid: Bool, enqBits: T): PipeIO[T] = apply(enqValid, enqBits, 1) def apply[T <: Data](enq: PipeIO[T], latency: Int = 1): PipeIO[T] = apply(enq.valid, enq.bits, latency) From e3726c4db04fe7d573a1137f78f9d534daad533d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 3 Aug 2012 18:59:37 -0700 Subject: [PATCH 0459/1087] fix control bug in LLC structural hazard on tag ram caused deadlock --- rocket/src/main/scala/llc.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index 546da774..eece03ed 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -154,13 +154,14 @@ class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) io.cpu.ready := !conflicts.orR && !validBits.andR - io.data.valid := replay && io.tag.ready || writeback + io.data.valid := writeback io.data.bits.rw := Bool(false) io.data.bits.tag := mshr(replayId).tag io.data.bits.isWriteback := Bool(true) io.data.bits.addr := Cat(mshr(writebackId).old_tag, mshr(writebackId).addr(log2Up(sets)-1, 0)).toUFix io.data.bits.way := mshr(writebackId).way when (replay) { + io.data.valid := io.tag.ready io.data.bits.isWriteback := Bool(false) io.data.bits.addr := mshr(replayId).addr io.data.bits.way := mshr(replayId).way From 6510f020c7ab6e13d26a8de90ac7850c8437be71 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 3 Aug 2012 19:00:03 -0700 Subject: [PATCH 0460/1087] fix deadlock in coherence hub --- rocket/src/main/scala/uncore.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 5c58f151..a1726242 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -101,8 +101,8 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { cmd_sent := Bool(true) } when (at_front_of_dep_queue) { - req_cmd.valid := !cmd_sent && req_data.ready - lock := Bool(true) + req_cmd.valid := !cmd_sent && req_data.ready && data.valid + lock := data.valid || cmd_sent when (req_cmd.ready || cmd_sent) { req_data.valid := data.valid when(req_data.ready) { From b94e6915ab3e68559f8000033908a1d831da4cf0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 3 Aug 2012 19:00:34 -0700 Subject: [PATCH 0461/1087] refactor IPIs; use new tohost/fromhost protocol --- rocket/src/main/scala/dpath_util.scala | 21 ++++++++----- rocket/src/main/scala/htif.scala | 42 ++++++++++++++------------ rocket/src/main/scala/top.scala | 3 +- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index ebd39d8f..f216e52a 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -122,13 +122,13 @@ class rocketDpathPCR extends Component val rdata = Bits(); val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_req.bits.addr(4,0)) - io.host.pcr_rep.valid := io.host.pcr_req.valid && !io.r.en && !io.host.pcr_req.bits.rw + io.host.pcr_rep.valid := io.host.pcr_req.fire() io.host.pcr_rep.bits := rdata - val wen = io.w.en || io.host.pcr_req.valid && io.host.pcr_req.bits.rw + val wen = io.w.en || !io.r.en && io.host.pcr_req.valid && io.host.pcr_req.bits.rw val waddr = Mux(io.w.en, io.w.addr, io.host.pcr_req.bits.addr) val wdata = Mux(io.w.en, io.w.data, io.host.pcr_req.bits.data) - io.host.pcr_req.ready := Mux(io.host.pcr_req.bits.rw, !io.w.en, !io.r.en) + io.host.pcr_req.ready := !io.w.en && !io.r.en io.ptbr_wen := reg_status_vm.toBool && wen && (waddr === PCR_PTBR); io.status := Cat(reg_status_im, Bits(0,7), reg_status_vm, reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); @@ -176,9 +176,11 @@ class rocketDpathPCR extends Component io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; - io.host.ipi.valid := io.w.en && io.w.addr === PCR_SEND_IPI - io.host.ipi.bits := io.w.data - io.replay := io.host.ipi.valid && !io.host.ipi.ready + io.host.ipi_req.valid := io.w.en && io.w.addr === PCR_SEND_IPI + io.host.ipi_req.bits := io.w.data + io.replay := io.host.ipi_req.valid && !io.host.ipi_req.ready + + when (io.host.pcr_req.fire() && !io.host.pcr_req.bits.rw && io.host.pcr_req.bits.addr === PCR_TOHOST) { reg_tohost := UFix(0) } when (wen) { when (waddr === PCR_STATUS) { @@ -198,8 +200,8 @@ class rocketDpathPCR extends Component when (waddr === PCR_COUNT) { reg_count := wdata(31,0).toUFix; } when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } when (waddr === PCR_COREID) { reg_coreid := wdata(15,0) } - when (waddr === PCR_FROMHOST) { reg_fromhost := wdata; reg_tohost := Bits(0) } - when (waddr === PCR_TOHOST) { reg_tohost := wdata; reg_fromhost := Bits(0) } + when (waddr === PCR_FROMHOST) { when (reg_fromhost === UFix(0) || io.w.en) { reg_fromhost := wdata } } + when (waddr === PCR_TOHOST) { when (reg_tohost === UFix(0)) { reg_tohost := wdata } } when (waddr === PCR_CLR_IPI) { r_irq_ipi := wdata(0) } when (waddr === PCR_K0) { reg_k0 := wdata; } when (waddr === PCR_K1) { reg_k1 := wdata; } @@ -207,6 +209,9 @@ class rocketDpathPCR extends Component when (waddr === PCR_VECBANK) { reg_vecbank:= wdata(7,0) } } + io.host.ipi_rep.ready := Bool(true) + when (io.host.ipi_rep.valid) { r_irq_ipi := Bool(true) } + rdata := io.status // raddr === PCR_STATUS switch (raddr) { is (PCR_EPC) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 8a9af58b..2f6af9e4 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -28,7 +28,8 @@ class ioHTIF extends Bundle val debug = new ioDebug val pcr_req = (new FIFOIO) { new PCRReq }.flip val pcr_rep = (new FIFOIO) { Bits(width = 64) } - val ipi = (new FIFOIO) { Bits(width = log2Up(NTILES)) } + val ipi_req = (new FIFOIO) { Bits(width = log2Up(NTILES)) } + val ipi_rep = (new FIFOIO) { Bool() }.flip } class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends Component @@ -93,7 +94,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } val rx_done = rx_word_done && Mux(rx_word_count === UFix(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2Up(packet_ram_depth)-1,0) === UFix(0)) - val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr), size, UFix(0)) + val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UFix(0)) val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UFix(0) && packet_ram_raddr.andR) val mem_acked = Reg(resetVal = Bool(false)) @@ -195,31 +196,32 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val cpu = io.cpu(i) val me = pcr_coreid === UFix(i) - cpu.pcr_req.valid := my_ipi || state === state_pcr_req && me - cpu.pcr_req.bits.rw := my_ipi || cmd === cmd_writecr - cpu.pcr_req.bits.addr := Mux(my_ipi, PCR_CLR_IPI, pcr_addr) - cpu.pcr_req.bits.data := my_ipi | pcr_wdata + cpu.pcr_req.valid := state === state_pcr_req && me && pcr_addr != PCR_RESET + cpu.pcr_req.bits.rw := cmd === cmd_writecr + cpu.pcr_req.bits.addr := pcr_addr + cpu.pcr_req.bits.data := pcr_wdata cpu.reset := my_reset + when (cpu.ipi_rep.ready) { + my_ipi := Bool(false) + } + cpu.ipi_rep.valid := my_ipi + cpu.ipi_req.ready := Bool(true) for (j <- 0 until ncores) { - when (io.cpu(j).ipi.valid && io.cpu(j).ipi.bits === UFix(i)) { + when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UFix(i)) { my_ipi := Bool(true) } } - cpu.ipi.ready := Bool(true) - when (my_ipi) { - my_ipi := !cpu.pcr_req.ready - } - when (state === state_pcr_req && me && !my_ipi && cpu.pcr_req.ready) { + when (cpu.pcr_req.valid && cpu.pcr_req.ready) { + state := state_pcr_resp + } + when (state === state_pcr_req && me && pcr_addr === PCR_RESET) { when (cmd === cmd_writecr) { - state := state_tx - when (pcr_addr === PCR_RESET) { - my_reset := pcr_wdata(0) - } - }.otherwise { - state := state_pcr_resp + my_reset := pcr_wdata(0) } + rdata := my_reset.toBits + state := state_tx } cpu.pcr_rep.ready := Bool(true) @@ -229,14 +231,14 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } pcr_mux.io.sel(i) := me - pcr_mux.io.in(i) := Mux(pcr_addr === PCR_RESET, Cat(Bits(0, 63), my_reset), rdata) + pcr_mux.io.in(i) := rdata } val tx_cmd = Mux(nack, cmd_nack, cmd_ack) val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext) val tx_data = Mux(tx_word_count === UFix(0), tx_header, - Mux(cmd === cmd_readcr, pcr_mux.io.out, + Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcr_mux.io.out, packet_ram(packet_ram_raddr))) io.host.in.ready := state === state_rx diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index bf52ca78..160411f6 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -40,7 +40,8 @@ class Top extends Component tile.io.host.reset := Reg(Reg(hl.reset)) tile.io.host.pcr_req <> Queue(hl.pcr_req) hl.pcr_rep <> Queue(tile.io.host.pcr_rep) - hl.ipi <> Queue(tile.io.host.ipi) + hl.ipi_req <> Queue(tile.io.host.ipi_req) + tile.io.host.ipi_rep <> Queue(hl.ipi_rep) error_mode = error_mode || Reg(tile.io.host.debug.error_mode) tl.xact_init <> Queue(tile.io.tilelink.xact_init) From e9c35b4923835959e330463d92278f7bf0d2a760 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 6 Aug 2012 17:05:05 -0700 Subject: [PATCH 0462/1087] ameliorate DTLB kill->rdy critical path --- rocket/src/main/scala/dtlb.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 99018351..6ce054dc 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -125,11 +125,11 @@ class rocketDTLB(entries: Int) extends Component val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace).toUFix; - val lookup = (state === s_ready) && r_cpu_req_val && !io.cpu_req.bits.kill && (req_load || req_store || req_amo || req_pf); + val lookup = (state === s_ready) && status_vm && r_cpu_req_val && (req_load || req_store || req_amo || req_pf); val lookup_hit = lookup && tag_hit; val lookup_miss = lookup && !tag_hit; - val tlb_hit = status_vm && lookup_hit; - val tlb_miss = status_vm && lookup_miss; + val tlb_hit = !io.cpu_req.bits.kill && lookup_hit; + val tlb_miss = !io.cpu_req.bits.kill && lookup_miss; // currently replace TLB entries in LIFO order // TODO: implement LRU replacement policy @@ -154,7 +154,7 @@ class rocketDTLB(entries: Int) extends Component io.cpu_resp.xcpt_st := store_fault_common && (req_store || req_amo) io.cpu_resp.xcpt_pf := load_fault_common && req_pf - io.cpu_req.ready := (state === s_ready) && !tlb_miss; + io.cpu_req.ready := (state === s_ready) && !lookup_miss; io.cpu_resp.miss := tlb_miss; io.cpu_resp.ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)); From 897a4e349ba1c6f3f7ddcfd4b9be9928c88af7a1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 6 Aug 2012 17:10:04 -0700 Subject: [PATCH 0463/1087] fix some LLC control bugs --- rocket/src/main/scala/llc.scala | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index eece03ed..64d48ce8 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -383,15 +383,20 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) val repl_tag = s2_tags(repl_way).toUFix val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty - stall_s1 := initialize || mshr.io.tag.valid || setDirty || s2_valid && !s2_hit || stall_s2 + stall_s1 := initialize || stall_s2 - tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || setDirty || mshr.io.tag.valid - tags.io.in.bits.addr := Mux(initialize, initCount, Mux(setDirty, s2.addr, Mux(mshr.io.tag.valid, mshr.io.tag.bits.addr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)))(log2Up(sets)-1,0)) - tags.io.in.bits.rw := initialize || setDirty || mshr.io.tag.valid - tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)))) - tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way)))) + val tag_we = setDirty || mshr.io.tag.valid + val tag_waddr = Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(log2Up(sets)-1,0) + val tag_wdata = Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)) + val tag_wway = Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way) + tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || tag_we + tags.io.in.bits.addr := Mux(initialize, initCount, Mux(tag_we, tag_waddr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)(log2Up(sets)-1,0))) + tags.io.in.bits.rw := initialize || tag_we + tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, tag_wdata)) + tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(tag_wway))) + when (tag_we && Mux(stall_s2, s2.addr, s1.addr)(log2Up(sets)-1,0) === tag_waddr) { s2_tags(tag_wway) := tag_wdata } - mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw + mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw && dataArb.io.in(1).ready && writeback.io.req(0).ready // stall_s2 mshr.io.cpu.bits := s2 mshr.io.repl_way := repl_way mshr.io.repl_dirty := repl_tag(tagWidth+1, tagWidth).andR @@ -404,29 +409,29 @@ class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], da data.io.mem_resp_set := mshr.io.mem_resp_set data.io.mem_resp_way := mshr.io.mem_resp_way data.io.req_data.bits := io.cpu.req_data.bits + data.io.req_data.valid := io.cpu.req_data.valid writeback.io.req(0) <> data.io.writeback writeback.io.data(0) <> data.io.writeback_data - writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw + writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw && dataArb.io.in(1).ready && mshr.io.cpu.ready // stall_s2 writeback.io.req(1).bits := s2.addr writeback.io.data(1).valid := io.cpu.req_data.valid writeback.io.data(1).bits := io.cpu.req_data.bits - data.io.req_data.valid := io.cpu.req_data.valid && writeback.io.req(1).ready memCmdArb.io.in(0) <> mshr.io.mem.req_cmd memCmdArb.io.in(1) <> writeback.io.mem.req_cmd dataArb.io.in(0) <> mshr.io.data - dataArb.io.in(1).valid := s2_valid && s2_hit + dataArb.io.in(1).valid := s2_valid && s2_hit && writeback.io.req(0).ready && mshr.io.cpu.ready // stall_s2 dataArb.io.in(1).bits := s2 dataArb.io.in(1).bits.way := s2_hit_way dataArb.io.in(1).bits.isWriteback := Bool(false) - stall_s2 := s2_valid && !Mux(s2_hit, dataArb.io.in(1).ready, Mux(s2.rw, writeback.io.req(1).ready, mshr.io.cpu.ready)) + stall_s2 := s2_valid && !(dataArb.io.in(1).ready && writeback.io.req(0).ready && mshr.io.cpu.ready) io.cpu.resp <> data.io.resp io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 - io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready && writeback.io.req(1).ready + io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready io.mem.req_cmd <> memCmdArb.io.out io.mem.req_data <> writeback.io.mem.req_data } From 0f2077166448a793aa4ce7135b5f5f4d1319f03f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Aug 2012 22:11:32 -0700 Subject: [PATCH 0464/1087] rename queue to Queue fixes build with case-insensitive file system --- rocket/src/main/scala/htif.scala | 2 +- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/icache_prefetch.scala | 96 --------------------- rocket/src/main/scala/llc.scala | 2 +- rocket/src/main/scala/memserdes.scala | 2 +- rocket/src/main/scala/multiplier.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 4 +- rocket/src/main/scala/queues.scala | 8 +- rocket/src/main/scala/slowio.scala | 4 +- rocket/src/main/scala/uncore.scala | 4 +- 10 files changed, 15 insertions(+), 111 deletions(-) delete mode 100644 rocket/src/main/scala/icache_prefetch.scala diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 2f6af9e4..058ed016 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -121,7 +121,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } val mem_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val x_init = new queue(1)(new TransactionInit) + val x_init = new Queue(1)(new TransactionInit) when (state === state_mem_req && x_init.io.enq.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index fec70226..fbbc2dfa 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -127,7 +127,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten } tag_hit := any_hit - val finish_q = (new queue(1)) { new TransactionFinish } + val finish_q = (new Queue(1)) { new TransactionFinish } finish_q.io.enq.valid := refill_done && io.mem.xact_rep.bits.require_ack finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id diff --git a/rocket/src/main/scala/icache_prefetch.scala b/rocket/src/main/scala/icache_prefetch.scala deleted file mode 100644 index 45ed9fc2..00000000 --- a/rocket/src/main/scala/icache_prefetch.scala +++ /dev/null @@ -1,96 +0,0 @@ -package rocket - -import Chisel._; -import Node._; -import Constants._; -import scala.math._; - -class ioIPrefetcher extends Bundle() { - val icache = new ioTileLink().flip - val mem = new ioTileLink - val invalidate = Bool(INPUT) -} - -class rocketIPrefetcher(co: CoherencePolicyWithUncached) extends Component -{ - val io = new ioIPrefetcher(); - val pdq = (new queue(REFILL_CYCLES, flushable = true)) { Bits(width = MEM_DATA_BITS) }; - - val s_invalid :: s_valid :: s_refilling :: s_req_wait :: s_resp_wait :: s_bad_resp_wait :: Nil = Enum(6) { UFix() }; - val state = Reg(resetVal = s_invalid); - - val ip_mem_resp_abort = io.mem.xact_abort.valid && io.mem.xact_abort.bits.tile_xact_id(0) - val demand_miss = io.icache.xact_init.valid && io.icache.xact_init.ready - val prefetch_addr = Reg() { UFix(width = io.icache.xact_init.bits.address.width) }; - val addr_match = (prefetch_addr === io.icache.xact_init.bits.address); - val hit = (state != s_invalid) && (state != s_req_wait) && addr_match && !ip_mem_resp_abort - val prefetch_miss = io.icache.xact_init.valid && !hit - when (demand_miss) { prefetch_addr := io.icache.xact_init.bits.address + UFix(1); } - - io.icache.xact_init.ready := io.mem.xact_init.ready - val ip_mem_resp_val = io.mem.xact_rep.valid && io.mem.xact_rep.bits.tile_xact_id(0) - val ip_mem_req_rdy = io.mem.xact_init.ready && !prefetch_miss - - val finish_q = (new queue(1)) { new TransactionFinish } - io.mem.xact_abort.ready := Bool(true) - io.mem.xact_init.valid := prefetch_miss || (state === s_req_wait) && finish_q.io.enq.ready - io.mem.xact_init.bits.x_type := co.getTransactionInitTypeOnUncachedRead - io.mem.xact_init.bits.tile_xact_id := Mux(prefetch_miss, UFix(0), UFix(1)) - io.mem.xact_init.bits.address := Mux(prefetch_miss, io.icache.xact_init.bits.address, prefetch_addr); - - val finish_arb = (new Arbiter(2)) { new TransactionFinish } - finish_arb.io.in(0) <> io.icache.xact_finish - finish_arb.io.in(1) <> finish_q.io.deq - io.mem.xact_finish <> finish_arb.io.out - - val fill_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - when (ip_mem_resp_val) { fill_cnt := fill_cnt + UFix(1) } - val fill_done = fill_cnt.andR && ip_mem_resp_val - - finish_q.io.enq.valid := fill_done && io.mem.xact_rep.bits.require_ack - finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id - - val forward = Reg(resetVal = Bool(false)) - val forward_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - when (forward && pdq.io.deq.valid) { forward_cnt := forward_cnt + UFix(1) } - val forward_done = forward_cnt.andR && pdq.io.deq.valid - forward := demand_miss && hit || forward && !forward_done - - io.icache.xact_abort.valid := io.mem.xact_abort.valid && !io.mem.xact_abort.bits.tile_xact_id(0) || - forward && ip_mem_resp_abort - io.icache.xact_rep.valid := io.mem.xact_rep.valid && !io.mem.xact_rep.bits.tile_xact_id(0) || (forward && pdq.io.deq.valid) - io.icache.xact_rep.bits.data := Mux(forward, pdq.io.deq.bits, io.mem.xact_rep.bits.data) - io.icache.xact_rep.bits.require_ack := !forward && io.mem.xact_rep.bits.require_ack - io.icache.xact_rep.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id - - pdq.io.flush := Reg(demand_miss && !hit || (state === s_bad_resp_wait), resetVal = Bool(false)) - pdq.io.enq.bits := io.mem.xact_rep.bits.data - pdq.io.enq.valid := ip_mem_resp_val - pdq.io.deq.ready := forward - - switch (state) { - is (s_invalid) { - when (demand_miss) { state := s_req_wait; } - } - is (s_valid) { - when (demand_miss || forward && forward_done) { state := s_req_wait } - .elsewhen (io.invalidate && !forward) { state := s_invalid } - } - is (s_refilling) { - when (demand_miss && !addr_match && fill_done) { state := s_req_wait } - .elsewhen (fill_done) { state := Mux(io.invalidate, s_invalid, s_valid) } - .elsewhen (demand_miss && !addr_match || io.invalidate) { state := s_bad_resp_wait } - } - is (s_req_wait) { - when (ip_mem_req_rdy && finish_q.io.enq.ready) { state := s_resp_wait } - } - is (s_resp_wait) { - when (ip_mem_resp_abort) { state := s_invalid } - .elsewhen (demand_miss && !addr_match || io.invalidate) { state := s_bad_resp_wait } - .elsewhen (ip_mem_resp_val) { state := s_refilling } - } - is (s_bad_resp_wait) { - when (fill_done || ip_mem_resp_abort) { state := s_req_wait } - } - } -} diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala index 64d48ce8..a2a5d361 100644 --- a/rocket/src/main/scala/llc.scala +++ b/rocket/src/main/scala/llc.scala @@ -245,7 +245,7 @@ class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Compo val isWriteback = Bool() override def clone = new QEntry().asInstanceOf[this.type] } - val q = (new queue(latency+2)) { new QEntry } + val q = (new Queue(latency+2)) { new QEntry } val qReady = q.io.count <= UFix(q.entries-latency-1) val valid = Reg(resetVal = Bool(false)) val req = Reg() { io.req.bits.clone } diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index e20f1ec3..cdd109c2 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -149,7 +149,7 @@ class MemDessert extends Component // test rig side io.wide.req_data.valid := state === s_data io.wide.req_data.bits.data := in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (dbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) - val dataq = (new queue(REFILL_CYCLES)) { new MemResp } + val dataq = (new Queue(REFILL_CYCLES)) { new MemResp } dataq.io.enq <> io.wide.resp dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 9d3145b5..f56f3d56 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -48,7 +48,7 @@ class rocketVUMultiplier(nwbq: Int) extends Component { inflight_cnt = inflight_cnt + wbq_cnt val wbq_rdy = inflight_cnt < UFix(nwbq) - val wbq = (new queue(nwbq)) { Bits(width = io.cpu.resp_bits.width + io.cpu.resp_tag.width) } + val wbq = (new Queue(nwbq)) { Bits(width = io.cpu.resp_bits.width + io.cpu.resp_tag.width) } wbq.io.enq.valid := valid(0) wbq.io.enq.bits := Cat(io.vu.resp, tag(0)) wbq.io.deq.ready := io.cpu.resp_rdy diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c63e3696..f92e6e6c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -198,7 +198,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val idx_match = req.idx === io.req_bits.idx val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val rpq = (new queue(NRPQ)) { new RPQEntry } + val rpq = (new Queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id @@ -209,7 +209,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val refill_done = reply && refill_count.andR val wb_done = reply && (state === s_wb_resp) - val finish_q = (new queue(2 /* wb + refill */)) { new TransactionFinish } + val finish_q = (new Queue(2 /* wb + refill */)) { new TransactionFinish } finish_q.io.enq.valid := wb_done || refill_done finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 2fba1b1e..d9790e26 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -11,7 +11,7 @@ class ioQueue[T <: Data](entries: Int, flushable: Boolean)(data: => T) extends B val count = UFix(OUTPUT, log2Up(entries+1)) } -class queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = false, flushable: Boolean = false)(data: => T) extends Component +class Queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = false, flushable: Boolean = false)(data: => T) extends Component { val io = new ioQueue(entries, flushable)(data) @@ -66,7 +66,7 @@ class queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = object Queue { def apply[T <: Data](enq: FIFOIO[T], entries: Int = 2, pipe: Boolean = false) = { - val q = (new queue(entries, pipe)) { enq.bits.clone } + val q = (new Queue(entries, pipe)) { enq.bits.clone } q.io.enq.valid := enq.valid // not using <> so that override is allowed q.io.enq.bits := enq.bits enq.ready := q.io.enq.ready @@ -115,8 +115,8 @@ class SkidBuffer[T <: Data]()(data: => T) extends Component val deq = new FIFOIO()(data) } - val fq = new queue(1, flow = true)(data) - val pq = new queue(1, pipe = true)(data) + val fq = new Queue(1, flow = true)(data) + val pq = new Queue(1, pipe = true)(data) fq.io.enq <> io.enq pq.io.enq <> fq.io.deq diff --git a/rocket/src/main/scala/slowio.scala b/rocket/src/main/scala/slowio.scala index 6cf5a3d9..068e90c5 100644 --- a/rocket/src/main/scala/slowio.scala +++ b/rocket/src/main/scala/slowio.scala @@ -28,12 +28,12 @@ class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) val out_slow_val = Reg(resetVal = Bool(false)) val out_slow_bits = Reg() { data } - val fromhost_q = new queue(1)(data) + val fromhost_q = new Queue(1)(data) fromhost_q.io.enq.valid := in_en && (io.in_slow.valid && in_slow_rdy || reset) fromhost_q.io.enq.bits := io.in_slow.bits fromhost_q.io.deq <> io.in_fast - val tohost_q = new queue(1)(data) + val tohost_q = new Queue(1)(data) tohost_q.io.enq <> io.out_fast tohost_q.io.deq.ready := in_en && io.out_slow.ready && out_slow_val diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index a1726242..7ffef155 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -336,8 +336,8 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH } } - val p_rep_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY - val x_init_data_dep_list = List.fill(ntiles)((new queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY + val p_rep_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY + val x_init_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY // Free finished transactions for( j <- 0 until ntiles ) { From 743e032f064741967f4c936393eab282660a1bfb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Aug 2012 13:38:07 -0700 Subject: [PATCH 0465/1087] generalize interface to DecodeLogic --- rocket/src/main/scala/consts.scala | 4 ++-- rocket/src/main/scala/decode.scala | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 78f979d8..7a2f716c 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -57,8 +57,8 @@ object Constants val DIV_RU = UFix(3, 2); val X = Bits("b?", 1) - val N = UFix(0, 1); - val Y = UFix(1, 1); + val N = Bits(0, 1); + val Y = Bits(1, 1); val WA_X = X val WA_RD = N diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index e2472820..355dd1a1 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -13,32 +13,32 @@ object DecodeLogic new Term(b.value) } } - def logic(addr: Bits, keys: Seq[Bits], cache: scala.collection.mutable.Map[Term,Bits], terms: Set[Term]) = { + def logic(addr: Bits, cache: scala.collection.mutable.Map[Term,Bits], terms: Set[Term]) = { terms.map { t => if (!cache.contains(t)) cache += t -> ((if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addr.width)-(t.mask+1), addr.width){Bits()}) === Lit(t.value, addr.width){Bits()}) cache(t) }.foldLeft(Bool(false))(_||_) } - def apply(addr: Bits, default: List[Bits], mapping: Array[(Bits, List[Bits])]) = { + def apply(addr: Bits, default: Iterable[Bits], mapping: Iterable[(Bits, Iterable[Bits])]) = { var map = mapping var cache = scala.collection.mutable.Map[Term,Bits]() default map { d => val dlit = d.litOf val dterm = term(dlit) val (keys, values) = map.unzip - val keysterms = keys.map(k => term(k.litOf)) zip values.map(v => term(v.head.litOf)) + val keysterms = keys.toList.map(k => term(k.litOf)) zip values.toList.map(v => term(v.head.litOf)) val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => if (((dterm.mask >> i) & 1) != 0) { var mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1).toSet var maxt = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1).toSet - logic(addr, keys, cache, SimplifyDC(mint, maxt, addr.width)).toBits + logic(addr, cache, SimplifyDC(mint, maxt, addr.width)).toBits } else { val want = 1 - ((dterm.value.toInt >> i) & 1) val mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == want }.map(_._1).toSet val dc = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1).toSet - val bit = logic(addr, keys, cache, Simplify(mint, dc, addr.width)).toBits + val bit = logic(addr, cache, Simplify(mint, dc, addr.width)).toBits if (want == 1) bit else ~bit } }).reverse.reduceRight(Cat(_,_)) From d4a001b867965d63c0c2ed85dee53bbdbcb3516c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Aug 2012 13:38:25 -0700 Subject: [PATCH 0466/1087] add PriorityMux; use to implement PriorityEncoder --- rocket/src/main/scala/util.scala | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 5cd67ea2..4257de4f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -64,15 +64,20 @@ class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { io.out.bits := Mux(any_lock_held, bits_arr(lock_idx), dout) } +object PriorityMux +{ + def apply[T <: Data](sel: Seq[Bits], in: Seq[T]): T = { + if (in.size == 1) + in.head + else + Mux(sel.head, in.head, apply(sel.tail, in.tail)) + } + def apply[T <: Data](sel: Bits, in: Seq[T]): T = apply((0 until in.size).map(sel(_)), in) +} + object PriorityEncoder { - def doit(in: Seq[Bits], n: Int): UFix = { - if (n >= in.size-1) - UFix(n) - else - Mux(in(n), UFix(n), doit(in, n+1)) - } - def apply(in: Seq[Bits]): UFix = doit(in, 0) + def apply(in: Seq[Bits]): UFix = PriorityMux(in, (0 until in.size).map(UFix(_))) def apply(in: Bits): UFix = apply((0 until in.getWidth).map(in(_))) } From 667b4ee8580bdabb2ecf1f1072ddc200a77dca32 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Aug 2012 13:39:19 -0700 Subject: [PATCH 0467/1087] remove Queue flush port (override reset instead) --- rocket/src/main/scala/queues.scala | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index d9790e26..419c2b4c 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -3,17 +3,16 @@ package rocket import Chisel._ import Node._; -class ioQueue[T <: Data](entries: Int, flushable: Boolean)(data: => T) extends Bundle +class ioQueue[T <: Data](entries: Int)(data: => T) extends Bundle { - val flush = if (flushable) Bool(INPUT) else null val enq = new FIFOIO()(data).flip val deq = new FIFOIO()(data) val count = UFix(OUTPUT, log2Up(entries+1)) } -class Queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = false, flushable: Boolean = false)(data: => T) extends Component +class Queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = false, resetSignal: Bool = null)(data: => T) extends Component(resetSignal) { - val io = new ioQueue(entries, flushable)(data) + val io = new ioQueue(entries)(data) val do_flow = Bool() val do_enq = io.enq.ready && io.enq.valid && !do_flow @@ -26,23 +25,12 @@ class Queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = { enq_ptr = Counter(do_enq, entries)._1 deq_ptr = Counter(do_deq, entries)._1 - if (flushable) { - when (io.flush) { - deq_ptr := UFix(0) - enq_ptr := UFix(0) - } - } } val maybe_full = Reg(resetVal = Bool(false)) when (do_enq != do_deq) { maybe_full := do_enq } - if (flushable) { - when (io.flush) { - maybe_full := Bool(false) - } - } val ram = Mem(entries) { data } when (do_enq) { ram(enq_ptr) := io.enq.bits } @@ -74,7 +62,7 @@ object Queue } } -class pipereg[T <: Data](latency: Int = 1)(data: => T) extends Component +class Pipe[T <: Data](latency: Int = 1)(data: => T) extends Component { val io = new Bundle { val enq = new PipeIO()(data).flip @@ -99,7 +87,7 @@ class pipereg[T <: Data](latency: Int = 1)(data: => T) extends Component object Pipe { def apply[T <: Data](enqValid: Bool, enqBits: T, latency: Int): PipeIO[T] = { - val q = (new pipereg(latency)) { enqBits.clone } + val q = (new Pipe(latency)) { enqBits.clone } q.io.enq.valid := enqValid q.io.enq.bits := enqBits q.io.deq @@ -108,7 +96,7 @@ object Pipe def apply[T <: Data](enq: PipeIO[T], latency: Int = 1): PipeIO[T] = apply(enq.valid, enq.bits, latency) } -class SkidBuffer[T <: Data]()(data: => T) extends Component +class SkidBuffer[T <: Data](resetSignal: Bool = null)(data: => T) extends Component(resetSignal) { val io = new Bundle { val enq = new FIFOIO()(data).flip From d9cb96c0aefa78f59107335ccc9571ef26a29749 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Thu, 27 Sep 2012 22:53:34 -0700 Subject: [PATCH 0468/1087] factored out common stuff to ChiselUtil --- rocket/src/main/scala/queues.scala | 93 ------------------------------ rocket/src/main/scala/top.scala | 2 +- rocket/src/main/scala/util.scala | 92 ----------------------------- 3 files changed, 1 insertion(+), 186 deletions(-) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index 419c2b4c..f414ff6e 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -3,99 +3,6 @@ package rocket import Chisel._ import Node._; -class ioQueue[T <: Data](entries: Int)(data: => T) extends Bundle -{ - val enq = new FIFOIO()(data).flip - val deq = new FIFOIO()(data) - val count = UFix(OUTPUT, log2Up(entries+1)) -} - -class Queue[T <: Data](val entries: Int, pipe: Boolean = false, flow: Boolean = false, resetSignal: Bool = null)(data: => T) extends Component(resetSignal) -{ - val io = new ioQueue(entries)(data) - - val do_flow = Bool() - val do_enq = io.enq.ready && io.enq.valid && !do_flow - val do_deq = io.deq.ready && io.deq.valid && !do_flow - - var enq_ptr = UFix(0) - var deq_ptr = UFix(0) - - if (entries > 1) - { - enq_ptr = Counter(do_enq, entries)._1 - deq_ptr = Counter(do_deq, entries)._1 - } - - val maybe_full = Reg(resetVal = Bool(false)) - when (do_enq != do_deq) { - maybe_full := do_enq - } - - val ram = Mem(entries) { data } - when (do_enq) { ram(enq_ptr) := io.enq.bits } - - val ptr_match = enq_ptr === deq_ptr - val empty = ptr_match && !maybe_full - val full = ptr_match && maybe_full - val maybe_flow = Bool(flow) && empty - do_flow := maybe_flow && io.deq.ready - io.deq.valid := !empty || Bool(flow) && io.enq.valid - io.enq.ready := !full || Bool(pipe) && io.deq.ready - io.deq.bits := Mux(maybe_flow, io.enq.bits, ram(deq_ptr)) - - val ptr_diff = enq_ptr - deq_ptr - if (isPow2(entries)) - io.count := Cat(maybe_full && ptr_match, ptr_diff).toUFix - else - io.count := Mux(ptr_match, Mux(maybe_full, UFix(entries), UFix(0)), Mux(deq_ptr > enq_ptr, UFix(entries) + ptr_diff, ptr_diff)) -} - -object Queue -{ - def apply[T <: Data](enq: FIFOIO[T], entries: Int = 2, pipe: Boolean = false) = { - val q = (new Queue(entries, pipe)) { enq.bits.clone } - q.io.enq.valid := enq.valid // not using <> so that override is allowed - q.io.enq.bits := enq.bits - enq.ready := q.io.enq.ready - q.io.deq - } -} - -class Pipe[T <: Data](latency: Int = 1)(data: => T) extends Component -{ - val io = new Bundle { - val enq = new PipeIO()(data).flip - val deq = new PipeIO()(data) - } - - var bits: T = io.enq.bits - var valid: Bool = io.enq.valid - - for (i <- 0 until latency) { - val reg_bits = Reg() { io.enq.bits.clone } - val reg_valid = Reg(valid, resetVal = Bool(false)) - when (valid) { reg_bits := bits } - valid = reg_valid - bits = reg_bits - } - - io.deq.valid := valid - io.deq.bits := bits -} - -object Pipe -{ - def apply[T <: Data](enqValid: Bool, enqBits: T, latency: Int): PipeIO[T] = { - val q = (new Pipe(latency)) { enqBits.clone } - q.io.enq.valid := enqValid - q.io.enq.bits := enqBits - q.io.deq - } - def apply[T <: Data](enqValid: Bool, enqBits: T): PipeIO[T] = apply(enqValid, enqBits, 1) - def apply[T <: Data](enq: PipeIO[T], latency: Int = 1): PipeIO[T] = apply(enq.valid, enq.bits, latency) -} - class SkidBuffer[T <: Data](resetSignal: Bool = null)(data: => T) extends Component(resetSignal) { val io = new Bundle { diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 160411f6..c8437567 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import Node._; import Constants._; -import collection.mutable._ +import collection.mutable.ArrayBuffer class Top extends Component { diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 4257de4f..19856386 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -4,17 +4,6 @@ import Chisel._ import Node._ import scala.math._ -object FillInterleaved -{ - def apply(n: Int, in: Bits) = - { - var out = Fill(n, in(0)) - for (i <- 1 until in.getWidth) - out = Cat(Fill(n, in(i)), out) - out - } -} - class Mux1H [T <: Data](n: Int)(gen: => T) extends Component { val io = new Bundle { @@ -25,84 +14,3 @@ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component io.out := Mux1H(io.sel, io.in) } - -class ioLockingArbiter[T <: Data](n: Int)(data: => T) extends Bundle { - val in = Vec(n) { (new FIFOIO()) { data } }.flip - val lock = Vec(n) { Bool() }.asInput - val out = (new FIFOIO()) { data } -} - -class LockingArbiter[T <: Data](n: Int)(data: => T) extends Component { - val io = new ioLockingArbiter(n)(data) - val locked = Vec(n) { Reg(resetVal = Bool(false)) } - val any_lock_held = (locked.toBits & io.lock.toBits).orR - val valid_arr = Vec(n) { Bool() } - val bits_arr = Vec(n) { data } - for(i <- 0 until n) { - valid_arr(i) := io.in(i).valid - bits_arr(i) := io.in(i).bits - } - - io.in(0).ready := Mux(any_lock_held, io.out.ready && locked(0), io.out.ready) - locked(0) := Mux(any_lock_held, locked(0), io.in(0).ready && io.lock(0)) - for (i <- 1 until n) { - io.in(i).ready := Mux(any_lock_held, io.out.ready && locked(i), - !io.in(i-1).valid && io.in(i-1).ready) - locked(i) := Mux(any_lock_held, locked(i), io.in(i).ready && io.lock(i)) - } - - var dout = io.in(n-1).bits - for (i <- 1 until n) - dout = Mux(io.in(n-1-i).valid, io.in(n-1-i).bits, dout) - - var vout = io.in(0).valid - for (i <- 1 until n) - vout = vout || io.in(i).valid - - val lock_idx = PriorityEncoder(locked.toBits) - io.out.valid := Mux(any_lock_held, valid_arr(lock_idx), vout) - io.out.bits := Mux(any_lock_held, bits_arr(lock_idx), dout) -} - -object PriorityMux -{ - def apply[T <: Data](sel: Seq[Bits], in: Seq[T]): T = { - if (in.size == 1) - in.head - else - Mux(sel.head, in.head, apply(sel.tail, in.tail)) - } - def apply[T <: Data](sel: Bits, in: Seq[T]): T = apply((0 until in.size).map(sel(_)), in) -} - -object PriorityEncoder -{ - def apply(in: Seq[Bits]): UFix = PriorityMux(in, (0 until in.size).map(UFix(_))) - def apply(in: Bits): UFix = apply((0 until in.getWidth).map(in(_))) -} - -object PriorityEncoderOH -{ - def apply(in: Bits): Bits = Vec(apply((0 until in.getWidth).map(in(_)))){Bool()}.toBits - def apply(in: Seq[Bits]): Seq[Bool] = { - var none_hot = Bool(true) - val out = collection.mutable.ArrayBuffer[Bool]() - for (i <- 0 until in.size) { - out += none_hot && in(i) - none_hot = none_hot && !in(i) - } - out - } -} - -object Counter -{ - def apply(cond: Bool, n: Int) = { - val c = Reg(resetVal = UFix(0, log2Up(n))) - val wrap = c === UFix(n-1) - when (cond) { - c := Mux(Bool(!isPow2(n)) && wrap, UFix(0), c + UFix(1)) - } - (c, wrap && cond) - } -} From b9a9664de5be7b3dbd6bd8ae281838c0102955ef Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 27 Sep 2012 16:46:36 -0700 Subject: [PATCH 0469/1087] uncore and rocket changes for new xact types --- rocket/src/main/scala/coherence.scala | 194 +++++++++++++++++++------- rocket/src/main/scala/consts.scala | 3 + rocket/src/main/scala/htif.scala | 4 +- rocket/src/main/scala/icache.scala | 3 +- rocket/src/main/scala/nbdcache.scala | 18 +-- rocket/src/main/scala/uncore.scala | 15 +- 6 files changed, 165 insertions(+), 72 deletions(-) diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala index 04843fec..2517b08a 100644 --- a/rocket/src/main/scala/coherence.scala +++ b/rocket/src/main/scala/coherence.scala @@ -3,10 +3,39 @@ package rocket import Chisel._ import Constants._ -class TransactionInit extends Bundle { +object TransactionInit +{ + def apply(x_type: Bits, addr: UFix, tile_xact_id: UFix) = { + val init = new TransactionInit + init.x_type := x_type + init.addr := addr + init.tile_xact_id := tile_xact_id + init + } + def apply(x_type: Bits, addr: UFix, tile_xact_id: UFix, write_mask: Bits) = { + val init = new TransactionInit + init.x_type := x_type + init.addr := addr + init.tile_xact_id := tile_xact_id + init.write_mask := write_mask + init + } + def apply(x_type: Bits, addr: UFix, tile_xact_id: UFix, subword_addr: UFix, atomic_opcode: UFix) = { + val init = new TransactionInit + init.x_type := x_type + init.addr := addr + init.tile_xact_id := tile_xact_id + init.subword_addr := subword_addr + init.atomic_opcode := atomic_opcode + init + } +} +class TransactionInit extends PhysicalAddress { val x_type = Bits(width = X_INIT_TYPE_MAX_BITS) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) - val address = UFix(width = PADDR_BITS - OFFSET_BITS) + val write_mask = Bits(width = X_INIT_WRITE_MASK_BITS) + val subword_addr = Bits(width = X_INIT_SUBWORD_ADDR_BITS) + val atomic_opcode = Bits(width = X_INIT_ATOMIC_OP_BITS) } class TransactionInitData extends MemData @@ -15,10 +44,9 @@ class TransactionAbort extends Bundle { val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) } -class ProbeRequest extends Bundle { +class ProbeRequest extends PhysicalAddress { val p_type = Bits(width = P_REQ_TYPE_MAX_BITS) val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) - val address = Bits(width = PADDR_BITS - OFFSET_BITS) } class ProbeReply extends Bundle { @@ -76,6 +104,7 @@ abstract class CoherencePolicy { def messageHasData (init: TransactionInit): Bool def messageHasData (reply: TransactionReply): Bool def messageUpdatesDataArray (reply: TransactionReply): Bool + def messageIsUncached(init: TransactionInit): Bool def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool def getTransactionReplyType(x_type: UFix, count: UFix): Bits @@ -86,8 +115,11 @@ abstract class CoherencePolicy { } trait UncachedTransactions { - def getTransactionInitTypeOnUncachedRead(): UFix - def getTransactionInitTypeOnUncachedWrite(): UFix + def getUncachedReadTransactionInit(addr: UFix, id: UFix): TransactionInit + def getUncachedWriteTransactionInit(addr: UFix, id: UFix): TransactionInit + def getUncachedReadWordTransactionInit(addr: UFix, id: UFix): TransactionInit + def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits): TransactionInit + def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix): TransactionInit } abstract class CoherencePolicyWithUncached extends CoherencePolicy with UncachedTransactions @@ -115,6 +147,8 @@ class ThreeStateIncoherence extends IncoherentPolicy { val xactInitReadClean :: xactInitReadDirty :: xactInitWriteback :: Nil = Enum(3){ UFix() } val xactReplyData :: xactReplyAck :: Nil = Enum(2){ UFix() } val probeRepInvalidateAck :: Nil = Enum(1){ UFix() } + val uncachedTypeList = List() + val hasDataTypeList = List(xactInitWriteback) def isHit ( cmd: Bits, state: UFix): Bool = (state === tileClean || state === tileDirty) def isValid (state: UFix): Bool = state != tileInvalid @@ -149,9 +183,10 @@ class ThreeStateIncoherence extends IncoherentPolicy { def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteback //TODO def getTransactionInitTypeOnWriteback(): Bits = xactInitWriteback - def messageHasData (init: TransactionInit): Bool = (init.x_type === xactInitWriteback) + def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) def messageHasData (reply: TransactionReply) = (reply.x_type === xactReplyData) def messageUpdatesDataArray (reply: TransactionReply) = (reply.x_type === xactReplyData) + def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) } class MICoherence extends CoherencePolicyWithUncached { @@ -159,10 +194,12 @@ class MICoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileValid :: Nil = Enum(2){ UFix() } val globalInvalid :: globalValid :: Nil = Enum(2){ UFix() } - val xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(3){ UFix() } - val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: Nil = Enum(3){ UFix() } + val xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(6){ UFix() } + val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(6){ UFix() } val probeReqInvalidate :: probeReqCopy :: Nil = Enum(2){ UFix() } val probeRepInvalidateData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepCopyAck :: Nil = Enum(4){ UFix() } + val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) + val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) def isHit (cmd: Bits, state: UFix): Bool = state != tileInvalid def isValid (state: UFix): Bool = state != tileInvalid @@ -191,7 +228,10 @@ class MICoherence extends CoherencePolicyWithUncached { MuxLookup(incoming.x_type, tileInvalid, Array( xactReplyReadExclusive -> tileValid, xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid + xactReplyWriteUncached -> tileInvalid, + xactReplyReadWordUncached -> tileInvalid, + xactReplyWriteWordUncached -> tileInvalid, + xactReplyAtomicUncached -> tileInvalid )) } def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { @@ -201,8 +241,12 @@ class MICoherence extends CoherencePolicyWithUncached { )) } - def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached - def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached + def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) + def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) + def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) + def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) + def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = xactInitReadExclusive def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = xactInitReadExclusive def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached @@ -227,15 +271,14 @@ class MICoherence extends CoherencePolicyWithUncached { (reply.p_type === probeRepInvalidateData || reply.p_type === probeRepCopyData) } - def messageHasData (init: TransactionInit): Bool = { - (init.x_type === xactInitWriteUncached) - } + def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached) + (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyWriteWordUncached) } def messageUpdatesDataArray (reply: TransactionReply): Bool = { (reply.x_type === xactReplyReadExclusive) } + def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) @@ -243,7 +286,10 @@ class MICoherence extends CoherencePolicyWithUncached { MuxLookup(x_type, xactReplyReadUncached, Array( xactInitReadExclusive -> xactReplyReadExclusive, xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached + xactInitWriteUncached -> xactReplyWriteUncached, + xactInitReadWordUncached -> xactReplyReadWordUncached, + xactInitWriteWordUncached -> xactReplyWriteWordUncached, + xactInitAtomicUncached -> xactReplyAtomicUncached )) } @@ -251,7 +297,10 @@ class MICoherence extends CoherencePolicyWithUncached { MuxLookup(x_type, probeReqCopy, Array( xactInitReadExclusive -> probeReqInvalidate, xactInitReadUncached -> probeReqCopy, - xactInitWriteUncached -> probeReqInvalidate + xactInitWriteUncached -> probeReqInvalidate, + xactInitReadWordUncached -> probeReqCopy, + xactInitWriteWordUncached -> probeReqInvalidate, + xactInitAtomicUncached -> probeReqInvalidate )) } @@ -271,17 +320,19 @@ class MEICoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(3){ UFix() } val globalInvalid :: globalExclusiveClean :: Nil = Enum(2){ UFix() } - val xactInitReadExclusiveClean :: xactInitReadExclusiveDirty :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(4){ UFix() } - val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: Nil = Enum(4){ UFix() } + val xactInitReadExclusiveClean :: xactInitReadExclusiveDirty :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(7){ UFix() } + val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(7){ UFix() } val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } + val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) + val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) def isHit (cmd: Bits, state: UFix): Bool = state != tileInvalid def isValid (state: UFix): Bool = state != tileInvalid def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { val (read, write) = cpuCmdToRW(cmd) - (read && (outstanding.x_type === xactInitReadUncached || outstanding.x_type === xactInitWriteUncached)) || + (read && messageIsUncached(outstanding)) || (write && (outstanding.x_type != xactInitReadExclusiveDirty)) } def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { @@ -311,7 +362,10 @@ class MEICoherence extends CoherencePolicyWithUncached { xactReplyReadExclusive -> Mux(outstanding.x_type === xactInitReadExclusiveDirty, tileExclusiveDirty, tileExclusiveClean), xactReplyReadExclusiveAck -> tileExclusiveDirty, xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid + xactReplyWriteUncached -> tileInvalid, + xactReplyReadWordUncached -> tileInvalid, + xactReplyWriteWordUncached -> tileInvalid, + xactReplyAtomicUncached -> tileInvalid )) } def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { @@ -322,8 +376,12 @@ class MEICoherence extends CoherencePolicyWithUncached { )) } - def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached - def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached + def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) + def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) + def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) + def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) + def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write, xactInitReadExclusiveDirty, xactInitReadExclusiveClean) @@ -357,15 +415,14 @@ class MEICoherence extends CoherencePolicyWithUncached { reply.p_type === probeRepDowngradeData || reply.p_type === probeRepCopyData) } - def messageHasData (init: TransactionInit): Bool = { - (init.x_type === xactInitWriteUncached) - } + def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck) + (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck && reply.x_type != xactReplyWriteWordUncached) } def messageUpdatesDataArray (reply: TransactionReply): Bool = { (reply.x_type === xactReplyReadExclusive) } + def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) @@ -374,7 +431,10 @@ class MEICoherence extends CoherencePolicyWithUncached { xactInitReadExclusiveClean -> xactReplyReadExclusive, xactInitReadExclusiveDirty -> xactReplyReadExclusive, xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached + xactInitWriteUncached -> xactReplyWriteUncached, + xactInitReadWordUncached -> xactReplyReadWordUncached, + xactInitWriteWordUncached -> xactReplyWriteWordUncached, + xactInitAtomicUncached -> xactReplyAtomicUncached )) } @@ -383,7 +443,10 @@ class MEICoherence extends CoherencePolicyWithUncached { xactInitReadExclusiveClean -> probeReqInvalidate, xactInitReadExclusiveDirty -> probeReqInvalidate, xactInitReadUncached -> probeReqCopy, - xactInitWriteUncached -> probeReqInvalidate + xactInitWriteUncached -> probeReqInvalidate, + xactInitReadWordUncached -> probeReqCopy, + xactInitWriteWordUncached -> probeReqInvalidate, + xactInitAtomicUncached -> probeReqInvalidate )) } @@ -403,10 +466,12 @@ class MSICoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileShared :: tileExclusiveDirty :: Nil = Enum(3){ UFix() } val globalInvalid :: globalShared :: globalExclusive :: Nil = Enum(3){ UFix() } - val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(4){ UFix() } - val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: Nil = Enum(5){ UFix() } + val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(7){ UFix() } + val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(8){ UFix() } val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } + val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) + val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) def isHit (cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) @@ -419,7 +484,7 @@ class MSICoherence extends CoherencePolicyWithUncached { def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { val (read, write) = cpuCmdToRW(cmd) - (read && (outstanding.x_type === xactInitReadUncached || outstanding.x_type === xactInitWriteUncached)) || + (read && messageIsUncached(outstanding)) || (write && (outstanding.x_type != xactInitReadExclusive)) } def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { @@ -450,7 +515,10 @@ class MSICoherence extends CoherencePolicyWithUncached { xactReplyReadExclusive -> tileExclusiveDirty, xactReplyReadExclusiveAck -> tileExclusiveDirty, xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid + xactReplyWriteUncached -> tileInvalid, + xactReplyReadWordUncached -> tileInvalid, + xactReplyWriteWordUncached -> tileInvalid, + xactReplyAtomicUncached -> tileInvalid )) } def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { @@ -461,8 +529,12 @@ class MSICoherence extends CoherencePolicyWithUncached { )) } - def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached - def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached + def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) + def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) + def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) + def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) + def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) @@ -496,15 +568,14 @@ class MSICoherence extends CoherencePolicyWithUncached { reply.p_type === probeRepDowngradeData || reply.p_type === probeRepCopyData) } - def messageHasData (init: TransactionInit): Bool = { - (init.x_type === xactInitWriteUncached) - } + def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck) + (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck && reply.x_type != xactReplyWriteWordUncached) } def messageUpdatesDataArray (reply: TransactionReply): Bool = { (reply.x_type === xactReplyReadShared || reply.x_type === xactReplyReadExclusive) } + def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) @@ -513,7 +584,10 @@ class MSICoherence extends CoherencePolicyWithUncached { xactInitReadShared -> Mux(count > UFix(0), xactReplyReadShared, xactReplyReadExclusive), xactInitReadExclusive -> xactReplyReadExclusive, xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached + xactInitWriteUncached -> xactReplyWriteUncached, + xactInitReadWordUncached -> xactReplyReadWordUncached, + xactInitWriteWordUncached -> xactReplyWriteWordUncached, + xactInitAtomicUncached -> xactReplyAtomicUncached )) } @@ -542,10 +616,12 @@ class MESICoherence extends CoherencePolicyWithUncached { val tileInvalid :: tileShared :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(4){ UFix() } val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } - val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: Nil = Enum(4){ UFix() } - val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: Nil = Enum(5){ UFix() } + val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(7){ UFix() } + val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(8){ UFix() } val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } + val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) + val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) def isHit (cmd: Bits, state: UFix): Bool = { val (read, write) = cpuCmdToRW(cmd) @@ -558,7 +634,7 @@ class MESICoherence extends CoherencePolicyWithUncached { def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { val (read, write) = cpuCmdToRW(cmd) - (read && (outstanding.x_type === xactInitReadUncached || outstanding.x_type === xactInitWriteUncached)) || + (read && messageIsUncached(outstanding)) || (write && (outstanding.x_type != xactInitReadExclusive)) } def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { @@ -589,7 +665,10 @@ class MESICoherence extends CoherencePolicyWithUncached { xactReplyReadExclusive -> Mux(outstanding.x_type === xactInitReadExclusive, tileExclusiveDirty, tileExclusiveClean), xactReplyReadExclusiveAck -> tileExclusiveDirty, xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid + xactReplyWriteUncached -> tileInvalid, + xactReplyReadWordUncached -> tileInvalid, + xactReplyWriteWordUncached -> tileInvalid, + xactReplyAtomicUncached -> tileInvalid )) } def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { @@ -600,8 +679,12 @@ class MESICoherence extends CoherencePolicyWithUncached { )) } - def getTransactionInitTypeOnUncachedRead() = xactInitReadUncached - def getTransactionInitTypeOnUncachedWrite() = xactInitWriteUncached + def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) + def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) + def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) + def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) + def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) + def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { val (read, write) = cpuCmdToRW(cmd) Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) @@ -635,15 +718,14 @@ class MESICoherence extends CoherencePolicyWithUncached { reply.p_type === probeRepDowngradeData || reply.p_type === probeRepCopyData) } - def messageHasData (init: TransactionInit): Bool = { - (init.x_type === xactInitWriteUncached) - } + def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck) + (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck && reply.x_type != xactReplyWriteWordUncached) } def messageUpdatesDataArray (reply: TransactionReply): Bool = { (reply.x_type === xactReplyReadShared || reply.x_type === xactReplyReadExclusive) } + def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) @@ -652,7 +734,10 @@ class MESICoherence extends CoherencePolicyWithUncached { xactInitReadShared -> Mux(count > UFix(0), xactReplyReadShared, xactReplyReadExclusive), xactInitReadExclusive -> xactReplyReadExclusive, xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached + xactInitWriteUncached -> xactReplyWriteUncached, + xactInitReadWordUncached -> xactReplyReadWordUncached, + xactInitWriteWordUncached -> xactReplyWriteWordUncached, + xactInitAtomicUncached -> xactReplyAtomicUncached )) } @@ -661,7 +746,10 @@ class MESICoherence extends CoherencePolicyWithUncached { xactInitReadShared -> probeReqDowngrade, xactInitReadExclusive -> probeReqInvalidate, xactInitReadUncached -> probeReqCopy, - xactInitWriteUncached -> probeReqInvalidate + xactInitWriteUncached -> probeReqInvalidate, + xactInitReadWordUncached -> probeReqCopy, + xactInitWriteWordUncached -> probeReqInvalidate, + xactInitAtomicUncached -> probeReqInvalidate )) } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 7a2f716c..ac5de59d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -200,6 +200,9 @@ object Constants val GLOBAL_XACT_ID_BITS = log2Up(NGLOBAL_XACTS) val X_INIT_TYPE_MAX_BITS = 2 + val X_INIT_WRITE_MASK_BITS = OFFSET_BITS + val X_INIT_SUBWORD_ADDR_BITS = log2Up(OFFSET_BITS) + val X_INIT_ATOMIC_OP_BITS = 4 val X_REP_TYPE_MAX_BITS = 3 val P_REQ_TYPE_MAX_BITS = 2 val P_REP_TYPE_MAX_BITS = 3 diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 058ed016..38955296 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -176,8 +176,8 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C mem_req_data = Cat(packet_ram(idx), mem_req_data) } x_init.io.enq.valid := state === state_mem_req - x_init.io.enq.bits.x_type := Mux(cmd === cmd_writemem, co.getTransactionInitTypeOnUncachedWrite, co.getTransactionInitTypeOnUncachedRead) - x_init.io.enq.bits.address := addr.toUFix >> UFix(OFFSET_BITS-3) + val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) + x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteTransactionInit(init_addr, UFix(0)), co.getUncachedReadTransactionInit(init_addr, UFix(0))) io.mem.xact_init <> x_init.io.deq io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index fbbc2dfa..17d47ca1 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -136,8 +136,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits.x_type := co.getTransactionInitTypeOnUncachedRead - io.mem.xact_init.bits.address := r_cpu_miss_addr(tagmsb,indexlsb).toUFix + io.mem.xact_init.bits := co.getUncachedReadTransactionInit(r_cpu_miss_addr(tagmsb,indexlsb).toUFix, UFix(0)) io.mem.xact_finish <> finish_q.io.deq // control state machine diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f92e6e6c..40b9e6ab 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -282,7 +282,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { io.mem_req.valid := (state === s_refill_req) && !flush io.mem_req.bits.x_type := xacx_type - io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix + io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq @@ -475,7 +475,7 @@ class WritebackUnit(co: CoherencePolicy) extends Component { io.mem_req.valid := valid && !cmd_sent io.mem_req.bits.x_type := co.getTransactionInitTypeOnWriteback() - io.mem_req.bits.address := Cat(req.tag, req.idx).toUFix + io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id io.mem_req_data.valid := data_req_fired && !is_probe io.mem_req_data.bits.data := io.data_resp @@ -492,7 +492,7 @@ class ProbeUnit(co: CoherencePolicy) extends Component { val wb_req = (new FIFOIO) { new WritebackReq } val tag_match_way_oh = Bits(INPUT, NWAYS) val line_state = UFix(INPUT, 2) - val address = Bits(OUTPUT, PADDR_BITS-OFFSET_BITS) + val addr = Bits(OUTPUT, PADDR_BITS-OFFSET_BITS) } val s_reset :: s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(8) { UFix() } @@ -535,16 +535,16 @@ class ProbeUnit(co: CoherencePolicy) extends Component { io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_mshr_req || state === s_probe_rep && hit io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) io.meta_req.bits.rw := state === s_probe_rep - io.meta_req.bits.idx := req.address + io.meta_req.bits.idx := req.addr io.meta_req.bits.data.state := co.newStateOnProbeRequest(req, line_state) - io.meta_req.bits.data.tag := req.address >> UFix(IDX_BITS) + io.meta_req.bits.data.tag := req.addr >> UFix(IDX_BITS) io.mshr_req.valid := state === s_meta_resp || state === s_mshr_req - io.address := req.address + io.addr := req.addr io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_oh := way_oh - io.wb_req.bits.idx := req.address - io.wb_req.bits.tag := req.address >> UFix(IDX_BITS) + io.wb_req.bits.idx := req.addr + io.wb_req.bits.tag := req.addr >> UFix(IDX_BITS) } class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { @@ -860,7 +860,7 @@ class HellaCache(co: CoherencePolicy) extends Component { meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS) val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.address >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req.bits.ppn) + val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req.bits.ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until NWAYS).map( w => co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala index 7ffef155..84b74366 100644 --- a/rocket/src/main/scala/uncore.scala +++ b/rocket/src/main/scala/uncore.scala @@ -3,14 +3,17 @@ package rocket import Chisel._ import Constants._ +class PhysicalAddress extends Bundle { + val addr = UFix(width = PADDR_BITS - OFFSET_BITS) +} + class MemData extends Bundle { val data = Bits(width = MEM_DATA_BITS) } -class MemReqCmd() extends Bundle +class MemReqCmd() extends PhysicalAddress { val rw = Bool() - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) val tag = Bits(width = MEM_TAG_BITS) } @@ -165,7 +168,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { io.probe_req.valid := Bool(false) io.probe_req.bits.p_type := co.getProbeRequestType(x_type_, UFix(0)) io.probe_req.bits.global_xact_id := UFix(id) - io.probe_req.bits.address := addr_ + io.probe_req.bits.addr := addr_ io.push_p_req := Bits(0, width = ntiles) io.pop_p_rep := Bits(0, width = ntiles) io.pop_p_rep_data := Bits(0, width = ntiles) @@ -178,7 +181,7 @@ class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { switch (state) { is(s_idle) { when( io.alloc_req.valid && io.can_alloc ) { - addr_ := io.alloc_req.bits.xact_init.address + addr_ := io.alloc_req.bits.xact_init.addr x_type_ := io.alloc_req.bits.xact_init.x_type init_tile_id_ := io.alloc_req.bits.tile_id tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id @@ -272,7 +275,7 @@ class CoherenceHubNull(co: ThreeStateIncoherence) extends CoherenceHub(1, co) io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) io.mem.req_cmd.bits.rw := is_write io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id - io.mem.req_cmd.bits.addr := x_init.bits.address + io.mem.req_cmd.bits.addr := x_init.bits.addr io.mem.req_data <> io.tiles(0).xact_init_data val x_rep = io.tiles(0).xact_rep @@ -432,7 +435,7 @@ class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceH val conflicts = Vec(NGLOBAL_XACTS) { Bool() } for( i <- 0 until NGLOBAL_XACTS) { val t = trackerList(i).io - conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.address) + conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.addr) } x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && co.messageHasData(x_init.bits))) From e909093f3788cd61b4e44691563eda4eeb88f364 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Mon, 1 Oct 2012 16:08:41 -0700 Subject: [PATCH 0470/1087] factoring out uncore into separate uncore repo --- rocket/src/main/scala/arbiter.scala | 1 + rocket/src/main/scala/coherence.scala | 765 -------------------------- rocket/src/main/scala/htif.scala | 1 + rocket/src/main/scala/icache.scala | 1 + rocket/src/main/scala/llc.scala | 437 --------------- rocket/src/main/scala/memserdes.scala | 1 + rocket/src/main/scala/nbdcache.scala | 1 + rocket/src/main/scala/tile.scala | 1 + rocket/src/main/scala/top.scala | 1 + rocket/src/main/scala/uncore.scala | 519 ----------------- 10 files changed, 7 insertions(+), 1721 deletions(-) delete mode 100644 rocket/src/main/scala/coherence.scala delete mode 100644 rocket/src/main/scala/llc.scala delete mode 100644 rocket/src/main/scala/uncore.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 93a72be0..95432dc8 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -3,6 +3,7 @@ package rocket import Chisel._; import Node._; import Constants._; +import uncore._ class ioUncachedRequestor extends Bundle { val xact_init = (new FIFOIO) { new TransactionInit } diff --git a/rocket/src/main/scala/coherence.scala b/rocket/src/main/scala/coherence.scala deleted file mode 100644 index 2517b08a..00000000 --- a/rocket/src/main/scala/coherence.scala +++ /dev/null @@ -1,765 +0,0 @@ -package rocket - -import Chisel._ -import Constants._ - -object TransactionInit -{ - def apply(x_type: Bits, addr: UFix, tile_xact_id: UFix) = { - val init = new TransactionInit - init.x_type := x_type - init.addr := addr - init.tile_xact_id := tile_xact_id - init - } - def apply(x_type: Bits, addr: UFix, tile_xact_id: UFix, write_mask: Bits) = { - val init = new TransactionInit - init.x_type := x_type - init.addr := addr - init.tile_xact_id := tile_xact_id - init.write_mask := write_mask - init - } - def apply(x_type: Bits, addr: UFix, tile_xact_id: UFix, subword_addr: UFix, atomic_opcode: UFix) = { - val init = new TransactionInit - init.x_type := x_type - init.addr := addr - init.tile_xact_id := tile_xact_id - init.subword_addr := subword_addr - init.atomic_opcode := atomic_opcode - init - } -} -class TransactionInit extends PhysicalAddress { - val x_type = Bits(width = X_INIT_TYPE_MAX_BITS) - val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) - val write_mask = Bits(width = X_INIT_WRITE_MASK_BITS) - val subword_addr = Bits(width = X_INIT_SUBWORD_ADDR_BITS) - val atomic_opcode = Bits(width = X_INIT_ATOMIC_OP_BITS) -} - -class TransactionInitData extends MemData - -class TransactionAbort extends Bundle { - val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) -} - -class ProbeRequest extends PhysicalAddress { - val p_type = Bits(width = P_REQ_TYPE_MAX_BITS) - val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) -} - -class ProbeReply extends Bundle { - val p_type = Bits(width = P_REP_TYPE_MAX_BITS) - val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) -} - -class ProbeReplyData extends MemData - -class TransactionReply extends MemData { - val x_type = Bits(width = X_REP_TYPE_MAX_BITS) - val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) - val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) - val require_ack = Bool() -} - -class TransactionFinish extends Bundle { - val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) -} - -object cpuCmdToRW { - def apply(cmd: Bits): (Bool, Bool) = { - val store = (cmd === M_XWR) - val load = (cmd === M_XRD) - val amo = cmd(3).toBool - val read = load || amo || (cmd === M_PFR) || (cmd === M_PFW) - val write = store || amo - (read, write) - } -} - -abstract class CoherencePolicy { - def isHit (cmd: Bits, state: UFix): Bool - def isValid (state: UFix): Bool - - def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool - def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool - def needsWriteback (state: UFix): Bool - - def newStateOnHit(cmd: Bits, state: UFix): UFix - def newStateOnCacheControl(cmd: Bits): UFix - def newStateOnWriteback(): UFix - def newStateOnFlush(): UFix - def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix - def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits - - def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix - def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix - def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits - def getTransactionInitTypeOnWriteback(): Bits - - def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply - - def messageHasData (reply: ProbeReply): Bool - def messageHasData (init: TransactionInit): Bool - def messageHasData (reply: TransactionReply): Bool - def messageUpdatesDataArray (reply: TransactionReply): Bool - def messageIsUncached(init: TransactionInit): Bool - - def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool - def getTransactionReplyType(x_type: UFix, count: UFix): Bits - def getProbeRequestType(x_type: UFix, global_state: UFix): UFix - def needsMemRead(x_type: UFix, global_state: UFix): Bool - def needsMemWrite(x_type: UFix, global_state: UFix): Bool - def needsAckReply(x_type: UFix, global_state: UFix): Bool -} - -trait UncachedTransactions { - def getUncachedReadTransactionInit(addr: UFix, id: UFix): TransactionInit - def getUncachedWriteTransactionInit(addr: UFix, id: UFix): TransactionInit - def getUncachedReadWordTransactionInit(addr: UFix, id: UFix): TransactionInit - def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits): TransactionInit - def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix): TransactionInit -} - -abstract class CoherencePolicyWithUncached extends CoherencePolicy with UncachedTransactions - -abstract class IncoherentPolicy extends CoherencePolicy { - // UNIMPLEMENTED - def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = state - def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = new ProbeReply() - reply.p_type := UFix(0) - reply.global_xact_id := UFix(0) - reply - } - def messageHasData (reply: ProbeReply) = Bool(false) - def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = Bool(false) - def getTransactionReplyType(x_type: UFix, count: UFix): Bits = Bits(0) - def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = UFix(0) - def needsMemRead(x_type: UFix, global_state: UFix): Bool = Bool(false) - def needsMemWrite(x_type: UFix, global_state: UFix): Bool = Bool(false) - def needsAckReply(x_type: UFix, global_state: UFix): Bool = Bool(false) -} - -class ThreeStateIncoherence extends IncoherentPolicy { - val tileInvalid :: tileClean :: tileDirty :: Nil = Enum(3){ UFix() } - val xactInitReadClean :: xactInitReadDirty :: xactInitWriteback :: Nil = Enum(3){ UFix() } - val xactReplyData :: xactReplyAck :: Nil = Enum(2){ UFix() } - val probeRepInvalidateAck :: Nil = Enum(1){ UFix() } - val uncachedTypeList = List() - val hasDataTypeList = List(xactInitWriteback) - - def isHit ( cmd: Bits, state: UFix): Bool = (state === tileClean || state === tileDirty) - def isValid (state: UFix): Bool = state != tileInvalid - - def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit) = Bool(false) - def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = state === tileDirty - def needsWriteback (state: UFix): Bool = state === tileDirty - - def newState(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileDirty, Mux(read, Mux(state === tileDirty, tileDirty, tileClean), state)) - } - def newStateOnHit(cmd: Bits, state: UFix): UFix = newState(cmd, state) - def newStateOnCacheControl(cmd: Bits) = tileInvalid //TODO - def newStateOnWriteback() = tileInvalid - def newStateOnFlush() = tileInvalid - def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit) = { - MuxLookup(incoming.x_type, tileInvalid, Array( - xactReplyData -> Mux(outstanding.x_type === xactInitReadDirty, tileDirty, tileClean), - xactReplyAck -> tileInvalid - )) - } - - def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write || cmd === M_PFW, xactInitReadDirty, xactInitReadClean) - } - def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, xactInitReadDirty, outstanding.x_type) - } - def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteback //TODO - def getTransactionInitTypeOnWriteback(): Bits = xactInitWriteback - - def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) - def messageHasData (reply: TransactionReply) = (reply.x_type === xactReplyData) - def messageUpdatesDataArray (reply: TransactionReply) = (reply.x_type === xactReplyData) - def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) -} - -class MICoherence extends CoherencePolicyWithUncached { - - val tileInvalid :: tileValid :: Nil = Enum(2){ UFix() } - val globalInvalid :: globalValid :: Nil = Enum(2){ UFix() } - - val xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(6){ UFix() } - val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(6){ UFix() } - val probeReqInvalidate :: probeReqCopy :: Nil = Enum(2){ UFix() } - val probeRepInvalidateData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepCopyAck :: Nil = Enum(4){ UFix() } - val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - - def isHit (cmd: Bits, state: UFix): Bool = state != tileInvalid - def isValid (state: UFix): Bool = state != tileInvalid - - def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = (outstanding.x_type != xactInitReadExclusive) - def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { - MuxLookup(cmd, (state === tileValid), Array( - M_INV -> (state === tileValid), - M_CLN -> (state === tileValid) - )) - } - def needsWriteback (state: UFix): Bool = { - needsTransactionOnCacheControl(M_INV, state) - } - - def newStateOnHit(cmd: Bits, state: UFix): UFix = state - def newStateOnCacheControl(cmd: Bits) = { - MuxLookup(cmd, tileInvalid, Array( - M_INV -> tileInvalid, - M_CLN -> tileValid - )) - } - def newStateOnWriteback() = newStateOnCacheControl(M_INV) - def newStateOnFlush() = newStateOnCacheControl(M_INV) - def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { - MuxLookup(incoming.x_type, tileInvalid, Array( - xactReplyReadExclusive -> tileValid, - xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid, - xactReplyReadWordUncached -> tileInvalid, - xactReplyWriteWordUncached -> tileInvalid, - xactReplyAtomicUncached -> tileInvalid - )) - } - def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { - MuxLookup(incoming.p_type, state, Array( - probeReqInvalidate -> tileInvalid, - probeReqCopy -> state - )) - } - - def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) - def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) - def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) - def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) - def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) - - def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = xactInitReadExclusive - def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = xactInitReadExclusive - def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached - def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) - - def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = new ProbeReply() - val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( - probeReqInvalidate -> probeRepInvalidateData, - probeReqCopy -> probeRepCopyData - )) - val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( - probeReqInvalidate -> probeRepInvalidateAck, - probeReqCopy -> probeRepCopyAck - )) - reply.p_type := Mux(needsWriteback(state), with_data, without_data) - reply.global_xact_id := incoming.global_xact_id - reply - } - - def messageHasData (reply: ProbeReply): Bool = { - (reply.p_type === probeRepInvalidateData || - reply.p_type === probeRepCopyData) - } - def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) - def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyWriteWordUncached) - } - def messageUpdatesDataArray (reply: TransactionReply): Bool = { - (reply.x_type === xactReplyReadExclusive) - } - def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) - - def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) - - def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { - MuxLookup(x_type, xactReplyReadUncached, Array( - xactInitReadExclusive -> xactReplyReadExclusive, - xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached, - xactInitReadWordUncached -> xactReplyReadWordUncached, - xactInitWriteWordUncached -> xactReplyWriteWordUncached, - xactInitAtomicUncached -> xactReplyAtomicUncached - )) - } - - def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { - MuxLookup(x_type, probeReqCopy, Array( - xactInitReadExclusive -> probeReqInvalidate, - xactInitReadUncached -> probeReqCopy, - xactInitWriteUncached -> probeReqInvalidate, - xactInitReadWordUncached -> probeReqCopy, - xactInitWriteWordUncached -> probeReqInvalidate, - xactInitAtomicUncached -> probeReqInvalidate - )) - } - - def needsMemRead(x_type: UFix, global_state: UFix): Bool = { - (x_type != xactInitWriteUncached) - } - def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } - def needsAckReply(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } -} - -class MEICoherence extends CoherencePolicyWithUncached { - - val tileInvalid :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(3){ UFix() } - val globalInvalid :: globalExclusiveClean :: Nil = Enum(2){ UFix() } - - val xactInitReadExclusiveClean :: xactInitReadExclusiveDirty :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(7){ UFix() } - val xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(7){ UFix() } - val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } - val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } - val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - - def isHit (cmd: Bits, state: UFix): Bool = state != tileInvalid - def isValid (state: UFix): Bool = state != tileInvalid - - def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { - val (read, write) = cpuCmdToRW(cmd) - (read && messageIsUncached(outstanding)) || - (write && (outstanding.x_type != xactInitReadExclusiveDirty)) - } - def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { - MuxLookup(cmd, (state === tileExclusiveDirty), Array( - M_INV -> (state === tileExclusiveDirty), - M_CLN -> (state === tileExclusiveDirty) - )) - } - def needsWriteback (state: UFix): Bool = { - needsTransactionOnCacheControl(M_INV, state) - } - - def newStateOnHit(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileExclusiveDirty, state) - } - def newStateOnCacheControl(cmd: Bits) = { - MuxLookup(cmd, tileInvalid, Array( - M_INV -> tileInvalid, - M_CLN -> tileExclusiveClean - )) - } - def newStateOnWriteback() = newStateOnCacheControl(M_INV) - def newStateOnFlush() = newStateOnCacheControl(M_INV) - def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { - MuxLookup(incoming.x_type, tileInvalid, Array( - xactReplyReadExclusive -> Mux(outstanding.x_type === xactInitReadExclusiveDirty, tileExclusiveDirty, tileExclusiveClean), - xactReplyReadExclusiveAck -> tileExclusiveDirty, - xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid, - xactReplyReadWordUncached -> tileInvalid, - xactReplyWriteWordUncached -> tileInvalid, - xactReplyAtomicUncached -> tileInvalid - )) - } - def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { - MuxLookup(incoming.p_type, state, Array( - probeReqInvalidate -> tileInvalid, - probeReqDowngrade -> tileExclusiveClean, - probeReqCopy -> state - )) - } - - def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) - def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) - def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) - def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) - def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) - - def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, xactInitReadExclusiveDirty, xactInitReadExclusiveClean) - } - def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, xactInitReadExclusiveDirty, outstanding.x_type) - } - def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached - def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) - - def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = new ProbeReply() - val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( - probeReqInvalidate -> probeRepInvalidateData, - probeReqDowngrade -> probeRepDowngradeData, - probeReqCopy -> probeRepCopyData - )) - val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( - probeReqInvalidate -> probeRepInvalidateAck, - probeReqDowngrade -> probeRepDowngradeAck, - probeReqCopy -> probeRepCopyAck - )) - reply.p_type := Mux(needsWriteback(state), with_data, without_data) - reply.global_xact_id := incoming.global_xact_id - reply - } - - def messageHasData (reply: ProbeReply): Bool = { - (reply.p_type === probeRepInvalidateData || - reply.p_type === probeRepDowngradeData || - reply.p_type === probeRepCopyData) - } - def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) - def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck && reply.x_type != xactReplyWriteWordUncached) - } - def messageUpdatesDataArray (reply: TransactionReply): Bool = { - (reply.x_type === xactReplyReadExclusive) - } - def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) - - def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) - - def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { - MuxLookup(x_type, xactReplyReadUncached, Array( - xactInitReadExclusiveClean -> xactReplyReadExclusive, - xactInitReadExclusiveDirty -> xactReplyReadExclusive, - xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached, - xactInitReadWordUncached -> xactReplyReadWordUncached, - xactInitWriteWordUncached -> xactReplyWriteWordUncached, - xactInitAtomicUncached -> xactReplyAtomicUncached - )) - } - - def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { - MuxLookup(x_type, probeReqCopy, Array( - xactInitReadExclusiveClean -> probeReqInvalidate, - xactInitReadExclusiveDirty -> probeReqInvalidate, - xactInitReadUncached -> probeReqCopy, - xactInitWriteUncached -> probeReqInvalidate, - xactInitReadWordUncached -> probeReqCopy, - xactInitWriteWordUncached -> probeReqInvalidate, - xactInitAtomicUncached -> probeReqInvalidate - )) - } - - def needsMemRead(x_type: UFix, global_state: UFix): Bool = { - (x_type != xactInitWriteUncached) - } - def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } - def needsAckReply(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } -} - -class MSICoherence extends CoherencePolicyWithUncached { - - val tileInvalid :: tileShared :: tileExclusiveDirty :: Nil = Enum(3){ UFix() } - val globalInvalid :: globalShared :: globalExclusive :: Nil = Enum(3){ UFix() } - - val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(7){ UFix() } - val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(8){ UFix() } - val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } - val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } - val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - - def isHit (cmd: Bits, state: UFix): Bool = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, (state === tileExclusiveDirty), - (state === tileShared || state === tileExclusiveDirty)) - } - def isValid (state: UFix): Bool = { - state != tileInvalid - } - - def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { - val (read, write) = cpuCmdToRW(cmd) - (read && messageIsUncached(outstanding)) || - (write && (outstanding.x_type != xactInitReadExclusive)) - } - def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { - MuxLookup(cmd, (state === tileExclusiveDirty), Array( - M_INV -> (state === tileExclusiveDirty), - M_CLN -> (state === tileExclusiveDirty) - )) - } - def needsWriteback (state: UFix): Bool = { - needsTransactionOnCacheControl(M_INV, state) - } - - def newStateOnHit(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileExclusiveDirty, state) - } - def newStateOnCacheControl(cmd: Bits) = { - MuxLookup(cmd, tileInvalid, Array( - M_INV -> tileInvalid, - M_CLN -> tileShared - )) - } - def newStateOnWriteback() = newStateOnCacheControl(M_INV) - def newStateOnFlush() = newStateOnCacheControl(M_INV) - def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { - MuxLookup(incoming.x_type, tileInvalid, Array( - xactReplyReadShared -> tileShared, - xactReplyReadExclusive -> tileExclusiveDirty, - xactReplyReadExclusiveAck -> tileExclusiveDirty, - xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid, - xactReplyReadWordUncached -> tileInvalid, - xactReplyWriteWordUncached -> tileInvalid, - xactReplyAtomicUncached -> tileInvalid - )) - } - def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { - MuxLookup(incoming.p_type, state, Array( - probeReqInvalidate -> tileInvalid, - probeReqDowngrade -> tileShared, - probeReqCopy -> state - )) - } - - def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) - def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) - def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) - def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) - def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) - - def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) - } - def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, xactInitReadExclusive, outstanding.x_type) - } - def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached - def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) - - def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = new ProbeReply() - val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( - probeReqInvalidate -> probeRepInvalidateData, - probeReqDowngrade -> probeRepDowngradeData, - probeReqCopy -> probeRepCopyData - )) - val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( - probeReqInvalidate -> probeRepInvalidateAck, - probeReqDowngrade -> probeRepDowngradeAck, - probeReqCopy -> probeRepCopyAck - )) - reply.p_type := Mux(needsWriteback(state), with_data, without_data) - reply.global_xact_id := incoming.global_xact_id - reply - } - - def messageHasData (reply: ProbeReply): Bool = { - (reply.p_type === probeRepInvalidateData || - reply.p_type === probeRepDowngradeData || - reply.p_type === probeRepCopyData) - } - def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) - def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck && reply.x_type != xactReplyWriteWordUncached) - } - def messageUpdatesDataArray (reply: TransactionReply): Bool = { - (reply.x_type === xactReplyReadShared || reply.x_type === xactReplyReadExclusive) - } - def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) - - def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) - - def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { - MuxLookup(x_type, xactReplyReadUncached, Array( - xactInitReadShared -> Mux(count > UFix(0), xactReplyReadShared, xactReplyReadExclusive), - xactInitReadExclusive -> xactReplyReadExclusive, - xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached, - xactInitReadWordUncached -> xactReplyReadWordUncached, - xactInitWriteWordUncached -> xactReplyWriteWordUncached, - xactInitAtomicUncached -> xactReplyAtomicUncached - )) - } - - def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { - MuxLookup(x_type, probeReqCopy, Array( - xactInitReadShared -> probeReqDowngrade, - xactInitReadExclusive -> probeReqInvalidate, - xactInitReadUncached -> probeReqCopy, - xactInitWriteUncached -> probeReqInvalidate - )) - } - - def needsMemRead(x_type: UFix, global_state: UFix): Bool = { - (x_type != xactInitWriteUncached) - } - def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } - def needsAckReply(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } -} - -class MESICoherence extends CoherencePolicyWithUncached { - - val tileInvalid :: tileShared :: tileExclusiveClean :: tileExclusiveDirty :: Nil = Enum(4){ UFix() } - val globalInvalid :: globalShared :: globalExclusiveClean :: Nil = Enum(3){ UFix() } - - val xactInitReadShared :: xactInitReadExclusive :: xactInitReadUncached :: xactInitWriteUncached :: xactInitReadWordUncached :: xactInitWriteWordUncached :: xactInitAtomicUncached :: Nil = Enum(7){ UFix() } - val xactReplyReadShared :: xactReplyReadExclusive :: xactReplyReadUncached :: xactReplyWriteUncached :: xactReplyReadExclusiveAck :: xactReplyReadWordUncached :: xactReplyWriteWordUncached :: xactReplyAtomicUncached :: Nil = Enum(8){ UFix() } - val probeReqInvalidate :: probeReqDowngrade :: probeReqCopy :: Nil = Enum(3){ UFix() } - val probeRepInvalidateData :: probeRepDowngradeData :: probeRepCopyData :: probeRepInvalidateAck :: probeRepDowngradeAck :: probeRepCopyAck :: Nil = Enum(6){ UFix() } - val uncachedTypeList = List(xactInitReadUncached, xactInitWriteUncached, xactReplyReadWordUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - val hasDataTypeList = List(xactInitWriteUncached, xactInitWriteWordUncached, xactInitAtomicUncached) - - def isHit (cmd: Bits, state: UFix): Bool = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, (state === tileExclusiveClean || state === tileExclusiveDirty), - (state === tileShared || state === tileExclusiveClean || state === tileExclusiveDirty)) - } - def isValid (state: UFix): Bool = { - state != tileInvalid - } - - def needsTransactionOnSecondaryMiss(cmd: Bits, outstanding: TransactionInit): Bool = { - val (read, write) = cpuCmdToRW(cmd) - (read && messageIsUncached(outstanding)) || - (write && (outstanding.x_type != xactInitReadExclusive)) - } - def needsTransactionOnCacheControl(cmd: Bits, state: UFix): Bool = { - MuxLookup(cmd, (state === tileExclusiveDirty), Array( - M_INV -> (state === tileExclusiveDirty), - M_CLN -> (state === tileExclusiveDirty) - )) - } - def needsWriteback (state: UFix): Bool = { - needsTransactionOnCacheControl(M_INV, state) - } - - def newStateOnHit(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, tileExclusiveDirty, state) - } - def newStateOnCacheControl(cmd: Bits) = { - MuxLookup(cmd, tileInvalid, Array( - M_INV -> tileInvalid, - M_CLN -> tileShared - )) - } - def newStateOnWriteback() = newStateOnCacheControl(M_INV) - def newStateOnFlush() = newStateOnCacheControl(M_INV) - def newStateOnTransactionReply(incoming: TransactionReply, outstanding: TransactionInit): UFix = { - MuxLookup(incoming.x_type, tileInvalid, Array( - xactReplyReadShared -> tileShared, - xactReplyReadExclusive -> Mux(outstanding.x_type === xactInitReadExclusive, tileExclusiveDirty, tileExclusiveClean), - xactReplyReadExclusiveAck -> tileExclusiveDirty, - xactReplyReadUncached -> tileInvalid, - xactReplyWriteUncached -> tileInvalid, - xactReplyReadWordUncached -> tileInvalid, - xactReplyWriteWordUncached -> tileInvalid, - xactReplyAtomicUncached -> tileInvalid - )) - } - def newStateOnProbeRequest(incoming: ProbeRequest, state: UFix): Bits = { - MuxLookup(incoming.p_type, state, Array( - probeReqInvalidate -> tileInvalid, - probeReqDowngrade -> tileShared, - probeReqCopy -> state - )) - } - - def getUncachedReadTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadUncached, addr, id) - def getUncachedWriteTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitWriteUncached, addr, id) - def getUncachedReadWordTransactionInit(addr: UFix, id: UFix) = TransactionInit(xactInitReadWordUncached, addr, id) - def getUncachedWriteWordTransactionInit(addr: UFix, id: UFix, write_mask: Bits) = TransactionInit(xactInitWriteWordUncached, addr, id, write_mask) - def getUncachedAtomicTransactionInit(addr: UFix, id: UFix, subword_addr: UFix, atomic_op: UFix) = TransactionInit(xactInitAtomicUncached, addr, id, subword_addr, atomic_op) - - def getTransactionInitTypeOnPrimaryMiss(cmd: Bits, state: UFix): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write || cmd === M_PFW, xactInitReadExclusive, xactInitReadShared) - } - def getTransactionInitTypeOnSecondaryMiss(cmd: Bits, state: UFix, outstanding: TransactionInit): UFix = { - val (read, write) = cpuCmdToRW(cmd) - Mux(write, xactInitReadExclusive, outstanding.x_type) - } - def getTransactionInitTypeOnCacheControl(cmd: Bits): Bits = xactInitWriteUncached - def getTransactionInitTypeOnWriteback(): Bits = getTransactionInitTypeOnCacheControl(M_INV) - - def newProbeReply (incoming: ProbeRequest, state: UFix): ProbeReply = { - val reply = new ProbeReply() - val with_data = MuxLookup(incoming.p_type, probeRepInvalidateData, Array( - probeReqInvalidate -> probeRepInvalidateData, - probeReqDowngrade -> probeRepDowngradeData, - probeReqCopy -> probeRepCopyData - )) - val without_data = MuxLookup(incoming.p_type, probeRepInvalidateAck, Array( - probeReqInvalidate -> probeRepInvalidateAck, - probeReqDowngrade -> probeRepDowngradeAck, - probeReqCopy -> probeRepCopyAck - )) - reply.p_type := Mux(needsWriteback(state), with_data, without_data) - reply.global_xact_id := incoming.global_xact_id - reply - } - - def messageHasData (reply: ProbeReply): Bool = { - (reply.p_type === probeRepInvalidateData || - reply.p_type === probeRepDowngradeData || - reply.p_type === probeRepCopyData) - } - def messageHasData (init: TransactionInit): Bool = hasDataTypeList.map(t => init.x_type === t).reduceLeft(_||_) - def messageHasData (reply: TransactionReply): Bool = { - (reply.x_type != xactReplyWriteUncached && reply.x_type != xactReplyReadExclusiveAck && reply.x_type != xactReplyWriteWordUncached) - } - def messageUpdatesDataArray (reply: TransactionReply): Bool = { - (reply.x_type === xactReplyReadShared || reply.x_type === xactReplyReadExclusive) - } - def messageIsUncached(init: TransactionInit): Bool = uncachedTypeList.map(t => init.x_type === t).reduceLeft(_||_) - - def isCoherenceConflict(addr1: Bits, addr2: Bits): Bool = (addr1 === addr2) - - def getTransactionReplyType(x_type: UFix, count: UFix): Bits = { - MuxLookup(x_type, xactReplyReadUncached, Array( - xactInitReadShared -> Mux(count > UFix(0), xactReplyReadShared, xactReplyReadExclusive), - xactInitReadExclusive -> xactReplyReadExclusive, - xactInitReadUncached -> xactReplyReadUncached, - xactInitWriteUncached -> xactReplyWriteUncached, - xactInitReadWordUncached -> xactReplyReadWordUncached, - xactInitWriteWordUncached -> xactReplyWriteWordUncached, - xactInitAtomicUncached -> xactReplyAtomicUncached - )) - } - - def getProbeRequestType(x_type: UFix, global_state: UFix): UFix = { - MuxLookup(x_type, probeReqCopy, Array( - xactInitReadShared -> probeReqDowngrade, - xactInitReadExclusive -> probeReqInvalidate, - xactInitReadUncached -> probeReqCopy, - xactInitWriteUncached -> probeReqInvalidate, - xactInitReadWordUncached -> probeReqCopy, - xactInitWriteWordUncached -> probeReqInvalidate, - xactInitAtomicUncached -> probeReqInvalidate - )) - } - - def needsMemRead(x_type: UFix, global_state: UFix): Bool = { - (x_type != xactInitWriteUncached) - } - def needsMemWrite(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } - def needsAckReply(x_type: UFix, global_state: UFix): Bool = { - (x_type === xactInitWriteUncached) - } -} diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 38955296..335ef3cb 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import Node._; import Constants._; +import uncore._ class ioDebug extends Bundle { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 17d47ca1..9b79e2ce 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -4,6 +4,7 @@ import Chisel._; import Node._; import Constants._; import scala.math._; +import uncore._ // interface between I$ and pipeline/ITLB (32 bits wide) class ioImem extends Bundle diff --git a/rocket/src/main/scala/llc.scala b/rocket/src/main/scala/llc.scala deleted file mode 100644 index a2a5d361..00000000 --- a/rocket/src/main/scala/llc.scala +++ /dev/null @@ -1,437 +0,0 @@ -package rocket - -import Chisel._ -import Node._ -import Constants._ - -class BigMem[T <: Data](n: Int, preLatency: Int, postLatency: Int, leaf: Mem[Bits])(gen: => T) extends Component -{ - class Inputs extends Bundle { - val addr = UFix(INPUT, log2Up(n)) - val rw = Bool(INPUT) - val wdata = gen.asInput - val wmask = gen.asInput - override def clone = new Inputs().asInstanceOf[this.type] - } - val io = new Bundle { - val in = new PipeIO()(new Inputs).flip - val rdata = gen.asOutput - } - val data = gen - val colMux = if (2*data.width <= leaf.data.width && n > leaf.n) 1 << math.floor(math.log(leaf.data.width/data.width)/math.log(2)).toInt else 1 - val nWide = if (data.width > leaf.data.width) 1+(data.width-1)/leaf.data.width else 1 - val nDeep = if (n > colMux*leaf.n) 1+(n-1)/(colMux*leaf.n) else 1 - if (nDeep > 1 || colMux > 1) - require(isPow2(n) && isPow2(leaf.n)) - - val rdataDeep = Vec(nDeep) { Bits() } - val rdataSel = Vec(nDeep) { Bool() } - for (i <- 0 until nDeep) { - val in = Pipe(io.in.valid && (if (nDeep == 1) Bool(true) else UFix(i) === io.in.bits.addr(log2Up(n)-1, log2Up(n/nDeep))), io.in.bits, preLatency) - val idx = in.bits.addr(log2Up(n/nDeep/colMux)-1, 0) - val wdata = in.bits.wdata.toBits - val wmask = in.bits.wmask.toBits - val ren = in.valid && !in.bits.rw - val reg_ren = Reg(ren) - val rdata = Vec(nWide) { Bits() } - - val r = Pipe(ren, in.bits.addr, postLatency) - - for (j <- 0 until nWide) { - val mem = leaf.clone - var dout: Bits = null - val dout1 = if (postLatency > 0) Reg() { Bits() } else null - - var wmask0 = Fill(colMux, wmask(math.min(wmask.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) - if (colMux > 1) - wmask0 = wmask0 & FillInterleaved(gen.width, UFixToOH(in.bits.addr(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux)), log2Up(colMux))) - val wdata0 = Fill(colMux, wdata(math.min(wdata.getWidth, leaf.data.width*(j+1))-1, leaf.data.width*j)) - when (in.valid) { - when (in.bits.rw) { mem.write(idx, wdata0, wmask0) } - .otherwise { if (postLatency > 0) dout1 := mem(idx) } - } - - if (postLatency == 0) { - dout = mem(idx) - } else if (postLatency == 1) { - dout = dout1 - } else - dout = Pipe(reg_ren, dout1, postLatency-1).bits - - rdata(j) := dout - } - val rdataWide = rdata.reduceLeft((x, y) => Cat(y, x)) - - var colMuxOut = rdataWide - if (colMux > 1) { - val colMuxIn = Vec((0 until colMux).map(k => rdataWide(gen.width*(k+1)-1, gen.width*k))) { Bits() } - colMuxOut = colMuxIn(r.bits(log2Up(n/nDeep)-1, log2Up(n/nDeep/colMux))) - } - - rdataDeep(i) := colMuxOut - rdataSel(i) := r.valid - } - - io.rdata := Mux1H(rdataSel, rdataDeep) -} - -class LLCDataReq(ways: Int) extends MemReqCmd -{ - val way = UFix(width = log2Up(ways)) - val isWriteback = Bool() - - override def clone = new LLCDataReq(ways).asInstanceOf[this.type] -} - -class LLCMSHRFile(sets: Int, ways: Int, outstanding: Int) extends Component -{ - val io = new Bundle { - val cpu = (new FIFOIO) { new MemReqCmd }.flip - val repl_way = UFix(INPUT, log2Up(ways)) - val repl_dirty = Bool(INPUT) - val repl_tag = UFix(INPUT, PADDR_BITS - OFFSET_BITS - log2Up(sets)) - val data = (new FIFOIO) { new LLCDataReq(ways) } - val tag = (new FIFOIO) { new Bundle { - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) - val way = UFix(width = log2Up(ways)) - } } - val mem = new ioMemPipe - val mem_resp_set = UFix(OUTPUT, log2Up(sets)) - val mem_resp_way = UFix(OUTPUT, log2Up(ways)) - } - - class MSHR extends Bundle { - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) - val way = UFix(width = log2Up(ways)) - val tag = io.cpu.bits.tag.clone - val refilled = Bool() - val refillCount = UFix(width = log2Up(REFILL_CYCLES)) - val requested = Bool() - val old_dirty = Bool() - val old_tag = UFix(width = PADDR_BITS - OFFSET_BITS - log2Up(sets)) - - override def clone = new MSHR().asInstanceOf[this.type] - } - - val valid = Vec(outstanding) { Reg(resetVal = Bool(false)) } - val validBits = valid.toBits - val freeId = PriorityEncoder(~validBits) - val mshr = Vec(outstanding) { Reg() { new MSHR } } - when (io.cpu.valid && io.cpu.ready) { - valid(freeId) := Bool(true) - mshr(freeId).addr := io.cpu.bits.addr - mshr(freeId).tag := io.cpu.bits.tag - mshr(freeId).way := io.repl_way - mshr(freeId).old_dirty := io.repl_dirty - mshr(freeId).old_tag := io.repl_tag - mshr(freeId).requested := Bool(false) - mshr(freeId).refillCount := UFix(0) - mshr(freeId).refilled := Bool(false) - } - - val requests = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && !mshr(i).old_dirty && !mshr(i).requested):_*) - val request = requests.orR - val requestId = PriorityEncoder(requests) - when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { mshr(requestId).requested := Bool(true) } - - val refillId = io.mem.resp.bits.tag(log2Up(outstanding)-1, 0) - val refillCount = mshr(refillId).refillCount - when (io.mem.resp.valid) { - mshr(refillId).refillCount := refillCount + UFix(1) - when (refillCount === UFix(REFILL_CYCLES-1)) { mshr(refillId).refilled := Bool(true) } - } - - val replays = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).refilled):_*) - val replay = replays.orR - val replayId = PriorityEncoder(replays) - when (replay && io.data.ready && io.tag.ready) { valid(replayId) := Bool(false) } - - val writebacks = Cat(Bits(0), (outstanding-1 to 0 by -1).map(i => valid(i) && mshr(i).old_dirty):_*) - val writeback = writebacks.orR - val writebackId = PriorityEncoder(writebacks) - when (writeback && io.data.ready && !replay) { mshr(writebackId).old_dirty := Bool(false) } - - val conflicts = Cat(Bits(0), (0 until outstanding).map(i => valid(i) && io.cpu.bits.addr(log2Up(sets)-1, 0) === mshr(i).addr(log2Up(sets)-1, 0)):_*) - io.cpu.ready := !conflicts.orR && !validBits.andR - - io.data.valid := writeback - io.data.bits.rw := Bool(false) - io.data.bits.tag := mshr(replayId).tag - io.data.bits.isWriteback := Bool(true) - io.data.bits.addr := Cat(mshr(writebackId).old_tag, mshr(writebackId).addr(log2Up(sets)-1, 0)).toUFix - io.data.bits.way := mshr(writebackId).way - when (replay) { - io.data.valid := io.tag.ready - io.data.bits.isWriteback := Bool(false) - io.data.bits.addr := mshr(replayId).addr - io.data.bits.way := mshr(replayId).way - } - io.tag.valid := replay && io.data.ready - io.tag.bits.addr := io.data.bits.addr - io.tag.bits.way := io.data.bits.way - - io.mem.req_cmd.valid := request - io.mem.req_cmd.bits.rw := Bool(false) - io.mem.req_cmd.bits.addr := mshr(requestId).addr - io.mem.req_cmd.bits.tag := requestId - io.mem_resp_set := mshr(refillId).addr - io.mem_resp_way := mshr(refillId).way -} - -class LLCWriteback(requestors: Int) extends Component -{ - val io = new Bundle { - val req = Vec(requestors) { (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) }.flip } - val data = Vec(requestors) { (new FIFOIO) { new MemData }.flip } - val mem = new ioMemPipe - } - - val valid = Reg(resetVal = Bool(false)) - val who = Reg() { UFix() } - val addr = Reg() { UFix() } - val cmd_sent = Reg() { Bool() } - val data_sent = Reg() { Bool() } - val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - - var anyReq = Bool(false) - for (i <- 0 until requestors) { - io.req(i).ready := !valid && !anyReq - io.data(i).ready := valid && who === UFix(i) && io.mem.req_data.ready - anyReq = anyReq || io.req(i).valid - } - - val nextWho = PriorityEncoder(io.req.map(_.valid)) - when (!valid && io.req.map(_.valid).reduceLeft(_||_)) { - valid := Bool(true) - cmd_sent := Bool(false) - data_sent := Bool(false) - who := nextWho - addr := io.req(nextWho).bits - } - - when (io.mem.req_data.valid && io.mem.req_data.ready) { - count := count + UFix(1) - when (count === UFix(REFILL_CYCLES-1)) { - data_sent := Bool(true) - when (cmd_sent) { valid := Bool(false) } - } - } - when (io.mem.req_cmd.valid && io.mem.req_cmd.ready) { cmd_sent := Bool(true) } - when (valid && cmd_sent && data_sent) { valid := Bool(false) } - - io.mem.req_cmd.valid := valid && !cmd_sent - io.mem.req_cmd.bits.addr := addr - io.mem.req_cmd.bits.rw := Bool(true) - - io.mem.req_data.valid := valid && !data_sent && io.data(who).valid - io.mem.req_data.bits := io.data(who).bits -} - -class LLCData(latency: Int, sets: Int, ways: Int, leaf: Mem[Bits]) extends Component -{ - val io = new Bundle { - val req = (new FIFOIO) { new LLCDataReq(ways) }.flip - val req_data = (new FIFOIO) { new MemData }.flip - val writeback = (new FIFOIO) { UFix(width = PADDR_BITS - OFFSET_BITS) } - val writeback_data = (new FIFOIO) { new MemData } - val resp = (new FIFOIO) { new MemResp } - val mem_resp = (new PipeIO) { new MemResp }.flip - val mem_resp_set = UFix(INPUT, log2Up(sets)) - val mem_resp_way = UFix(INPUT, log2Up(ways)) - } - - val data = new BigMem(sets*ways*REFILL_CYCLES, 1, latency-1, leaf)(Bits(width = MEM_DATA_BITS)) - class QEntry extends MemResp { - val isWriteback = Bool() - override def clone = new QEntry().asInstanceOf[this.type] - } - val q = (new Queue(latency+2)) { new QEntry } - val qReady = q.io.count <= UFix(q.entries-latency-1) - val valid = Reg(resetVal = Bool(false)) - val req = Reg() { io.req.bits.clone } - val count = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val refillCount = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - - when (data.io.in.valid && !io.mem_resp.valid) { - count := count + UFix(1) - when (valid && count === UFix(REFILL_CYCLES-1)) { valid := Bool(false) } - } - when (io.req.valid && io.req.ready) { valid := Bool(true); req := io.req.bits } - when (io.mem_resp.valid) { refillCount := refillCount + UFix(1) } - - data.io.in.valid := io.req.valid && io.req.ready && Mux(io.req.bits.rw, io.req_data.valid, qReady) - data.io.in.bits.addr := Cat(io.req.bits.way, io.req.bits.addr(log2Up(sets)-1, 0), count).toUFix - data.io.in.bits.rw := io.req.bits.rw - data.io.in.bits.wdata := io.req_data.bits.data - data.io.in.bits.wmask := Fix(-1, io.req_data.bits.data.width) - when (valid) { - data.io.in.valid := Mux(req.rw, io.req_data.valid, qReady) - data.io.in.bits.addr := Cat(req.way, req.addr(log2Up(sets)-1, 0), count).toUFix - data.io.in.bits.rw := req.rw - } - when (io.mem_resp.valid) { - data.io.in.valid := Bool(true) - data.io.in.bits.addr := Cat(io.mem_resp_way, io.mem_resp_set, refillCount).toUFix - data.io.in.bits.rw := Bool(true) - data.io.in.bits.wdata := io.mem_resp.bits.data - } - - val tagPipe = Pipe(data.io.in.valid && !data.io.in.bits.rw, Mux(valid, req.tag, io.req.bits.tag), latency) - q.io.enq.valid := tagPipe.valid - q.io.enq.bits.tag := tagPipe.bits - q.io.enq.bits.isWriteback := Pipe(Mux(valid, req.isWriteback, io.req.bits.isWriteback), Bool(false), latency).valid - q.io.enq.bits.data := data.io.rdata - - io.req.ready := !valid && Mux(io.req.bits.isWriteback, io.writeback.ready, Bool(true)) - io.req_data.ready := !io.mem_resp.valid && Mux(valid, req.rw, io.req.valid && io.req.bits.rw) - - io.writeback.valid := io.req.valid && io.req.ready && io.req.bits.isWriteback - io.writeback.bits := io.req.bits.addr - - q.io.deq.ready := Mux(q.io.deq.bits.isWriteback, io.writeback_data.ready, io.resp.ready) - io.resp.valid := q.io.deq.valid && !q.io.deq.bits.isWriteback - io.resp.bits := q.io.deq.bits - io.writeback_data.valid := q.io.deq.valid && q.io.deq.bits.isWriteback - io.writeback_data.bits := q.io.deq.bits -} - -class MemReqArb(n: Int) extends Component // UNTESTED -{ - val io = new Bundle { - val cpu = Vec(n) { new ioMem().flip } - val mem = new ioMem - } - - val lock = Reg(resetVal = Bool(false)) - val locker = Reg() { UFix() } - - val arb = new RRArbiter(n)(new MemReqCmd) - val respWho = io.mem.resp.bits.tag(log2Up(n)-1,0) - val respTag = io.mem.resp.bits.tag >> UFix(log2Up(n)) - for (i <- 0 until n) { - val me = UFix(i, log2Up(n)) - arb.io.in(i).valid := io.cpu(i).req_cmd.valid - arb.io.in(i).bits := io.cpu(i).req_cmd.bits - arb.io.in(i).bits.tag := Cat(io.cpu(i).req_cmd.bits.tag, me) - io.cpu(i).req_cmd.ready := arb.io.in(i).ready - io.cpu(i).req_data.ready := Bool(false) - - val getLock = io.cpu(i).req_cmd.fire() && io.cpu(i).req_cmd.bits.rw && !lock - val haveLock = lock && locker === me - when (getLock) { - lock := Bool(true) - locker := UFix(i) - } - when (getLock || haveLock) { - io.cpu(i).req_data.ready := io.mem.req_data.ready - io.mem.req_data.valid := Bool(true) - io.mem.req_data.bits := io.cpu(i).req_data.bits - } - - io.cpu(i).resp.valid := io.mem.resp.valid && respWho === me - io.cpu(i).resp.bits := io.mem.resp.bits - io.cpu(i).resp.bits.tag := respTag - } - io.mem.resp.ready := io.cpu(respWho).resp.ready - - val unlock = Counter(io.mem.req_data.fire(), REFILL_CYCLES)._2 - when (unlock) { lock := Bool(false) } -} - -class DRAMSideLLC(sets: Int, ways: Int, outstanding: Int, tagLeaf: Mem[Bits], dataLeaf: Mem[Bits]) extends Component -{ - val io = new Bundle { - val cpu = new ioMem().flip - val mem = new ioMemPipe - } - - val tagWidth = PADDR_BITS - OFFSET_BITS - log2Up(sets) - val metaWidth = tagWidth + 2 // valid + dirty - - val memCmdArb = (new Arbiter(2)) { new MemReqCmd } - val dataArb = (new Arbiter(2)) { new LLCDataReq(ways) } - val mshr = new LLCMSHRFile(sets, ways, outstanding) - val tags = new BigMem(sets, 0, 1, tagLeaf)(Bits(width = metaWidth*ways)) - val data = new LLCData(4, sets, ways, dataLeaf) - val writeback = new LLCWriteback(2) - - val initCount = Reg(resetVal = UFix(0, log2Up(sets+1))) - val initialize = !initCount(log2Up(sets)) - when (initialize) { initCount := initCount + UFix(1) } - - val stall_s1 = Bool() - val replay_s1 = Reg(resetVal = Bool(false)) - val s1_valid = Reg(io.cpu.req_cmd.valid && !stall_s1 || replay_s1, resetVal = Bool(false)) - replay_s1 := s1_valid && stall_s1 - val s1 = Reg() { new MemReqCmd } - when (io.cpu.req_cmd.valid && io.cpu.req_cmd.ready) { s1 := io.cpu.req_cmd.bits } - - val stall_s2 = Bool() - val s2_valid = Reg(resetVal = Bool(false)) - s2_valid := s1_valid && !replay_s1 && !stall_s1 || stall_s2 - val s2 = Reg() { new MemReqCmd } - val s2_tags = Vec(ways) { Reg() { Bits(width = metaWidth) } } - when (s1_valid && !stall_s1 && !replay_s1) { - s2 := s1 - for (i <- 0 until ways) - s2_tags(i) := tags.io.rdata(metaWidth*(i+1)-1, metaWidth*i) - } - val s2_hits = s2_tags.map(t => t(tagWidth) && s2.addr(s2.addr.width-1, s2.addr.width-tagWidth) === t(tagWidth-1, 0)) - val s2_hit_way = OHToUFix(s2_hits) - val s2_hit = s2_hits.reduceLeft(_||_) - val s2_hit_dirty = s2_tags(s2_hit_way)(tagWidth+1) - val repl_way = LFSR16(s2_valid)(log2Up(ways)-1, 0) - val repl_tag = s2_tags(repl_way).toUFix - val setDirty = s2_valid && s2.rw && s2_hit && !s2_hit_dirty - stall_s1 := initialize || stall_s2 - - val tag_we = setDirty || mshr.io.tag.valid - val tag_waddr = Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(log2Up(sets)-1,0) - val tag_wdata = Cat(setDirty, Bool(true), Mux(setDirty, s2.addr, mshr.io.tag.bits.addr)(mshr.io.tag.bits.addr.width-1, mshr.io.tag.bits.addr.width-tagWidth)) - val tag_wway = Mux(setDirty, s2_hit_way, mshr.io.tag.bits.way) - tags.io.in.valid := (io.cpu.req_cmd.valid || replay_s1) && !stall_s1 || initialize || tag_we - tags.io.in.bits.addr := Mux(initialize, initCount, Mux(tag_we, tag_waddr, Mux(replay_s1, s1.addr, io.cpu.req_cmd.bits.addr)(log2Up(sets)-1,0))) - tags.io.in.bits.rw := initialize || tag_we - tags.io.in.bits.wdata := Mux(initialize, UFix(0), Fill(ways, tag_wdata)) - tags.io.in.bits.wmask := FillInterleaved(metaWidth, Mux(initialize, Fix(-1, ways), UFixToOH(tag_wway))) - when (tag_we && Mux(stall_s2, s2.addr, s1.addr)(log2Up(sets)-1,0) === tag_waddr) { s2_tags(tag_wway) := tag_wdata } - - mshr.io.cpu.valid := s2_valid && !s2_hit && !s2.rw && dataArb.io.in(1).ready && writeback.io.req(0).ready // stall_s2 - mshr.io.cpu.bits := s2 - mshr.io.repl_way := repl_way - mshr.io.repl_dirty := repl_tag(tagWidth+1, tagWidth).andR - mshr.io.repl_tag := repl_tag - mshr.io.mem.resp := io.mem.resp - mshr.io.tag.ready := !setDirty - - data.io.req <> dataArb.io.out - data.io.mem_resp := io.mem.resp - data.io.mem_resp_set := mshr.io.mem_resp_set - data.io.mem_resp_way := mshr.io.mem_resp_way - data.io.req_data.bits := io.cpu.req_data.bits - data.io.req_data.valid := io.cpu.req_data.valid - - writeback.io.req(0) <> data.io.writeback - writeback.io.data(0) <> data.io.writeback_data - writeback.io.req(1).valid := s2_valid && !s2_hit && s2.rw && dataArb.io.in(1).ready && mshr.io.cpu.ready // stall_s2 - writeback.io.req(1).bits := s2.addr - writeback.io.data(1).valid := io.cpu.req_data.valid - writeback.io.data(1).bits := io.cpu.req_data.bits - - memCmdArb.io.in(0) <> mshr.io.mem.req_cmd - memCmdArb.io.in(1) <> writeback.io.mem.req_cmd - - dataArb.io.in(0) <> mshr.io.data - dataArb.io.in(1).valid := s2_valid && s2_hit && writeback.io.req(0).ready && mshr.io.cpu.ready // stall_s2 - dataArb.io.in(1).bits := s2 - dataArb.io.in(1).bits.way := s2_hit_way - dataArb.io.in(1).bits.isWriteback := Bool(false) - - stall_s2 := s2_valid && !(dataArb.io.in(1).ready && writeback.io.req(0).ready && mshr.io.cpu.ready) - - io.cpu.resp <> data.io.resp - io.cpu.req_cmd.ready := !stall_s1 && !replay_s1 - io.cpu.req_data.ready := writeback.io.data(1).ready || data.io.req_data.ready - io.mem.req_cmd <> memCmdArb.io.out - io.mem.req_data <> writeback.io.mem.req_data -} diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index cdd109c2..712dec16 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import scala.math._ +import uncore._ class ioMemSerialized extends Bundle { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 40b9e6ab..951d6adc 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -2,6 +2,7 @@ package rocket import Chisel._ import Constants._ +import uncore._ class ioReplacementWayGen extends Bundle { val pick_new_way = Bool(dir = INPUT) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index a5178fb4..5e621808 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import Node._ import Constants._ +import uncore._ class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Component(resetSignal) { diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index c8437567..4390f8d7 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -2,6 +2,7 @@ package rocket import Chisel._ import Node._; +import uncore._ import Constants._; import collection.mutable.ArrayBuffer diff --git a/rocket/src/main/scala/uncore.scala b/rocket/src/main/scala/uncore.scala deleted file mode 100644 index 84b74366..00000000 --- a/rocket/src/main/scala/uncore.scala +++ /dev/null @@ -1,519 +0,0 @@ -package rocket - -import Chisel._ -import Constants._ - -class PhysicalAddress extends Bundle { - val addr = UFix(width = PADDR_BITS - OFFSET_BITS) -} - -class MemData extends Bundle { - val data = Bits(width = MEM_DATA_BITS) -} - -class MemReqCmd() extends PhysicalAddress -{ - val rw = Bool() - val tag = Bits(width = MEM_TAG_BITS) -} - -class MemResp () extends MemData -{ - val tag = Bits(width = MEM_TAG_BITS) -} - -class ioMem() extends Bundle -{ - val req_cmd = (new FIFOIO) { new MemReqCmd() } - val req_data = (new FIFOIO) { new MemData() } - val resp = (new FIFOIO) { new MemResp() }.flip -} - -class ioMemPipe() extends Bundle -{ - val req_cmd = (new FIFOIO) { new MemReqCmd() } - val req_data = (new FIFOIO) { new MemData() } - val resp = (new PipeIO) { new MemResp() }.flip -} - -class TrackerProbeData extends Bundle { - val tile_id = Bits(width = TILE_ID_BITS) -} - -class TrackerAllocReq extends Bundle { - val xact_init = new TransactionInit() - val tile_id = Bits(width = TILE_ID_BITS) -} - -class TrackerDependency extends Bundle { - val global_xact_id = Bits(width = GLOBAL_XACT_ID_BITS) -} - -class ioTileLink extends Bundle { - val xact_init = (new FIFOIO) { new TransactionInit } - val xact_init_data = (new FIFOIO) { new TransactionInitData } - val xact_abort = (new FIFOIO) { new TransactionAbort }.flip - val probe_req = (new FIFOIO) { new ProbeRequest }.flip - val probe_rep = (new FIFOIO) { new ProbeReply } - val probe_rep_data = (new FIFOIO) { new ProbeReplyData } - val xact_rep = (new FIFOIO) { new TransactionReply }.flip - val xact_finish = (new FIFOIO) { new TransactionFinish } - val incoherent = Bool(OUTPUT) -} - -class XactTracker(ntiles: Int, id: Int, co: CoherencePolicy) extends Component { - val io = new Bundle { - val alloc_req = (new FIFOIO) { new TrackerAllocReq }.flip - val p_data = (new PipeIO) { new TrackerProbeData }.flip - val can_alloc = Bool(INPUT) - val xact_finish = Bool(INPUT) - val p_rep_cnt_dec = Bits(INPUT, ntiles) - val p_req_cnt_inc = Bits(INPUT, ntiles) - val tile_incoherent = Bits(INPUT, ntiles) - val p_rep_data = (new PipeIO) { new ProbeReplyData }.flip - val x_init_data = (new PipeIO) { new TransactionInitData }.flip - val sent_x_rep_ack = Bool(INPUT) - val p_rep_data_dep = (new PipeIO) { new TrackerDependency }.flip - val x_init_data_dep = (new PipeIO) { new TrackerDependency }.flip - - val mem_req_cmd = (new FIFOIO) { new MemReqCmd } - val mem_req_data = (new FIFOIO) { new MemData } - val mem_req_lock = Bool(OUTPUT) - val probe_req = (new FIFOIO) { new ProbeRequest } - val busy = Bool(OUTPUT) - val addr = Bits(OUTPUT, PADDR_BITS - OFFSET_BITS) - val init_tile_id = Bits(OUTPUT, TILE_ID_BITS) - val p_rep_tile_id = Bits(OUTPUT, TILE_ID_BITS) - val tile_xact_id = Bits(OUTPUT, TILE_XACT_ID_BITS) - val sharer_count = Bits(OUTPUT, TILE_ID_BITS+1) - val x_type = Bits(OUTPUT, X_INIT_TYPE_MAX_BITS) - val push_p_req = Bits(OUTPUT, ntiles) - val pop_p_rep = Bits(OUTPUT, ntiles) - val pop_p_rep_data = Bits(OUTPUT, ntiles) - val pop_p_rep_dep = Bits(OUTPUT, ntiles) - val pop_x_init = Bits(OUTPUT, ntiles) - val pop_x_init_data = Bits(OUTPUT, ntiles) - val pop_x_init_dep = Bits(OUTPUT, ntiles) - val send_x_rep_ack = Bool(OUTPUT) - } - - def doMemReqWrite(req_cmd: FIFOIO[MemReqCmd], req_data: FIFOIO[MemData], lock: Bool, data: PipeIO[MemData], trigger: Bool, cmd_sent: Bool, pop_data: Bits, pop_dep: Bits, at_front_of_dep_queue: Bool, tile_id: UFix) { - req_cmd.bits.rw := Bool(true) - req_data.bits := data.bits - when(req_cmd.ready && req_cmd.valid) { - cmd_sent := Bool(true) - } - when (at_front_of_dep_queue) { - req_cmd.valid := !cmd_sent && req_data.ready && data.valid - lock := data.valid || cmd_sent - when (req_cmd.ready || cmd_sent) { - req_data.valid := data.valid - when(req_data.ready) { - pop_data := UFix(1) << tile_id - when (data.valid) { - mem_cnt := mem_cnt_next - when(mem_cnt === UFix(REFILL_CYCLES-1)) { - pop_dep := UFix(1) << tile_id - trigger := Bool(false) - } - } - } - } - } - } - - def doMemReqRead(req_cmd: FIFOIO[MemReqCmd], trigger: Bool) { - req_cmd.valid := Bool(true) - req_cmd.bits.rw := Bool(false) - when(req_cmd.ready) { - trigger := Bool(false) - } - } - - val s_idle :: s_ack :: s_mem :: s_probe :: s_busy :: Nil = Enum(5){ UFix() } - val state = Reg(resetVal = s_idle) - val addr_ = Reg{ UFix() } - val x_type_ = Reg{ Bits() } - val init_tile_id_ = Reg{ Bits() } - val tile_xact_id_ = Reg{ Bits() } - val p_rep_count = if (ntiles == 1) UFix(0) else Reg(resetVal = UFix(0, width = log2Up(ntiles))) - val p_req_flags = Reg(resetVal = Bits(0, width = ntiles)) - val p_rep_tile_id_ = Reg{ Bits() } - val x_needs_read = Reg(resetVal = Bool(false)) - val x_init_data_needs_write = Reg(resetVal = Bool(false)) - val p_rep_data_needs_write = Reg(resetVal = Bool(false)) - val x_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val p_w_mem_cmd_sent = Reg(resetVal = Bool(false)) - val mem_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) - val mem_cnt_next = mem_cnt + UFix(1) - val mem_cnt_max = ~UFix(0, width = log2Up(REFILL_CYCLES)) - val p_req_initial_flags = Bits(width = ntiles) - p_req_initial_flags := (if (ntiles == 1) Bits(0) else ~(io.tile_incoherent | UFixToOH(io.alloc_req.bits.tile_id(log2Up(ntiles)-1,0)))) //TODO: Broadcast only - - io.busy := state != s_idle - io.addr := addr_ - io.init_tile_id := init_tile_id_ - io.p_rep_tile_id := p_rep_tile_id_ - io.tile_xact_id := tile_xact_id_ - io.sharer_count := UFix(ntiles) // TODO: Broadcast only - io.x_type := x_type_ - - io.mem_req_cmd.valid := Bool(false) - io.mem_req_cmd.bits.rw := Bool(false) - io.mem_req_cmd.bits.addr := addr_ - io.mem_req_cmd.bits.tag := UFix(id) - io.mem_req_data.valid := Bool(false) - io.mem_req_data.bits.data := UFix(0) - io.mem_req_lock := Bool(false) - io.probe_req.valid := Bool(false) - io.probe_req.bits.p_type := co.getProbeRequestType(x_type_, UFix(0)) - io.probe_req.bits.global_xact_id := UFix(id) - io.probe_req.bits.addr := addr_ - io.push_p_req := Bits(0, width = ntiles) - io.pop_p_rep := Bits(0, width = ntiles) - io.pop_p_rep_data := Bits(0, width = ntiles) - io.pop_p_rep_dep := Bits(0, width = ntiles) - io.pop_x_init := Bits(0, width = ntiles) - io.pop_x_init_data := Bits(0, width = ntiles) - io.pop_x_init_dep := Bits(0, width = ntiles) - io.send_x_rep_ack := Bool(false) - - switch (state) { - is(s_idle) { - when( io.alloc_req.valid && io.can_alloc ) { - addr_ := io.alloc_req.bits.xact_init.addr - x_type_ := io.alloc_req.bits.xact_init.x_type - init_tile_id_ := io.alloc_req.bits.tile_id - tile_xact_id_ := io.alloc_req.bits.xact_init.tile_xact_id - x_init_data_needs_write := co.messageHasData(io.alloc_req.bits.xact_init) - x_needs_read := co.needsMemRead(io.alloc_req.bits.xact_init.x_type, UFix(0)) - p_req_flags := p_req_initial_flags - mem_cnt := UFix(0) - p_w_mem_cmd_sent := Bool(false) - x_w_mem_cmd_sent := Bool(false) - io.pop_x_init := UFix(1) << io.alloc_req.bits.tile_id - if(ntiles > 1) { - p_rep_count := PopCount(p_req_initial_flags) - state := Mux(p_req_initial_flags.orR, s_probe, s_mem) - } else state := s_mem - } - } - is(s_probe) { - when(p_req_flags.orR) { - io.push_p_req := p_req_flags - io.probe_req.valid := Bool(true) - } - when(io.p_req_cnt_inc.orR) { - p_req_flags := p_req_flags & ~io.p_req_cnt_inc // unflag sent reqs - } - when(io.p_rep_cnt_dec.orR) { - val dec = PopCount(io.p_rep_cnt_dec) - io.pop_p_rep := io.p_rep_cnt_dec - if(ntiles > 1) p_rep_count := p_rep_count - dec - when(p_rep_count === dec) { - state := s_mem - } - } - when(io.p_data.valid) { - p_rep_data_needs_write := Bool(true) - p_rep_tile_id_ := io.p_data.bits.tile_id - } - } - is(s_mem) { - when (p_rep_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, - io.mem_req_data, - io.mem_req_lock, - io.p_rep_data, - p_rep_data_needs_write, - p_w_mem_cmd_sent, - io.pop_p_rep_data, - io.pop_p_rep_dep, - io.p_rep_data_dep.valid && (io.p_rep_data_dep.bits.global_xact_id === UFix(id)), - p_rep_tile_id_) - } . elsewhen(x_init_data_needs_write) { - doMemReqWrite(io.mem_req_cmd, - io.mem_req_data, - io.mem_req_lock, - io.x_init_data, - x_init_data_needs_write, - x_w_mem_cmd_sent, - io.pop_x_init_data, - io.pop_x_init_dep, - io.x_init_data_dep.valid && (io.x_init_data_dep.bits.global_xact_id === UFix(id)), - init_tile_id_) - } . elsewhen (x_needs_read) { - doMemReqRead(io.mem_req_cmd, x_needs_read) - } . otherwise { - state := Mux(co.needsAckReply(x_type_, UFix(0)), s_ack, s_busy) - } - } - is(s_ack) { - io.send_x_rep_ack := Bool(true) - when(io.sent_x_rep_ack) { state := s_busy } - } - is(s_busy) { // Nothing left to do but wait for transaction to complete - when (io.xact_finish) { - state := s_idle - } - } - } -} - -abstract class CoherenceHub(ntiles: Int, co: CoherencePolicy) extends Component { - val io = new Bundle { - val tiles = Vec(ntiles) { new ioTileLink() }.flip - val mem = new ioMem - } -} - -class CoherenceHubNull(co: ThreeStateIncoherence) extends CoherenceHub(1, co) -{ - val x_init = io.tiles(0).xact_init - val is_write = x_init.bits.x_type === co.xactInitWriteback - x_init.ready := io.mem.req_cmd.ready && !(is_write && io.mem.resp.valid) //stall write req/resp to handle previous read resp - io.mem.req_cmd.valid := x_init.valid && !(is_write && io.mem.resp.valid) - io.mem.req_cmd.bits.rw := is_write - io.mem.req_cmd.bits.tag := x_init.bits.tile_xact_id - io.mem.req_cmd.bits.addr := x_init.bits.addr - io.mem.req_data <> io.tiles(0).xact_init_data - - val x_rep = io.tiles(0).xact_rep - x_rep.bits.x_type := Mux(io.mem.resp.valid, co.xactReplyData, co.xactReplyAck) - x_rep.bits.tile_xact_id := Mux(io.mem.resp.valid, io.mem.resp.bits.tag, x_init.bits.tile_xact_id) - x_rep.bits.global_xact_id := UFix(0) // don't care - x_rep.bits.data := io.mem.resp.bits.data - x_rep.bits.require_ack := Bool(true) - x_rep.valid := io.mem.resp.valid || x_init.valid && is_write && io.mem.req_cmd.ready - - io.tiles(0).xact_abort.valid := Bool(false) - io.tiles(0).xact_finish.ready := Bool(true) - io.tiles(0).probe_req.valid := Bool(false) - io.tiles(0).probe_rep.ready := Bool(true) - io.tiles(0).probe_rep_data.ready := Bool(true) -} - - -class CoherenceHubBroadcast(ntiles: Int, co: CoherencePolicy) extends CoherenceHub(ntiles, co) -{ - val trackerList = (0 until NGLOBAL_XACTS).map(new XactTracker(ntiles, _, co)) - - val busy_arr = Vec(NGLOBAL_XACTS){ Bool() } - val addr_arr = Vec(NGLOBAL_XACTS){ Bits(width=PADDR_BITS-OFFSET_BITS) } - val init_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } - val tile_xact_id_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_XACT_ID_BITS) } - val x_type_arr = Vec(NGLOBAL_XACTS){ Bits(width=X_INIT_TYPE_MAX_BITS) } - val sh_count_arr = Vec(NGLOBAL_XACTS){ Bits(width=TILE_ID_BITS) } - val send_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } - - val do_free_arr = Vec(NGLOBAL_XACTS){ Bool() } - val p_rep_cnt_dec_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } - val p_req_cnt_inc_arr = VecBuf(NGLOBAL_XACTS){ Vec(ntiles){ Bool()} } - val sent_x_rep_ack_arr = Vec(NGLOBAL_XACTS){ Bool() } - val p_data_tile_id_arr = Vec(NGLOBAL_XACTS){ Bits(width = TILE_ID_BITS) } - val p_data_valid_arr = Vec(NGLOBAL_XACTS){ Bool() } - - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - busy_arr(i) := t.busy - addr_arr(i) := t.addr - init_tile_id_arr(i) := t.init_tile_id - tile_xact_id_arr(i) := t.tile_xact_id - x_type_arr(i) := t.x_type - sh_count_arr(i) := t.sharer_count - send_x_rep_ack_arr(i) := t.send_x_rep_ack - t.xact_finish := do_free_arr(i) - t.p_data.bits.tile_id := p_data_tile_id_arr(i) - t.p_data.valid := p_data_valid_arr(i) - t.p_rep_cnt_dec := p_rep_cnt_dec_arr(i).toBits - t.p_req_cnt_inc := p_req_cnt_inc_arr(i).toBits - t.tile_incoherent := (Vec(io.tiles.map(_.incoherent)) { Bool() }).toBits - t.sent_x_rep_ack := sent_x_rep_ack_arr(i) - do_free_arr(i) := Bool(false) - sent_x_rep_ack_arr(i) := Bool(false) - p_data_tile_id_arr(i) := Bits(0, width = TILE_ID_BITS) - p_data_valid_arr(i) := Bool(false) - for( j <- 0 until ntiles) { - p_rep_cnt_dec_arr(i)(j) := Bool(false) - p_req_cnt_inc_arr(i)(j) := Bool(false) - } - } - - val p_rep_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth must >= NPRIMARY - val x_init_data_dep_list = List.fill(ntiles)((new Queue(NGLOBAL_XACTS)){new TrackerDependency}) // depth should >= NPRIMARY - - // Free finished transactions - for( j <- 0 until ntiles ) { - val finish = io.tiles(j).xact_finish - when (finish.valid) { - do_free_arr(finish.bits.global_xact_id) := Bool(true) - } - finish.ready := Bool(true) - } - - // Reply to initial requestor - // Forward memory responses from mem to tile or arbitrate to ack - val mem_idx = io.mem.resp.bits.tag - val ack_idx = PriorityEncoder(send_x_rep_ack_arr.toBits) - for( j <- 0 until ntiles ) { - val rep = io.tiles(j).xact_rep - rep.bits.x_type := UFix(0) - rep.bits.tile_xact_id := UFix(0) - rep.bits.global_xact_id := UFix(0) - rep.bits.data := io.mem.resp.bits.data - rep.bits.require_ack := Bool(true) - rep.valid := Bool(false) - when(io.mem.resp.valid && (UFix(j) === init_tile_id_arr(mem_idx))) { - rep.bits.x_type := co.getTransactionReplyType(x_type_arr(mem_idx), sh_count_arr(mem_idx)) - rep.bits.tile_xact_id := tile_xact_id_arr(mem_idx) - rep.bits.global_xact_id := mem_idx - rep.valid := Bool(true) - } . otherwise { - rep.bits.x_type := co.getTransactionReplyType(x_type_arr(ack_idx), sh_count_arr(ack_idx)) - rep.bits.tile_xact_id := tile_xact_id_arr(ack_idx) - rep.bits.global_xact_id := ack_idx - when (UFix(j) === init_tile_id_arr(ack_idx)) { - rep.valid := send_x_rep_ack_arr.toBits.orR - sent_x_rep_ack_arr(ack_idx) := rep.ready - } - } - } - io.mem.resp.ready := io.tiles(init_tile_id_arr(mem_idx)).xact_rep.ready - - // Create an arbiter for the one memory port - // We have to arbitrate between the different trackers' memory requests - // and once we have picked a request, get the right write data - val mem_req_cmd_arb = (new Arbiter(NGLOBAL_XACTS)) { new MemReqCmd() } - val mem_req_data_arb = (new LockingArbiter(NGLOBAL_XACTS)) { new MemData() } - for( i <- 0 until NGLOBAL_XACTS ) { - mem_req_cmd_arb.io.in(i) <> trackerList(i).io.mem_req_cmd - mem_req_data_arb.io.in(i) <> trackerList(i).io.mem_req_data - mem_req_data_arb.io.lock(i) <> trackerList(i).io.mem_req_lock - } - io.mem.req_cmd <> Queue(mem_req_cmd_arb.io.out) - io.mem.req_data <> Queue(mem_req_data_arb.io.out) - - // Handle probe replies, which may or may not have data - for( j <- 0 until ntiles ) { - val p_rep = io.tiles(j).probe_rep - val p_rep_data = io.tiles(j).probe_rep_data - val idx = p_rep.bits.global_xact_id - val pop_p_reps = trackerList.map(_.io.pop_p_rep(j).toBool) - val do_pop = foldR(pop_p_reps)(_ || _) - p_rep.ready := Bool(true) - p_rep_data_dep_list(j).io.enq.valid := p_rep.valid && co.messageHasData(p_rep.bits) - p_rep_data_dep_list(j).io.enq.bits.global_xact_id := p_rep.bits.global_xact_id - p_rep_data.ready := foldR(trackerList.map(_.io.pop_p_rep_data(j)))(_ || _) - when (p_rep.valid && co.messageHasData(p_rep.bits)) { - p_data_valid_arr(idx) := Bool(true) - p_data_tile_id_arr(idx) := UFix(j) - } - p_rep_data_dep_list(j).io.deq.ready := foldR(trackerList.map(_.io.pop_p_rep_dep(j).toBool))(_||_) - } - for( i <- 0 until NGLOBAL_XACTS ) { - trackerList(i).io.p_rep_data.valid := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.valid - trackerList(i).io.p_rep_data.bits := io.tiles(trackerList(i).io.p_rep_tile_id).probe_rep_data.bits - - trackerList(i).io.p_rep_data_dep.valid := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.valid)) - trackerList(i).io.p_rep_data_dep.bits := MuxLookup(trackerList(i).io.p_rep_tile_id, p_rep_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> p_rep_data_dep_list(j).io.deq.bits)) - - for( j <- 0 until ntiles) { - val p_rep = io.tiles(j).probe_rep - p_rep_cnt_dec_arr(i)(j) := p_rep.valid && (p_rep.bits.global_xact_id === UFix(i)) - } - } - - // Nack conflicting transaction init attempts - val s_idle :: s_abort_drain :: s_abort_send :: Nil = Enum(3){ UFix() } - val abort_state_arr = Vec(ntiles) { Reg(resetVal = s_idle) } - val want_to_abort_arr = Vec(ntiles) { Bool() } - for( j <- 0 until ntiles ) { - val x_init = io.tiles(j).xact_init - val x_init_data = io.tiles(j).xact_init_data - val x_abort = io.tiles(j).xact_abort - val abort_cnt = Reg(resetVal = UFix(0, width = log2Up(REFILL_CYCLES))) - val conflicts = Vec(NGLOBAL_XACTS) { Bool() } - for( i <- 0 until NGLOBAL_XACTS) { - val t = trackerList(i).io - conflicts(i) := t.busy && x_init.valid && co.isCoherenceConflict(t.addr, x_init.bits.addr) - } - x_abort.bits.tile_xact_id := x_init.bits.tile_xact_id - want_to_abort_arr(j) := x_init.valid && (conflicts.toBits.orR || busy_arr.toBits.andR || (!x_init_data_dep_list(j).io.enq.ready && co.messageHasData(x_init.bits))) - - x_abort.valid := Bool(false) - switch(abort_state_arr(j)) { - is(s_idle) { - when(want_to_abort_arr(j)) { - when(co.messageHasData(x_init.bits)) { - abort_state_arr(j) := s_abort_drain - } . otherwise { - abort_state_arr(j) := s_abort_send - } - } - } - is(s_abort_drain) { // raises x_init_data.ready below - when(x_init_data.valid) { - abort_cnt := abort_cnt + UFix(1) - when(abort_cnt === ~UFix(0, width = log2Up(REFILL_CYCLES))) { - abort_state_arr(j) := s_abort_send - } - } - } - is(s_abort_send) { // nothing is dequeued for now - x_abort.valid := Bool(true) - when(x_abort.ready) { // raises x_init.ready below - abort_state_arr(j) := s_idle - } - } - } - } - - // Handle transaction initiation requests - // Only one allocation per cycle - // Init requests may or may not have data - val alloc_arb = (new Arbiter(NGLOBAL_XACTS)) { Bool() } - val init_arb = (new Arbiter(ntiles)) { new TrackerAllocReq() } - for( i <- 0 until NGLOBAL_XACTS ) { - alloc_arb.io.in(i).valid := !trackerList(i).io.busy - trackerList(i).io.can_alloc := alloc_arb.io.in(i).ready - trackerList(i).io.alloc_req.bits := init_arb.io.out.bits - trackerList(i).io.alloc_req.valid := init_arb.io.out.valid - - trackerList(i).io.x_init_data.bits := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.bits - trackerList(i).io.x_init_data.valid := io.tiles(trackerList(i).io.init_tile_id).xact_init_data.valid - trackerList(i).io.x_init_data_dep.bits := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.bits, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.bits)) - trackerList(i).io.x_init_data_dep.valid := MuxLookup(trackerList(i).io.init_tile_id, x_init_data_dep_list(0).io.deq.valid, (0 until ntiles).map( j => UFix(j) -> x_init_data_dep_list(j).io.deq.valid)) - } - for( j <- 0 until ntiles ) { - val x_init = io.tiles(j).xact_init - val x_init_data = io.tiles(j).xact_init_data - val x_init_data_dep = x_init_data_dep_list(j).io.deq - val x_abort = io.tiles(j).xact_abort - init_arb.io.in(j).valid := (abort_state_arr(j) === s_idle) && !want_to_abort_arr(j) && x_init.valid - init_arb.io.in(j).bits.xact_init := x_init.bits - init_arb.io.in(j).bits.tile_id := UFix(j) - val pop_x_inits = trackerList.map(_.io.pop_x_init(j).toBool) - val do_pop = foldR(pop_x_inits)(_||_) - x_init_data_dep_list(j).io.enq.valid := do_pop && co.messageHasData(x_init.bits) && (abort_state_arr(j) === s_idle) - x_init_data_dep_list(j).io.enq.bits.global_xact_id := OHToUFix(pop_x_inits) - x_init.ready := (x_abort.valid && x_abort.ready) || do_pop - x_init_data.ready := (abort_state_arr(j) === s_abort_drain) || foldR(trackerList.map(_.io.pop_x_init_data(j).toBool))(_||_) - x_init_data_dep.ready := foldR(trackerList.map(_.io.pop_x_init_dep(j).toBool))(_||_) - } - - alloc_arb.io.out.ready := init_arb.io.out.valid - - // Handle probe request generation - // Must arbitrate for each request port - val p_req_arb_arr = List.fill(ntiles)((new Arbiter(NGLOBAL_XACTS)) { new ProbeRequest() }) - for( j <- 0 until ntiles ) { - for( i <- 0 until NGLOBAL_XACTS ) { - val t = trackerList(i).io - p_req_arb_arr(j).io.in(i).bits := t.probe_req.bits - p_req_arb_arr(j).io.in(i).valid := t.probe_req.valid && t.push_p_req(j) - p_req_cnt_inc_arr(i)(j) := p_req_arb_arr(j).io.in(i).ready - } - p_req_arb_arr(j).io.out <> io.tiles(j).probe_req - } - -} From ed8cc4a1cf1bfc5bc8c746726672df8330c8eb2f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 4 Oct 2012 09:05:14 -0700 Subject: [PATCH 0471/1087] eliminate D$ probe->WB critical path --- rocket/src/main/scala/nbdcache.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 951d6adc..a3402dcb 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -238,7 +238,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { when (abort) { state := s_wb_req } } when (state === s_wb_req) { - when (io.probe_writeback.valid && idx_match) { state := s_refill_req } + when (io.probe_writeback.valid && io.probe_writeback.bits && idx_match) { state := s_refill_req } .elsewhen (io.wb_req.ready) { state := s_wb_resp } } @@ -366,7 +366,8 @@ class MSHRFile(co: CoherencePolicy) extends Component { mshr.io.wb_req <> wb_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) mshr.io.probe_refill.valid := io.probe.valid && tag_match - mshr.io.probe_writeback.valid := io.probe.valid && wb_probe_match + mshr.io.probe_writeback.valid := io.probe.valid + mshr.io.probe_writeback.bits := wb_probe_match mshr.io.mem_abort <> io.mem_abort mshr.io.mem_rep <> io.mem_rep From b5ff43609275176480311c90b60d4ba7a3dfbcc2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 5 Oct 2012 15:50:42 -0700 Subject: [PATCH 0472/1087] decode constant object split into multiple objects --- rocket/src/main/scala/ctrl.scala | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 24304498..bf148c9a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -90,7 +90,7 @@ class ioCtrlAll extends Bundle() val vec_iface = new ioCtrlVecInterface() } -object rocketCtrlDecode +abstract trait rocketCtrlDecodeConstants { val xpr64 = Y; @@ -102,7 +102,12 @@ object rocketCtrlDecode // | | | | | | | | | | | | | | | | | | | | | | | | | List(N, X,X,BR_X, X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,DIV_X, X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) - val xdecode = Array( + val table: Array[(Bits, List[Bits])] +} + +object rocketCtrlXDecode extends rocketCtrlDecodeConstants +{ + val table = Array( // eret // fp_val renx2 | syscall // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged @@ -209,8 +214,11 @@ object rocketCtrlDecode RDTIME-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), RDCYCLE-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), RDINSTRET-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) - - val fdecode = Array( +} + +object rocketCtrlFDecode extends rocketCtrlDecodeConstants +{ + val table = Array( // eret // fp_val renx2 | syscall // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged @@ -274,8 +282,11 @@ object rocketCtrlDecode FLD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), FSW-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), FSD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) +} - val vdecode = Array( +object rocketCtrlVDecode extends rocketCtrlDecodeConstants +{ + val table = Array( // eret // fp_val renx2 | syscall // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged @@ -334,11 +345,11 @@ class rocketCtrl extends Component { val io = new ioCtrlAll(); - var decode_table = rocketCtrlDecode.xdecode - if (HAVE_FPU) decode_table ++= rocketCtrlDecode.fdecode - if (HAVE_VEC) decode_table ++= rocketCtrlDecode.vdecode + var decode_table = rocketCtrlXDecode.table + if (HAVE_FPU) decode_table ++= rocketCtrlFDecode.table + if (HAVE_VEC) decode_table ++= rocketCtrlVDecode.table - val cs = DecodeLogic(io.dpath.inst, rocketCtrlDecode.decode_default, decode_table) + val cs = DecodeLogic(io.dpath.inst, rocketCtrlXDecode.decode_default, decode_table) val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 From dfdfddebe8ba743a858cb1c2468910e7c561ca44 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 7 Oct 2012 20:15:54 -0700 Subject: [PATCH 0473/1087] constants as traits --- rocket/src/main/scala/arbiter.scala | 5 +- rocket/src/main/scala/consts.scala | 78 +++++++++++++++--------- rocket/src/main/scala/ctrl.scala | 3 +- rocket/src/main/scala/ctrl_util.scala | 2 +- rocket/src/main/scala/dpath.scala | 3 +- rocket/src/main/scala/dpath_alu.scala | 3 +- rocket/src/main/scala/dpath_util.scala | 8 +-- rocket/src/main/scala/dtlb.scala | 8 +-- rocket/src/main/scala/htif.scala | 4 +- rocket/src/main/scala/icache.scala | 8 +-- rocket/src/main/scala/instructions.scala | 3 +- rocket/src/main/scala/itlb.scala | 8 +-- rocket/src/main/scala/memserdes.scala | 2 +- rocket/src/main/scala/package.scala | 29 +++++++++ rocket/src/main/scala/ptw.scala | 8 +-- rocket/src/main/scala/queues.scala | 3 +- rocket/src/main/scala/top.scala | 26 +++++--- rocket/src/main/scala/util.scala | 2 - 18 files changed, 128 insertions(+), 75 deletions(-) create mode 100644 rocket/src/main/scala/package.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 95432dc8..333397af 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -1,8 +1,7 @@ package rocket -import Chisel._; -import Node._; -import Constants._; +import Chisel._ +import Node._ import uncore._ class ioUncachedRequestor extends Bundle { diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index ac5de59d..1e3d0578 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -1,21 +1,40 @@ package rocket +package constants import Chisel._ import scala.math._ -object Constants -{ - val NTILES = 1 - val HAVE_RVC = false - val HAVE_FPU = true - val HAVE_VEC = true +abstract trait MulticoreConstants { + val NTILES: Int = 1 + val TILE_ID_BITS = log2Up(NTILES)+1 +} - val MAX_THREADS = - hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK +abstract trait CoherenceConfigConstants { + val ENABLE_SHARING: Boolean + val ENABLE_CLEAN_EXCLUSIVE: Boolean +} +trait UncoreConstants { + val NGLOBAL_XACTS = 8 + val GLOBAL_XACT_ID_BITS = log2Up(NGLOBAL_XACTS) +} + +trait HTIFConstants { val HTIF_WIDTH = 16 val MEM_BACKUP_WIDTH = HTIF_WIDTH +} +abstract trait TileConfigConstants extends UncoreConstants with MulticoreConstants { + val HAVE_RVC: Boolean + val HAVE_FPU: Boolean + val HAVE_VEC: Boolean + def FPU_N = UFix(0, 1) + def FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N + def VEC_N = UFix(0, 1); + def VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N +} + +trait ScalarOpConstants { val BR_X = Bits("b????", 4) val BR_N = UFix(0, 4); val BR_EQ = UFix(1, 4); @@ -89,7 +108,9 @@ object Constants val DW_XPR = Y val RA = UFix(1, 5); +} +trait MemoryOpConstants { val MT_X = Bits("b???", 3); val MT_B = Bits("b000", 3); val MT_H = Bits("b001", 3); @@ -116,7 +137,9 @@ object Constants val M_XA_MAX = Bits("b1101", 4); val M_XA_MINU = Bits("b1110", 4); val M_XA_MAXU = Bits("b1111", 4); +} +trait PCRConstants { val PCR_X = Bits("b???", 3) val PCR_N = Bits(0,3) val PCR_F = Bits(1,3) // mfpcr @@ -161,11 +184,15 @@ object Constants val SR_VM = 8 // VM enable val SR_IM = 16 // interrupt mask val SR_IM_WIDTH = 8 +} +trait InterruptConstants { val CAUSE_INTERRUPT = 32 val IRQ_IPI = 5 val IRQ_TIMER = 7 - +} + +trait AddressConstants { val PADDR_BITS = 40; val VADDR_BITS = 43; val PGIDX_BITS = 13; @@ -173,8 +200,9 @@ object Constants val VPN_BITS = VADDR_BITS-PGIDX_BITS; val ASID_BITS = 7; val PERM_BITS = 6; +} - // rocketNBDCache parameters +abstract trait RocketDcacheConstants extends TileConfigConstants with AddressConstants { val DCACHE_PORTS = 3 val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; @@ -188,17 +216,10 @@ object Constants val TAG_BITS = PADDR_BITS - OFFSET_BITS - IDX_BITS; val NWAYS = 4 require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); +} - // coherence parameters - val ENABLE_SHARING = true - val ENABLE_CLEAN_EXCLUSIVE = true - - val COHERENCE_DATA_BITS = (1 << OFFSET_BITS)*8 - val TILE_ID_BITS = log2Up(NTILES)+1 +trait TileLinkSizeConstants extends RocketDcacheConstants { val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 - val NGLOBAL_XACTS = 8 - val GLOBAL_XACT_ID_BITS = log2Up(NGLOBAL_XACTS) - val X_INIT_TYPE_MAX_BITS = 2 val X_INIT_WRITE_MASK_BITS = OFFSET_BITS val X_INIT_SUBWORD_ADDR_BITS = log2Up(OFFSET_BITS) @@ -206,24 +227,21 @@ object Constants val X_REP_TYPE_MAX_BITS = 3 val P_REQ_TYPE_MAX_BITS = 2 val P_REP_TYPE_MAX_BITS = 3 +} - // external memory interface +trait MemoryInterfaceConstants extends UncoreConstants with TileLinkSizeConstants { val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS - +} + +trait TLBConstants { val DTLB_ENTRIES = 16 val ITLB_ENTRIES = 8; val VITLB_ENTRIES = 4 - - val START_ADDR = 0x2000; - - val FPU_N = UFix(0, 1); - val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N; - - val VEC_N = UFix(0, 1); - val VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N; +} +trait VectorOpConstants { val VEC_X = Bits("b??", 2).toUFix val VEC_FN_N = UFix(0, 2) val VEC_VL = UFix(1, 2) @@ -246,7 +264,9 @@ object Constants val VIMM2_RS2 = UFix(0, 1) val VIMM2_ALU = UFix(1, 1) val VIMM2_X = UFix(0, 1) +} +trait ArbiterConstants { val DTLB_CPU = 0 val DTLB_VEC = 1 val DTLB_VPF = 2 diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index bf148c9a..3ea5105f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -1,8 +1,7 @@ package rocket import Chisel._ -import Node._; - +import Node._ import Constants._ import Instructions._ import hwacha._ diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala index 416033a6..34b6d40b 100644 --- a/rocket/src/main/scala/ctrl_util.scala +++ b/rocket/src/main/scala/ctrl_util.scala @@ -1,7 +1,7 @@ package rocket import Chisel._ -import Node._; +import Node._ class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 84746631..a4d259c8 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -1,8 +1,7 @@ package rocket import Chisel._ -import Node._; - +import Node._ import Constants._ import Instructions._ import hwacha._ diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 25fc2956..17404f56 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -1,8 +1,7 @@ package rocket import Chisel._ -import Node._; - +import Node._ import Constants._ import Instructions._ diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index f216e52a..e648a1e2 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -1,9 +1,9 @@ package rocket -import Chisel._; -import Node._; -import Constants._; -import scala.math._; +import Chisel._ +import Node._ +import Constants._ +import scala.math._ class ioDpathBTB extends Bundle() { diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala index 6ce054dc..07c80159 100644 --- a/rocket/src/main/scala/dtlb.scala +++ b/rocket/src/main/scala/dtlb.scala @@ -1,10 +1,10 @@ package rocket -import Chisel._; -import Node._; -import Constants._; -import scala.math._; +import Chisel._ +import Node._ +import Constants._ import hwacha._ +import scala.math._ // ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala // should keep them in sync diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 335ef3cb..33a56c56 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -1,8 +1,8 @@ package rocket import Chisel._ -import Node._; -import Constants._; +import Node._ +import Constants._ import uncore._ class ioDebug extends Bundle diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 9b79e2ce..eb4c4b80 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,10 +1,10 @@ package rocket -import Chisel._; -import Node._; -import Constants._; -import scala.math._; +import Chisel._ +import Node._ +import Constants._ import uncore._ +import scala.math._ // interface between I$ and pipeline/ITLB (32 bits wide) class ioImem extends Bundle diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 42aad6ef..377f2058 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -1,7 +1,8 @@ package rocket import Chisel._ -import Node._; +import Node._ +import Constants._ object Instructions { diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala index dac04fa2..dda612a9 100644 --- a/rocket/src/main/scala/itlb.scala +++ b/rocket/src/main/scala/itlb.scala @@ -1,9 +1,9 @@ package rocket -import Chisel._; -import Node._; -import Constants._; -import scala.math._; +import Chisel._ +import Node._ +import Constants._ +import scala.math._ class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { val clear = Bool(INPUT); diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index 712dec16..737b835b 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -3,8 +3,8 @@ package rocket import Chisel._ import Node._ import Constants._ -import scala.math._ import uncore._ +import scala.math._ class ioMemSerialized extends Bundle { diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala new file mode 100644 index 00000000..108c5860 --- /dev/null +++ b/rocket/src/main/scala/package.scala @@ -0,0 +1,29 @@ +package rocket +import rocket.constants._ + +import Chisel._ +import scala.math._ + +//TODO: When compiler bug SI-5604 is fixed in 2.10, change object Constants to +// package object rocket and remove import Constants._'s from other files +object Constants extends HTIFConstants with + MemoryOpConstants with + PCRConstants with + InterruptConstants with + AddressConstants with + ArbiterConstants with + VectorOpConstants with + TLBConstants with + ScalarOpConstants with + MemoryInterfaceConstants +{ + val HAVE_RVC = false + val HAVE_FPU = true + val HAVE_VEC = true + + val MAX_THREADS = + hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK + + val START_ADDR = 0x2000 + +} diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index d0c540c5..92550d77 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -1,9 +1,9 @@ package rocket -import Chisel._; -import Node._; -import Constants._; -import scala.math._; +import Chisel._ +import Node._ +import Constants._ +import scala.math._ class ioHellaCacheArbiter(n: Int) extends Bundle { diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index f414ff6e..7028c880 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -1,7 +1,8 @@ package rocket import Chisel._ -import Node._; +import Node._ +import Constants._ class SkidBuffer[T <: Data](resetSignal: Bool = null)(data: => T) extends Component(resetSignal) { diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 4390f8d7..42413045 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -1,19 +1,21 @@ package rocket import Chisel._ -import Node._; +import Node._ +import Constants._ import uncore._ -import Constants._; import collection.mutable.ArrayBuffer -class Top extends Component -{ - val io = new Bundle { - val debug = new ioDebug - val host = new ioHost(HTIF_WIDTH) - val mem = new ioMemPipe - } +object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants { +// val NTILES = 1 + val ENABLE_SHARING = true + val ENABLE_CLEAN_EXCLUSIVE = true +} +import DummyTopLevelConstants._ + +class Top extends Component +{ val co = if(ENABLE_SHARING) { if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence else new MSICoherence @@ -22,6 +24,12 @@ class Top extends Component else new MICoherence } + val io = new Bundle { + val debug = new ioDebug + val host = new ioHost(HTIF_WIDTH) + val mem = new ioMemPipe + } + val htif = new rocketHTIF(HTIF_WIDTH, NTILES, co) val hub = new CoherenceHubBroadcast(NTILES+1, co) hub.io.tiles(NTILES) <> htif.io.mem diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 19856386..c60ae47d 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import scala.math._ class Mux1H [T <: Data](n: Int)(gen: => T) extends Component { From 9025d0610c19168422287c993a2137bd6ef77eb3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 7 Oct 2012 22:37:29 -0700 Subject: [PATCH 0474/1087] first pass at configuration object passed as implicit parameter --- rocket/src/main/scala/cpu.scala | 12 +++--- rocket/src/main/scala/dpath.scala | 6 +-- rocket/src/main/scala/dpath_util.scala | 4 +- rocket/src/main/scala/htif.scala | 18 ++++----- rocket/src/main/scala/icache.scala | 4 +- rocket/src/main/scala/nbdcache.scala | 54 +++++++++++++------------- rocket/src/main/scala/tile.scala | 8 ++-- rocket/src/main/scala/top.scala | 6 ++- 8 files changed, 57 insertions(+), 55 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index c0a2f1c7..412d7e11 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -1,19 +1,19 @@ package rocket -import Chisel._; -import Node._; -import Constants._; +import Chisel._ +import Node._ +import Constants._ import hwacha._ -class ioRocket extends Bundle() +class ioRocket()(implicit conf: Configuration) extends Bundle { - val host = new ioHTIF + val host = new ioHTIF() val imem = (new ioImem).flip val vimem = (new ioImem).flip val dmem = new ioHellaCache } -class rocketProc extends Component +class rocketProc()(implicit conf: Configuration) extends Component { val io = new ioRocket diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a4d259c8..fc199520 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -12,9 +12,9 @@ class ioDpathImem extends Bundle() val resp_data = Bits(INPUT, 32); } -class ioDpathAll extends Bundle() +class ioDpathAll()(implicit conf: Configuration) extends Bundle() { - val host = new ioHTIF(); + val host = new ioHTIF() val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache val dtlb = new ioDTLB_CPU_req_bundle().asOutput() @@ -28,7 +28,7 @@ class ioDpathAll extends Bundle() val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) } -class rocketDpath extends Component +class rocketDpath()(implicit conf: Configuration) extends Component { val io = new ioDpathAll(); diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index e648a1e2..66e6dec9 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -57,7 +57,7 @@ class rocketDpathBTB(entries: Int) extends Component io.target := mux.io.out.toUFix } -class ioDpathPCR extends Bundle() +class ioDpathPCR()(implicit conf: Configuration) extends Bundle() { val host = new ioHTIF() val r = new ioReadPort(); @@ -86,7 +86,7 @@ class ioDpathPCR extends Bundle() val vec_nfregs = UFix(INPUT, 6) } -class rocketDpathPCR extends Component +class rocketDpathPCR()(implicit conf: Configuration) extends Component { val io = new ioDpathPCR(); diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 33a56c56..761bb59d 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -23,21 +23,21 @@ class PCRReq extends Bundle val data = Bits(width = 64) } -class ioHTIF extends Bundle +class ioHTIF()(implicit conf: Configuration) extends Bundle { val reset = Bool(INPUT) val debug = new ioDebug val pcr_req = (new FIFOIO) { new PCRReq }.flip val pcr_rep = (new FIFOIO) { Bits(width = 64) } - val ipi_req = (new FIFOIO) { Bits(width = log2Up(NTILES)) } + val ipi_req = (new FIFOIO) { Bits(width = log2Up(conf.ntiles)) } val ipi_rep = (new FIFOIO) { Bool() }.flip } -class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends Component +class rocketHTIF(w: Int)(implicit conf: Configuration) extends Component { val io = new Bundle { val host = new ioHost(w) - val cpu = Vec(ncores) { new ioHTIF().flip } + val cpu = Vec(conf.ntiles) { new ioHTIF().flip } val mem = new ioTileLink } @@ -78,7 +78,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) - val pcr_coreid = if (ncores == 1) UFix(0) else addr(20+log2Up(ncores),20) + val pcr_coreid = if (conf.ntiles == 1) UFix(0) else addr(20+log2Up(conf.ntiles),20) val pcr_wdata = packet_ram(0) val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR @@ -178,7 +178,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } x_init.io.enq.valid := state === state_mem_req val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) - x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteTransactionInit(init_addr, UFix(0)), co.getUncachedReadTransactionInit(init_addr, UFix(0))) + x_init.io.enq.bits := Mux(cmd === cmd_writemem, conf.co.getUncachedWriteTransactionInit(init_addr, UFix(0)), conf.co.getUncachedReadTransactionInit(init_addr, UFix(0))) io.mem.xact_init <> x_init.io.deq io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data @@ -189,8 +189,8 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C io.mem.probe_rep_data.valid := Bool(false) io.mem.incoherent := Bool(true) - val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } - for (i <- 0 until ncores) { + val pcr_mux = (new Mux1H(conf.ntiles)) { Bits(width = 64) } + for (i <- 0 until conf.ntiles) { val my_reset = Reg(resetVal = Bool(true)) val my_ipi = Reg(resetVal = Bool(false)) val rdata = Reg() { Bits() } @@ -208,7 +208,7 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C } cpu.ipi_rep.valid := my_ipi cpu.ipi_req.ready := Bool(true) - for (j <- 0 until ncores) { + for (j <- 0 until conf.ntiles) { when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UFix(i)) { my_ipi := Bool(true) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index eb4c4b80..c0fa675e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -28,7 +28,7 @@ class ioRocketICache extends Bundle() // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines // parameters : // lines = # cache lines -class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) extends Component +class rocketICache(sets: Int, assoc: Int)(implicit conf: Configuration) extends Component { val io = new ioRocketICache(); @@ -137,7 +137,7 @@ class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) exten rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); io.cpu.resp_data := data_mux.io.out io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits := co.getUncachedReadTransactionInit(r_cpu_miss_addr(tagmsb,indexlsb).toUFix, UFix(0)) + io.mem.xact_init.bits := conf.co.getUncachedReadTransactionInit(r_cpu_miss_addr(tagmsb,indexlsb).toUFix, UFix(0)) io.mem.xact_finish <> finish_q.io.deq // control state machine diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 951d6adc..a2f0839a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -159,7 +159,7 @@ class MetaArrayReq extends Bundle { val data = new MetaData() } -class MSHR(id: Int, co: CoherencePolicy) extends Component { +class MSHR(id: Int)(implicit conf: Configuration) extends Component { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -197,7 +197,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { val req_cmd = io.req_bits.cmd val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) val idx_match = req.idx === io.req_bits.idx - val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val rpq = (new Queue(NRPQ)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq @@ -221,7 +221,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { when (refill_done) { state := s_drain_rpq } when (reply) { refill_count := refill_count + UFix(1) - line_state := co.newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) + line_state := conf.co.newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) } when (abort) { state := s_refill_req } } @@ -243,13 +243,13 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - xacx_type := co.getTransactionInitTypeOnSecondaryMiss(req_cmd, co.newStateOnFlush(), io.mem_req.bits) + xacx_type := conf.co.getTransactionInitTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { flush := req_cmd === M_FLA - line_state := co.newStateOnFlush() + line_state := conf.co.newStateOnFlush() refill_count := UFix(0) - xacx_type := co.getTransactionInitTypeOnPrimaryMiss(req_cmd, co.newStateOnFlush()) + xacx_type := conf.co.getTransactionInitTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) req := io.req_bits when (io.req_bits.tag_miss) { @@ -268,7 +268,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { io.meta_req.valid := (state === s_drain_rpq) && !rpq.io.deq.valid && !finish_q.io.deq.valid || (state === s_meta_clear) io.meta_req.bits.rw := Bool(true) io.meta_req.bits.idx := req.idx - io.meta_req.bits.data.state := Mux(state === s_meta_clear, co.newStateOnFlush(), line_state) + io.meta_req.bits.data.state := Mux(state === s_meta_clear, conf.co.newStateOnFlush(), line_state) io.meta_req.bits.data.tag := req.tag io.meta_req.bits.way_en := req.way_oh @@ -293,7 +293,7 @@ class MSHR(id: Int, co: CoherencePolicy) extends Component { io.replay.bits.way_oh := req.way_oh } -class MSHRFile(co: CoherencePolicy) extends Component { +class MSHRFile()(implicit conf: Configuration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) @@ -346,7 +346,7 @@ class MSHRFile(co: CoherencePolicy) extends Component { var refill_probe_rdy = Bool(true) for (i <- 0 to NMSHR-1) { - val mshr = new MSHR(i, co) + val mshr = new MSHR(i) tag_mux.io.sel(i) := mshr.io.idx_match tag_mux.io.in(i) := mshr.io.tag @@ -415,7 +415,7 @@ class MSHRFile(co: CoherencePolicy) extends Component { } -class WritebackUnit(co: CoherencePolicy) extends Component { +class WritebackUnit()(implicit conf: Configuration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip @@ -475,7 +475,7 @@ class WritebackUnit(co: CoherencePolicy) extends Component { io.data_req.bits.data := Bits(0) io.mem_req.valid := valid && !cmd_sent - io.mem_req.bits.x_type := co.getTransactionInitTypeOnWriteback() + io.mem_req.bits.x_type := conf.co.getTransactionInitTypeOnWriteback() io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id io.mem_req_data.valid := data_req_fired && !is_probe @@ -484,7 +484,7 @@ class WritebackUnit(co: CoherencePolicy) extends Component { io.probe_rep_data.bits.data := io.data_resp } -class ProbeUnit(co: CoherencePolicy) extends Component { +class ProbeUnit()(implicit conf: Configuration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new ProbeRequest }.flip val rep = (new FIFOIO) { new ProbeReply } @@ -510,7 +510,7 @@ class ProbeUnit(co: CoherencePolicy) extends Component { state := s_writeback_resp } when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { - state := Mux(hit && co.needsWriteback(line_state), s_writeback_req, s_invalid) + state := Mux(hit && conf.co.needsWriteback(line_state), s_writeback_req, s_invalid) } when ((state === s_mshr_req) && io.mshr_req.ready) { state := s_meta_req @@ -531,13 +531,13 @@ class ProbeUnit(co: CoherencePolicy) extends Component { io.req.ready := state === s_invalid io.rep.valid := state === s_probe_rep && io.meta_req.ready - io.rep.bits := co.newProbeReply(req, Mux(hit, line_state, co.newStateOnFlush())) + io.rep.bits := conf.co.newProbeReply(req, Mux(hit, line_state, conf.co.newStateOnFlush)) io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_mshr_req || state === s_probe_rep && hit io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) io.meta_req.bits.rw := state === s_probe_rep io.meta_req.bits.idx := req.addr - io.meta_req.bits.data.state := co.newStateOnProbeRequest(req, line_state) + io.meta_req.bits.data.state := conf.co.newStateOnProbeRequest(req, line_state) io.meta_req.bits.data.tag := req.addr >> UFix(IDX_BITS) io.mshr_req.valid := state === s_meta_resp || state === s_mshr_req io.addr := req.addr @@ -548,7 +548,7 @@ class ProbeUnit(co: CoherencePolicy) extends Component { io.wb_req.bits.tag := req.addr >> UFix(IDX_BITS) } -class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { +class FlushUnit(lines: Int)(implicit conf: Configuration) extends Component { val io = new Bundle { val req = (new FIFOIO) { Bool() }.flip val meta_req = (new FIFOIO) { new MetaArrayReq() } @@ -593,7 +593,7 @@ class FlushUnit(lines: Int, co: CoherencePolicy) extends Component { io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) io.meta_req.bits.idx := idx_cnt io.meta_req.bits.rw := (state === s_reset) - io.meta_req.bits.data.state := co.newStateOnFlush() + io.meta_req.bits.data.state := conf.co.newStateOnFlush() io.meta_req.bits.data.tag := UFix(0) } @@ -748,7 +748,7 @@ class ioHellaCache extends Bundle { val xcpt = (new HellaCacheExceptions).asInput } -class HellaCache(co: CoherencePolicy) extends Component { +class HellaCache()(implicit conf: Configuration) extends Component { val io = new Bundle { val cpu = (new ioHellaCache).flip val mem = new ioTileLink @@ -801,10 +801,10 @@ class HellaCache(co: CoherencePolicy) extends Component { val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch val nack_hit = Bool() - val wb = new WritebackUnit(co) - val prober = new ProbeUnit(co) - val mshr = new MSHRFile(co) - val flusher = new FlushUnit(lines, co) + val wb = new WritebackUnit + val prober = new ProbeUnit + val mshr = new MSHRFile + val flusher = new FlushUnit(lines) val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool // reset and flush unit @@ -863,10 +863,10 @@ class HellaCache(co: CoherencePolicy) extends Component { val early_tag_nack = !meta_arb.io.in(3).ready val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req.bits.ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until NWAYS).map( w => co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_arr = (0 until NWAYS).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec - val tag_hit_arr = (0 until NWAYS).map( w => co.isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_hit_arr = (0 until NWAYS).map( w => conf.co.isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_hit = Cat(Bits(0),tag_hit_arr:_*).orR val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, meta.io.way_en) val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, data.io.way_en) @@ -892,7 +892,7 @@ class HellaCache(co: CoherencePolicy) extends Component { data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) data_arb.io.in(0).bits.data := io.mem.xact_rep.bits.data data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - data_arb.io.in(0).valid := io.mem.xact_rep.valid && co.messageUpdatesDataArray(io.mem.xact_rep.bits) + data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) // load hits data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) @@ -922,7 +922,7 @@ class HellaCache(co: CoherencePolicy) extends Component { p_store_valid := p_store_valid && !drain_store || (r_cpu_req_val && tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo // tag update after a store to an exclusive clean line. - val new_hit_state = co.newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) + val new_hit_state = conf.co.newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) val set_hit_state = r_cpu_req_val && tag_hit && meta_resp_mux.state != new_hit_state meta.io.state_req.bits.rw := Bool(true) meta.io.state_req.bits.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) @@ -946,7 +946,7 @@ class HellaCache(co: CoherencePolicy) extends Component { // miss handling mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid mshr.io.req.bits.tag_miss := !tag_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.old_dirty := co.needsWriteback(meta_wb_mux.state) && (!tag_match || flusher.io.mshr_req.valid) // don't wb upgrades + mshr.io.req.bits.old_dirty := conf.co.needsWriteback(meta_wb_mux.state) && (!tag_match || flusher.io.mshr_req.valid) // don't wb upgrades mshr.io.req.bits.old_tag := meta_wb_mux.tag mshr.io.req.bits.tag := cpu_req_tag mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 5e621808..30e961d6 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -5,7 +5,7 @@ import Node._ import Constants._ import uncore._ -class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Component(resetSignal) +class Tile(resetSignal: Bool = null)(implicit conf: Configuration) extends Component(resetSignal) { val io = new Bundle { val tilelink = new ioTileLink @@ -13,8 +13,8 @@ class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Co } val cpu = new rocketProc - val icache = new rocketICache(128, 4, co) // 128 sets x 4 ways (32KB) - val dcache = new HellaCache(co) + val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) + val dcache = new HellaCache val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) arbiter.io.requestor(0) <> dcache.io.mem @@ -31,7 +31,7 @@ class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Co if (HAVE_VEC) { - val vicache = new rocketICache(128, 1, co) // 128 sets x 1 ways (8KB) + val vicache = new rocketICache(128, 1) // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 42413045..35c41e79 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -13,6 +13,7 @@ object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants } import DummyTopLevelConstants._ +case class Configuration(ntiles: Int, co: CoherencePolicyWithUncached) class Top extends Component { @@ -23,6 +24,7 @@ class Top extends Component if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence else new MICoherence } + implicit val conf = Configuration(NTILES, co) val io = new Bundle { val debug = new ioDebug @@ -30,7 +32,7 @@ class Top extends Component val mem = new ioMemPipe } - val htif = new rocketHTIF(HTIF_WIDTH, NTILES, co) + val htif = new rocketHTIF(HTIF_WIDTH) val hub = new CoherenceHubBroadcast(NTILES+1, co) hub.io.tiles(NTILES) <> htif.io.mem io.host <> htif.io.host @@ -44,7 +46,7 @@ class Top extends Component for (i <- 0 until NTILES) { val hl = htif.io.cpu(i) val tl = hub.io.tiles(i) - val tile = new Tile(co, resetSignal = hl.reset) + val tile = new Tile(resetSignal = hl.reset) tile.io.host.reset := Reg(Reg(hl.reset)) tile.io.host.pcr_req <> Queue(hl.pcr_req) From fe21142972611d31f63ec783a7f80954ef05d92f Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Tue, 9 Oct 2012 13:02:58 -0700 Subject: [PATCH 0475/1087] fixed memdessert unpacking --- rocket/src/main/scala/memserdes.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala index 712dec16..e8d99514 100644 --- a/rocket/src/main/scala/memserdes.scala +++ b/rocket/src/main/scala/memserdes.scala @@ -144,8 +144,8 @@ class MemDessert extends Component // test rig side val req_cmd = in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (abits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) io.wide.req_cmd.valid := state === s_cmd io.wide.req_cmd.bits.tag := req_cmd - io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width) - io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.addr.width) + io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.rw.width) + io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width) io.wide.req_data.valid := state === s_data io.wide.req_data.bits.data := in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (dbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) From 1864e41361f6b877ebca8c69009014366f7afcc3 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Wed, 10 Oct 2012 15:25:24 -0700 Subject: [PATCH 0476/1087] memserdes + slowio out of rocket and into uncore --- rocket/src/main/scala/memserdes.scala | 159 -------------------------- rocket/src/main/scala/slowio.scala | 50 -------- 2 files changed, 209 deletions(-) delete mode 100644 rocket/src/main/scala/memserdes.scala delete mode 100644 rocket/src/main/scala/slowio.scala diff --git a/rocket/src/main/scala/memserdes.scala b/rocket/src/main/scala/memserdes.scala deleted file mode 100644 index e8d99514..00000000 --- a/rocket/src/main/scala/memserdes.scala +++ /dev/null @@ -1,159 +0,0 @@ -package rocket - -import Chisel._ -import Node._ -import Constants._ -import scala.math._ -import uncore._ - -class ioMemSerialized extends Bundle -{ - val req = (new FIFOIO) { Bits(width = MEM_BACKUP_WIDTH) } - val resp = (new PipeIO) { Bits(width = MEM_BACKUP_WIDTH) }.flip -} - -class MemSerdes extends Component -{ - val io = new Bundle { - val wide = new ioMem().flip - val narrow = new ioMemSerialized - } - val abits = io.wide.req_cmd.bits.toBits.getWidth - val dbits = io.wide.req_data.bits.toBits.getWidth - val rbits = io.wide.resp.bits.getWidth - - val out_buf = Reg() { Bits() } - val in_buf = Reg() { Bits() } - - val s_idle :: s_read_addr :: s_write_addr :: s_write_idle :: s_write_data :: Nil = Enum(5) { UFix() } - val state = Reg(resetVal = s_idle) - val send_cnt = Reg(resetVal = UFix(0, log2Up((max(abits, dbits)+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_send_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val adone = io.narrow.req.ready && send_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) - val ddone = io.narrow.req.ready && send_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) - - when (io.narrow.req.valid && io.narrow.req.ready) { - send_cnt := send_cnt + UFix(1) - out_buf := out_buf >> UFix(MEM_BACKUP_WIDTH) - } - when (io.wide.req_cmd.valid && io.wide.req_cmd.ready) { - out_buf := io.wide.req_cmd.bits.toBits - } - when (io.wide.req_data.valid && io.wide.req_data.ready) { - out_buf := io.wide.req_data.bits.toBits - } - - io.wide.req_cmd.ready := state === s_idle - io.wide.req_data.ready := state === s_write_idle - io.narrow.req.valid := state === s_read_addr || state === s_write_addr || state === s_write_data - io.narrow.req.bits := out_buf - - when (state === s_idle && io.wide.req_cmd.valid) { - state := Mux(io.wide.req_cmd.bits.rw, s_write_addr, s_read_addr) - } - when (state === s_read_addr && adone) { - state := s_idle - send_cnt := UFix(0) - } - when (state === s_write_addr && adone) { - state := s_write_idle - send_cnt := UFix(0) - } - when (state === s_write_idle && io.wide.req_data.valid) { - state := s_write_data - } - when (state === s_write_data && ddone) { - data_send_cnt := data_send_cnt + UFix(1) - state := Mux(data_send_cnt === UFix(REFILL_CYCLES-1), s_idle, s_write_idle) - send_cnt := UFix(0) - } - - val recv_cnt = Reg(resetVal = UFix(0, log2Up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_recv_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val resp_val = Reg(resetVal = Bool(false)) - - resp_val := Bool(false) - when (io.narrow.resp.valid) { - recv_cnt := recv_cnt + UFix(1) - when (recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH)) { - recv_cnt := UFix(0) - data_recv_cnt := data_recv_cnt + UFix(1) - resp_val := Bool(true) - } - in_buf := Cat(io.narrow.resp.bits, in_buf((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH*MEM_BACKUP_WIDTH-1,MEM_BACKUP_WIDTH)) - } - - io.wide.resp.valid := resp_val - io.wide.resp.bits.tag := in_buf(io.wide.resp.bits.tag.width-1,0) - io.wide.resp.bits.data := in_buf >> UFix(io.wide.resp.bits.tag.width) -} - -class MemDessert extends Component // test rig side -{ - val io = new Bundle { - val narrow = new ioMemSerialized().flip - val wide = new ioMem - } - val abits = io.wide.req_cmd.bits.toBits.getWidth - val dbits = io.wide.req_data.bits.toBits.getWidth - val rbits = io.wide.resp.bits.getWidth - - require(dbits >= abits && rbits >= dbits) - val recv_cnt = Reg(resetVal = UFix(0, log2Up((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH))) - val data_recv_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val adone = io.narrow.req.valid && recv_cnt === UFix((abits-1)/MEM_BACKUP_WIDTH) - val ddone = io.narrow.req.valid && recv_cnt === UFix((dbits-1)/MEM_BACKUP_WIDTH) - val rdone = io.narrow.resp.valid && recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) - - val s_cmd_recv :: s_cmd :: s_data_recv :: s_data :: s_reply :: Nil = Enum(5) { UFix() } - val state = Reg(resetVal = s_cmd_recv) - - val in_buf = Reg() { Bits() } - when (io.narrow.req.valid && io.narrow.req.ready || io.narrow.resp.valid) { - recv_cnt := recv_cnt + UFix(1) - in_buf := Cat(io.narrow.req.bits, in_buf((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH*MEM_BACKUP_WIDTH-1,MEM_BACKUP_WIDTH)) - } - io.narrow.req.ready := state === s_cmd_recv || state === s_data_recv - - when (state === s_cmd_recv && adone) { - state := s_cmd - recv_cnt := UFix(0) - } - when (state === s_cmd && io.wide.req_cmd.ready) { - state := Mux(io.wide.req_cmd.bits.rw, s_data_recv, s_reply) - } - when (state === s_data_recv && ddone) { - state := s_data - recv_cnt := UFix(0) - } - when (state === s_data && io.wide.req_data.ready) { - state := s_data_recv - when (data_recv_cnt === UFix(REFILL_CYCLES-1)) { - state := s_cmd_recv - } - data_recv_cnt := data_recv_cnt + UFix(1) - } - when (rdone) { // state === s_reply - when (data_recv_cnt === UFix(REFILL_CYCLES-1)) { - state := s_cmd_recv - } - recv_cnt := UFix(0) - data_recv_cnt := data_recv_cnt + UFix(1) - } - - val req_cmd = in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (abits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) - io.wide.req_cmd.valid := state === s_cmd - io.wide.req_cmd.bits.tag := req_cmd - io.wide.req_cmd.bits.addr := req_cmd.toUFix >> UFix(io.wide.req_cmd.bits.tag.width + io.wide.req_cmd.bits.rw.width) - io.wide.req_cmd.bits.rw := req_cmd(io.wide.req_cmd.bits.tag.width) - - io.wide.req_data.valid := state === s_data - io.wide.req_data.bits.data := in_buf >> UFix(((rbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH - (dbits+MEM_BACKUP_WIDTH-1)/MEM_BACKUP_WIDTH)*MEM_BACKUP_WIDTH) - - val dataq = (new Queue(REFILL_CYCLES)) { new MemResp } - dataq.io.enq <> io.wide.resp - dataq.io.deq.ready := recv_cnt === UFix((rbits-1)/MEM_BACKUP_WIDTH) - - io.narrow.resp.valid := dataq.io.deq.valid - io.narrow.resp.bits := dataq.io.deq.bits.toBits >> (recv_cnt * UFix(MEM_BACKUP_WIDTH)) -} diff --git a/rocket/src/main/scala/slowio.scala b/rocket/src/main/scala/slowio.scala deleted file mode 100644 index 068e90c5..00000000 --- a/rocket/src/main/scala/slowio.scala +++ /dev/null @@ -1,50 +0,0 @@ -package rocket - -import Chisel._ -import Constants._ - -class slowIO[T <: Data](val divisor: Int, hold_cycles_in: Int = -1)(data: => T) extends Component -{ - val io = new Bundle { - val out_fast = new FIFOIO()(data).flip - val out_slow = new FIFOIO()(data) - - val in_fast = new FIFOIO()(data) - val in_slow = new FIFOIO()(data).flip - - val clk_slow = Bool(OUTPUT) - } - - val hold_cycles = if (hold_cycles_in == -1) divisor/4 else hold_cycles_in - require((divisor & (divisor-1)) == 0) - require(hold_cycles < divisor/2 && hold_cycles >= 1) - - val cnt = Reg() { UFix(width = log2Up(divisor)) } - cnt := cnt + UFix(1) - val out_en = cnt === UFix(divisor/2+hold_cycles-1) // rising edge + hold time - val in_en = cnt === UFix(divisor/2-1) // rising edge - - val in_slow_rdy = Reg(resetVal = Bool(false)) - val out_slow_val = Reg(resetVal = Bool(false)) - val out_slow_bits = Reg() { data } - - val fromhost_q = new Queue(1)(data) - fromhost_q.io.enq.valid := in_en && (io.in_slow.valid && in_slow_rdy || reset) - fromhost_q.io.enq.bits := io.in_slow.bits - fromhost_q.io.deq <> io.in_fast - - val tohost_q = new Queue(1)(data) - tohost_q.io.enq <> io.out_fast - tohost_q.io.deq.ready := in_en && io.out_slow.ready && out_slow_val - - when (out_en) { - in_slow_rdy := fromhost_q.io.enq.ready - out_slow_val := tohost_q.io.deq.valid - out_slow_bits := Mux(reset, fromhost_q.io.deq.bits, tohost_q.io.deq.bits) - } - - io.in_slow.ready := in_slow_rdy - io.out_slow.valid := out_slow_val - io.out_slow.bits := out_slow_bits - io.clk_slow := cnt(log2Up(divisor)-1).toBool -} From 5d2a470215ed1d5c23efa30055be5c1555fd2c7f Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 8 Oct 2012 13:06:45 -0700 Subject: [PATCH 0477/1087] all rocket-specific arbiters in one file and refactored traits slightly --- rocket/src/main/scala/arbiter.scala | 67 +++++++++++++++++++++ rocket/src/main/scala/consts.scala | 90 +++++++++++++++++------------ rocket/src/main/scala/cpu.scala | 62 ++++++++++---------- rocket/src/main/scala/package.scala | 11 ++-- rocket/src/main/scala/ptw.scala | 66 --------------------- rocket/src/main/scala/tile.scala | 8 +-- rocket/src/main/scala/top.scala | 4 +- 7 files changed, 162 insertions(+), 146 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 333397af..9e27f6ed 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -2,8 +2,75 @@ package rocket import Chisel._ import Node._ +import Constants._ import uncore._ +class ioHellaCacheArbiter(n: Int) extends Bundle +{ + val requestor = Vec(n) { new ioHellaCache() }.flip + val mem = new ioHellaCache +} + +class rocketHellaCacheArbiter(n: Int) extends Component +{ + val io = new ioHellaCacheArbiter(n) + require(DCACHE_TAG_BITS >= log2Up(n) + CPU_TAG_BITS) + + var req_val = Bool(false) + var req_rdy = io.mem.req.ready + for (i <- 0 until n) + { + io.requestor(i).req.ready := req_rdy + req_val = req_val || io.requestor(i).req.valid + req_rdy = req_rdy && !io.requestor(i).req.valid + } + + var req_cmd = io.requestor(n-1).req.bits.cmd + var req_type = io.requestor(n-1).req.bits.typ + var req_idx = io.requestor(n-1).req.bits.idx + var req_ppn = io.requestor(n-1).req.bits.ppn + var req_data = io.requestor(n-1).req.bits.data + var req_kill = io.requestor(n-1).req.bits.kill + var req_tag = io.requestor(n-1).req.bits.tag + for (i <- n-1 to 0 by -1) + { + val r = io.requestor(i).req + req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) + req_type = Mux(r.valid, r.bits.typ, req_type) + req_idx = Mux(r.valid, r.bits.idx, req_idx) + req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) + req_data = Mux(Reg(r.valid), r.bits.data, req_data) + req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) + req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) + } + + io.mem.req.valid := req_val + io.mem.req.bits.cmd := req_cmd + io.mem.req.bits.typ := req_type + io.mem.req.bits.idx := req_idx + io.mem.req.bits.ppn := req_ppn + io.mem.req.bits.data := req_data + io.mem.req.bits.kill := req_kill + io.mem.req.bits.tag := req_tag + + for (i <- 0 until n) + { + val r = io.requestor(i).resp + val x = io.requestor(i).xcpt + val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) + x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) + x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) + r.valid := io.mem.resp.valid && tag_hit + r.bits.miss := io.mem.resp.bits.miss && tag_hit + r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) + r.bits.replay := io.mem.resp.bits.replay && tag_hit + r.bits.data := io.mem.resp.bits.data + r.bits.data_subword := io.mem.resp.bits.data_subword + r.bits.typ := io.mem.resp.bits.typ + r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) + } +} + class ioUncachedRequestor extends Bundle { val xact_init = (new FIFOIO) { new TransactionInit } val xact_abort = (new FIFOIO) { new TransactionAbort }.flip diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1e3d0578..9ec91845 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -5,7 +5,7 @@ import Chisel._ import scala.math._ abstract trait MulticoreConstants { - val NTILES: Int = 1 + val NTILES: Int val TILE_ID_BITS = log2Up(NTILES)+1 } @@ -19,19 +19,46 @@ trait UncoreConstants { val GLOBAL_XACT_ID_BITS = log2Up(NGLOBAL_XACTS) } -trait HTIFConstants { - val HTIF_WIDTH = 16 - val MEM_BACKUP_WIDTH = HTIF_WIDTH +trait TileLinkTypeConstants { + val X_INIT_TYPE_MAX_BITS = 2 + val X_REP_TYPE_MAX_BITS = 3 + val P_REQ_TYPE_MAX_BITS = 2 + val P_REP_TYPE_MAX_BITS = 3 } -abstract trait TileConfigConstants extends UncoreConstants with MulticoreConstants { - val HAVE_RVC: Boolean - val HAVE_FPU: Boolean - val HAVE_VEC: Boolean - def FPU_N = UFix(0, 1) - def FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N - def VEC_N = UFix(0, 1); - def VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N +trait TileLinkSizeConstants extends + RocketDcacheConstants with + TileLinkTypeConstants +{ + val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 + val X_INIT_WRITE_MASK_BITS = OFFSET_BITS + val X_INIT_SUBWORD_ADDR_BITS = log2Up(OFFSET_BITS) + val X_INIT_ATOMIC_OP_BITS = 4 +} + +trait HTIFConstants { + val HTIF_WIDTH = 16 +} + +trait MemoryInterfaceConstants extends + HTIFConstants with + UncoreConstants with + TileLinkSizeConstants +{ + val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) + val MEM_DATA_BITS = 128 + val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS + val MEM_BACKUP_WIDTH = HTIF_WIDTH +} + +abstract trait TileConfigConstants { + def HAVE_RVC: Boolean + def HAVE_FPU: Boolean + def HAVE_VEC: Boolean + val FPU_N = UFix(0, 1) + val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N + val VEC_N = UFix(0, 1); + val VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N } trait ScalarOpConstants { @@ -202,39 +229,21 @@ trait AddressConstants { val PERM_BITS = 6; } -abstract trait RocketDcacheConstants extends TileConfigConstants with AddressConstants { - val DCACHE_PORTS = 3 +abstract trait RocketDcacheConstants extends ArbiterConstants with AddressConstants { val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; val DCACHE_TAG_BITS = log2Up(DCACHE_PORTS) + CPU_TAG_BITS - val OFFSET_BITS = 6; // log2(cache line size in bytes) + val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses val NRPQ = 16; // number of secondary misses val NSDQ = 17; // number of secondary stores/AMOs - val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) + val OFFSET_BITS = 6; // log2(cache line size in bytes) val IDX_BITS = 7; val TAG_BITS = PADDR_BITS - OFFSET_BITS - IDX_BITS; val NWAYS = 4 require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); } -trait TileLinkSizeConstants extends RocketDcacheConstants { - val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 - val X_INIT_TYPE_MAX_BITS = 2 - val X_INIT_WRITE_MASK_BITS = OFFSET_BITS - val X_INIT_SUBWORD_ADDR_BITS = log2Up(OFFSET_BITS) - val X_INIT_ATOMIC_OP_BITS = 4 - val X_REP_TYPE_MAX_BITS = 3 - val P_REQ_TYPE_MAX_BITS = 2 - val P_REP_TYPE_MAX_BITS = 3 -} - -trait MemoryInterfaceConstants extends UncoreConstants with TileLinkSizeConstants { - val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) - val MEM_DATA_BITS = 128 - val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS -} - trait TLBConstants { val DTLB_ENTRIES = 16 val ITLB_ENTRIES = 8; @@ -266,12 +275,19 @@ trait VectorOpConstants { val VIMM2_X = UFix(0, 1) } -trait ArbiterConstants { +abstract trait ArbiterConstants extends TileConfigConstants { + val DTLB_PORTS = 3 val DTLB_CPU = 0 val DTLB_VEC = 1 val DTLB_VPF = 2 - val DMEM_CPU = 0 - val DMEM_PTW = 1 - val DMEM_VU = 2 + val DCACHE_PORTS = 3 + val DCACHE_CPU = 0 + val DCACHE_PTW = 1 + val DCACHE_VU = 2 + + val DMEM_PORTS = if (HAVE_VEC) 3 else 2 + val DMEM_DCACHE = 0 + val DMEM_ICACHE = 1 + val DMEM_VICACHE = 2 } diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 412d7e11..8f7f49fc 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -30,8 +30,8 @@ class rocketProc()(implicit conf: Configuration) extends Component { vu = new vu() // cpu, vector prefetch, and vector use the DTLB - val dtlbarb = new RRArbiter(3)({new ioDTLB_CPU_req_bundle()}) - val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2Up(3))) + val dtlbarb = new RRArbiter(DTLB_PORTS)({new ioDTLB_CPU_req_bundle()}) + val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2Up(DTLB_PORTS))) when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } // tlb respones come out a cycle later @@ -86,15 +86,15 @@ class rocketProc()(implicit conf: Configuration) extends Component dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.status := dpath.io.ctrl.status - arb.io.requestor(DMEM_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn - ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DMEM_CPU).req.ready + arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn + ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DCACHE_CPU).req.ready // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) ptw.io.requestor(0) <> itlb.io.ptw ptw.io.requestor(1) <> dtlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; - arb.io.requestor(DMEM_PTW) <> ptw.io.mem + arb.io.requestor(DCACHE_PTW) <> ptw.io.mem arb.io.mem <> io.dmem ctrl.io.dpath <> dpath.io.ctrl; @@ -119,17 +119,17 @@ class rocketProc()(implicit conf: Configuration) extends Component // connect arbiter to ctrl+dpath+DTLB //TODO: views on nested bundles? - arb.io.requestor(DMEM_CPU).resp <> ctrl.io.dmem.resp - arb.io.requestor(DMEM_CPU).xcpt <> ctrl.io.dmem.xcpt - arb.io.requestor(DMEM_CPU).resp <> dpath.io.dmem.resp - arb.io.requestor(DMEM_CPU).req.valid := ctrl.io.dmem.req.valid - ctrl.io.dmem.req.ready := arb.io.requestor(DMEM_CPU).req.ready - arb.io.requestor(DMEM_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill - arb.io.requestor(DMEM_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd - arb.io.requestor(DMEM_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ - arb.io.requestor(DMEM_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx - arb.io.requestor(DMEM_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag - arb.io.requestor(DMEM_CPU).req.bits.data := dpath.io.dmem.req.bits.data + arb.io.requestor(DCACHE_CPU).resp <> ctrl.io.dmem.resp + arb.io.requestor(DCACHE_CPU).xcpt <> ctrl.io.dmem.xcpt + arb.io.requestor(DCACHE_CPU).resp <> dpath.io.dmem.resp + arb.io.requestor(DCACHE_CPU).req.valid := ctrl.io.dmem.req.valid + ctrl.io.dmem.req.ready := arb.io.requestor(DCACHE_CPU).req.ready + arb.io.requestor(DCACHE_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill + arb.io.requestor(DCACHE_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd + arb.io.requestor(DCACHE_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ + arb.io.requestor(DCACHE_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx + arb.io.requestor(DCACHE_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag + arb.io.requestor(DCACHE_CPU).req.bits.data := dpath.io.dmem.req.bits.data var fpu: rocketFPU = null if (HAVE_FPU) @@ -217,21 +217,21 @@ class rocketProc()(implicit conf: Configuration) extends Component storegen.io.typ := vu.io.dmem_req.bits.typ storegen.io.din := vu.io.dmem_req.bits.data - arb.io.requestor(DMEM_VU).req.valid := vu.io.dmem_req.valid - arb.io.requestor(DMEM_VU).req.bits.kill := vu.io.dmem_req.bits.kill - arb.io.requestor(DMEM_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd - arb.io.requestor(DMEM_VU).req.bits.typ := vu.io.dmem_req.bits.typ - arb.io.requestor(DMEM_VU).req.bits.idx := vu.io.dmem_req.bits.idx - arb.io.requestor(DMEM_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(DMEM_VU).req.bits.data := Reg(storegen.io.dout) - arb.io.requestor(DMEM_VU).req.bits.tag := vu.io.dmem_req.bits.tag + arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid + arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill + arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd + arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ + arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx + arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) + arb.io.requestor(DCACHE_VU).req.bits.data := Reg(storegen.io.dout) + arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag - vu.io.dmem_req.ready := arb.io.requestor(DMEM_VU).req.ready - vu.io.dmem_resp.valid := Reg(arb.io.requestor(DMEM_VU).resp.valid) - vu.io.dmem_resp.bits.nack := arb.io.requestor(DMEM_VU).resp.bits.nack - vu.io.dmem_resp.bits.data := arb.io.requestor(DMEM_VU).resp.bits.data_subword - vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DMEM_VU).resp.bits.tag) - vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DMEM_VU).resp.bits.typ) + vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready + vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid) + vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack + vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword + vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag) + vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -243,7 +243,7 @@ class rocketProc()(implicit conf: Configuration) extends Component } else { - arb.io.requestor(DMEM_VU).req.valid := Bool(false) + arb.io.requestor(DCACHE_VU).req.valid := Bool(false) if (HAVE_FPU) { fpu.io.sfma.valid := Bool(false) diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 108c5860..aaa06dcd 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -6,20 +6,19 @@ import scala.math._ //TODO: When compiler bug SI-5604 is fixed in 2.10, change object Constants to // package object rocket and remove import Constants._'s from other files -object Constants extends HTIFConstants with +object Constants extends + ScalarOpConstants with MemoryOpConstants with PCRConstants with InterruptConstants with AddressConstants with - ArbiterConstants with VectorOpConstants with TLBConstants with - ScalarOpConstants with MemoryInterfaceConstants { - val HAVE_RVC = false - val HAVE_FPU = true - val HAVE_VEC = true + def HAVE_RVC = false + def HAVE_FPU = true + def HAVE_VEC = true val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 92550d77..11d8f0b8 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,72 +5,6 @@ import Node._ import Constants._ import scala.math._ -class ioHellaCacheArbiter(n: Int) extends Bundle -{ - val requestor = Vec(n) { new ioHellaCache() }.flip - val mem = new ioHellaCache -} - -class rocketHellaCacheArbiter(n: Int) extends Component -{ - val io = new ioHellaCacheArbiter(n) - require(DCACHE_TAG_BITS >= log2Up(n) + CPU_TAG_BITS) - - var req_val = Bool(false) - var req_rdy = io.mem.req.ready - for (i <- 0 until n) - { - io.requestor(i).req.ready := req_rdy - req_val = req_val || io.requestor(i).req.valid - req_rdy = req_rdy && !io.requestor(i).req.valid - } - - var req_cmd = io.requestor(n-1).req.bits.cmd - var req_type = io.requestor(n-1).req.bits.typ - var req_idx = io.requestor(n-1).req.bits.idx - var req_ppn = io.requestor(n-1).req.bits.ppn - var req_data = io.requestor(n-1).req.bits.data - var req_kill = io.requestor(n-1).req.bits.kill - var req_tag = io.requestor(n-1).req.bits.tag - for (i <- n-1 to 0 by -1) - { - val r = io.requestor(i).req - req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) - req_type = Mux(r.valid, r.bits.typ, req_type) - req_idx = Mux(r.valid, r.bits.idx, req_idx) - req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) - req_data = Mux(Reg(r.valid), r.bits.data, req_data) - req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) - req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) - } - - io.mem.req.valid := req_val - io.mem.req.bits.cmd := req_cmd - io.mem.req.bits.typ := req_type - io.mem.req.bits.idx := req_idx - io.mem.req.bits.ppn := req_ppn - io.mem.req.bits.data := req_data - io.mem.req.bits.kill := req_kill - io.mem.req.bits.tag := req_tag - - for (i <- 0 until n) - { - val r = io.requestor(i).resp - val x = io.requestor(i).xcpt - val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) - x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) - x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) - r.valid := io.mem.resp.valid && tag_hit - r.bits.miss := io.mem.resp.bits.miss && tag_hit - r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) - r.bits.replay := io.mem.resp.bits.replay && tag_hit - r.bits.data := io.mem.resp.bits.data - r.bits.data_subword := io.mem.resp.bits.data_subword - r.bits.typ := io.mem.resp.bits.typ - r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) - } -} - class ioPTW(n: Int) extends Bundle { val requestor = Vec(n) { new ioTLB_PTW }.flip diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 30e961d6..69aef22d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -16,9 +16,9 @@ class Tile(resetSignal: Bool = null)(implicit conf: Configuration) extends Compo val icache = new rocketICache(128, 4) // 128 sets x 4 ways (32KB) val dcache = new HellaCache - val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) - arbiter.io.requestor(0) <> dcache.io.mem - arbiter.io.requestor(1) <> icache.io.mem + val arbiter = new rocketMemArbiter(DMEM_PORTS) + arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem + arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem io.tilelink.xact_init <> arbiter.io.mem.xact_init io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data @@ -32,7 +32,7 @@ class Tile(resetSignal: Bool = null)(implicit conf: Configuration) extends Compo if (HAVE_VEC) { val vicache = new rocketICache(128, 1) // 128 sets x 1 ways (8KB) - arbiter.io.requestor(2) <> vicache.io.mem + arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 35c41e79..7b29021c 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -6,8 +6,8 @@ import Constants._ import uncore._ import collection.mutable.ArrayBuffer -object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants { -// val NTILES = 1 +object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants with rocket.constants.MulticoreConstants { + val NTILES = 1 val ENABLE_SHARING = true val ENABLE_CLEAN_EXCLUSIVE = true } From a7a4e6569040683e2b0d9703bb13953ed91280b3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 15 Oct 2012 16:04:25 -0700 Subject: [PATCH 0478/1087] Initial verison of reading config from files --- rocket/src/main/scala/config.scala | 56 +++++++++++++++++++++++++++++ rocket/src/main/scala/rocket.config | 1 + 2 files changed, 57 insertions(+) create mode 100644 rocket/src/main/scala/config.scala create mode 100644 rocket/src/main/scala/rocket.config diff --git a/rocket/src/main/scala/config.scala b/rocket/src/main/scala/config.scala new file mode 100644 index 00000000..13d7344a --- /dev/null +++ b/rocket/src/main/scala/config.scala @@ -0,0 +1,56 @@ +package rocket +package config + +import java.io.File +import java.io.FileInputStream +import java.util.Properties +import scala.util.{Properties => SProperties} + +class Config(props: Properties) { + private val msg = "Configuration is missing requested parameter " + def getInt(name: String): Int = Option(props.getProperty(name).toInt).getOrElse(sys.error(msg+name)) + def getString(name: String): String = Option(props.getProperty(name)).getOrElse(sys.error(msg+name)) + def getBoolean(name: String): Boolean = Option(props.getProperty(name).toBoolean).getOrElse(sys.error(msg+name)) + def apply(name: String): Int = getInt(name) +} + +object Config { + + lazy val internal_config = getConfig() + + def apply(name: String) = internal_config(name) + + private def getConfig(): Config = { + + val filePath0 = + SProperties + .envOrNone("ROCKET_CONFIG") + .orElse(SProperties.propOrNone("rocket.config")) + if (filePath0.isEmpty) + Console.err.println(""" + | WARNING: Could not find configuration file to load. + | Options are: + | (1) Set environmental variable ROCKET_CONFIG to the config file path + | (2) Set system property rocket.config to the config file path + | Using default values for config. + """.stripMargin) + + val filePath = + filePath0.flatMap(fp => { + val f = new File(fp) + if (!f.isFile) { + Console.err.println(""" + | WARNING: File '%s' is not a valid file path + | Using default values for config + """.format(fp).stripMargin) + None + } else Some(fp) + }) + + val props = new Properties() + filePath.map(fp => props.load(new FileInputStream(fp))) + new Config(props) + } + +} + diff --git a/rocket/src/main/scala/rocket.config b/rocket/src/main/scala/rocket.config new file mode 100644 index 00000000..65b60ff0 --- /dev/null +++ b/rocket/src/main/scala/rocket.config @@ -0,0 +1 @@ +NWAYS 4 From 8970b635b216d6407ea5a11d5672b7ba5f783b83 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 15 Oct 2012 16:29:49 -0700 Subject: [PATCH 0479/1087] improvements to implicit RocketConfiguration parameter --- rocket/src/main/scala/cpu.scala | 8 ++++---- rocket/src/main/scala/dpath.scala | 6 +++--- rocket/src/main/scala/dpath_util.scala | 8 ++++---- rocket/src/main/scala/htif.scala | 4 ++-- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 12 ++++++------ rocket/src/main/scala/tile.scala | 2 +- rocket/src/main/scala/top.scala | 4 ++-- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 8f7f49fc..b9adcd8d 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -5,7 +5,7 @@ import Node._ import Constants._ import hwacha._ -class ioRocket()(implicit conf: Configuration) extends Bundle +class ioRocket(implicit conf: RocketConfiguration) extends Bundle { val host = new ioHTIF() val imem = (new ioImem).flip @@ -13,12 +13,12 @@ class ioRocket()(implicit conf: Configuration) extends Bundle val dmem = new ioHellaCache } -class rocketProc()(implicit conf: Configuration) extends Component +class rocketProc(implicit conf: RocketConfiguration) extends Component { val io = new ioRocket - val ctrl = new rocketCtrl(); - val dpath = new rocketDpath(); + val ctrl = new rocketCtrl + val dpath = new rocketDpath val dtlb = new rocketDTLB(DTLB_ENTRIES); val itlb = new rocketITLB(ITLB_ENTRIES); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index fc199520..64e45086 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -12,9 +12,9 @@ class ioDpathImem extends Bundle() val resp_data = Bits(INPUT, 32); } -class ioDpathAll()(implicit conf: Configuration) extends Bundle() +class ioDpathAll(implicit conf: RocketConfiguration) extends Bundle { - val host = new ioHTIF() + val host = new ioHTIF val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache val dtlb = new ioDTLB_CPU_req_bundle().asOutput() @@ -28,7 +28,7 @@ class ioDpathAll()(implicit conf: Configuration) extends Bundle() val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) } -class rocketDpath()(implicit conf: Configuration) extends Component +class rocketDpath(implicit conf: RocketConfiguration) extends Component { val io = new ioDpathAll(); diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 66e6dec9..e0e2b84a 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -57,9 +57,9 @@ class rocketDpathBTB(entries: Int) extends Component io.target := mux.io.out.toUFix } -class ioDpathPCR()(implicit conf: Configuration) extends Bundle() +class ioDpathPCR(implicit conf: RocketConfiguration) extends Bundle { - val host = new ioHTIF() + val host = new ioHTIF val r = new ioReadPort(); val w = new ioWritePort(); @@ -86,9 +86,9 @@ class ioDpathPCR()(implicit conf: Configuration) extends Bundle() val vec_nfregs = UFix(INPUT, 6) } -class rocketDpathPCR()(implicit conf: Configuration) extends Component +class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component { - val io = new ioDpathPCR(); + val io = new ioDpathPCR val reg_epc = Reg() { UFix() }; val reg_badvaddr = Reg() { UFix() }; diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 761bb59d..5f2a3e35 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -23,7 +23,7 @@ class PCRReq extends Bundle val data = Bits(width = 64) } -class ioHTIF()(implicit conf: Configuration) extends Bundle +class ioHTIF(implicit conf: RocketConfiguration) extends Bundle { val reset = Bool(INPUT) val debug = new ioDebug @@ -33,7 +33,7 @@ class ioHTIF()(implicit conf: Configuration) extends Bundle val ipi_rep = (new FIFOIO) { Bool() }.flip } -class rocketHTIF(w: Int)(implicit conf: Configuration) extends Component +class rocketHTIF(w: Int)(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val host = new ioHost(w) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c0fa675e..6b3894ca 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -28,7 +28,7 @@ class ioRocketICache extends Bundle() // 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines // parameters : // lines = # cache lines -class rocketICache(sets: Int, assoc: Int)(implicit conf: Configuration) extends Component +class rocketICache(sets: Int, assoc: Int)(implicit conf: RocketConfiguration) extends Component { val io = new ioRocketICache(); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a2f0839a..6c68c302 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -159,7 +159,7 @@ class MetaArrayReq extends Bundle { val data = new MetaData() } -class MSHR(id: Int)(implicit conf: Configuration) extends Component { +class MSHR(id: Int)(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -293,7 +293,7 @@ class MSHR(id: Int)(implicit conf: Configuration) extends Component { io.replay.bits.way_oh := req.way_oh } -class MSHRFile()(implicit conf: Configuration) extends Component { +class MSHRFile(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) @@ -415,7 +415,7 @@ class MSHRFile()(implicit conf: Configuration) extends Component { } -class WritebackUnit()(implicit conf: Configuration) extends Component { +class WritebackUnit(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip @@ -484,7 +484,7 @@ class WritebackUnit()(implicit conf: Configuration) extends Component { io.probe_rep_data.bits.data := io.data_resp } -class ProbeUnit()(implicit conf: Configuration) extends Component { +class ProbeUnit(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new ProbeRequest }.flip val rep = (new FIFOIO) { new ProbeReply } @@ -548,7 +548,7 @@ class ProbeUnit()(implicit conf: Configuration) extends Component { io.wb_req.bits.tag := req.addr >> UFix(IDX_BITS) } -class FlushUnit(lines: Int)(implicit conf: Configuration) extends Component { +class FlushUnit(lines: Int)(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val req = (new FIFOIO) { Bool() }.flip val meta_req = (new FIFOIO) { new MetaArrayReq() } @@ -748,7 +748,7 @@ class ioHellaCache extends Bundle { val xcpt = (new HellaCacheExceptions).asInput } -class HellaCache()(implicit conf: Configuration) extends Component { +class HellaCache(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val cpu = (new ioHellaCache).flip val mem = new ioTileLink diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 69aef22d..0a37db2a 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -5,7 +5,7 @@ import Node._ import Constants._ import uncore._ -class Tile(resetSignal: Bool = null)(implicit conf: Configuration) extends Component(resetSignal) +class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends Component(resetSignal) { val io = new Bundle { val tilelink = new ioTileLink diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 7b29021c..df6f084d 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -13,7 +13,7 @@ object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants } import DummyTopLevelConstants._ -case class Configuration(ntiles: Int, co: CoherencePolicyWithUncached) +case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached) class Top extends Component { @@ -24,7 +24,7 @@ class Top extends Component if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence else new MICoherence } - implicit val conf = Configuration(NTILES, co) + implicit val conf = RocketConfiguration(NTILES, co) val io = new Bundle { val debug = new ioDebug From 27ddff1adbaaeb114e0c25e3d0c45bfa66a6055e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 6 Oct 2012 17:32:01 -0700 Subject: [PATCH 0480/1087] simplify and improve multiplier --- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/multiplier.scala | 55 ++++++++------------------ 2 files changed, 17 insertions(+), 40 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 84746631..f052f6f1 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -252,7 +252,7 @@ class rocketDpath extends Component io.ctrl.div_result_val := div.io.resp_val // multiplier - var mul_io = new rocketMultiplier().io + var mul_io = new rocketMultiplier(unroll = 6).io if (HAVE_VEC) { val vu_mul = new rocketVUMultiplier(nwbq = 1) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index f56f3d56..f5e3445b 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -61,17 +61,12 @@ class rocketVUMultiplier(nwbq: Int) extends Component { io.vu.req <> io.cpu.req } -class rocketMultiplier extends Component { +class rocketMultiplier(unroll: Int = 1) extends Component { val io = new ioMultiplier - // w must be even (booth). - // we need an extra bit to handle signed vs. unsigned, - // so we need to add a second to keep w even. - val w = 64 + 2 - val unroll = 3 - require(w % 2 == 0 && (w/2) % unroll == 0) - - val cycles = w/unroll/2 + val w0 = io.req.bits.in0.getWidth + val w = (w0+1+unroll-1)/unroll*unroll + val cycles = w/unroll val r_val = Reg(resetVal = Bool(false)); val r_dw = Reg { Bits() } @@ -85,15 +80,15 @@ class rocketMultiplier extends Component { val dw = io.req.bits.fn(io.req.bits.fn.width-1) val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) - val lhs_msb = Mux(dw === DW_64, io.req.bits.in0(63), io.req.bits.in0(31)).toBool + val lhs_msb = Mux(dw === DW_64, io.req.bits.in0(w0-1), io.req.bits.in0(w0/2-1)).toBool val lhs_sign = ((fn === MUL_H) || (fn === MUL_HSU)) && lhs_msb - val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(63,32), Fill(32, lhs_sign)) - val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(31,0)) + val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w0-1,w0/2), Fill(w0/2, lhs_sign)) + val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(w0/2-1,0)) - val rhs_msb = Mux(dw === DW_64, io.req.bits.in1(63), io.req.bits.in1(31)).toBool + val rhs_msb = Mux(dw === DW_64, io.req.bits.in1(w0-1), io.req.bits.in1(w0/2-1)).toBool val rhs_sign = (fn === MUL_H) && rhs_msb - val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(63,32), Fill(32, rhs_sign)) - val rhs_in = Cat(rhs_sign, rhs_sign, rhs_hi, io.req.bits.in1(31,0)) + val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, rhs_sign)) + val rhs_in = Cat(Fill(w-w0, rhs_sign), rhs_hi, io.req.bits.in1(w0/2-1,0)) val do_kill = io.req_kill && r_cnt === UFix(0) // can only kill on 1st cycle @@ -111,34 +106,16 @@ class rocketMultiplier extends Component { r_val := Bool(false) } - val lhs_sext = Cat(r_lhs(w-2), r_lhs(w-2), r_lhs).toUFix - val lhs_twice = Cat(r_lhs(w-2), r_lhs, Bits(0,1)).toUFix - - var prod = r_prod - var lsb = r_lsb - - for (i <- 0 until unroll) { - val addend = Mux(prod(0) != lsb, lhs_sext, - Mux(prod(0) != prod(1), lhs_twice, - UFix(0))); - val sub = prod(1) - val adder_lhs = Cat(prod(w*2-1), prod(w*2-1,w)).toUFix - val adder_rhs = Mux(sub, ~addend, addend) - val adder_out = (adder_lhs + adder_rhs + sub.toUFix)(w,0) - - lsb = prod(1) - prod = Cat(adder_out(w), adder_out, prod(w-1,2)) - } - + val sum = r_prod(2*w-1,w).toFix + r_prod(unroll-1,0).toFix * r_lhs.toFix + Mux(r_lsb, r_lhs.toFix, Fix(0)) when (r_val && (r_cnt != UFix(cycles))) { - r_lsb := lsb - r_prod := prod + r_lsb := r_prod(unroll-1) + r_prod := Cat(sum, r_prod(w-1,unroll)).toFix r_cnt := r_cnt + UFix(1) } - val mul_output64 = Mux(r_fn === MUL_LO, r_prod(63,0), r_prod(127,64)) - val mul_output32 = Mux(r_fn === MUL_LO, r_prod(31,0), r_prod(63,32)) - val mul_output32_ext = Cat(Fill(32, mul_output32(31)), mul_output32) + val mul_output64 = Mux(r_fn === MUL_LO, r_prod(w0-1,0), r_prod(2*w0-1,w0)) + val mul_output32 = Mux(r_fn === MUL_LO, r_prod(w0/2-1,0), r_prod(w0-1,w0/2)) + val mul_output32_ext = Cat(Fill(32, mul_output32(w0/2-1)), mul_output32) val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext) From fcd69dba988798e9e2f7c2057bc2671c1f13cf5f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 9 Oct 2012 18:29:50 -0700 Subject: [PATCH 0481/1087] add optional early-out to mul/div --- rocket/src/main/scala/divider.scala | 92 ++++++++++++++------------ rocket/src/main/scala/multiplier.scala | 26 +++++--- 2 files changed, 64 insertions(+), 54 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index b016ee04..41268264 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -4,13 +4,13 @@ import Chisel._ import Node._ import Constants._ -class rocketDivider(width: Int) extends Component { +class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { val io = new ioMultiplier val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); - val count = Reg() { UFix() }; + val count = Reg() { UFix(width = log2Up(w+1)) } val divby0 = Reg() { Bool() }; val neg_quo = Reg() { Bool() }; val neg_rem = Reg() { Bool() }; @@ -18,16 +18,14 @@ class rocketDivider(width: Int) extends Component { val rem = Reg() { Bool() }; val half = Reg() { Bool() }; - val divisor = Reg() { UFix() }; - val remainder = Reg() { UFix() }; - val subtractor = remainder(2*width, width).toUFix - divisor; + val divisor = Reg() { Bits() } + val remainder = Reg() { Bits(width = 2*w+1) } + val subtractor = remainder(2*w,w) - divisor val dw = io.req.bits.fn(io.req.bits.fn.width-1) val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) val tc = (fn === DIV_D) || (fn === DIV_R); - val do_kill = io.req_kill && Reg(io.req.ready) // kill on 1st cycle only - switch (state) { is (s_ready) { when (io.req.valid) { @@ -35,13 +33,13 @@ class rocketDivider(width: Int) extends Component { } } is (s_neg_inputs) { - state := Mux(do_kill, s_ready, s_busy) + state := Mux(io.req_kill, s_ready, s_busy) } is (s_busy) { - when (do_kill) { + when (io.req_kill && Reg(io.req.ready)) { state := s_ready } - .elsewhen (count === UFix(width)) { + .elsewhen (count === UFix(w)) { state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) } } @@ -57,63 +55,69 @@ class rocketDivider(width: Int) extends Component { // state machine - val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(width-1), io.req.bits.in0(width/2-1)).toBool - val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(width-1,width/2), Fill(width/2, lhs_sign)) - val lhs_in = Cat(lhs_hi, io.req.bits.in0(width/2-1,0)) + val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w-1), io.req.bits.in0(w/2-1)) + val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w-1,w/2), Fill(w/2, lhs_sign)) + val lhs_in = Cat(lhs_hi, io.req.bits.in0(w/2-1,0)) - val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(width-1), io.req.bits.in1(width/2-1)).toBool - val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(width-1,width/2), Fill(width/2, rhs_sign)) - val rhs_in = Cat(rhs_hi, io.req.bits.in1(width/2-1,0)) + val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) + val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, rhs_sign)) + val rhs_in = Cat(rhs_hi, io.req.bits.in1(w/2-1,0)) - when ((state === s_ready) && io.req.valid) { - count := UFix(0, log2Up(width+1)); + when (io.req.fire()) { + count := UFix(0) half := (dw === DW_32); neg_quo := Bool(false); neg_rem := Bool(false); rem := (fn === DIV_R) || (fn === DIV_RU); reg_tag := io.req_tag; divby0 := Bool(true); - divisor := rhs_in.toUFix; - remainder := Cat(UFix(0,width+1), lhs_in).toUFix; + divisor := rhs_in + remainder := lhs_in } when (state === s_neg_inputs) { - neg_rem := remainder(width-1).toBool; - neg_quo := (remainder(width-1) != divisor(width-1)); - when (remainder(width-1).toBool) { - remainder := Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; + neg_rem := remainder(w-1) + neg_quo := (remainder(w-1) != divisor(w-1)) + when (remainder(w-1)) { + remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) } - when (divisor(width-1).toBool) { - divisor := subtractor(width-1,0); + when (divisor(w-1)) { + divisor := subtractor(w-1,0) } } when (state === s_neg_outputs) { when (neg_rem && neg_quo && !divby0) { - remainder := Cat(-remainder(2*width, width+1), remainder(width), -remainder(width-1,0)).toUFix; + remainder := Cat(-remainder(2*w, w+1), remainder(w), -remainder(w-1,0)) } .elsewhen (neg_quo && !divby0) { - remainder := Cat(remainder(2*width, width), -remainder(width-1,0)).toUFix; + remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) } .elsewhen (neg_rem) { - remainder := Cat(-remainder(2*width, width+1), remainder(width,0)).toUFix; - } - - when (divisor(width-1).toBool) { - divisor := subtractor(width-1,0); + remainder := Cat(-remainder(2*w, w+1), remainder(w,0)) } } when (state === s_busy) { - count := count + UFix(1); - divby0 := divby0 && !subtractor(width).toBool; - remainder := Mux(subtractor(width).toBool, - Cat(remainder(2*width-1, width), remainder(width-1,0), ~subtractor(width)), - Cat(subtractor(width-1, 0), remainder(width-1,0), ~subtractor(width))).toUFix; + count := count + UFix(1) + + val msb = subtractor(w) + divby0 := divby0 && !msb + remainder := Cat(Mux(msb, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !msb) + + val divisorMSB = Log2(divisor, w) + val dividendMSB = Log2(remainder(w-1,0), w) + val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB + val eOut = count === UFix(0) && eOutPos > dividendMSB && (divisorMSB != UFix(0) || divisor(0)) + when (Bool(earlyOut) && eOut) { + val eOutDist = eOutPos - dividendMSB + val shift = Mux(eOutDist >= UFix(w-1), UFix(w-1), eOutDist(log2Up(w)-1,0)) + remainder := remainder << shift + count := shift + } } - val result = Mux(rem, remainder(2*width, width+1), remainder(width-1,0)); + val result = Mux(rem, remainder(2*w, w+1), remainder(w-1,0)) - io.resp_bits := Mux(half, Cat(Fill(width/2, result(width/2-1)), result(width/2-1,0)), result); - io.resp_tag := reg_tag; - io.resp_val := (state === s_done); - - io.req.ready := (state === s_ready); + io.resp_bits := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) + io.resp_tag := reg_tag + io.resp_val := state === s_done + io.req.ready := state === s_ready } diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index f5e3445b..c52ef782 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -61,7 +61,7 @@ class rocketVUMultiplier(nwbq: Int) extends Component { io.vu.req <> io.cpu.req } -class rocketMultiplier(unroll: Int = 1) extends Component { +class rocketMultiplier(unroll: Int = 1, earlyOut: Boolean = false) extends Component { val io = new ioMultiplier val w0 = io.req.bits.in0.getWidth @@ -89,8 +89,6 @@ class rocketMultiplier(unroll: Int = 1) extends Component { val rhs_sign = (fn === MUL_H) && rhs_msb val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, rhs_sign)) val rhs_in = Cat(Fill(w-w0, rhs_sign), rhs_hi, io.req.bits.in1(w0/2-1,0)) - - val do_kill = io.req_kill && r_cnt === UFix(0) // can only kill on 1st cycle when (io.req.valid && io.req.ready) { r_val := Bool(true) @@ -102,25 +100,33 @@ class rocketMultiplier(unroll: Int = 1) extends Component { r_prod:= rhs_in r_lsb := Bool(false) } - .elsewhen (io.resp_val && io.resp_rdy || do_kill) { // can only kill on first cycle + .elsewhen (io.resp_val && io.resp_rdy || io.req_kill && r_cnt === UFix(0)) { // can only kill on first cycle r_val := Bool(false) } + val eOutDist = (UFix(cycles)-r_cnt)*UFix(unroll) + val outShift = Mux(r_fn === MUL_LO, UFix(0), Mux(r_dw === DW_64, UFix(64), UFix(32))) + val shiftDist = Mux(r_cnt === UFix(cycles), outShift, eOutDist) + val eOutMask = (UFix(1) << eOutDist) - UFix(1) + val eOut = r_cnt != UFix(0) && Bool(earlyOut) && !((r_prod(w-1,0) ^ r_lsb.toFix) & eOutMask).orR + val shift = r_prod.toFix >> shiftDist + val sum = r_prod(2*w-1,w).toFix + r_prod(unroll-1,0).toFix * r_lhs.toFix + Mux(r_lsb, r_lhs.toFix, Fix(0)) when (r_val && (r_cnt != UFix(cycles))) { r_lsb := r_prod(unroll-1) r_prod := Cat(sum, r_prod(w-1,unroll)).toFix r_cnt := r_cnt + UFix(1) + when (eOut) { + r_prod := shift + r_cnt := UFix(cycles) + } } - val mul_output64 = Mux(r_fn === MUL_LO, r_prod(w0-1,0), r_prod(2*w0-1,w0)) - val mul_output32 = Mux(r_fn === MUL_LO, r_prod(w0/2-1,0), r_prod(w0-1,w0/2)) - val mul_output32_ext = Cat(Fill(32, mul_output32(w0/2-1)), mul_output32) - - val mul_output = Mux(r_dw === DW_64, mul_output64, mul_output32_ext) + val out32 = Cat(Fill(w0/2, shift(w0/2-1)), shift(w0/2-1,0)) + val out64 = shift(w0-1,0) io.req.ready := !r_val - io.resp_bits := mul_output; + io.resp_bits := Mux(r_dw === DW_64, out64, out32) io.resp_tag := r_tag; io.resp_val := r_val && (r_cnt === UFix(cycles)) } From 661f8e635b0d67a851cc953d0d93b71af959b32b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 9 Oct 2012 21:35:03 -0700 Subject: [PATCH 0482/1087] merge I$, ITLB, BTB into Frontend --- rocket/src/main/scala/consts.scala | 48 +- rocket/src/main/scala/cpu.scala | 59 +-- rocket/src/main/scala/ctrl.scala | 667 ++++++++++++------------- rocket/src/main/scala/divider.scala | 84 ++-- rocket/src/main/scala/dpath.scala | 127 ++--- rocket/src/main/scala/dpath_alu.scala | 32 +- rocket/src/main/scala/dpath_util.scala | 13 +- rocket/src/main/scala/dtlb.scala | 183 ------- rocket/src/main/scala/icache.scala | 333 +++++++----- rocket/src/main/scala/itlb.scala | 229 --------- rocket/src/main/scala/multiplier.scala | 2 +- rocket/src/main/scala/ptw.scala | 37 +- rocket/src/main/scala/queues.scala | 16 +- rocket/src/main/scala/tile.scala | 4 +- rocket/src/main/scala/tlb.scala | 255 ++++++++++ 15 files changed, 946 insertions(+), 1143 deletions(-) delete mode 100644 rocket/src/main/scala/dtlb.scala delete mode 100644 rocket/src/main/scala/itlb.scala create mode 100644 rocket/src/main/scala/tlb.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index ac5de59d..731f24ba 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -16,25 +16,20 @@ object Constants val HTIF_WIDTH = 16 val MEM_BACKUP_WIDTH = HTIF_WIDTH - val BR_X = Bits("b????", 4) - val BR_N = UFix(0, 4); - val BR_EQ = UFix(1, 4); - val BR_NE = UFix(2, 4); - val BR_LT = UFix(3, 4); - val BR_LTU = UFix(4, 4); - val BR_GE = UFix(5, 4); - val BR_GEU = UFix(6, 4); - val BR_J = UFix(7, 4); - val BR_JR = UFix(8, 4); + val BR_X = Bits("b???", 3) + val BR_EQ = UFix(0, 3) + val BR_NE = UFix(1, 3) + val BR_J = UFix(2, 3) + val BR_N = UFix(3, 3) + val BR_LT = UFix(4, 3) + val BR_GE = UFix(5, 3) + val BR_LTU = UFix(6, 3) + val BR_GEU = UFix(7, 3) - val PC_4 = UFix(0, 3); - val PC_BTB = UFix(1, 3); - val PC_EX4 = UFix(2, 3); - val PC_BR = UFix(3, 3); - val PC_PCR = UFix(4, 3); - val PC_WB = UFix(5, 3); - val PC_EVEC = UFix(6, 3); - val PC_JR = UFix(7, 3); + val PC_EX4 = UFix(0, 2) + val PC_EX = UFix(1, 2) + val PC_WB = UFix(2, 2) + val PC_PCR = UFix(3, 2) val A2_X = Bits("b???", 3) val A2_BTYPE = UFix(0, 3); @@ -70,19 +65,6 @@ object Constants val WB_TSC = UFix(4, 3); val WB_IRT = UFix(5, 3); - val FN_X = Bits("b????", 4) - val FN_ADD = UFix(0, 4); - val FN_SUB = UFix(1, 4); - val FN_SLT = UFix(2, 4); - val FN_SLTU = UFix(3, 4); - val FN_AND = UFix(4, 4); - val FN_OR = UFix(5, 4); - val FN_XOR = UFix(6, 4); - val FN_SL = UFix(7, 4); - val FN_SR = UFix(8, 4); - val FN_SRA = UFix(9, 4); - val FN_OP2 = UFix(10, 4); - val DW_X = X val DW_32 = N val DW_64 = Y @@ -175,6 +157,7 @@ object Constants val PERM_BITS = 6; // rocketNBDCache parameters + val INST_BITS = 32 val DCACHE_PORTS = 3 val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; @@ -212,8 +195,9 @@ object Constants val MEM_DATA_BITS = 128 val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS + val BTB_ENTRIES = 8 + val ITLB_ENTRIES = 8 val DTLB_ENTRIES = 16 - val ITLB_ENTRIES = 8; val VITLB_ENTRIES = 4 val START_ADDR = 0x2000; diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index c0a2f1c7..ec0178c1 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -8,8 +8,8 @@ import hwacha._ class ioRocket extends Bundle() { val host = new ioHTIF - val imem = (new ioImem).flip - val vimem = (new ioImem).flip + val imem = new IOCPUFrontend + val vimem = new IOCPUFrontend val dmem = new ioHellaCache } @@ -20,8 +20,7 @@ class rocketProc extends Component val ctrl = new rocketCtrl(); val dpath = new rocketDpath(); - val dtlb = new rocketDTLB(DTLB_ENTRIES); - val itlb = new rocketITLB(ITLB_ENTRIES); + val dtlb = new rocketTLB(DTLB_ENTRIES); val ptw = new rocketPTW(if (HAVE_VEC) 3 else 2) val arb = new rocketHellaCacheArbiter(DCACHE_PORTS) @@ -59,7 +58,7 @@ class rocketProc extends Component dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlbarb.io.in(DTLB_CPU).bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlbarb.io.in(DTLB_CPU).bits.asid := UFix(0) dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready @@ -75,7 +74,7 @@ class rocketProc extends Component dtlb.io.cpu_req.valid := ctrl.io.dtlb_val dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlb.io.cpu_req.bits.asid := Bits(0,ASID_BITS); // FIXME: connect to PCR + dtlb.io.cpu_req.bits.asid := UFix(0) dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st @@ -91,7 +90,7 @@ class rocketProc extends Component // connect page table walker to TLBs, page table base register (from PCR) // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) - ptw.io.requestor(0) <> itlb.io.ptw + ptw.io.requestor(0) <> io.imem.ptw ptw.io.requestor(1) <> dtlb.io.ptw ptw.io.ptbr := dpath.io.ptbr; arb.io.requestor(DMEM_PTW) <> ptw.io.mem @@ -102,20 +101,9 @@ class rocketProc extends Component // FIXME: try to make this more compact - // connect ITLB to I$, ctrl, dpath - itlb.io.cpu.invalidate := dpath.io.ptbr_wen; - itlb.io.cpu.status := dpath.io.ctrl.status; - itlb.io.cpu.req_val := ctrl.io.imem.req_val; - itlb.io.cpu.req_asid := Bits(0,ASID_BITS); // FIXME: connect to PCR - itlb.io.cpu.req_vpn := dpath.io.imem.req_addr(VADDR_BITS,PGIDX_BITS); - io.imem.req_idx := dpath.io.imem.req_addr(PGIDX_BITS-1,0); - io.imem.req_ppn := itlb.io.cpu.resp_ppn; - io.imem.req_val := ctrl.io.imem.req_val; - io.imem.invalidate := ctrl.io.dpath.flush_inst; - ctrl.io.imem.resp_val := io.imem.resp_val; - dpath.io.imem.resp_data := io.imem.resp_data; - ctrl.io.xcpt_itlb := itlb.io.cpu.exception; - io.imem.itlb_miss := itlb.io.cpu.resp_miss; + // connect I$ + ctrl.io.imem <> io.imem + dpath.io.imem <> io.imem // connect arbiter to ctrl+dpath+DTLB //TODO: views on nested bundles? @@ -144,22 +132,19 @@ class rocketProc extends Component dpath.io.vec_ctrl <> ctrl.io.vec_dpath // hooking up vector I$ - val vitlb = new rocketITLB(VITLB_ENTRIES) - ptw.io.requestor(2) <> vitlb.io.ptw - vitlb.io.cpu.invalidate := dpath.io.ptbr_wen - vitlb.io.cpu.status := dpath.io.ctrl.status - vitlb.io.cpu.req_val := vu.io.imem_req.valid - vitlb.io.cpu.req_asid := Bits(0,ASID_BITS) // FIXME: connect to PCR - vitlb.io.cpu.req_vpn := vu.io.imem_req.bits(VADDR_BITS,PGIDX_BITS).toUFix - io.vimem.req_idx := vu.io.imem_req.bits(PGIDX_BITS-1,0) - io.vimem.req_ppn := vitlb.io.cpu.resp_ppn - io.vimem.req_val := vu.io.imem_req.valid - io.vimem.invalidate := ctrl.io.dpath.flush_inst - vu.io.imem_req.ready := Bool(true) - vu.io.imem_resp.valid := io.vimem.resp_val - vu.io.imem_resp.bits := io.vimem.resp_data - vu.io.vitlb_exception := vitlb.io.cpu.exception - io.vimem.itlb_miss := vitlb.io.cpu.resp_miss + ptw.io.requestor(2) <> io.vimem.ptw + io.vimem.req.bits.status := dpath.io.ctrl.status + io.vimem.req.bits.pc := vu.io.imem_req.bits.toUFix + io.vimem.req.valid := vu.io.imem_req.valid + io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst + io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen + vu.io.imem_req.ready := Bool(true) + vu.io.imem_resp.valid := io.vimem.resp.valid + vu.io.imem_resp.bits := io.vimem.resp.bits.data + vu.io.vitlb_exception := io.vimem.resp.bits.xcpt_if + io.vimem.resp.ready := Bool(true) + io.vimem.req.bits.mispredict := Bool(false) + io.vimem.req.bits.taken := Bool(false) // hooking up vector command queues vu.io.vec_cmdq.valid := ctrl.io.vec_iface.vcmdq_valid diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 24304498..f88aca6b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -6,16 +6,13 @@ import Node._; import Constants._ import Instructions._ import hwacha._ +import ALU._ class ioCtrlDpath extends Bundle() { // outputs to datapath val sel_pc = UFix(OUTPUT, 3); - val wen_btb = Bool(OUTPUT); - val clr_btb = Bool(OUTPUT); - val stallf = Bool(OUTPUT); val stalld = Bool(OUTPUT); - val killf = Bool(OUTPUT); val killd = Bool(OUTPUT); val killx = Bool(OUTPUT); val killm = Bool(OUTPUT); @@ -26,8 +23,10 @@ class ioCtrlDpath extends Bundle() val fn_alu = UFix(OUTPUT, 4); val mul_val = Bool(OUTPUT); val mul_fn = UFix(OUTPUT, 2); + val mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); val div_fn = UFix(OUTPUT, 2); + val div_kill = Bool(OUTPUT) val sel_wa = Bool(OUTPUT); val sel_wb = UFix(OUTPUT, 3); val pcr = UFix(OUTPUT, 3) @@ -37,6 +36,7 @@ class ioCtrlDpath extends Bundle() val ex_fp_val= Bool(OUTPUT); val mem_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); + val ex_jalr = Bool(OUTPUT) val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); val wb_valid = Bool(OUTPUT) @@ -48,8 +48,6 @@ class ioCtrlDpath extends Bundle() val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath - val xcpt_ma_inst = Bool(INPUT); // high on a misaligned/illegal virtual PC - val btb_hit = Bool(INPUT); val inst = Bits(INPUT, 32); val br_eq = Bool(INPUT); val br_lt = Bool(INPUT); @@ -76,7 +74,7 @@ class ioCtrlDpath extends Bundle() class ioCtrlAll extends Bundle() { val dpath = new ioCtrlDpath(); - val imem = new ioImem().flip + val imem = new IOCPUFrontend val dmem = new ioHellaCache val dtlb_val = Bool(OUTPUT); val dtlb_kill = Bool(OUTPUT); @@ -84,7 +82,6 @@ class ioCtrlAll extends Bundle() val dtlb_miss = Bool(INPUT); val xcpt_dtlb_ld = Bool(INPUT); val xcpt_dtlb_st = Bool(INPUT); - val xcpt_itlb = Bool(INPUT); val fpu = new ioCtrlFPU(); val vec_dpath = new ioCtrlDpathVec() val vec_iface = new ioCtrlVecInterface() @@ -95,239 +92,239 @@ object rocketCtrlDecode val xpr64 = Y; val decode_default = - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,DIV_X, X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) - - val xdecode = Array( - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - - J-> List(Y, N,N,BR_J, N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_C-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_J-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_R-> List(Y, N,N,BR_JR, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - RDNPC-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - - LB-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SB-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SH-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SW-> List(Y, N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - AMOADD_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - LUI-> List(Y, N,N,BR_N, N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvAND-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvOR-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - ADDIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - MUL-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - - DIV-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_D, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_DU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REM-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_R, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_RU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_D, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_DU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_R, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,MUL_X, Y,DIV_RU,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - - SYSCALL-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), - ERET-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), - CFLUSH-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), - MTPCR-> List(Y, N,N,BR_N, Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) - - val fdecode = Array( - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_S-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_D-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,Y,N,BR_N, N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLW-> List(FPU_Y,Y,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FLD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSW-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSD-> List(FPU_Y,Y,N,BR_N, N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) - - val vdecode = Array( - // eret - // fp_val renx2 | syscall - // | vec_val | renx1 mem_val mul_val div_val wen pcr | | privileged - // val | | brtype | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | div_fn | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VVCFG-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VSETVL-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VF-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VMVV-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - VLD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - - VENQCMD-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQCNT-> List(VEC_Y,N,Y,BR_N, Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,DIV_X, N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) + + val xdecode = Array( + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + + J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + + LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), + CFLUSH-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) + + val fdecode = Array( + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_S-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MXTF_D-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MTFSR-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FLW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FLD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) + + val vdecode = Array( + // jalr eret + // fp_val | renx2 div_val | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VVCFG-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VSETVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), + VF-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + VMSV-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), + VLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLWU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLHU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLBU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTWU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTHU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VLSTBU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VSSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + VFSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + + VENQCMD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VENQCNT-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), + VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) } class rocketCtrl extends Component @@ -340,32 +337,22 @@ class rocketCtrl extends Component val cs = DecodeLogic(io.dpath.inst, rocketCtrlDecode.decode_default, decode_table) - val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs - val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_div_fn :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 + val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_jalr :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs + val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 val id_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 - val if_reg_xcpt_ma_inst = Reg(io.dpath.xcpt_ma_inst, resetVal = Bool(false)); - val id_raddr3 = io.dpath.inst(16,12); val id_raddr2 = io.dpath.inst(21,17); val id_raddr1 = io.dpath.inst(26,22); val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); - - val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) - val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)); - - val id_reg_valid = Reg(resetVal = Bool(false)); - val id_reg_btb_hit = Reg(resetVal = Bool(false)); - val id_reg_xcpt_itlb = Reg(resetVal = Bool(false)); - val id_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); - val id_reg_icmiss = Reg(resetVal = Bool(false)); - val id_reg_replay = Reg(resetVal = Bool(false)); - val id_load_use = Bool(); + val id_load_use = Bool(); val ex_reg_br_type = Reg(){Bits()} + val ex_reg_jalr = Reg(){Bool()} val ex_reg_btb_hit = Reg(){Bool()}; val ex_reg_div_val = Reg(){Bool()}; val ex_reg_mul_val = Reg(){Bool()}; + val ex_reg_mul_fn = Reg(){UFix()}; val ex_reg_mem_val = Reg(){Bool()}; val ex_reg_mem_cmd = Reg(){Bits()}; val ex_reg_mem_type = Reg(){UFix(width = 3)}; @@ -385,7 +372,7 @@ class rocketCtrl extends Component val ex_reg_fp_val = Reg(resetVal = Bool(false)); val ex_reg_fp_sboard_set = Reg(resetVal = Bool(false)); val ex_reg_vec_val = Reg(resetVal = Bool(false)); - val ex_reg_replay = Reg(resetVal = Bool(false)); + val ex_reg_replay_next = Reg(resetVal = Bool(false)); val ex_reg_load_use = Reg(resetVal = Bool(false)); val mem_reg_valid = Reg(resetVal = Bool(false)); @@ -404,6 +391,7 @@ class rocketCtrl extends Component val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); val mem_reg_fp_val = Reg(resetVal = Bool(false)); val mem_reg_replay = Reg(resetVal = Bool(false)); + val mem_reg_replay_next = Reg(resetVal = Bool(false)); val mem_reg_kill = Reg(resetVal = Bool(false)); val mem_reg_fp_sboard_set = Reg(resetVal = Bool(false)); @@ -415,30 +403,16 @@ class rocketCtrl extends Component val wb_reg_eret = Reg(resetVal = Bool(false)); val wb_reg_exception = Reg(resetVal = Bool(false)); val wb_reg_replay = Reg(resetVal = Bool(false)); + val wb_reg_replay_next = Reg(resetVal = Bool(false)); val wb_reg_cause = Reg(){UFix()}; val wb_reg_fp_val = Reg(resetVal = Bool(false)); val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); + val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)); + val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val take_pc = Bool() val take_pc_wb = Bool() - - when (!io.dpath.stalld) { - when (io.dpath.killf) { - id_reg_valid := Bool(false) - id_reg_btb_hit := Bool(false); - id_reg_xcpt_ma_inst := Bool(false); - id_reg_xcpt_itlb := Bool(false); - id_reg_replay := !take_pc; // replay on I$ miss - } - .otherwise{ - id_reg_valid := Bool(true) - id_reg_btb_hit := io.dpath.btb_hit; - id_reg_xcpt_ma_inst := if_reg_xcpt_ma_inst; - id_reg_xcpt_itlb := io.xcpt_itlb; - id_reg_replay := id_replay_next - } - id_reg_icmiss := !io.imem.resp_val; - } + val ctrl_killm = Bool() var vec_replay = Bool(false) var vec_stalld = Bool(false) @@ -499,6 +473,7 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killd) { ex_reg_br_type := BR_N; + ex_reg_jalr := Bool(false) ex_reg_btb_hit := Bool(false); ex_reg_div_val := Bool(false); ex_reg_mul_val := Bool(false); @@ -517,55 +492,50 @@ class rocketCtrl extends Component ex_reg_fp_val := Bool(false); ex_reg_fp_sboard_set := Bool(false); ex_reg_vec_val := Bool(false); - ex_reg_replay := Bool(false); + ex_reg_replay_next := Bool(false); ex_reg_load_use := Bool(false); } .otherwise { ex_reg_br_type := id_br_type; - ex_reg_btb_hit := id_reg_btb_hit; + ex_reg_jalr := id_jalr + ex_reg_btb_hit := io.imem.resp.bits.taken ex_reg_div_val := id_div_val.toBool && id_waddr != UFix(0); ex_reg_mul_val := id_mul_val.toBool && id_waddr != UFix(0); + ex_reg_mul_fn := id_mul_fn.toUFix ex_reg_mem_val := id_mem_val.toBool; - ex_reg_valid := id_reg_valid + ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := (id_sync === SYNC_I); - ex_reg_xcpt_ma_inst := id_reg_xcpt_ma_inst; - ex_reg_xcpt_itlb := id_reg_xcpt_itlb; + ex_reg_xcpt_ma_inst := io.imem.resp.bits.xcpt_ma + ex_reg_xcpt_itlb := io.imem.resp.bits.xcpt_if ex_reg_xcpt_illegal := illegal_inst; ex_reg_xcpt_privileged := (id_privileged & ~io.dpath.status(SR_S)).toBool; ex_reg_xcpt_syscall := id_syscall.toBool; ex_reg_fp_val := id_fp_val ex_reg_fp_sboard_set := io.fpu.dec.sboard ex_reg_vec_val := id_vec_val.toBool - ex_reg_replay := id_reg_replay + ex_reg_replay_next := id_replay_next ex_reg_load_use := id_load_use; } + ex_reg_xcpt_interrupt := !take_pc && id_interrupt ex_reg_mem_cmd := id_mem_cmd ex_reg_mem_type := id_mem_type.toUFix - ex_reg_xcpt_interrupt := id_reg_valid && id_interrupt && !take_pc ex_reg_cause := id_cause - val beq = io.dpath.br_eq; - val bne = ~io.dpath.br_eq; - val blt = io.dpath.br_lt; - val bltu = io.dpath.br_ltu; - val bge = ~io.dpath.br_lt; - val bgeu = ~io.dpath.br_ltu; - - val br_taken = !(wb_reg_dcache_miss && ex_reg_load_use) && - ((ex_reg_br_type === BR_EQ) && beq || - (ex_reg_br_type === BR_NE) && bne || - (ex_reg_br_type === BR_LT) && blt || - (ex_reg_br_type === BR_LTU) && bltu || - (ex_reg_br_type === BR_GE) && bge || - (ex_reg_br_type === BR_GEU) && bgeu || - (ex_reg_br_type === BR_J)) // treat J/JAL like taken branches - val jr_taken = !(wb_reg_dcache_miss && ex_reg_load_use) && ex_reg_br_type === BR_JR + val br_taken = + Mux(ex_reg_br_type === BR_EQ, io.dpath.br_eq, + Mux(ex_reg_br_type === BR_NE, ~io.dpath.br_eq, + Mux(ex_reg_br_type === BR_LT, io.dpath.br_lt, + Mux(ex_reg_br_type === BR_GE, ~io.dpath.br_lt, + Mux(ex_reg_br_type === BR_LTU, io.dpath.br_ltu, + Mux(ex_reg_br_type === BR_GEU, ~io.dpath.br_ltu, + ex_reg_br_type === BR_J)))))) - val mem_reg_div_mul_val = Reg(){Bool()}; + val mem_reg_div_val = Reg(){Bool()} + val mem_reg_mul_val = Reg(){Bool()} val mem_reg_eret = Reg(){Bool()}; val mem_reg_mem_val = Reg(){Bool()}; val mem_reg_mem_cmd = Reg(){Bits()} @@ -574,7 +544,8 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killx) { mem_reg_valid := Bool(false); mem_reg_pcr := PCR_N - mem_reg_div_mul_val := Bool(false); + mem_reg_div_val := Bool(false) + mem_reg_mul_val := Bool(false) mem_reg_wen := Bool(false); mem_reg_fp_wen := Bool(false); mem_reg_eret := Bool(false); @@ -589,11 +560,13 @@ class rocketCtrl extends Component mem_reg_xcpt_syscall := Bool(false); mem_reg_fp_val := Bool(false); mem_reg_fp_sboard_set := Bool(false) + mem_reg_replay_next := Bool(false) } .otherwise { mem_reg_valid := ex_reg_valid mem_reg_pcr := ex_reg_pcr - mem_reg_div_mul_val := ex_reg_div_val || ex_reg_mul_val; + mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy + mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy mem_reg_wen := ex_reg_wen; mem_reg_fp_wen := ex_reg_fp_wen; mem_reg_eret := ex_reg_eret; @@ -608,13 +581,14 @@ class rocketCtrl extends Component mem_reg_xcpt_syscall := ex_reg_xcpt_syscall; mem_reg_fp_val := ex_reg_fp_val mem_reg_fp_sboard_set := ex_reg_fp_sboard_set + mem_reg_replay_next := ex_reg_replay_next } mem_reg_mem_cmd := ex_reg_mem_cmd; mem_reg_mem_type := ex_reg_mem_type; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb mem_reg_cause := ex_reg_cause - when (io.dpath.killm) { + when (ctrl_killm) { wb_reg_valid := Bool(false) wb_reg_pcr := PCR_N wb_reg_wen := Bool(false); @@ -624,17 +598,19 @@ class rocketCtrl extends Component wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) wb_reg_fp_sboard_set := Bool(false) + wb_reg_replay_next := Bool(false) } .otherwise { wb_reg_valid := mem_reg_valid wb_reg_pcr := mem_reg_pcr wb_reg_wen := mem_reg_wen; wb_reg_fp_wen := mem_reg_fp_wen; - wb_reg_eret := mem_reg_eret; + wb_reg_eret := mem_reg_eret && !mem_reg_replay wb_reg_flush_inst := mem_reg_flush_inst; - wb_reg_div_mul_val := mem_reg_div_mul_val; + wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val wb_reg_fp_val := mem_reg_fp_val wb_reg_fp_sboard_set := mem_reg_fp_sboard_set + wb_reg_replay_next := mem_reg_replay_next } val sboard = new rocketCtrlSboard(32, 3, 2); @@ -717,7 +693,7 @@ class rocketCtrl extends Component UFix(0,5)))))))))))); // instruction address misaligned // control transfer from ex/mem - val take_pc_ex = ex_reg_btb_hit != br_taken || jr_taken + val take_pc_ex = ex_reg_btb_hit != br_taken || ex_reg_jalr take_pc_wb := wb_reg_replay || vec_replay || wb_reg_exception || wb_reg_eret take_pc := take_pc_ex || take_pc_wb; @@ -726,22 +702,23 @@ class rocketCtrl extends Component val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem - val kill_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill || fpu_kill_mem - val kill_dcache = io.dtlb_miss || mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill + ctrl_killm := killm_common || dmem_kill_mem || fpu_kill_mem // replay execute stage PC when the D$ is blocked, when the D$ misses, // for privileged instructions, and for fence.i instructions val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_replay || ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy + ex_reg_mul_val && !io.dpath.mul_rdy || + mem_reg_replay_next val kill_ex = take_pc_wb || replay_ex mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_kill := kill_ex; wb_reg_replay := replay_mem && !take_pc_wb - wb_reg_exception := mem_exception && !take_pc_wb; + wb_reg_exception := mem_exception && !take_pc_wb && !wb_reg_replay_next wb_reg_cause := mem_cause; val replay_wb = wb_reg_replay || vec_replay || io.dpath.pcr_replay @@ -755,19 +732,16 @@ class rocketCtrl extends Component io.dpath.vec_irq_aux_wen := wb_reg_exception && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) io.dpath.sel_pc := - Mux(wb_reg_exception, PC_PCR, // exception - Mux(replay_wb, PC_WB, // replay - Mux(wb_reg_eret, PC_PCR, // eret instruction - Mux(ex_reg_btb_hit && !br_taken, PC_EX4, // mispredicted not taken branch - Mux(!ex_reg_btb_hit && br_taken, PC_BR, // mispredicted taken branch - Mux(jr_taken, PC_JR, // taken JALR - Mux(io.dpath.btb_hit, PC_BTB, // predicted PC from BTB - PC_4))))))); // PC+4 + Mux(wb_reg_exception, PC_PCR, // exception + Mux(wb_reg_eret, PC_PCR, // eret instruction + Mux(replay_wb, PC_WB, // replay + Mux(ex_reg_jalr, PC_EX, // JALR + Mux(!ex_reg_btb_hit, PC_EX, // mispredicted taken branch + PC_EX4))))) // mispredicted not taken branch - io.dpath.wen_btb := !ex_reg_btb_hit && br_taken - io.dpath.clr_btb := ex_reg_btb_hit && !br_taken || id_reg_icmiss; - - io.imem.req_val := !reset.toBool && (take_pc_wb || !mem_reg_replay && !ex_reg_replay && (take_pc_ex || !id_reg_replay)) + io.imem.req.bits.mispredict := !take_pc_wb && !ex_reg_jalr && ex_reg_btb_hit != br_taken + io.imem.req.bits.taken := !ex_reg_btb_hit + io.imem.req.valid := take_pc // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. val data_hazard_ex = ex_reg_wen && @@ -795,7 +769,7 @@ class rocketCtrl extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_val || mem_reg_mul_val || mem_reg_fp_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -812,28 +786,24 @@ class rocketCtrl extends Component val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || fp_data_hazard_wb && (wb_reg_dcache_miss || wb_reg_fp_val) + val killd_common = take_pc || id_interrupt || ex_reg_replay_next + val ctrl_killd = killd_common || !io.imem.resp.valid + val ctrl_stalld = - !take_pc && - ( - id_ex_hazard || id_mem_hazard || id_wb_hazard || - id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || - id_fp_val && id_stall_fpu || - id_mem_val.toBool && !(io.dmem.req.ready && io.dtlb_rdy) || - ((id_sync === SYNC_D) || (id_sync === SYNC_I)) && !io.dmem.req.ready || - vec_stalld - ); - val ctrl_stallf = ctrl_stalld; - - val ctrl_killd = take_pc || ctrl_stalld; - val ctrl_killf = take_pc || !io.imem.resp_val; + id_ex_hazard || id_mem_hazard || id_wb_hazard || + id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || + id_fp_val && id_stall_fpu || + id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + (id_sync === SYNC_D || id_sync === SYNC_I) && !io.dmem.req.ready || + vec_stalld io.dpath.flush_inst := wb_reg_flush_inst; - io.dpath.stallf := ctrl_stallf; - io.dpath.stalld := ctrl_stalld; - io.dpath.killf := ctrl_killf; - io.dpath.killd := ctrl_killd; + io.dpath.stalld := !ctrl_killd && ctrl_stalld; + io.dpath.killd := ctrl_killd || ctrl_stalld io.dpath.killx := kill_ex; - io.dpath.killm := kill_mem; + io.dpath.killm := killm_common + io.imem.resp.ready := killd_common || !ctrl_stalld + io.imem.req.bits.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.ren2 := id_renx2.toBool; @@ -841,12 +811,15 @@ class rocketCtrl extends Component io.dpath.sel_alu2 := id_sel_alu2.toUFix io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu.toUFix - io.dpath.div_fn := id_div_fn.toUFix - io.dpath.div_val := id_div_val.toBool && id_waddr != UFix(0); - io.dpath.mul_fn := id_mul_fn.toUFix - io.dpath.mul_val := id_mul_val.toBool && id_waddr != UFix(0); + io.dpath.div_fn := ex_reg_mul_fn + io.dpath.div_val := ex_reg_div_val + io.dpath.div_kill := mem_reg_div_val && killm_common + io.dpath.mul_fn := ex_reg_mul_fn + io.dpath.mul_val := ex_reg_mul_val + io.dpath.mul_kill := mem_reg_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; + io.dpath.ex_jalr := ex_reg_jalr io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; io.dpath.wb_wen := wb_reg_wen; @@ -860,12 +833,12 @@ class rocketCtrl extends Component io.fpu.valid := !io.dpath.killd && id_fp_val io.fpu.killx := kill_ex - io.fpu.killm := kill_mem + io.fpu.killm := killm_common io.dtlb_val := ex_reg_mem_val io.dtlb_kill := mem_reg_kill io.dmem.req.valid := ex_reg_mem_val - io.dmem.req.bits.kill := kill_dcache + io.dmem.req.bits.kill := killm_common || io.dtlb_miss io.dmem.req.bits.cmd := ex_reg_mem_cmd io.dmem.req.bits.typ := ex_reg_mem_type } diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 41268264..c3c15522 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -4,8 +4,10 @@ import Chisel._ import Node._ import Constants._ -class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { +class rocketDivider(earlyOut: Boolean = false) extends Component { val io = new ioMultiplier + val w0 = io.req.bits.in0.getWidth + val w = w0+1 // sign bit val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); @@ -26,57 +28,16 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) val tc = (fn === DIV_D) || (fn === DIV_R); - switch (state) { - is (s_ready) { - when (io.req.valid) { - state := Mux(tc, s_neg_inputs, s_busy) - } - } - is (s_neg_inputs) { - state := Mux(io.req_kill, s_ready, s_busy) - } - is (s_busy) { - when (io.req_kill && Reg(io.req.ready)) { - state := s_ready - } - .elsewhen (count === UFix(w)) { - state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) - } - } - is (s_neg_outputs) { - state := s_done - } - is (s_done) { - when (io.resp_rdy) { - state := s_ready - } - } - } - - // state machine + val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w0-1), io.req.bits.in0(w0/2-1)) + val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w0-1,w0/2), Fill(w0/2, lhs_sign)) + val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(w0/2-1,0)) - val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w-1), io.req.bits.in0(w/2-1)) - val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w-1,w/2), Fill(w/2, lhs_sign)) - val lhs_in = Cat(lhs_hi, io.req.bits.in0(w/2-1,0)) - - val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) - val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, rhs_sign)) - val rhs_in = Cat(rhs_hi, io.req.bits.in1(w/2-1,0)) + val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w0-1), io.req.bits.in1(w0/2-1)) + val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, rhs_sign)) + val rhs_in = Cat(rhs_sign, rhs_hi, io.req.bits.in1(w0/2-1,0)) - when (io.req.fire()) { - count := UFix(0) - half := (dw === DW_32); - neg_quo := Bool(false); - neg_rem := Bool(false); - rem := (fn === DIV_R) || (fn === DIV_RU); - reg_tag := io.req_tag; - divby0 := Bool(true); - divisor := rhs_in - remainder := lhs_in - } when (state === s_neg_inputs) { - neg_rem := remainder(w-1) - neg_quo := (remainder(w-1) != divisor(w-1)) + state := s_busy when (remainder(w-1)) { remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) } @@ -85,6 +46,7 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { } } when (state === s_neg_outputs) { + state := s_done when (neg_rem && neg_quo && !divby0) { remainder := Cat(-remainder(2*w, w+1), remainder(w), -remainder(w-1,0)) } @@ -96,6 +58,9 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { } } when (state === s_busy) { + when (count === UFix(w)) { + state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) + } count := count + UFix(1) val msb = subtractor(w) @@ -112,11 +77,26 @@ class rocketDivider(w: Int, earlyOut: Boolean = false) extends Component { remainder := remainder << shift count := shift } - } + } + when (state === s_done && io.resp_rdy || io.req_kill) { + state := s_ready + } + when (io.req.fire()) { + state := Mux(lhs_sign || rhs_sign, s_neg_inputs, s_busy) + count := UFix(0) + half := (dw === DW_32); + neg_quo := lhs_sign != rhs_sign + neg_rem := lhs_sign + rem := (fn === DIV_R) || (fn === DIV_RU); + reg_tag := io.req_tag; + divby0 := Bool(true); + divisor := rhs_in + remainder := lhs_in + } - val result = Mux(rem, remainder(2*w, w+1), remainder(w-1,0)) + val result = Mux(rem, remainder(w+w0, w+1), remainder(w0-1,0)) - io.resp_bits := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) + io.resp_bits := Mux(half, Cat(Fill(w0/2, result(w0/2-1)), result(w0/2-1,0)), result) io.resp_tag := reg_tag io.resp_val := state === s_done io.req.ready := state === s_ready diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f052f6f1..24f70519 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -7,19 +7,13 @@ import Constants._ import Instructions._ import hwacha._ -class ioDpathImem extends Bundle() -{ - val req_addr = UFix(OUTPUT, VADDR_BITS+1); - val resp_data = Bits(INPUT, 32); -} - class ioDpathAll extends Bundle() { val host = new ioHTIF(); val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache val dtlb = new ioDTLB_CPU_req_bundle().asOutput() - val imem = new ioDpathImem(); + val imem = new IOCPUFrontend val ptbr_wen = Bool(OUTPUT); val ptbr = UFix(OUTPUT, PADDR_BITS); val fpu = new ioDpathFPU(); @@ -32,27 +26,16 @@ class ioDpathAll extends Bundle() class rocketDpath extends Component { val io = new ioDpathAll(); - - val btb = new rocketDpathBTB(4); // # of entries in BTB - - val if_btb_target = btb.io.target; val pcr = new rocketDpathPCR(); val ex_pcr = pcr.io.r.data; - val alu = new rocketDpathALU(); + val alu = new ALU val ex_alu_out = alu.io.out; val ex_alu_adder_out = alu.io.adder_out; val rfile = new rocketDpathRegfile(); - // instruction fetch definitions - val if_reg_pc = Reg(resetVal = UFix(START_ADDR,VADDR_BITS+1)); - - // instruction decode definitions - val id_reg_inst = Reg(resetVal = NOP); - val id_reg_pc = Reg() { UFix(width = VADDR_BITS+1) }; - // execute definitions val ex_reg_pc = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; @@ -62,13 +45,8 @@ class rocketDpath extends Component val ex_reg_rs2 = Reg() { Bits() }; val ex_reg_rs1 = Reg() { Bits() }; val ex_reg_waddr = Reg() { UFix() }; - val ex_reg_ctrl_eret = Reg(resetVal = Bool(false)); val ex_reg_ctrl_fn_dw = Reg() { UFix() }; val ex_reg_ctrl_fn_alu = Reg() { UFix() }; - val ex_reg_ctrl_mul_val = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_mul_fn = Reg() { UFix() }; - val ex_reg_ctrl_div_val = Reg(resetVal = Bool(false)); - val ex_reg_ctrl_div_fn = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; val ex_wdata = Bits(); @@ -99,9 +77,6 @@ class rocketDpath extends Component val r_dmem_resp_replay = Reg(resetVal = Bool(false)); val r_dmem_fp_replay = Reg(resetVal = Bool(false)); val r_dmem_resp_waddr = Reg() { UFix() }; - - // instruction fetch stage - val if_pc_plus4 = if_reg_pc + UFix(4); val ex_pc_plus4 = ex_reg_pc + UFix(4); val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix @@ -109,41 +84,24 @@ class rocketDpath extends Component val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix - val if_next_pc = - Mux(io.ctrl.sel_pc === PC_BTB, Cat(if_btb_target(VADDR_BITS-1), if_btb_target), - Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_BR, ex_branch_target, - Mux(io.ctrl.sel_pc === PC_JR, ex_effective_address, - Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), - Mux(io.ctrl.sel_pc === PC_WB, wb_reg_pc, - if_pc_plus4)))))) // PC_4 - - when (!io.ctrl.stallf) { - if_reg_pc := if_next_pc.toUFix; - } - - io.ctrl.xcpt_ma_inst := if_next_pc(1,0) != Bits(0) - - io.imem.req_addr := - Mux(io.ctrl.stallf, if_reg_pc, - if_next_pc.toUFix); - - btb.io.current_pc := if_reg_pc; - btb.io.hit <> io.ctrl.btb_hit; - btb.io.wen <> io.ctrl.wen_btb; - btb.io.clr <> io.ctrl.clr_btb; - btb.io.correct_pc := ex_reg_pc; - btb.io.correct_target := ex_branch_target - btb.io.invalidate := io.ctrl.flush_inst + // hook up I$ + io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen + io.imem.req.bits.currentpc := ex_reg_pc + io.imem.req.bits.status := pcr.io.status + io.imem.req.bits.pc := + Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, + Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), + Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, + wb_reg_pc))) // PC_WB // instruction decode stage - when (!io.ctrl.stalld) { - id_reg_pc := if_reg_pc; - id_reg_inst := Mux(io.ctrl.killf, NOP, io.imem.resp_data) - } + val id_inst = io.imem.resp.bits.data + val id_pc = io.imem.resp.bits.pc + debug(id_inst) + debug(id_pc) - val id_raddr1 = id_reg_inst(26,22).toUFix; - val id_raddr2 = id_reg_inst(21,17).toUFix; + val id_raddr1 = id_inst(26,22).toUFix; + val id_raddr2 = id_inst(21,17).toUFix; // regfile read rfile.io.r0.en <> io.ctrl.ren2; @@ -156,7 +114,7 @@ class rocketDpath extends Component // destination register selection val id_waddr = - Mux(io.ctrl.sel_wa === WA_RD, id_reg_inst(31,27).toUFix, + Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUFix, RA); // WA_RA // bypass muxes @@ -185,26 +143,26 @@ class rocketDpath extends Component val id_imm_l = io.ctrl.sel_alu2 === A2_LTYPE val id_imm_zero = io.ctrl.sel_alu2 === A2_ZERO || io.ctrl.sel_alu2 === A2_RTYPE val id_imm_ibz = io.ctrl.sel_alu2 === A2_ITYPE || io.ctrl.sel_alu2 === A2_BTYPE || id_imm_zero - val id_imm_sign = Mux(id_imm_bj, id_reg_inst(31), - Mux(id_imm_l, id_reg_inst(26), + val id_imm_sign = Mux(id_imm_bj, id_inst(31), + Mux(id_imm_l, id_inst(26), Mux(id_imm_zero, Bits(0,1), - id_reg_inst(21)))) // IMM_ITYPE + id_inst(21)))) // IMM_ITYPE val id_imm_small = Mux(id_imm_zero, Bits(0,12), - Cat(Mux(id_imm_bj, id_reg_inst(31,27), id_reg_inst(21,17)), id_reg_inst(16,10))) + Cat(Mux(id_imm_bj, id_inst(31,27), id_inst(21,17)), id_inst(16,10))) val id_imm = Cat(Fill(32, id_imm_sign), - Mux(id_imm_l, Cat(id_reg_inst(26,7), Bits(0,12)), + Mux(id_imm_l, Cat(id_inst(26,7), Bits(0,12)), Mux(id_imm_ibz, Cat(Fill(20, id_imm_sign), id_imm_small), - Cat(Fill(7, id_imm_sign), id_reg_inst(31,7))))) // A2_JTYPE + Cat(Fill(7, id_imm_sign), id_inst(31,7))))) // A2_JTYPE val id_op2_dmem_bypass = id_rs2_dmem_bypass && io.ctrl.sel_alu2 === A2_RTYPE val id_op2 = Mux(io.ctrl.sel_alu2 === A2_RTYPE, id_rs2, id_imm) - io.ctrl.inst := id_reg_inst - io.fpu.inst := id_reg_inst + io.ctrl.inst := id_inst + io.fpu.inst := id_inst // execute stage - ex_reg_pc := id_reg_pc; - ex_reg_inst := id_reg_inst + ex_reg_pc := id_pc + ex_reg_inst := id_inst ex_reg_raddr1 := id_raddr1 ex_reg_raddr2 := id_raddr2; ex_reg_op2 := id_op2; @@ -213,21 +171,8 @@ class rocketDpath extends Component ex_reg_waddr := id_waddr; ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix; ex_reg_ctrl_fn_alu := io.ctrl.fn_alu; - ex_reg_ctrl_mul_fn := io.ctrl.mul_fn; - ex_reg_ctrl_div_fn := io.ctrl.div_fn; ex_reg_ctrl_sel_wb := io.ctrl.sel_wb; - when(io.ctrl.killd) { - ex_reg_ctrl_div_val := Bool(false); - ex_reg_ctrl_mul_val := Bool(false); - ex_reg_ctrl_eret := Bool(false); - } - .otherwise { - ex_reg_ctrl_div_val := io.ctrl.div_val; - ex_reg_ctrl_mul_val := io.ctrl.mul_val; - ex_reg_ctrl_eret := io.ctrl.id_eret; - } - val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs1) val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs2) val ex_op2 = Mux(Reg(id_op2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_op2) @@ -240,19 +185,19 @@ class rocketDpath extends Component io.fpu.fromint_data := ex_rs1 // divider - val div = new rocketDivider(64) - div.io.req.valid := ex_reg_ctrl_div_val - div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_div_fn) + val div = new rocketDivider(earlyOut = true) + div.io.req.valid := io.ctrl.div_val + div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.div_fn) div.io.req.bits.in0 := ex_rs1 div.io.req.bits.in1 := ex_rs2 div.io.req_tag := ex_reg_waddr - div.io.req_kill := io.ctrl.killm + div.io.req_kill := io.ctrl.div_kill div.io.resp_rdy := !dmem_resp_replay io.ctrl.div_rdy := div.io.req.ready io.ctrl.div_result_val := div.io.resp_val // multiplier - var mul_io = new rocketMultiplier(unroll = 6).io + var mul_io = new rocketMultiplier(unroll = 4, earlyOut = true).io if (HAVE_VEC) { val vu_mul = new rocketVUMultiplier(nwbq = 1) @@ -260,12 +205,12 @@ class rocketDpath extends Component vu_mul.io.vu.resp <> io.vec_imul_resp mul_io = vu_mul.io.cpu } - mul_io.req.valid := ex_reg_ctrl_mul_val; - mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, ex_reg_ctrl_mul_fn) + mul_io.req.valid := io.ctrl.mul_val + mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.mul_fn) mul_io.req.bits.in0 := ex_rs1 mul_io.req.bits.in1 := ex_rs2 mul_io.req_tag := ex_reg_waddr - mul_io.req_kill := io.ctrl.killm + mul_io.req_kill := io.ctrl.mul_kill mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val io.ctrl.mul_rdy := mul_io.req.ready io.ctrl.mul_result_val := mul_io.resp_val diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 25fc2956..fc4b7a03 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -15,27 +15,45 @@ class ioALU extends Bundle(){ val adder_out = UFix(OUTPUT, 64); } -class rocketDpathALU extends Component +object ALU { + val FN_X = Bits("b????") + val FN_ADD = UFix(0) + val FN_SL = UFix(1) + val FN_XOR = UFix(4) + val FN_OR = UFix(6) + val FN_AND = UFix(7) + val FN_SR = UFix(5) + val FN_SUB = UFix(8) + val FN_SLT = UFix(10) + val FN_SLTU = UFix(11) + val FN_SRA = UFix(13) + val FN_OP2 = UFix(15) + + def isSub(cmd: Bits) = cmd(3) + def isSLTU(cmd: Bits) = cmd(0) +} + +class ALU extends Component +{ + import ALU._ val io = new ioALU(); // ADD, SUB - val sub = (io.fn === FN_SUB) || (io.fn === FN_SLT) || (io.fn === FN_SLTU) + val sub = isSub(io.fn) val adder_rhs = Mux(sub, ~io.in2, io.in2) val sum = (io.in1 + adder_rhs + sub.toUFix)(63,0) // SLT, SLTU val less = Mux(io.in1(63) === io.in2(63), sum(63), - Mux(io.fn === FN_SLT, io.in1(63), io.in2(63))) + Mux(isSLTU(io.fn), io.in2(63), io.in1(63))) // SLL, SRL, SRA - val sra = (io.fn === FN_SRA) val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUFix - val shright = sra || (io.fn === FN_SR) - val shin_hi_32 = Mux(sra, Fill(32, io.in1(31)), UFix(0,32)) + val shin_hi_32 = Mux(isSub(io.fn), Fill(32, io.in1(31)), UFix(0,32)) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) val shin = Cat(shin_hi, io.in1(31,0)) - val shout_r = (Cat(sra & shin(63), shin).toFix >> shamt)(63,0) + val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toFix >> shamt)(63,0) val shout_l = (shin << shamt)(63,0) val bitwise_logic = diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index f216e52a..61cec1fc 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -36,13 +36,14 @@ class rocketDpathBTB(entries: Int) extends Component val valid = Reg(resetVal = Bool(false)) val my_hit = valid && tag === io.current_pc val my_update = valid && tag === io.correct_pc - val my_clr = io.clr && my_update || io.invalidate - val my_wen = io.wen && (my_update || !update && UFix(i) === repl_way) - valid := !my_clr && (valid || my_wen) - when (my_wen) { - tag := io.correct_pc - target := io.correct_target + when (io.wen && (my_update || !update && UFix(i) === repl_way)) { + valid := Bool(false) + when (!io.clr) { + valid := Bool(true) + tag := io.correct_pc + target := io.correct_target + } } hit_reduction = hit_reduction || my_hit diff --git a/rocket/src/main/scala/dtlb.scala b/rocket/src/main/scala/dtlb.scala deleted file mode 100644 index 6ce054dc..00000000 --- a/rocket/src/main/scala/dtlb.scala +++ /dev/null @@ -1,183 +0,0 @@ -package rocket - -import Chisel._; -import Node._; -import Constants._; -import scala.math._; -import hwacha._ - -// ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala -// should keep them in sync - -class ioDTLB_CPU_req_bundle extends Bundle -{ - // lookup requests - val kill = Bool() - val cmd = Bits(width=4) // load/store/amo - val asid = Bits(width=ASID_BITS) - val vpn = Bits(width=VPN_BITS+1) -} -class ioDTLB_CPU_req extends FIFOIO()( { new ioDTLB_CPU_req_bundle() } ) - -class ioDTLB_CPU_resp extends Bundle -{ - // lookup responses - val miss = Bool(OUTPUT) - val ppn = Bits(OUTPUT, PPN_BITS) - val xcpt_ld = Bool(OUTPUT) - val xcpt_st = Bool(OUTPUT) - val xcpt_pf = Bool(OUTPUT) -} - -class ioDTLB extends Bundle -{ - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(INPUT, 32) - // invalidate all TLB entries - val invalidate = Bool(INPUT) - val cpu_req = new ioDTLB_CPU_req().flip - val cpu_resp = new ioDTLB_CPU_resp() - val ptw = new ioTLB_PTW() -} - -class rocketDTLB(entries: Int) extends Component -{ - val io = new ioDTLB(); - - val addr_bits = ceil(log10(entries)/log10(2)).toInt; - - val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; - val state = Reg(resetVal = s_ready); - - val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_vpn = Reg() { Bits() } - val r_cpu_req_cmd = Reg() { Bits() } - val r_cpu_req_asid = Reg() { Bits() } - val r_refill_tag = Reg() { Bits() } - val r_refill_waddr = Reg() { UFix() } - - when (io.cpu_req.valid && io.cpu_req.ready) { - r_cpu_req_vpn := io.cpu_req.bits.vpn; - r_cpu_req_cmd := io.cpu_req.bits.cmd; - r_cpu_req_asid := io.cpu_req.bits.asid; - r_cpu_req_val := Bool(true); - } - .otherwise { - r_cpu_req_val := Bool(false); - } - - val req_load = (r_cpu_req_cmd === M_XRD); - val req_store = (r_cpu_req_cmd === M_XWR); - val req_amo = r_cpu_req_cmd(3).toBool; - val req_pf = (r_cpu_req_cmd === M_PFR) || (r_cpu_req_cmd === M_PFW) - - val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); - - val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); - val tag_ram = Mem(entries) { io.ptw.resp_ppn.clone } - when (io.ptw.resp_val) { tag_ram(r_refill_waddr) := io.ptw.resp_ppn } - - val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - tag_cam.io.clear := io.invalidate; - tag_cam.io.clear_hit := io.cpu_resp.xcpt_ld || io.cpu_resp.xcpt_st || io.cpu_resp.xcpt_pf - tag_cam.io.tag := lookup_tag; - tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; - tag_cam.io.write_tag := r_refill_tag; - tag_cam.io.write_addr := r_refill_waddr; - val tag_hit = tag_cam.io.hit || bad_va; - val tag_hit_addr = tag_cam.io.hit_addr; - - // extract fields from status register - val status_s = io.status(SR_S).toBool; // user/supervisor mode - val status_u = !status_s; - val status_vm = io.status(SR_VM).toBool // virtual memory enable - - // extract fields from PT permission bits - val ptw_perm_ur = io.ptw.resp_perm(2); - val ptw_perm_uw = io.ptw.resp_perm(1); - val ptw_perm_sr = io.ptw.resp_perm(5); - val ptw_perm_sw = io.ptw.resp_perm(4); - - // permission bit arrays - val ur_array = Reg(resetVal = Bits(0, entries)); // user read permission - val uw_array = Reg(resetVal = Bits(0, entries)); // user write permission - val sr_array = Reg(resetVal = Bits(0, entries)); // supervisor read permission - val sw_array = Reg(resetVal = Bits(0, entries)); // supervisor write permission - when (io.ptw.resp_val) { - ur_array := ur_array.bitSet(r_refill_waddr, ptw_perm_ur); - uw_array := uw_array.bitSet(r_refill_waddr, ptw_perm_uw); - sr_array := sr_array.bitSet(r_refill_waddr, ptw_perm_sr); - sw_array := sw_array.bitSet(r_refill_waddr, ptw_perm_sw); - } - - // when the page table lookup reports an error, set all permission - // bits to 0 so the next access will cause an exception - when (io.ptw.resp_err) { - ur_array := ur_array.bitSet(r_refill_waddr, Bool(false)); - uw_array := uw_array.bitSet(r_refill_waddr, Bool(false)); - sr_array := sr_array.bitSet(r_refill_waddr, Bool(false)); - sw_array := sw_array.bitSet(r_refill_waddr, Bool(false)); - } - - // high if there are any unused (invalid) entries in the TLB - val has_invalid_entry = !tag_cam.io.valid_bits.andR - val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) - val plru = new PseudoLRU(entries) - val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace).toUFix; - - val lookup = (state === s_ready) && status_vm && r_cpu_req_val && (req_load || req_store || req_amo || req_pf); - val lookup_hit = lookup && tag_hit; - val lookup_miss = lookup && !tag_hit; - val tlb_hit = !io.cpu_req.bits.kill && lookup_hit; - val tlb_miss = !io.cpu_req.bits.kill && lookup_miss; - - // currently replace TLB entries in LIFO order - // TODO: implement LRU replacement policy - when (tlb_miss) { - r_refill_tag := lookup_tag; - r_refill_waddr := repl_waddr; - } - when (tlb_hit) { - plru.access(tag_hit_addr) - } - - val load_fault_common = tlb_hit && - ((status_s && !sr_array(tag_hit_addr)) || - (status_u && !ur_array(tag_hit_addr)) || - bad_va) - val store_fault_common = tlb_hit && - ((status_s && !sw_array(tag_hit_addr)) || - (status_u && !uw_array(tag_hit_addr)) || - bad_va) - - io.cpu_resp.xcpt_ld := load_fault_common && (req_load || req_amo) - io.cpu_resp.xcpt_st := store_fault_common && (req_store || req_amo) - io.cpu_resp.xcpt_pf := load_fault_common && req_pf - - io.cpu_req.ready := (state === s_ready) && !lookup_miss; - io.cpu_resp.miss := tlb_miss; - io.cpu_resp.ppn := - Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)); - - io.ptw.req_val := (state === s_request); - io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); - - // control state machine - switch (state) { - is (s_ready) { - when (tlb_miss) { - state := s_request; - } - } - is (s_request) { - when (io.ptw.req_rdy) { - state := s_wait; - } - } - is (s_wait) { - when (io.ptw.resp_val || io.ptw.resp_err) { - state := s_ready; - } - } - } -} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 9b79e2ce..3967b477 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -6,172 +6,249 @@ import Constants._; import scala.math._; import uncore._ -// interface between I$ and pipeline/ITLB (32 bits wide) -class ioImem extends Bundle +case class ICacheConfig(co: CoherencePolicyWithUncached, sets: Int, assoc: Int, parity: Boolean = false) { - val invalidate = Bool(INPUT); - val itlb_miss = Bool(INPUT); - val req_val = Bool(INPUT); - val req_idx = Bits(INPUT, PGIDX_BITS); - val req_ppn = Bits(INPUT, PPN_BITS); - val resp_data = Bits(OUTPUT, 32); - val resp_val = Bool(OUTPUT); + val w = 1 + val ibytes = INST_BITS/8 + + val dm = assoc == 1 + val lines = sets * assoc + val databits = MEM_DATA_BITS + val datawidth = databits + (if (parity) 1 else 0) + val idxbits = log2Up(sets) + val offbits = OFFSET_BITS + val untagbits = idxbits + offbits + val tagbits = PADDR_BITS - untagbits + val tagwidth = tagbits + (if (parity) 1 else 0) + + require(isPow2(sets) && isPow2(assoc)) + require(isPow2(w) && isPow2(ibytes)) + require(PGIDX_BITS >= untagbits) } -class ioRocketICache extends Bundle() -{ - val cpu = new ioImem(); - val mem = new ioUncachedRequestor +class FrontendReq extends Bundle { + val pc = UFix(width = VADDR_BITS+1) + val status = Bits(width = 32) + val invalidate = Bool() + val invalidateTLB = Bool() + val mispredict = Bool() + val taken = Bool() + val currentpc = UFix(width = VADDR_BITS+1) } -// basic direct mapped instruction cache -// 32 bit wide cpu port, 128 bit wide memory port, 64 byte cachelines -// parameters : -// lines = # cache lines -class rocketICache(sets: Int, assoc: Int, co: CoherencePolicyWithUncached) extends Component +class FrontendResp extends Bundle { + val pc = UFix(width = VADDR_BITS+1) // ID stage PC + val data = Bits(width = INST_BITS) + val taken = Bool() + val xcpt_ma = Bool() + val xcpt_if = Bool() +} + +class IOCPUFrontend extends Bundle { + val req = new PipeIO()(new FrontendReq) + val resp = new FIFOIO()(new FrontendResp).flip + val ptw = new IOTLBPTW().flip +} + +class Frontend(c: ICacheConfig) extends Component { - val io = new ioRocketICache(); - - val lines = sets * assoc; - val addrbits = PADDR_BITS; - val indexbits = log2Up(sets); - val offsetbits = OFFSET_BITS; - val tagmsb = addrbits - 1; - val taglsb = indexbits+offsetbits; - val tagbits = addrbits-taglsb; - val indexmsb = taglsb-1; - val indexlsb = offsetbits; - val offsetmsb = indexlsb-1; - val databits = 32; - val offsetlsb = log2Up(databits/8); - val rf_cnt_bits = log2Up(REFILL_CYCLES); - - require(PGIDX_BITS >= taglsb); // virtually-indexed, physically-tagged constraint - require(isPow2(sets) && isPow2(assoc)); + val io = new Bundle { + val cpu = new IOCPUFrontend().flip + val mem = new ioUncachedRequestor + } - val s_reset :: s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(5) { UFix() }; - val state = Reg(resetVal = s_reset); + val btb = new rocketDpathBTB(BTB_ENTRIES) + val icache = new ICache(c) + val tlb = new TLB(ITLB_ENTRIES) + + val s1_pc = Reg() { UFix() } + val s2_valid = Reg(resetVal = Bool(true)) + val s2_pc = Reg(resetVal = UFix(START_ADDR)) + val s2_btb_hit = Reg(resetVal = Bool(false)) + val s2_xcpt_if = Reg(resetVal = Bool(false)) + + val btbTarget = Cat(btb.io.target(VADDR_BITS-1), btb.io.target) + val pcp4_0 = s1_pc + UFix(c.ibytes) + val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) + val icmiss = s2_valid && !icache.io.resp.valid + val npc = Mux(icmiss, s2_pc, Mux(btb.io.hit, btbTarget, pcp4)).toUFix + + val stall = !io.cpu.resp.ready + when (!stall) { + s1_pc := npc + s2_valid := !icmiss + s2_pc := s1_pc + s2_btb_hit := btb.io.hit + s2_xcpt_if := tlb.io.resp.xcpt_if + } + when (io.cpu.req.valid) { + s1_pc := io.cpu.req.bits.pc + s2_valid := Bool(false) + } + + btb.io.current_pc := s1_pc + btb.io.wen := io.cpu.req.bits.mispredict + btb.io.clr := !io.cpu.req.bits.taken + btb.io.correct_pc := io.cpu.req.bits.currentpc + btb.io.correct_target := io.cpu.req.bits.pc + btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB + + tlb.io.ptw <> io.cpu.ptw + tlb.io.req.valid := !stall && !icmiss + tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) + tlb.io.req.bits.status := io.cpu.req.bits.status + tlb.io.req.bits.asid := UFix(0) + tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB + tlb.io.req.bits.instruction := Bool(true) + + icache.io.mem <> io.mem + icache.io.req.valid := !stall + icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) + icache.io.req.bits.invalidate := io.cpu.req.bits.invalidate + icache.io.req.bits.ppn := tlb.io.resp.ppn + icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss + icache.io.resp.ready := io.cpu.resp.ready + + io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) + io.cpu.resp.bits.pc := s2_pc + io.cpu.resp.bits.data := icache.io.resp.bits.data + io.cpu.resp.bits.taken := s2_btb_hit + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UFix(0) + io.cpu.resp.bits.xcpt_if := s2_xcpt_if +} + +class ICache(c: ICacheConfig) extends Component +{ + val io = new Bundle { + val req = new PipeIO()(new Bundle { + val idx = UFix(width = PGIDX_BITS) + val invalidate = Bool() + val ppn = UFix(width = PPN_BITS) // delayed one cycle + val kill = Bool() // delayed one cycle + }).flip + val resp = new FIFOIO()(new Bundle { + val data = Bits(width = INST_BITS) + val datablock = Bits(width = c.databits) + }) + val mem = new ioUncachedRequestor + } + + val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UFix() } + val state = Reg(resetVal = s_ready) val invalidated = Reg() { Bool() } - - val r_cpu_req_idx = Reg { Bits() } - val r_cpu_req_ppn = Reg { Bits() } - val r_cpu_req_val = Reg(resetVal = Bool(false)); - + val stall = !io.resp.ready val rdy = Bool() - val tag_hit = Bool() - - when (io.cpu.req_val && rdy) { - r_cpu_req_val := Bool(true) - r_cpu_req_idx := io.cpu.req_idx - } - .otherwise { - r_cpu_req_val := Bool(false) - } - when (state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss) { - r_cpu_req_ppn := io.cpu.req_ppn + + val s2_valid = Reg(resetVal = Bool(false)) + val s2_addr = Reg { UFix(width = PADDR_BITS) } + + val s1_valid = Reg(resetVal = Bool(false)) + val s1_pgoff = Reg() { UFix(width = PGIDX_BITS) } + + val s0_valid = io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill + val s0_pgoff = Mux(io.req.valid, io.req.bits.idx, s1_pgoff) + + s1_valid := s0_valid + when (io.req.valid && rdy) { + s1_pgoff := s0_pgoff } - val r_cpu_hit_addr = Cat(io.cpu.req_ppn, r_cpu_req_idx) - val r_cpu_hit_tag = r_cpu_hit_addr(tagmsb,taglsb) - val r_cpu_miss_addr = Cat(r_cpu_req_ppn, r_cpu_req_idx) - val r_cpu_miss_tag = r_cpu_miss_addr(tagmsb,taglsb) - - // refill counter - val refill_count = Reg(resetVal = UFix(0, rf_cnt_bits)); - when (io.mem.xact_rep.valid) { - refill_count := refill_count + UFix(1); + s2_valid := s1_valid && rdy && !io.req.bits.kill || stall + when (s1_valid && rdy && !stall) { + s2_addr := Cat(io.req.bits.ppn, s1_pgoff).toUFix } - val refill_done = io.mem.xact_rep.valid && refill_count.andR - val repl_way = if (assoc == 1) UFix(0) else LFSR16(state === s_ready && r_cpu_req_val && !io.cpu.itlb_miss && !tag_hit)(log2Up(assoc)-1,0) - val word_shift = Cat(r_cpu_req_idx(offsetmsb-rf_cnt_bits,offsetlsb), UFix(0, log2Up(databits))).toUFix - val tag_we = refill_done - val tag_addr = - Mux((state === s_refill), r_cpu_req_idx(indexmsb,indexlsb), - io.cpu.req_idx(indexmsb,indexlsb)).toUFix; - val data_addr = - Mux((state === s_refill_wait) || (state === s_refill), Cat(r_cpu_req_idx(indexmsb,offsetbits), refill_count), - io.cpu.req_idx(indexmsb, offsetbits-rf_cnt_bits)).toUFix; + val s2_tag = s2_addr(c.tagbits+c.untagbits-1,c.untagbits) + val s2_idx = s2_addr(c.untagbits-1,c.offbits) + val s2_offset = s2_addr(c.offbits-1,0) + val s2_any_tag_hit = Bool() + val s2_hit = s2_valid && s2_any_tag_hit + val s2_miss = s2_valid && !s2_any_tag_hit + rdy := state === s_ready && !s2_miss - val tag_array = Mem(sets, seqRead = true) { Bits(width = tagbits*assoc) } + val (rf_cnt, refill_done) = Counter(io.mem.xact_rep.valid, REFILL_CYCLES) + val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) + + val tag_array = Mem(c.sets, seqRead = true) { Bits(width = c.tagwidth*c.assoc) } val tag_rdata = Reg() { Bits() } - when (tag_we) { - tag_array.write(tag_addr, Fill(assoc, r_cpu_miss_tag), FillInterleaved(tagbits, if (assoc > 1) UFixToOH(repl_way) else Bits(1))) - }.otherwise { - tag_rdata := tag_array(tag_addr) + when (refill_done) { + val wmask = FillInterleaved(c.tagwidth, if (c.dm) Bits(1) else UFixToOH(repl_way)) + val tag = Cat(if (c.parity) s2_tag.xorR else null, s2_tag) + tag_array.write(s2_idx, Fill(c.assoc, tag), wmask) + }.elsewhen (s0_valid) { + tag_rdata := tag_array(s0_pgoff(c.untagbits-1,c.offbits)) } - val vb_array = Reg(resetVal = Bits(0, lines)) - when (io.cpu.invalidate) { + val vb_array = Reg(resetVal = Bits(0, c.lines)) + when (refill_done && !invalidated) { + vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true)) + } + when (io.req.bits.invalidate) { vb_array := Bits(0) - }.elsewhen (tag_we) { - vb_array := vb_array.bitSet(Cat(r_cpu_req_idx(indexmsb,indexlsb), if (assoc > 1) repl_way else null), !invalidated) + invalidated := Bool(true) } + val s2_disparity = Vec(c.assoc) { Bool() } + for (i <- 0 until c.assoc) + when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UFix(i), s2_idx), Bool(false)) } - val data_mux = (new Mux1H(assoc)){Bits(width = databits)} - var any_hit = Bool(false) - for (i <- 0 until assoc) - { - val valid = vb_array(Cat(r_cpu_req_idx(indexmsb,indexlsb), if (assoc > 1) UFix(i, log2Up(assoc)) else null)) - val hit = valid && tag_rdata(tagbits*(i+1)-1, tagbits*i) === r_cpu_hit_addr(tagmsb,taglsb) - - // data array - val data_array = Mem(sets*REFILL_CYCLES, seqRead = true){ io.mem.xact_rep.bits.data.clone } - val data_out = Reg(){ io.mem.xact_rep.bits.data.clone } - when (io.mem.xact_rep.valid && repl_way === UFix(i)) { data_array(data_addr) := io.mem.xact_rep.bits.data } - .otherwise { data_out := data_array(data_addr) } - - data_mux.io.sel(i) := hit - data_mux.io.in(i) := (data_out >> word_shift)(databits-1,0); - - any_hit = any_hit || hit + val s2_tag_hit = Vec(c.assoc) { Bool() } + val s2_data_disparity = Vec(c.assoc) { Bool() } + for (i <- 0 until c.assoc) { + val s1_vb = vb_array(Cat(UFix(i), s1_pgoff(c.untagbits-1,c.offbits))).toBool + val s2_vb = Reg() { Bool() } + val s2_tag_out = Reg() { Bits() } + when (s1_valid && rdy && !stall) { + s2_vb := s1_vb + s2_tag_out := tag_rdata(c.tagwidth*(i+1)-1, c.tagwidth*i) + } + s2_tag_hit(i) := s2_vb && s2_tag_out(c.tagbits-1,0) === s2_tag + s2_disparity(i) := Bool(c.parity) && s2_vb && (s2_tag_out.xorR || s2_data_disparity(i)) } - tag_hit := any_hit + s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) + + val s2_dout = Vec(c.assoc) { Reg() { Bits(width = c.databits) } } + for (i <- 0 until c.assoc) { + val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.datawidth) } + val s1_dout = Reg(){ Bits() } + when (io.mem.xact_rep.valid && repl_way === UFix(i)) { + val d = io.mem.xact_rep.bits.data + val wdata = if (c.parity) Cat(d.xorR, d) else d + data_array(Cat(s2_idx,rf_cnt)) := wdata + }.elsewhen (s0_valid) { + s1_dout := data_array(s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth)) + } + when (s1_valid && rdy && !stall) { s2_dout(i) := s1_dout } + s2_data_disparity(i) := s2_dout(i).xorR + } + val s2_dout_word = s2_dout.map(x => (x >> Cat(s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)), Bits(0,log2Up(c.ibytes*8))))(c.ibytes*8-1,0)) + io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) + io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val finish_q = (new Queue(1)) { new TransactionFinish } finish_q.io.enq.valid := refill_done && io.mem.xact_rep.bits.require_ack finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id // output signals - io.cpu.resp_val := !io.cpu.itlb_miss && (state === s_ready) && r_cpu_req_val && tag_hit; - rdy := !io.cpu.itlb_miss && (state === s_ready) && (!r_cpu_req_val || tag_hit); - io.cpu.resp_data := data_mux.io.out + io.resp.valid := s2_hit io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits := co.getUncachedReadTransactionInit(r_cpu_miss_addr(tagmsb,indexlsb).toUFix, UFix(0)) + io.mem.xact_init.bits := c.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) io.mem.xact_finish <> finish_q.io.deq // control state machine - when (io.cpu.invalidate) { - invalidated := Bool(true) - } switch (state) { - is (s_reset) { - state := s_ready; - } is (s_ready) { - when (r_cpu_req_val && !tag_hit && !io.cpu.itlb_miss) { - state := s_request; - } + when (s2_miss) { state := s_request } invalidated := Bool(false) } - is (s_request) - { - when (io.mem.xact_init.ready && finish_q.io.enq.ready) { - state := s_refill_wait; - } + is (s_request) { + when (io.mem.xact_init.ready && finish_q.io.enq.ready) { state := s_refill_wait } } is (s_refill_wait) { - when (io.mem.xact_abort.valid) { - state := s_request - } - when (io.mem.xact_rep.valid) { - state := s_refill; - } + when (io.mem.xact_abort.valid) { state := s_request } + when (io.mem.xact_rep.valid) { state := s_refill } } is (s_refill) { - when (refill_done) { - state := s_ready; - } + when (refill_done) { state := s_ready } } - } + } } diff --git a/rocket/src/main/scala/itlb.scala b/rocket/src/main/scala/itlb.scala deleted file mode 100644 index dac04fa2..00000000 --- a/rocket/src/main/scala/itlb.scala +++ /dev/null @@ -1,229 +0,0 @@ -package rocket - -import Chisel._; -import Node._; -import Constants._; -import scala.math._; - -class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { - val clear = Bool(INPUT); - val clear_hit = Bool(INPUT) - val tag = Bits(INPUT, tag_bits); - val hit = Bool(OUTPUT); - val hit_addr = UFix(OUTPUT, addr_bits); - val valid_bits = Bits(OUTPUT, entries); - - val write = Bool(INPUT); - val write_tag = Bits(INPUT, tag_bits); - val write_addr = UFix(INPUT, addr_bits); -} - -class rocketCAM(entries: Int, tag_bits: Int) extends Component { - val addr_bits = ceil(log(entries)/log(2)).toInt; - val io = new ioCAM(entries, addr_bits, tag_bits); - val cam_tags = Vec(entries) { Reg() { Bits(width = tag_bits) } } - val mux = (new Mux1H(entries)) { Bits(width = addr_bits) } - - val vb_array = Reg(resetVal = Bits(0, entries)); - when (io.write) { - vb_array := vb_array.bitSet(io.write_addr, Bool(true)); - cam_tags(io.write_addr) := io.write_tag - } - when (io.clear) { - vb_array := Bits(0, entries); - } - .elsewhen (io.clear_hit) { - vb_array := vb_array & ~mux.io.sel.toBits - } - - var l_hit = Bool(false) - for (i <- 0 to entries-1) { - val my_hit = vb_array(UFix(i)).toBool && (cam_tags(i) === io.tag) - l_hit = l_hit || my_hit - mux.io.in(i) := Bits(i) - mux.io.sel(i) := my_hit - } - - io.valid_bits := vb_array; - io.hit := l_hit; - io.hit_addr := mux.io.out.toUFix; -} - -class PseudoLRU(n: Int) -{ - val state = Reg() { Bits(width = n) } - def access(way: UFix) = { - var next_state = state - var idx = UFix(1,1) - for (i <- log2Up(n)-1 to 0 by -1) { - val bit = way(i) - val mask = (UFix(1,n) << idx)(n-1,0) - next_state = next_state & ~mask | Mux(bit, UFix(0), mask) - //next_state.bitSet(idx, !bit) - idx = Cat(idx, bit) - } - state := next_state - } - def replace = { - var idx = UFix(1,1) - for (i <- 0 until log2Up(n)) - idx = Cat(idx, state(idx)) - idx(log2Up(n)-1,0) - } -} - -// interface between TLB and PTW -class ioTLB_PTW extends Bundle -{ - // requests - val req_val = Bool(OUTPUT); - val req_rdy = Bool(INPUT); - val req_vpn = Bits(OUTPUT, VPN_BITS); - // responses - val resp_val = Bool(INPUT); - val resp_err = Bool(INPUT); - val resp_ppn = Bits(INPUT, PPN_BITS); - val resp_perm = Bits(INPUT, PERM_BITS); -} - -// interface between ITLB and fetch stage of pipeline -class ioITLB_CPU extends Bundle -{ - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(INPUT, 32); - // invalidate all TLB entries - val invalidate = Bool(INPUT); - // lookup requests - val req_val = Bool(INPUT); - val req_rdy = Bool(OUTPUT); - val req_asid = Bits(INPUT, ASID_BITS); - val req_vpn = UFix(INPUT, VPN_BITS+1); - // lookup responses - val resp_miss = Bool(OUTPUT); -// val resp_val = Bool(OUTPUT); - val resp_ppn = UFix(OUTPUT, PPN_BITS); - val exception = Bool(OUTPUT); -} - -class ioITLB extends Bundle -{ - val cpu = new ioITLB_CPU(); - val ptw = new ioTLB_PTW(); -} - -class rocketITLB(entries: Int) extends Component -{ - val addr_bits = ceil(log10(entries)/log10(2)).toInt; - val io = new ioITLB(); - - val s_ready :: s_request :: s_wait :: Nil = Enum(3) { UFix() }; - val state = Reg(resetVal = s_ready); - - val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_vpn = Reg() { Bits() }; - val r_cpu_req_asid = Reg() { Bits() }; - val r_refill_tag = Reg() { Bits() }; - val r_refill_waddr = Reg() { UFix() }; - - when (io.cpu.req_val && io.cpu.req_rdy) { - r_cpu_req_vpn := io.cpu.req_vpn; - r_cpu_req_asid := io.cpu.req_asid; - r_cpu_req_val := Bool(true); - } - .otherwise { - r_cpu_req_val := Bool(false); - } - - val bad_va = r_cpu_req_vpn(VPN_BITS) != r_cpu_req_vpn(VPN_BITS-1); - - val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); - val tag_ram = Mem(entries) { io.ptw.resp_ppn.clone } - when (io.ptw.resp_val) { tag_ram(r_refill_waddr) := io.ptw.resp_ppn } - - val lookup_tag = Cat(r_cpu_req_asid, r_cpu_req_vpn); - tag_cam.io.clear := io.cpu.invalidate; - tag_cam.io.clear_hit := io.cpu.exception - tag_cam.io.tag := lookup_tag; - tag_cam.io.write := io.ptw.resp_val || io.ptw.resp_err; - tag_cam.io.write_tag := r_refill_tag; - tag_cam.io.write_addr := r_refill_waddr; - val tag_hit = tag_cam.io.hit || bad_va; - val tag_hit_addr = tag_cam.io.hit_addr; - - // extract fields from status register - val status_s = io.cpu.status(SR_S).toBool; // user/supervisor mode - val status_u = !status_s; - val status_vm = io.cpu.status(SR_VM).toBool // virtual memory enable - - // extract fields from PT permission bits - val ptw_perm_ux = io.ptw.resp_perm(0); - val ptw_perm_sx = io.ptw.resp_perm(3); - - // permission bit arrays - val ux_array = Reg(resetVal = Bits(0, entries)); // user execute permission - val sx_array = Reg(resetVal = Bits(0, entries)); // supervisor execute permission - when (io.ptw.resp_val) { - ux_array := ux_array.bitSet(r_refill_waddr, ptw_perm_ux); - sx_array := sx_array.bitSet(r_refill_waddr, ptw_perm_sx); - } - - // when the page table lookup reports an error, set both execute permission - // bits to 0 so the next access will cause an exceptions - when (io.ptw.resp_err) { - ux_array := ux_array.bitSet(r_refill_waddr, Bool(false)); - sx_array := sx_array.bitSet(r_refill_waddr, Bool(false)); - } - - // high if there are any unused entries in the ITLB - val has_invalid_entry = !tag_cam.io.valid_bits.andR - val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) - val plru = new PseudoLRU(entries) - val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace).toUFix; - - val lookup = (state === s_ready) && r_cpu_req_val; - val lookup_hit = lookup && tag_hit; - val lookup_miss = lookup && !tag_hit; - val tlb_hit = status_vm && lookup_hit; - val tlb_miss = status_vm && lookup_miss; - - when (tlb_miss) { - r_refill_tag := lookup_tag; - r_refill_waddr := repl_waddr; - } - when (tlb_hit) { - plru.access(tag_hit_addr) - } - - val access_fault = - tlb_hit && - ((status_s && !sx_array(tag_hit_addr).toBool) || - (status_u && !ux_array(tag_hit_addr).toBool) || - bad_va); - - io.cpu.exception := access_fault; - io.cpu.req_rdy := Mux(status_vm, (state === s_ready) && (!r_cpu_req_val || tag_hit), Bool(true)); - io.cpu.resp_miss := tlb_miss || (state != s_ready); - io.cpu.resp_ppn := Mux(status_vm, tag_ram(tag_hit_addr), r_cpu_req_vpn(PPN_BITS-1,0)).toUFix; - - io.ptw.req_val := (state === s_request); - io.ptw.req_vpn := r_refill_tag(VPN_BITS-1,0); - - // control state machine - switch (state) { - is (s_ready) { - when (tlb_miss) { - state := s_request; - } - } - is (s_request) { - when (io.ptw.req_rdy) { - state := s_wait; - } - } - is (s_wait) { - when (io.ptw.resp_val || io.ptw.resp_err) { - state := s_ready; - } - } - } -} diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index c52ef782..989db4cb 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -100,7 +100,7 @@ class rocketMultiplier(unroll: Int = 1, earlyOut: Boolean = false) extends Compo r_prod:= rhs_in r_lsb := Bool(false) } - .elsewhen (io.resp_val && io.resp_rdy || io.req_kill && r_cnt === UFix(0)) { // can only kill on first cycle + .elsewhen (io.resp_val && io.resp_rdy || io.req_kill) { r_val := Bool(false) } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index d0c540c5..3e2eaf63 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -73,7 +73,7 @@ class rocketHellaCacheArbiter(n: Int) extends Component class ioPTW(n: Int) extends Bundle { - val requestor = Vec(n) { new ioTLB_PTW }.flip + val requestor = Vec(n) { new IOTLBPTW }.flip val mem = new ioHellaCache val ptbr = UFix(INPUT, PADDR_BITS) } @@ -99,20 +99,15 @@ class rocketPTW(n: Int) extends Component val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) - - val req_rdy = state === s_ready - var req_val = Bool(false) - for (r <- io.requestor) { - r.req_rdy := req_rdy && !req_val - req_val = req_val || r.req_val - } - val req_dest = PriorityEncoder(io.requestor.map(_.req_val)) - val req_vpn = io.requestor.slice(0, n-1).foldRight(io.requestor(n-1).req_vpn)((r, v) => Mux(r.req_val, r.req_vpn, v)) - when (state === s_ready && req_val) { - r_req_vpn := req_vpn - r_req_dest := req_dest - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), req_vpn(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) + val arb = new Arbiter(n)(UFix(width = VPN_BITS)) + arb.io.in <> io.requestor.map(_.req) + arb.io.out.ready := state === s_ready + + when (arb.io.out.fire()) { + r_req_vpn := arb.io.out.bits + r_req_dest := arb.io.chosen + req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) } val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) @@ -129,8 +124,8 @@ class rocketPTW(n: Int) extends Component io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) io.mem.req.bits.kill := Bool(false) - val resp_val = state === s_done - val resp_err = state === s_error + val resp_val = state === s_done || state === s_error + val resp_err = state === s_error || state === s_wait val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1) val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2) @@ -140,16 +135,16 @@ class rocketPTW(n: Int) extends Component for (i <- 0 until io.requestor.size) { val me = r_req_dest === UFix(i) - io.requestor(i).resp_val := resp_val && me - io.requestor(i).resp_err := resp_err && me - io.requestor(i).resp_perm := r_resp_perm - io.requestor(i).resp_ppn := resp_ppn + io.requestor(i).resp.valid := resp_val && me + io.requestor(i).resp.bits.error := resp_err + io.requestor(i).resp.bits.perm := r_resp_perm + io.requestor(i).resp.bits.ppn := resp_ppn.toUFix } // control state machine switch (state) { is (s_ready) { - when (req_val) { + when (arb.io.out.valid) { state := s_req; } count := UFix(0) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala index f414ff6e..56505d7b 100644 --- a/rocket/src/main/scala/queues.scala +++ b/rocket/src/main/scala/queues.scala @@ -3,25 +3,27 @@ package rocket import Chisel._ import Node._; -class SkidBuffer[T <: Data](resetSignal: Bool = null)(data: => T) extends Component(resetSignal) +class SkidBuffer[T <: Data](entries: Int, lateEnq: Boolean = false)(data: => T) extends Component { val io = new Bundle { val enq = new FIFOIO()(data).flip val deq = new FIFOIO()(data) } + require(entries >= 2) val fq = new Queue(1, flow = true)(data) - val pq = new Queue(1, pipe = true)(data) + val pq = new Queue(entries-1, pipe = true)(data) + val (iq, oq) = if (lateEnq) (pq, fq) else (fq, pq) - fq.io.enq <> io.enq - pq.io.enq <> fq.io.deq - io.deq <> pq.io.deq + iq.io.enq <> io.enq + oq.io.enq <> iq.io.deq + io.deq <> oq.io.deq } object SkidBuffer { - def apply[T <: Data](enq: FIFOIO[T]): FIFOIO[T] = { - val s = new SkidBuffer()(enq.bits.clone) + def apply[T <: Data](enq: FIFOIO[T], entries: Int = 2): FIFOIO[T] = { + val s = new SkidBuffer(entries)(enq.bits.clone) s.io.enq <> enq s.io.deq } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 5e621808..c6a84583 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,7 +13,7 @@ class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Co } val cpu = new rocketProc - val icache = new rocketICache(128, 4, co) // 128 sets x 4 ways (32KB) + val icache = new Frontend(ICacheConfig(co, 128, 4)) // 128 sets x 4 ways (32KB) val dcache = new HellaCache(co) val arbiter = new rocketMemArbiter(2 + (if (HAVE_VEC) 1 else 0)) @@ -31,7 +31,7 @@ class Tile(co: CoherencePolicyWithUncached, resetSignal: Bool = null) extends Co if (HAVE_VEC) { - val vicache = new rocketICache(128, 1, co) // 128 sets x 1 ways (8KB) + val vicache = new Frontend(ICacheConfig(co, 128, 1)) // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala new file mode 100644 index 00000000..6ce671a4 --- /dev/null +++ b/rocket/src/main/scala/tlb.scala @@ -0,0 +1,255 @@ +package rocket + +import Chisel._; +import Node._; +import Constants._; +import scala.math._; +import hwacha._ + +class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { + val clear = Bool(INPUT); + val clear_hit = Bool(INPUT) + val tag = Bits(INPUT, tag_bits); + val hit = Bool(OUTPUT); + val hits = UFix(OUTPUT, entries); + val valid_bits = Bits(OUTPUT, entries); + + val write = Bool(INPUT); + val write_tag = Bits(INPUT, tag_bits); + val write_addr = UFix(INPUT, addr_bits); +} + +class rocketCAM(entries: Int, tag_bits: Int) extends Component { + val addr_bits = ceil(log(entries)/log(2)).toInt; + val io = new ioCAM(entries, addr_bits, tag_bits); + val cam_tags = Vec(entries) { Reg() { Bits(width = tag_bits) } } + + val vb_array = Reg(resetVal = Bits(0, entries)); + when (io.write) { + vb_array := vb_array.bitSet(io.write_addr, Bool(true)); + cam_tags(io.write_addr) := io.write_tag + } + when (io.clear) { + vb_array := Bits(0, entries); + } + .elsewhen (io.clear_hit) { + vb_array := vb_array & ~io.hits + } + + val hits = (0 until entries).map(i => vb_array(i) && cam_tags(i) === io.tag) + + io.valid_bits := vb_array; + io.hits := Vec(hits){Bool()}.toBits.toUFix + io.hit := io.hits.orR +} + +class PseudoLRU(n: Int) +{ + val state = Reg() { Bits(width = n) } + def access(way: UFix) = { + var next_state = state + var idx = UFix(1,1) + for (i <- log2Up(n)-1 to 0 by -1) { + val bit = way(i) + val mask = (UFix(1,n) << idx)(n-1,0) + next_state = next_state & ~mask | Mux(bit, UFix(0), mask) + //next_state.bitSet(idx, !bit) + idx = Cat(idx, bit) + } + state := next_state + } + def replace = { + var idx = UFix(1,1) + for (i <- 0 until log2Up(n)) + idx = Cat(idx, state(idx)) + idx(log2Up(n)-1,0) + } +} + +class IOTLBPTW extends Bundle { + val req = new FIFOIO()(UFix(width = VPN_BITS)) + val resp = new PipeIO()(new Bundle { + val error = Bool() + val ppn = UFix(width = PPN_BITS) + val perm = Bits(width = PERM_BITS) + }).flip +} + +class TLBReq extends Bundle +{ + val asid = UFix(width = ASID_BITS) + val vpn = UFix(width = VPN_BITS+1) + val status = Bits(width = 32) + val invalidate = Bool() + val instruction = Bool() +} + +class TLBResp(entries: Int) extends Bundle +{ + // lookup responses + val miss = Bool(OUTPUT) + val hit_idx = UFix(OUTPUT, entries) + val ppn = UFix(OUTPUT, PPN_BITS) + val xcpt_ld = Bool(OUTPUT) + val xcpt_st = Bool(OUTPUT) + val xcpt_pf = Bool(OUTPUT) + val xcpt_if = Bool(OUTPUT) + + override def clone = new TLBResp(entries).asInstanceOf[this.type] +} + +class TLB(entries: Int) extends Component +{ + val io = new Bundle { + val req = new FIFOIO()(new TLBReq).flip + val resp = new TLBResp(entries) + val ptw = new IOTLBPTW + } + + val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) { UFix() } + val state = Reg(resetVal = s_ready) + val r_refill_tag = Reg() { UFix() } + val r_refill_waddr = Reg() { UFix() } + + val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); + val tag_ram = Vec(entries) { Reg() { io.ptw.resp.bits.ppn.clone } } + when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } + + val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix + tag_cam.io.clear := io.req.bits.invalidate + tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) + tag_cam.io.tag := lookup_tag + tag_cam.io.write := state === s_wait && io.ptw.resp.valid + tag_cam.io.write_tag := r_refill_tag + tag_cam.io.write_addr := r_refill_waddr + val tag_hit = tag_cam.io.hit + val tag_hit_addr = OHToUFix(tag_cam.io.hits) + + // permission bit arrays + val ur_array = Reg(resetVal = Bits(0, entries)) // user read permission + val uw_array = Reg(resetVal = Bits(0, entries)) // user write permission + val ux_array = Reg(resetVal = Bits(0, entries)) // user execute permission + val sr_array = Reg(resetVal = Bits(0, entries)) // supervisor read permission + val sw_array = Reg(resetVal = Bits(0, entries)) // supervisor write permission + val sx_array = Reg(resetVal = Bits(0, entries)) // supervisor execute permission + when (tag_cam.io.write) { + val perm = (!io.ptw.resp.bits.error).toFix & io.ptw.resp.bits.perm(5,0) + ur_array := ur_array.bitSet(r_refill_waddr, perm(2)) + uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) + ux_array := ux_array.bitSet(r_refill_waddr, perm(0)) + sr_array := sr_array.bitSet(r_refill_waddr, perm(5)) + sw_array := sw_array.bitSet(r_refill_waddr, perm(4)) + sx_array := sx_array.bitSet(r_refill_waddr, perm(3)) + } + + // high if there are any unused (invalid) entries in the TLB + val has_invalid_entry = !tag_cam.io.valid_bits.andR + val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) + val plru = new PseudoLRU(entries) + val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) + + val status_s = io.req.bits.status(SR_S) // user/supervisor mode + val status_vm = io.req.bits.status(SR_VM) // virtual memory enable + val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) + val tlb_hit = status_vm && tag_hit + val tlb_miss = status_vm && !tag_hit && !bad_va + + when (io.req.valid && tlb_hit) { + plru.access(tag_hit_addr) + } + + io.req.ready := state === s_ready + io.resp.xcpt_ld := bad_va || tlb_hit && !Mux(status_s, sr_array(tag_hit_addr), ur_array(tag_hit_addr)) + io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) + io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) + io.resp.miss := tlb_miss + io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) + io.resp.hit_idx := tag_cam.io.hits + + io.ptw.req.valid := state === s_request + io.ptw.req.bits := r_refill_tag + + when (io.req.fire() && tlb_miss) { + state := s_request + r_refill_tag := lookup_tag + r_refill_waddr := repl_waddr + } + when (state === s_request) { + when (io.req.bits.invalidate) { + state := s_ready + } + when (io.ptw.req.ready) { + state := s_wait + when (io.req.bits.invalidate) { state := s_wait_invalidate } + } + } + when (state === s_wait && io.req.bits.invalidate) { + state := s_wait_invalidate + } + when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { + state := s_ready + } +} + +// ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala +// should keep them in sync + +class ioDTLB_CPU_req_bundle extends TLBReq +{ + val kill = Bool() + val cmd = Bits(width=4) // load/store/amo +} +class ioDTLB_CPU_req extends FIFOIO()( { new ioDTLB_CPU_req_bundle() } ) +class ioDTLB_CPU_resp extends TLBResp(1) + +class ioDTLB extends Bundle +{ + // status bits (from PCR), to check current permission and whether VM is enabled + val status = Bits(INPUT, 32) + // invalidate all TLB entries + val invalidate = Bool(INPUT) + val cpu_req = new ioDTLB_CPU_req().flip + val cpu_resp = new ioDTLB_CPU_resp() + val ptw = new IOTLBPTW +} + +class rocketTLB(entries: Int) extends Component +{ + val io = new ioDTLB(); + + val r_cpu_req_val = Reg(resetVal = Bool(false)); + val r_cpu_req_vpn = Reg() { UFix() } + val r_cpu_req_cmd = Reg() { Bits() } + val r_cpu_req_asid = Reg() { UFix() } + + val tlb = new TLB(entries) + tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill + tlb.io.req.bits.instruction := Bool(false) + tlb.io.req.bits.invalidate := io.invalidate + tlb.io.req.bits.status := io.status + tlb.io.req.bits.vpn := r_cpu_req_vpn + tlb.io.req.bits.asid := r_cpu_req_asid + + def cmdIsRead(cmd: Bits) = cmd === M_XRD || cmd(3) + def cmdIsWrite(cmd: Bits) = cmd === M_XWR || cmd(3) + def cmdIsPrefetch(cmd: Bits) = cmd === M_PFR || cmd === M_PFW + def cmdNeedsTLB(cmd: Bits) = cmdIsRead(cmd) || cmdIsWrite(cmd) || cmdIsPrefetch(cmd) + + when (io.cpu_req.fire() && cmdNeedsTLB(io.cpu_req.bits.cmd)) { + r_cpu_req_vpn := io.cpu_req.bits.vpn; + r_cpu_req_cmd := io.cpu_req.bits.cmd; + r_cpu_req_asid := io.cpu_req.bits.asid; + r_cpu_req_val := Bool(true); + } + .otherwise { + r_cpu_req_val := Bool(false); + } + + io.cpu_req.ready := tlb.io.req.ready && !io.cpu_resp.miss + io.cpu_resp.ppn := tlb.io.resp.ppn + io.cpu_resp.miss := r_cpu_req_val && tlb.io.resp.miss + io.cpu_resp.xcpt_ld := r_cpu_req_val && tlb.io.resp.xcpt_ld && cmdIsRead(r_cpu_req_cmd) + io.cpu_resp.xcpt_st := r_cpu_req_val && tlb.io.resp.xcpt_st && cmdIsWrite(r_cpu_req_cmd) + io.cpu_resp.xcpt_pf := r_cpu_req_val && tlb.io.resp.xcpt_ld && cmdIsPrefetch(r_cpu_req_cmd) + io.ptw <> tlb.io.ptw +} From fc648d13a1248defb2dca4022d950b6fb5a631ed Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 11 Oct 2012 16:48:51 -0700 Subject: [PATCH 0483/1087] remove old Mux1H; add implicit conversions --- rocket/src/main/scala/dpath_util.scala | 25 ++++++++------------ rocket/src/main/scala/htif.scala | 12 ++++------ rocket/src/main/scala/nbdcache.scala | 32 +++++++++++++------------- rocket/src/main/scala/util.scala | 12 ++++------ 4 files changed, 34 insertions(+), 47 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 61cec1fc..db1ef65d 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -28,34 +28,29 @@ class rocketDpathBTB(entries: Int) extends Component val hit = Bool() val update = Bool() var update_reduction = Bool(false) - val mux = (new Mux1H(entries)) { Bits(width = VADDR_BITS) } + val hits = Vec(entries) { Bool() } + val updates = Vec(entries) { Bool() } + val targets = Vec(entries) { Reg() { UFix() } } + val anyUpdate = updates.toBits.orR for (i <- 0 until entries) { val tag = Reg() { UFix() } - val target = Reg() { UFix() } val valid = Reg(resetVal = Bool(false)) - val my_hit = valid && tag === io.current_pc - val my_update = valid && tag === io.correct_pc + hits(i) := valid && tag === io.current_pc + updates(i) := valid && tag === io.correct_pc - when (io.wen && (my_update || !update && UFix(i) === repl_way)) { + when (io.wen && (updates(i) || !anyUpdate && UFix(i) === repl_way)) { valid := Bool(false) when (!io.clr) { valid := Bool(true) tag := io.correct_pc - target := io.correct_target + targets(i) := io.correct_target } } - - hit_reduction = hit_reduction || my_hit - update_reduction = update_reduction || my_update - mux.io.sel(i) := my_hit - mux.io.in(i) := target } - hit := hit_reduction - update := update_reduction - io.hit := hit - io.target := mux.io.out.toUFix + io.hit := hits.toBits.orR + io.target := Mux1H(hits, targets) } class ioDpathPCR extends Bundle() diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 335ef3cb..c6229b1a 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -189,11 +189,10 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C io.mem.probe_rep_data.valid := Bool(false) io.mem.incoherent := Bool(true) - val pcr_mux = (new Mux1H(ncores)) { Bits(width = 64) } + val pcrReadData = Vec(ncores) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } for (i <- 0 until ncores) { val my_reset = Reg(resetVal = Bool(true)) val my_ipi = Reg(resetVal = Bool(false)) - val rdata = Reg() { Bits() } val cpu = io.cpu(i) val me = pcr_coreid === UFix(i) @@ -221,25 +220,22 @@ class rocketHTIF(w: Int, ncores: Int, co: CoherencePolicyWithUncached) extends C when (cmd === cmd_writecr) { my_reset := pcr_wdata(0) } - rdata := my_reset.toBits + pcrReadData(i) := my_reset.toBits state := state_tx } cpu.pcr_rep.ready := Bool(true) when (cpu.pcr_rep.valid) { - rdata := cpu.pcr_rep.bits + pcrReadData(i) := cpu.pcr_rep.bits state := state_tx } - - pcr_mux.io.sel(i) := me - pcr_mux.io.in(i) := rdata } val tx_cmd = Mux(nack, cmd_nack, cmd_ack) val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext) val tx_data = Mux(tx_word_count === UFix(0), tx_header, - Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcr_mux.io.out, + Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcrReadData(pcr_coreid), packet_ram(packet_ram_raddr))) io.host.in.ready := state === state_rx diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a3402dcb..6c882b1c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -325,9 +325,10 @@ class MSHRFile(co: CoherencePolicy) extends Component { val sdq = Mem(NSDQ) { io.req.bits.data.clone } when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val tag_mux = (new Mux1H(NMSHR)){ Bits(width = TAG_BITS) } - val wb_probe_mux = (new Mux1H(NMSHR)) { new WritebackReq } - val mem_resp_mux = (new Mux1H(NMSHR)){ new DataArrayReq } + val idxMatch = Vec(NMSHR) { Bool() } + val tagList = Vec(NMSHR) { Bits() } + val wbTagList = Vec(NMSHR) { Bits() } + val memRespMux = Vec(NMSHR) { new DataArrayReq } val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } @@ -335,8 +336,8 @@ class MSHRFile(co: CoherencePolicy) extends Component { val replay_arb = (new Arbiter(NMSHR)) { new Replay() } val alloc_arb = (new Arbiter(NMSHR)) { Bool() } - val tag_match = tag_mux.io.out === io.req.bits.tag - val wb_probe_match = wb_probe_mux.io.out.tag === io.req.bits.tag + val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag + val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -348,10 +349,9 @@ class MSHRFile(co: CoherencePolicy) extends Component { for (i <- 0 to NMSHR-1) { val mshr = new MSHR(i, co) - tag_mux.io.sel(i) := mshr.io.idx_match - tag_mux.io.in(i) := mshr.io.tag - wb_probe_mux.io.sel(i) := mshr.io.idx_match - wb_probe_mux.io.in(i) := mshr.io.wb_req.bits + idxMatch(i) := mshr.io.idx_match + tagList(i) := mshr.io.tag + wbTagList(i) := mshr.io.wb_req.bits.tag alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready @@ -371,10 +371,9 @@ class MSHRFile(co: CoherencePolicy) extends Component { mshr.io.mem_abort <> io.mem_abort mshr.io.mem_rep <> io.mem_rep - mem_resp_mux.io.sel(i) := UFix(i) === io.mem_rep.bits.tile_xact_id - mem_resp_mux.io.in(i).idx := mshr.io.idx - mem_resp_mux.io.in(i).offset := mshr.io.refill_count - mem_resp_mux.io.in(i).way_en := mshr.io.way_oh + memRespMux(i).idx := mshr.io.idx + memRespMux(i).offset := mshr.io.refill_count + memRespMux(i).way_en := mshr.io.way_oh pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy @@ -393,9 +392,10 @@ class MSHRFile(co: CoherencePolicy) extends Component { io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match - io.mem_resp_idx := mem_resp_mux.io.out.idx - io.mem_resp_offset := mem_resp_mux.io.out.offset - io.mem_resp_way_oh := mem_resp_mux.io.out.way_en + val memResp = memRespMux(io.mem_rep.bits.tile_xact_id) + io.mem_resp_idx := memResp.idx + io.mem_resp_offset := memResp.offset + io.mem_resp_way_oh := memResp.way_en io.fence_rdy := !fence io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 19856386..d5472420 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -4,13 +4,9 @@ import Chisel._ import Node._ import scala.math._ -class Mux1H [T <: Data](n: Int)(gen: => T) extends Component +object Util { - val io = new Bundle { - val sel = Vec(n) { Bool(dir = INPUT) } - val in = Vec(n) { gen }.asInput - val out = gen.asOutput - } - - io.out := Mux1H(io.sel, io.in) + implicit def intToUFix(x: Int): UFix = UFix(x) + implicit def intToBoolean(x: Int): Boolean = if (x != 0) true else false + implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 } From 197154c4850713633550633b2538e8f62003979f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 11 Oct 2012 16:50:15 -0700 Subject: [PATCH 0484/1087] use BTB for JALR --- rocket/src/main/scala/ctrl.scala | 17 ++++++++--------- rocket/src/main/scala/dpath.scala | 3 ++- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f88aca6b..019c436b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -49,6 +49,7 @@ class ioCtrlDpath extends Bundle() val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath val inst = Bits(INPUT, 32); + val jalr_eq = Bool(INPUT) val br_eq = Bool(INPUT); val br_lt = Bool(INPUT); val br_ltu = Bool(INPUT); @@ -534,8 +535,8 @@ class rocketCtrl extends Component Mux(ex_reg_br_type === BR_GEU, ~io.dpath.br_ltu, ex_reg_br_type === BR_J)))))) - val mem_reg_div_val = Reg(){Bool()} - val mem_reg_mul_val = Reg(){Bool()} + val mem_reg_div_val = Reg(resetVal = Bool(false)) + val mem_reg_mul_val = Reg(resetVal = Bool(false)) val mem_reg_eret = Reg(){Bool()}; val mem_reg_mem_val = Reg(){Bool()}; val mem_reg_mem_cmd = Reg(){Bits()} @@ -544,8 +545,6 @@ class rocketCtrl extends Component when (reset.toBool || io.dpath.killx) { mem_reg_valid := Bool(false); mem_reg_pcr := PCR_N - mem_reg_div_val := Bool(false) - mem_reg_mul_val := Bool(false) mem_reg_wen := Bool(false); mem_reg_fp_wen := Bool(false); mem_reg_eret := Bool(false); @@ -565,8 +564,6 @@ class rocketCtrl extends Component .otherwise { mem_reg_valid := ex_reg_valid mem_reg_pcr := ex_reg_pcr - mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy - mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy mem_reg_wen := ex_reg_wen; mem_reg_fp_wen := ex_reg_fp_wen; mem_reg_eret := ex_reg_eret; @@ -583,6 +580,8 @@ class rocketCtrl extends Component mem_reg_fp_sboard_set := ex_reg_fp_sboard_set mem_reg_replay_next := ex_reg_replay_next } + mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy + mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy mem_reg_mem_cmd := ex_reg_mem_cmd; mem_reg_mem_type := ex_reg_mem_type; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb @@ -693,7 +692,7 @@ class rocketCtrl extends Component UFix(0,5)))))))))))); // instruction address misaligned // control transfer from ex/mem - val take_pc_ex = ex_reg_btb_hit != br_taken || ex_reg_jalr + val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === br_taken) take_pc_wb := wb_reg_replay || vec_replay || wb_reg_exception || wb_reg_eret take_pc := take_pc_ex || take_pc_wb; @@ -739,8 +738,8 @@ class rocketCtrl extends Component Mux(!ex_reg_btb_hit, PC_EX, // mispredicted taken branch PC_EX4))))) // mispredicted not taken branch - io.imem.req.bits.mispredict := !take_pc_wb && !ex_reg_jalr && ex_reg_btb_hit != br_taken - io.imem.req.bits.taken := !ex_reg_btb_hit + io.imem.req.bits.mispredict := !take_pc_wb && take_pc_ex + io.imem.req.bits.taken := !ex_reg_btb_hit || ex_reg_jalr io.imem.req.valid := take_pc // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 24f70519..cec23646 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -238,6 +238,7 @@ class rocketDpath extends Component io.ptbr_wen := pcr.io.ptbr_wen; // branch resolution logic + io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) io.ctrl.br_eq := (ex_rs1 === ex_rs2) io.ctrl.br_ltu := (ex_rs1.toUFix < ex_rs2.toUFix) io.ctrl.br_lt := @@ -253,7 +254,7 @@ class rocketDpath extends Component // writeback select mux ex_wdata := - Mux(ex_reg_ctrl_sel_wb === WB_PC, Cat(Fill(64-VADDR_BITS, ex_pc_plus4(VADDR_BITS-1)), ex_pc_plus4), + Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4.toFix, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, ex_alu_out))).toBits // WB_ALU From b955985b38b7d1005542182d35db713ac62b4312 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 11 Oct 2012 16:50:53 -0700 Subject: [PATCH 0485/1087] improve divider QoR --- rocket/src/main/scala/divider.scala | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index c3c15522..d02859cf 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -6,8 +6,7 @@ import Constants._ class rocketDivider(earlyOut: Boolean = false) extends Component { val io = new ioMultiplier - val w0 = io.req.bits.in0.getWidth - val w = w0+1 // sign bit + val w = io.req.bits.in0.getWidth val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); @@ -28,13 +27,13 @@ class rocketDivider(earlyOut: Boolean = false) extends Component { val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) val tc = (fn === DIV_D) || (fn === DIV_R); - val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w0-1), io.req.bits.in0(w0/2-1)) - val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w0-1,w0/2), Fill(w0/2, lhs_sign)) - val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(w0/2-1,0)) + val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w-1), io.req.bits.in0(w/2-1)) + val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w-1,w/2), Fill(w/2, lhs_sign)) + val lhs_in = Cat(lhs_hi, io.req.bits.in0(w/2-1,0)) - val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w0-1), io.req.bits.in1(w0/2-1)) - val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, rhs_sign)) - val rhs_in = Cat(rhs_sign, rhs_hi, io.req.bits.in1(w0/2-1,0)) + val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) + val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, rhs_sign)) + val rhs_in = Cat(rhs_hi, io.req.bits.in1(w/2-1,0)) when (state === s_neg_inputs) { state := s_busy @@ -73,8 +72,8 @@ class rocketDivider(earlyOut: Boolean = false) extends Component { val eOut = count === UFix(0) && eOutPos > dividendMSB && (divisorMSB != UFix(0) || divisor(0)) when (Bool(earlyOut) && eOut) { val eOutDist = eOutPos - dividendMSB - val shift = Mux(eOutDist >= UFix(w-1), UFix(w-1), eOutDist(log2Up(w)-1,0)) - remainder := remainder << shift + val shift = Mux(divisorMSB >= dividendMSB, UFix(w-1), eOutDist(log2Up(w)-1,0)) + remainder := remainder(w-1,0) << shift count := shift } } @@ -94,9 +93,9 @@ class rocketDivider(earlyOut: Boolean = false) extends Component { remainder := lhs_in } - val result = Mux(rem, remainder(w+w0, w+1), remainder(w0-1,0)) + val result = Mux(rem, remainder(w+w, w+1), remainder(w-1,0)) - io.resp_bits := Mux(half, Cat(Fill(w0/2, result(w0/2-1)), result(w0/2-1,0)), result) + io.resp_bits := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) io.resp_tag := reg_tag io.resp_val := state === s_done io.req.ready := state === s_ready From 582190032943ec72079c7f9875f849596f57dc88 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 11 Oct 2012 16:54:28 -0700 Subject: [PATCH 0486/1087] don't refetch from I$ if on same 16B block --- rocket/src/main/scala/icache.scala | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 3967b477..2632e3ca 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,6 +5,7 @@ import Node._; import Constants._; import scala.math._; import uncore._ +import Util._ case class ICacheConfig(co: CoherencePolicyWithUncached, sets: Int, assoc: Int, parity: Boolean = false) { @@ -62,6 +63,7 @@ class Frontend(c: ICacheConfig) extends Component val tlb = new TLB(ITLB_ENTRIES) val s1_pc = Reg() { UFix() } + val s1_same_block = Reg() { Bool() } val s2_valid = Reg(resetVal = Bool(true)) val s2_pc = Reg(resetVal = UFix(START_ADDR)) val s2_btb_hit = Reg(resetVal = Bool(false)) @@ -71,10 +73,13 @@ class Frontend(c: ICacheConfig) extends Component val pcp4_0 = s1_pc + UFix(c.ibytes) val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) val icmiss = s2_valid && !icache.io.resp.valid - val npc = Mux(icmiss, s2_pc, Mux(btb.io.hit, btbTarget, pcp4)).toUFix + val predicted_npc = Mux(btb.io.hit, btbTarget, pcp4) + val npc = Mux(icmiss, s2_pc, predicted_npc).toUFix + val s0_same_block = !icmiss && !io.cpu.req.valid && (predicted_npc >> log2Up(c.databits/8)) === (s1_pc >> log2Up(c.databits/8)) - val stall = !io.cpu.resp.ready + val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { + s1_same_block := s0_same_block && !tlb.io.resp.miss s1_pc := npc s2_valid := !icmiss s2_pc := s1_pc @@ -82,6 +87,7 @@ class Frontend(c: ICacheConfig) extends Component s2_xcpt_if := tlb.io.resp.xcpt_if } when (io.cpu.req.valid) { + s1_same_block := Bool(false) s1_pc := io.cpu.req.bits.pc s2_valid := Bool(false) } @@ -102,16 +108,16 @@ class Frontend(c: ICacheConfig) extends Component tlb.io.req.bits.instruction := Bool(true) icache.io.mem <> io.mem - icache.io.req.valid := !stall + icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) icache.io.req.bits.invalidate := io.cpu.req.bits.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss - icache.io.resp.ready := io.cpu.resp.ready + icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc - io.cpu.resp.bits.data := icache.io.resp.bits.data + io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)) io.cpu.resp.bits.taken := s2_btb_hit io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UFix(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if @@ -141,6 +147,7 @@ class ICache(c: ICacheConfig) extends Component val s2_valid = Reg(resetVal = Bool(false)) val s2_addr = Reg { UFix(width = PADDR_BITS) } + val s2_any_tag_hit = Bool() val s1_valid = Reg(resetVal = Bool(false)) val s1_pgoff = Reg() { UFix(width = PGIDX_BITS) } @@ -153,7 +160,7 @@ class ICache(c: ICacheConfig) extends Component s1_pgoff := s0_pgoff } - s2_valid := s1_valid && rdy && !io.req.bits.kill || stall + s2_valid := s1_valid && rdy && !io.req.bits.kill || io.resp.valid && stall when (s1_valid && rdy && !stall) { s2_addr := Cat(io.req.bits.ppn, s1_pgoff).toUFix } @@ -161,7 +168,6 @@ class ICache(c: ICacheConfig) extends Component val s2_tag = s2_addr(c.tagbits+c.untagbits-1,c.untagbits) val s2_idx = s2_addr(c.untagbits-1,c.offbits) val s2_offset = s2_addr(c.offbits-1,0) - val s2_any_tag_hit = Bool() val s2_hit = s2_valid && s2_any_tag_hit val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss @@ -220,7 +226,7 @@ class ICache(c: ICacheConfig) extends Component when (s1_valid && rdy && !stall) { s2_dout(i) := s1_dout } s2_data_disparity(i) := s2_dout(i).xorR } - val s2_dout_word = s2_dout.map(x => (x >> Cat(s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)), Bits(0,log2Up(c.ibytes*8))))(c.ibytes*8-1,0)) + val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)))(c.ibytes*8-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) From 0a640f2cc6b84664a05086a3ac322157b17c34e4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 16 Oct 2012 04:51:21 -0700 Subject: [PATCH 0487/1087] make DecodeLogic deterministic (hopefully) --- rocket/src/main/scala/decode.scala | 83 +++++++++++++++++------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 355dd1a1..7cd2d917 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -13,7 +13,7 @@ object DecodeLogic new Term(b.value) } } - def logic(addr: Bits, cache: scala.collection.mutable.Map[Term,Bits], terms: Set[Term]) = { + def logic(addr: Bits, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = { terms.map { t => if (!cache.contains(t)) cache += t -> ((if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addr.width)-(t.mask+1), addr.width){Bits()}) === Lit(t.value, addr.width){Bits()}) @@ -31,13 +31,13 @@ object DecodeLogic val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => if (((dterm.mask >> i) & 1) != 0) { - var mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1).toSet - var maxt = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1).toSet + var mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) + var maxt = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) logic(addr, cache, SimplifyDC(mint, maxt, addr.width)).toBits } else { val want = 1 - ((dterm.value.toInt >> i) & 1) - val mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == want }.map(_._1).toSet - val dc = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1).toSet + val mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == want }.map(_._1) + val dc = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) val bit = logic(addr, cache, Simplify(mint, dc, addr.width)).toBits if (want == 1) bit else ~bit } @@ -59,6 +59,7 @@ class Term(val value: BigInt, val mask: BigInt = 0) case _ => false } override def hashCode = value.toInt + def < (that: Term) = value < that.value || value == that.value && mask < that.mask def similar(x: Term) = { val diff = value - x.value mask == x.mask && value > x.value && (diff & diff-1) == 0 @@ -75,32 +76,33 @@ class Term(val value: BigInt, val mask: BigInt = 0) object Simplify { - def getPrimeImplicants(implicants: Set[Term], bits: Int) = { - var prime = Set[Term]() + def getPrimeImplicants(implicants: Seq[Term], bits: Int) = { + var prime = List[Term]() implicants.foreach(_.prime = true) val cols = (0 to bits).map(b => implicants.filter(b == _.mask.bitCount)) - val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set() ++ c.filter(b == _.value.bitCount))) + val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*))) for (i <- 0 to bits) { for (j <- 0 until bits-i) table(i)(j).foreach(a => table(i+1)(j) ++= table(i)(j+1).filter(_.similar(a)).map(_.merge(a))) - prime ++= table(i).map(_.filter(_.prime)).reduceLeft(_++_) + for (r <- table(i)) + for (p <- r; if p.prime) + prime = p :: prime } - prime + prime.sort(_<_) } - def getEssentialPrimeImplicants(prime: Set[Term], minterms: Set[Term]): Tuple3[Set[Term],Set[Term],Set[Term]] = { - val useful1 = prime.toSeq - for (i <- 0 until useful1.size) { - val icover = minterms.filter(useful1(i) covers _) - for (j <- 0 until useful1.size) { - val jcover = minterms.filter(useful1(j) covers _) - if (icover.size > jcover.size && jcover.forall(useful1(i) covers _)) - return getEssentialPrimeImplicants(prime - useful1(j), minterms) + def getEssentialPrimeImplicants(prime: Seq[Term], minterms: Seq[Term]): (Seq[Term],Seq[Term],Seq[Term]) = { + for (i <- 0 until prime.size) { + val icover = minterms.filter(prime(i) covers _) + for (j <- 0 until prime.size) { + val jcover = minterms.filter(prime(j) covers _) + if (icover.size > jcover.size && jcover.forall(prime(i) covers _)) + return getEssentialPrimeImplicants(prime.filter(_ != prime(j)), minterms) } } val essentiallyCovered = minterms.filter(t => prime.count(_ covers t) == 1) val essential = prime.filter(p => essentiallyCovered.exists(p covers _)) - val nonessential = prime -- essential + val nonessential = prime.filterNot(essential contains _) val uncovered = minterms.filterNot(t => essential.exists(_ covers t)) if (essential.isEmpty || uncovered.isEmpty) (essential, nonessential, uncovered) @@ -109,19 +111,24 @@ object Simplify (essential ++ a, b, c) } } - def getCost(cover: Set[Term], bits: Int) = cover.map(bits - _.mask.bitCount).sum - def getCover(implicants: Set[Term], minterms: Set[Term], bits: Int) = { - var cover = minterms.map(m => implicants.filter(_.covers(m)).map(i => Set(i))).toList - while (cover.size > 1) - cover = cover(0).map(a => cover(1).map(_ ++ a)).reduceLeft(_++_) :: cover.tail.tail - if (cover.isEmpty) - Set[Term]() - else - cover(0).reduceLeft((a, b) => if (getCost(a, bits) < getCost(b, bits)) a else b) + def getCost(cover: Seq[Term], bits: Int) = cover.map(bits - _.mask.bitCount).sum + def cheaper(a: List[Term], b: List[Term], bits: Int) = { + val ca = getCost(a, bits) + val cb = getCost(b, bits) + def listLess(a: List[Term], b: List[Term]): Boolean = !b.isEmpty && (a.isEmpty || a.head < b.head || a.head == b.head && listLess(a.tail, b.tail)) + ca < cb || ca == cb && listLess(a.sort(_<_), b.sort(_<_)) } - def stringify(s: Set[Term], bits: Int) = s.map(t => (0 until bits).map(i => if ((t.mask & (1 << i)) != 0) "x" else ((t.value >> i) & 1).toString).reduceLeft(_+_).reverse).reduceLeft(_+" + "+_) + def getCover(implicants: Seq[Term], minterms: Seq[Term], bits: Int) = { + if (minterms.nonEmpty) { + val cover = minterms.map(m => implicants.filter(_.covers(m)).map(i => collection.mutable.Set(i))) + val all = cover.reduceLeft((c0, c1) => c0.map(a => c1.map(_ ++ a)).reduceLeft(_++_)) + all.map(_.toList).reduceLeft((a, b) => if (cheaper(a, b, bits)) a else b) + } else + Seq[Term]() + } + def stringify(s: Seq[Term], bits: Int) = s.map(t => (0 until bits).map(i => if ((t.mask & (1 << i)) != 0) "x" else ((t.value >> i) & 1).toString).reduceLeft(_+_).reverse).reduceLeft(_+" + "+_) - def apply(minterms: Set[Term], dontcares: Set[Term], bits: Int) = { + def apply(minterms: Seq[Term], dontcares: Seq[Term], bits: Int) = { val prime = getPrimeImplicants(minterms ++ dontcares, bits) minterms.foreach(t => assert(prime.exists(_.covers(t)))) val (eprime, prime2, uncovered) = getEssentialPrimeImplicants(prime, minterms) @@ -133,7 +140,7 @@ object Simplify object SimplifyDC { - def getImplicitDC(maxterms: Set[Term], term: Term, bits: Int, above: Boolean): Term = { + def getImplicitDC(maxterms: Seq[Term], term: Term, bits: Int, above: Boolean): Term = { for (i <- 0 until bits) { var t: Term = null if (above && ((term.value | term.mask) & (1L << i)) == 0) @@ -145,12 +152,12 @@ object SimplifyDC } null } - def getPrimeImplicants(minterms: Set[Term], maxterms: Set[Term], bits: Int) = { - var prime = Set[Term]() + def getPrimeImplicants(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = { + var prime = List[Term]() minterms.foreach(_.prime = true) var mint = minterms.map(t => new Term(t.value, t.mask)) val cols = (0 to bits).map(b => mint.filter(b == _.mask.bitCount)) - val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set() ++ c.filter(b == _.value.bitCount))) + val table = cols.map(c => (0 to bits).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount):_*))) for (i <- 0 to bits) { for (j <- 0 until bits-i) { @@ -168,12 +175,14 @@ object SimplifyDC table(i+1)(j) += a merge dc } } - prime ++= table(i).map(_.filter(_.prime)).reduceLeft(_++_) + for (r <- table(i)) + for (p <- r; if p.prime) + prime = p :: prime } - prime + prime.sort(_<_) } - def apply(minterms: Set[Term], maxterms: Set[Term], bits: Int) = { + def apply(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = { val prime = getPrimeImplicants(minterms, maxterms, bits) assert(minterms.forall(t => prime.exists(_ covers t))) val (eprime, prime2, uncovered) = Simplify.getEssentialPrimeImplicants(prime, minterms) From b9a2af697dbd4740744802ab29a665075aa72cc4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 16 Oct 2012 07:38:19 -0700 Subject: [PATCH 0488/1087] turn off HAVE_VEC as it's currently broken the new I$/frontend needs to be integrated --- rocket/src/main/scala/consts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 731f24ba..da50ed91 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -8,7 +8,7 @@ object Constants val NTILES = 1 val HAVE_RVC = false val HAVE_FPU = true - val HAVE_VEC = true + val HAVE_VEC = false val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK From 6cff1c13d861483ed1ef3c2137187241eaa94458 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 16 Oct 2012 14:22:23 -0700 Subject: [PATCH 0489/1087] Refer to traits moved to uncore, add UncoreConfiguration to top --- rocket/src/main/scala/consts.scala | 63 +++-------------------------- rocket/src/main/scala/package.scala | 6 +-- rocket/src/main/scala/top.scala | 7 ++-- 3 files changed, 12 insertions(+), 64 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 9ec91845..294e4052 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -4,53 +4,6 @@ package constants import Chisel._ import scala.math._ -abstract trait MulticoreConstants { - val NTILES: Int - val TILE_ID_BITS = log2Up(NTILES)+1 -} - -abstract trait CoherenceConfigConstants { - val ENABLE_SHARING: Boolean - val ENABLE_CLEAN_EXCLUSIVE: Boolean -} - -trait UncoreConstants { - val NGLOBAL_XACTS = 8 - val GLOBAL_XACT_ID_BITS = log2Up(NGLOBAL_XACTS) -} - -trait TileLinkTypeConstants { - val X_INIT_TYPE_MAX_BITS = 2 - val X_REP_TYPE_MAX_BITS = 3 - val P_REQ_TYPE_MAX_BITS = 2 - val P_REP_TYPE_MAX_BITS = 3 -} - -trait TileLinkSizeConstants extends - RocketDcacheConstants with - TileLinkTypeConstants -{ - val TILE_XACT_ID_BITS = log2Up(NMSHR)+3 - val X_INIT_WRITE_MASK_BITS = OFFSET_BITS - val X_INIT_SUBWORD_ADDR_BITS = log2Up(OFFSET_BITS) - val X_INIT_ATOMIC_OP_BITS = 4 -} - -trait HTIFConstants { - val HTIF_WIDTH = 16 -} - -trait MemoryInterfaceConstants extends - HTIFConstants with - UncoreConstants with - TileLinkSizeConstants -{ - val MEM_TAG_BITS = max(TILE_XACT_ID_BITS, GLOBAL_XACT_ID_BITS) - val MEM_DATA_BITS = 128 - val REFILL_CYCLES = (1 << OFFSET_BITS)*8/MEM_DATA_BITS - val MEM_BACKUP_WIDTH = HTIF_WIDTH -} - abstract trait TileConfigConstants { def HAVE_RVC: Boolean def HAVE_FPU: Boolean @@ -219,25 +172,19 @@ trait InterruptConstants { val IRQ_TIMER = 7 } -trait AddressConstants { - val PADDR_BITS = 40; - val VADDR_BITS = 43; - val PGIDX_BITS = 13; - val PPN_BITS = PADDR_BITS-PGIDX_BITS; - val VPN_BITS = VADDR_BITS-PGIDX_BITS; - val ASID_BITS = 7; - val PERM_BITS = 6; -} - -abstract trait RocketDcacheConstants extends ArbiterConstants with AddressConstants { +abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants { val CPU_DATA_BITS = 64; val CPU_TAG_BITS = 9; val DCACHE_TAG_BITS = log2Up(DCACHE_PORTS) + CPU_TAG_BITS val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses + require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS) val NRPQ = 16; // number of secondary misses val NSDQ = 17; // number of secondary stores/AMOs val OFFSET_BITS = 6; // log2(cache line size in bytes) + require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) + require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) + require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) val IDX_BITS = 7; val TAG_BITS = PADDR_BITS - OFFSET_BITS - IDX_BITS; val NWAYS = 4 diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index aaa06dcd..28fdcfad 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -8,13 +8,13 @@ import scala.math._ // package object rocket and remove import Constants._'s from other files object Constants extends ScalarOpConstants with - MemoryOpConstants with + uncore.constants.MemoryOpConstants with PCRConstants with InterruptConstants with - AddressConstants with + RocketDcacheConstants with VectorOpConstants with TLBConstants with - MemoryInterfaceConstants + uncore.constants.MemoryInterfaceConstants { def HAVE_RVC = false def HAVE_FPU = true diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index df6f084d..9e051d0e 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -6,7 +6,7 @@ import Constants._ import uncore._ import collection.mutable.ArrayBuffer -object DummyTopLevelConstants extends rocket.constants.CoherenceConfigConstants with rocket.constants.MulticoreConstants { +object DummyTopLevelConstants extends uncore.constants.CoherenceConfigConstants { val NTILES = 1 val ENABLE_SHARING = true val ENABLE_CLEAN_EXCLUSIVE = true @@ -24,7 +24,8 @@ class Top extends Component if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence else new MICoherence } - implicit val conf = RocketConfiguration(NTILES, co) + implicit val rconf = RocketConfiguration(NTILES, co) + implicit val uconf = UncoreConfiguration(NTILES+1, log2Up(NTILES)+1) val io = new Bundle { val debug = new ioDebug @@ -33,7 +34,7 @@ class Top extends Component } val htif = new rocketHTIF(HTIF_WIDTH) - val hub = new CoherenceHubBroadcast(NTILES+1, co) + val hub = new CoherenceHubBroadcast(co) hub.io.tiles(NTILES) <> htif.io.mem io.host <> htif.io.host From e2eb7ce8e9cccc7809b1b98651c701c31ca698fc Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 16 Oct 2012 16:33:07 -0700 Subject: [PATCH 0490/1087] Cleanup git incompetence --- rocket/src/main/scala/package.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 28fdcfad..22d18ca9 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -18,7 +18,7 @@ object Constants extends { def HAVE_RVC = false def HAVE_FPU = true - def HAVE_VEC = true + def HAVE_VEC = false val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK From 5773cbb68a52af54b97b90f44b4a7edf24efb04c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 18 Oct 2012 17:26:03 -0700 Subject: [PATCH 0491/1087] rejigger htif to use UncoreConfiguration --- rocket/src/main/scala/cpu.scala | 2 +- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/htif.scala | 15 +++++++++------ rocket/src/main/scala/tile.scala | 2 +- rocket/src/main/scala/top.scala | 8 ++++---- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index ff13a952..1b1d8c17 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -7,7 +7,7 @@ import hwacha._ class ioRocket(implicit conf: RocketConfiguration) extends Bundle { - val host = new ioHTIF + val host = new ioHTIF(conf.ntiles) val imem = new IOCPUFrontend val vimem = new IOCPUFrontend val dmem = new ioHellaCache diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 66e4e600..de19a6f9 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -8,7 +8,7 @@ import hwacha._ class ioDpathAll(implicit conf: RocketConfiguration) extends Bundle { - val host = new ioHTIF + val host = new ioHTIF(conf.ntiles) val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache val dtlb = new ioDTLB_CPU_req_bundle().asOutput() diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 6140ea51..db40190a 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -55,7 +55,7 @@ class rocketDpathBTB(entries: Int) extends Component class ioDpathPCR(implicit conf: RocketConfiguration) extends Bundle { - val host = new ioHTIF + val host = new ioHTIF(conf.ntiles) val r = new ioReadPort(); val w = new ioWritePort(); diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 3a59fd55..c8d203bb 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -10,8 +10,10 @@ class ioDebug extends Bundle val error_mode = Bool(OUTPUT); } -class ioHost(w: Int) extends Bundle +class ioHost(val w: Int) extends Bundle { + val clk = Bool(OUTPUT) + val clk_edge = Bool(OUTPUT) val in = new FIFOIO()(Bits(width = w)).flip val out = new FIFOIO()(Bits(width = w)) } @@ -23,21 +25,21 @@ class PCRReq extends Bundle val data = Bits(width = 64) } -class ioHTIF(implicit conf: RocketConfiguration) extends Bundle +class ioHTIF(ntiles: Int) extends Bundle { val reset = Bool(INPUT) val debug = new ioDebug val pcr_req = (new FIFOIO) { new PCRReq }.flip val pcr_rep = (new FIFOIO) { Bits(width = 64) } - val ipi_req = (new FIFOIO) { Bits(width = log2Up(conf.ntiles)) } + val ipi_req = (new FIFOIO) { Bits(width = log2Up(ntiles)) } val ipi_rep = (new FIFOIO) { Bool() }.flip } -class rocketHTIF(w: Int)(implicit conf: RocketConfiguration) extends Component +class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component { val io = new Bundle { val host = new ioHost(w) - val cpu = Vec(conf.ntiles) { new ioHTIF().flip } + val cpu = Vec(conf.ntiles) { new ioHTIF(conf.ntiles).flip } val mem = new ioTileLink } @@ -178,7 +180,8 @@ class rocketHTIF(w: Int)(implicit conf: RocketConfiguration) extends Component } x_init.io.enq.valid := state === state_mem_req val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) - x_init.io.enq.bits := Mux(cmd === cmd_writemem, conf.co.getUncachedWriteTransactionInit(init_addr, UFix(0)), conf.co.getUncachedReadTransactionInit(init_addr, UFix(0))) + val co = conf.co.asInstanceOf[CoherencePolicyWithUncached] + x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteTransactionInit(init_addr, UFix(0)), co.getUncachedReadTransactionInit(init_addr, UFix(0))) io.mem.xact_init <> x_init.io.deq io.mem.xact_init_data.valid:= state === state_mem_wdata io.mem.xact_init_data.bits.data := mem_req_data diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index f3ca4ad7..2dddcef9 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -9,7 +9,7 @@ class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends { val io = new Bundle { val tilelink = new ioTileLink - val host = new ioHTIF + val host = new ioHTIF(conf.ntiles) } val cpu = new rocketProc diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala index 9e051d0e..fae65d92 100644 --- a/rocket/src/main/scala/top.scala +++ b/rocket/src/main/scala/top.scala @@ -25,16 +25,16 @@ class Top extends Component else new MICoherence } implicit val rconf = RocketConfiguration(NTILES, co) - implicit val uconf = UncoreConfiguration(NTILES+1, log2Up(NTILES)+1) + implicit val uconf = UncoreConfiguration(NTILES+1, log2Up(NTILES)+1, co) val io = new Bundle { val debug = new ioDebug - val host = new ioHost(HTIF_WIDTH) + val host = new ioHost(16) val mem = new ioMemPipe } - val htif = new rocketHTIF(HTIF_WIDTH) - val hub = new CoherenceHubBroadcast(co) + val htif = new rocketHTIF(io.host.w) + val hub = new CoherenceHubBroadcast hub.io.tiles(NTILES) <> htif.io.mem io.host <> htif.io.host From fedee6c67d278da55c0be8837929f850f8b2283b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 30 Oct 2012 01:03:47 -0700 Subject: [PATCH 0492/1087] add generic error correcting codes --- rocket/src/main/scala/ecc.scala | 128 ++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 rocket/src/main/scala/ecc.scala diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala new file mode 100644 index 00000000..cc1e00a6 --- /dev/null +++ b/rocket/src/main/scala/ecc.scala @@ -0,0 +1,128 @@ +package rocket + +import Chisel._ +import Constants._ +import uncore._ +import Util._ + +abstract class Decoding +{ + def uncorrected: Bits + def corrected: Bits + def correctable: Bool + def uncorrectable: Bool + def error = correctable || uncorrectable +} + +abstract class Encoding +{ + def width(w0: Int): Int + def encode(x: Bits): Bits + def decode(x: Bits): Decoding +} + +class Parity extends Encoding +{ + def width(w0: Int) = w0+1 + def encode(x: Bits) = Cat(x.xorR, x) + def decode(y: Bits) = new Decoding { + def uncorrected = y(y.getWidth-2,0) + def corrected = uncorrected + def correctable = y.xorR + def uncorrectable = Bool(false) + } +} + +class SEC extends Encoding +{ + def width(k: Int) = { + val m = log2Up(k) + 1 - !isPow2(k) + k + m + ((1 << m) < m+k+1) + } + def encode(x: Bits) = { + val k = x.getWidth + require(k > 0) + val n = width(k) + + val y = for (i <- 1 to n) yield { + if (isPow2(i)) { + val r = for (j <- 1 to n; if j != i && (j & i)) + yield x(mapping(j)) + r reduce (_^_) + } else + x(mapping(i)) + } + Vec(y){Bool()}.toBits + } + def decode(y: Bits) = new Decoding { + val n = y.getWidth + require(n > 0 && !isPow2(n)) + + val p2 = for (i <- 0 until log2Up(n)) yield 1 << i + val syndrome = p2 map { i => + val r = for (j <- 1 to n; if j & i) + yield y(j-1) + r reduce (_^_) + } + val s = Vec(syndrome){Bool()}.toBits + + private def swizzle(z: Bits) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))){Bool()}.toBits + def uncorrected = swizzle(y) + def corrected = swizzle(((y << 1) ^ UFixToOH(s)) >> 1) + def correctable = s.orR + def uncorrectable = Bool(false) + } + private def mapping(i: Int) = i-1-log2Up(i) +} + +class SECDED extends Encoding +{ + def width(k: Int) = new SEC().width(k)+1 + def encode(x: Bits) = new Parity().encode(new SEC().encode(x)) + def decode(x: Bits) = new Decoding { + val sec = new SEC().decode(x(x.getWidth-2,0)) + val par = new Parity().decode(x) + def uncorrected = sec.uncorrected + def corrected = sec.corrected + def correctable = par.correctable + def uncorrectable = !par.correctable && sec.correctable + } +} + +class SECDEDTest extends Component +{ + def inject(x: Bits, n: UFix) = { + val r = LFSR16() + val r1 = UFixToOH(r(log2Up(x.getWidth)-1,0))(x.getWidth-1,0) + val r2 = UFixToOH(r(log2Up(x.getWidth)*2-1,log2Up(x.getWidth)))(x.getWidth-1,0) + x ^ Mux(n < UFix(1), UFix(0), r1) ^ Mux(n < UFix(2), UFix(0), r2) + } + + val code = new SECDED + val k = 4 + val n = code.width(k) + + val io = new Bundle { + val original = Bits(OUTPUT, k) + val encoded = Bits(OUTPUT, n) + val injected = Bits(OUTPUT, n) + val uncorrected = Bits(OUTPUT, k) + val corrected = Bits(OUTPUT, k) + val correctable = Bool(OUTPUT) + val uncorrectable = Bool(OUTPUT) + } + + val c = Counter(Bool(true), 1 << k) + val numErrors = Counter(c._2, 3)._1 + val e = code.encode(c._1) + val i = inject(e, numErrors) + val d = code.decode(i) + + io.original := c._1 + io.encoded := e + io.injected := i + io.uncorrected := d.uncorrected + io.corrected := d.corrected + io.correctable := d.correctable + io.uncorrectable := d.uncorrectable +} From bd2d61de03fb6affead91c0df58783721499a3fe Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 4 Nov 2012 16:39:25 -0800 Subject: [PATCH 0493/1087] use 8T SRAM for I$; gate clock more aggressively --- rocket/src/main/scala/icache.scala | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c81645a8..a8f99fdf 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -150,6 +150,8 @@ class ICache(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Compon val s1_valid = Reg(resetVal = Bool(false)) val s1_pgoff = Reg() { UFix(width = PGIDX_BITS) } + val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUFix + val s1_tag = s1_addr(c.tagbits+c.untagbits-1,c.untagbits) val s0_valid = io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill val s0_pgoff = Mux(io.req.valid, io.req.bits.idx, s1_pgoff) @@ -161,7 +163,7 @@ class ICache(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Compon s2_valid := s1_valid && rdy && !io.req.bits.kill || io.resp.valid && stall when (s1_valid && rdy && !stall) { - s2_addr := Cat(io.req.bits.ppn, s1_pgoff).toUFix + s2_addr := s1_addr } val s2_tag = s2_addr(c.tagbits+c.untagbits-1,c.untagbits) @@ -180,7 +182,8 @@ class ICache(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Compon val wmask = FillInterleaved(c.tagwidth, if (c.dm) Bits(1) else UFixToOH(repl_way)) val tag = Cat(if (c.parity) s2_tag.xorR else null, s2_tag) tag_array.write(s2_idx, Fill(c.assoc, tag), wmask) - }.elsewhen (s0_valid) { + } + /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM tag_rdata := tag_array(s0_pgoff(c.untagbits-1,c.offbits)) } @@ -196,18 +199,23 @@ class ICache(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Compon for (i <- 0 until c.assoc) when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UFix(i), s2_idx), Bool(false)) } + val s1_tag_match = Vec(c.assoc) { Bool() } val s2_tag_hit = Vec(c.assoc) { Bool() } val s2_data_disparity = Vec(c.assoc) { Bool() } for (i <- 0 until c.assoc) { val s1_vb = vb_array(Cat(UFix(i), s1_pgoff(c.untagbits-1,c.offbits))).toBool val s2_vb = Reg() { Bool() } - val s2_tag_out = Reg() { Bits() } + val s2_tag_disparity = Reg() { Bool() } + val s2_tag_match = Reg() { Bool() } + val tag_out = tag_rdata(c.tagwidth*(i+1)-1, c.tagwidth*i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb - s2_tag_out := tag_rdata(c.tagwidth*(i+1)-1, c.tagwidth*i) + s2_tag_disparity := tag_out.xorR + s2_tag_match := s1_tag_match(i) } - s2_tag_hit(i) := s2_vb && s2_tag_out(c.tagbits-1,0) === s2_tag - s2_disparity(i) := Bool(c.parity) && s2_vb && (s2_tag_out.xorR || s2_data_disparity(i)) + s1_tag_match(i) := tag_out(c.tagbits-1,0) === s1_tag + s2_tag_hit(i) := s2_vb && s2_tag_match + s2_disparity(i) := Bool(c.parity) && s2_vb && (s2_tag_disparity || s2_data_disparity(i)) } s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) @@ -219,10 +227,12 @@ class ICache(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Compon val d = io.mem.xact_rep.bits.data val wdata = if (c.parity) Cat(d.xorR, d) else d data_array(Cat(s2_idx,rf_cnt)) := wdata - }.elsewhen (s0_valid) { + } + /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM s1_dout := data_array(s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth)) } - when (s1_valid && rdy && !stall) { s2_dout(i) := s1_dout } + // if s1_tag_match is critical, replace with partial tag check + when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := s1_dout } s2_data_disparity(i) := s2_dout(i).xorR } val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)))(c.ibytes*8-1,0)) From 7380c9fe6007eecc329a9294a5882754168f4634 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 4 Nov 2012 16:40:14 -0800 Subject: [PATCH 0494/1087] aggressively clock gate int and fp datapaths --- rocket/src/main/scala/consts.scala | 16 +- rocket/src/main/scala/ctrl.scala | 374 ++++++-------- rocket/src/main/scala/divider.scala | 2 +- rocket/src/main/scala/dpath.scala | 117 ++--- rocket/src/main/scala/dpath_util.scala | 39 +- rocket/src/main/scala/fpu.scala | 651 +++++++++++++------------ rocket/src/main/scala/multiplier.scala | 6 +- rocket/src/main/scala/tile.scala | 2 +- 8 files changed, 568 insertions(+), 639 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 32696213..ea28d0cb 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -16,14 +16,14 @@ abstract trait TileConfigConstants { trait ScalarOpConstants { val BR_X = Bits("b???", 3) - val BR_EQ = UFix(0, 3) - val BR_NE = UFix(1, 3) - val BR_J = UFix(2, 3) - val BR_N = UFix(3, 3) - val BR_LT = UFix(4, 3) - val BR_GE = UFix(5, 3) - val BR_LTU = UFix(6, 3) - val BR_GEU = UFix(7, 3) + val BR_EQ = Bits(0, 3) + val BR_NE = Bits(1, 3) + val BR_J = Bits(2, 3) + val BR_N = Bits(3, 3) + val BR_LT = Bits(4, 3) + val BR_GE = Bits(5, 3) + val BR_LTU = Bits(6, 3) + val BR_GEU = Bits(7, 3) val PC_EX4 = UFix(0, 2) val PC_EX = UFix(1, 2) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cbe7736a..61fb58b2 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -11,25 +11,21 @@ class ioCtrlDpath extends Bundle() { // outputs to datapath val sel_pc = UFix(OUTPUT, 3); - val stalld = Bool(OUTPUT); val killd = Bool(OUTPUT); - val killx = Bool(OUTPUT); - val killm = Bool(OUTPUT); val ren2 = Bool(OUTPUT); val ren1 = Bool(OUTPUT); val sel_alu2 = UFix(OUTPUT, 3); val fn_dw = Bool(OUTPUT); val fn_alu = UFix(OUTPUT, 4); val mul_val = Bool(OUTPUT); - val mul_fn = UFix(OUTPUT, 2); + val mul_fn = Bits(OUTPUT, 2); val mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); - val div_fn = UFix(OUTPUT, 2); + val div_fn = Bits(OUTPUT, 2); val div_kill = Bool(OUTPUT) val sel_wa = Bool(OUTPUT); val sel_wb = UFix(OUTPUT, 3); val pcr = UFix(OUTPUT, 3) - val id_eret = Bool(OUTPUT); val wb_eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); val ex_fp_val= Bool(OUTPUT); @@ -40,7 +36,7 @@ class ioCtrlDpath extends Bundle() val wb_wen = Bool(OUTPUT); val wb_valid = Bool(OUTPUT) val flush_inst = Bool(OUTPUT); - val ex_mem_type = UFix(OUTPUT, 3) + val ex_mem_type = Bits(OUTPUT, 3) // exception handling val exception = Bool(OUTPUT); val cause = UFix(OUTPUT, 6); @@ -184,20 +180,20 @@ object rocketCtrlXDecode extends rocketCtrlDecodeConstants SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_X, DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), @@ -358,77 +354,74 @@ class rocketCtrl extends Component val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val id_load_use = Bool(); - val ex_reg_br_type = Reg(){Bits()} - val ex_reg_jalr = Reg(){Bool()} - val ex_reg_btb_hit = Reg(){Bool()}; - val ex_reg_div_val = Reg(){Bool()}; - val ex_reg_mul_val = Reg(){Bool()}; - val ex_reg_mul_fn = Reg(){UFix()}; - val ex_reg_mem_val = Reg(){Bool()}; - val ex_reg_mem_cmd = Reg(){Bits()}; - val ex_reg_mem_type = Reg(){UFix(width = 3)}; - val ex_reg_valid = Reg(resetVal = Bool(false)); - val ex_reg_pcr = Reg(resetVal = PCR_N); - val ex_reg_wen = Reg(resetVal = Bool(false)); - val ex_reg_fp_wen = Reg(resetVal = Bool(false)); - val ex_reg_eret = Reg(resetVal = Bool(false)); - val ex_reg_flush_inst = Reg(resetVal = Bool(false)); - val ex_reg_xcpt_interrupt = Reg(resetVal = Bool(false)); + val ex_reg_xcpt_interrupt = Reg(resetVal = Bool(false)) + val ex_reg_valid = Reg(resetVal = Bool(false)) + val ex_reg_eret = Reg(resetVal = Bool(false)) + val ex_reg_wen = Reg(resetVal = Bool(false)) + val ex_reg_fp_wen = Reg(resetVal = Bool(false)) + val ex_reg_flush_inst = Reg(resetVal = Bool(false)) + val ex_reg_jalr = Reg(resetVal = Bool(false)) + val ex_reg_btb_hit = Reg(resetVal = Bool(false)) + val ex_reg_div_val = Reg(resetVal = Bool(false)) + val ex_reg_mul_val = Reg(resetVal = Bool(false)) + val ex_reg_mem_val = Reg(resetVal = Bool(false)) + val ex_reg_xcpt = Reg(resetVal = Bool(false)) + val ex_reg_fp_val = Reg(resetVal = Bool(false)) + val ex_reg_vec_val = Reg(resetVal = Bool(false)) + val ex_reg_replay_next = Reg(resetVal = Bool(false)) + val ex_reg_load_use = Reg(resetVal = Bool(false)) + val ex_reg_pcr = Reg(resetVal = PCR_N) + val ex_reg_br_type = Reg(resetVal = BR_N) + val ex_reg_mul_fn = Reg(){Bits()} + val ex_reg_mem_cmd = Reg(){Bits()} + val ex_reg_mem_type = Reg(){Bits()} val ex_reg_cause = Reg(){UFix()} - val ex_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); - val ex_reg_xcpt_itlb = Reg(resetVal = Bool(false)); - val ex_reg_xcpt_illegal = Reg(resetVal = Bool(false)); - val ex_reg_xcpt_privileged = Reg(resetVal = Bool(false)); - val ex_reg_xcpt_syscall = Reg(resetVal = Bool(false)); - val ex_reg_fp_val = Reg(resetVal = Bool(false)); - val ex_reg_fp_sboard_set = Reg(resetVal = Bool(false)); - val ex_reg_vec_val = Reg(resetVal = Bool(false)); - val ex_reg_replay_next = Reg(resetVal = Bool(false)); - val ex_reg_load_use = Reg(resetVal = Bool(false)); - val mem_reg_valid = Reg(resetVal = Bool(false)); - val mem_reg_pcr = Reg(resetVal = PCR_N); - val mem_reg_wen = Reg(resetVal = Bool(false)); - val mem_reg_fp_wen = Reg(resetVal = Bool(false)); - val mem_reg_flush_inst = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_interrupt = Reg(resetVal = Bool(false)); + val mem_reg_xcpt_interrupt = Reg(resetVal = Bool(false)) + val mem_reg_valid = Reg(resetVal = Bool(false)) + val mem_reg_eret = Reg(resetVal = Bool(false)) + val mem_reg_wen = Reg(resetVal = Bool(false)) + val mem_reg_fp_wen = Reg(resetVal = Bool(false)) + val mem_reg_flush_inst = Reg(resetVal = Bool(false)) + val mem_reg_div_val = Reg(resetVal = Bool(false)) + val mem_reg_mul_val = Reg(resetVal = Bool(false)) + val mem_reg_mem_val = Reg(resetVal = Bool(false)) + val mem_reg_xcpt = Reg(resetVal = Bool(false)) + val mem_reg_fp_val = Reg(resetVal = Bool(false)) + val mem_reg_replay = Reg(resetVal = Bool(false)) + val mem_reg_replay_next = Reg(resetVal = Bool(false)) + val mem_reg_pcr = Reg(resetVal = PCR_N) val mem_reg_cause = Reg(){UFix()} - val mem_reg_xcpt_ma_inst = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_itlb = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_illegal = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_privileged = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_fpu = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_vec = Reg(resetVal = Bool(false)); - val mem_reg_xcpt_syscall = Reg(resetVal = Bool(false)); - val mem_reg_fp_val = Reg(resetVal = Bool(false)); - val mem_reg_replay = Reg(resetVal = Bool(false)); - val mem_reg_replay_next = Reg(resetVal = Bool(false)); - val mem_reg_kill = Reg(resetVal = Bool(false)); - val mem_reg_fp_sboard_set = Reg(resetVal = Bool(false)); + val mem_reg_mem_type = Reg(){Bits()} - val wb_reg_valid = Reg(resetVal = Bool(false)); - val wb_reg_pcr = Reg(resetVal = PCR_N); - val wb_reg_wen = Reg(resetVal = Bool(false)); - val wb_reg_fp_wen = Reg(resetVal = Bool(false)); - val wb_reg_flush_inst = Reg(resetVal = Bool(false)); - val wb_reg_eret = Reg(resetVal = Bool(false)); - val wb_reg_exception = Reg(resetVal = Bool(false)); - val wb_reg_replay = Reg(resetVal = Bool(false)); - val wb_reg_replay_next = Reg(resetVal = Bool(false)); - val wb_reg_cause = Reg(){UFix()}; - val wb_reg_fp_val = Reg(resetVal = Bool(false)); - val wb_reg_fp_sboard_set = Reg(resetVal = Bool(false)); - val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)); + + val wb_reg_valid = Reg(resetVal = Bool(false)) + val wb_reg_pcr = Reg(resetVal = PCR_N) + val wb_reg_wen = Reg(resetVal = Bool(false)) + val wb_reg_fp_wen = Reg(resetVal = Bool(false)) + val wb_reg_flush_inst = Reg(resetVal = Bool(false)) + val wb_reg_eret = Reg(resetVal = Bool(false)) + val wb_reg_xcpt = Reg(resetVal = Bool(false)) + val wb_reg_replay = Reg(resetVal = Bool(false)) + val wb_reg_replay_next = Reg(resetVal = Bool(false)) + val wb_reg_cause = Reg(){UFix()} + val wb_reg_fp_val = Reg(resetVal = Bool(false)) + val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)) val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val take_pc = Bool() val take_pc_wb = Bool() + val ctrl_killd = Bool() + val ctrl_killx = Bool() val ctrl_killm = Bool() + val id_maskable_interrupts = List( + (io.dpath.irq_ipi, IRQ_IPI), + (io.dpath.irq_timer, IRQ_TIMER)) + var id_interrupts = id_maskable_interrupts.map(i => (io.dpath.status(SR_IM+i._2) && i._1, UFix(CAUSE_INTERRUPT+i._2))) + var vec_replay = Bool(false) var vec_stalld = Bool(false) - var vec_irq = Bool(false) - var vec_irq_cause = UFix(CAUSE_INTERRUPT+IRQ_IPI) // don't care if (HAVE_VEC) { // vector control @@ -440,7 +433,7 @@ class rocketCtrl extends Component vec.io.valid := wb_reg_valid vec.io.s := io.dpath.status(SR_S) vec.io.sr_ev := io.dpath.status(SR_EV) - vec.io.exception := wb_reg_exception + vec.io.exception := wb_reg_xcpt vec.io.eret := wb_reg_eret val vec_dec = new rocketCtrlVecDecoder() @@ -463,48 +456,49 @@ class rocketCtrl extends Component vec_dec.io.sigs.vfence && !vec.io.vfence_ready) vec_replay = vec.io.replay - vec_irq = vec.io.irq - vec_irq_cause = vec.io.irq_cause + id_interrupts = id_interrupts :+ (vec.io.irq, vec.io.irq_cause) } - + + val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) + val id_interrupt = io.dpath.status(SR_ET) && id_interrupt_unmasked + + def checkExceptions(x: Seq[(Bits, UFix)]) = + (x.map(_._1).reduce(_||_), PriorityMux(x)) + // executing ERET when traps are enabled causes an illegal instruction exception val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status(SR_ET)) - val p_irq_timer = (io.dpath.status(SR_IM+IRQ_TIMER).toBool && io.dpath.irq_timer); - val p_irq_ipi = (io.dpath.status(SR_IM+IRQ_IPI).toBool && io.dpath.irq_ipi); - val id_interrupt = - io.dpath.status(SR_ET).toBool && - ((io.dpath.status(SR_IM+IRQ_TIMER).toBool && io.dpath.irq_timer) || - (io.dpath.status(SR_IM+IRQ_IPI).toBool && io.dpath.irq_ipi) || - vec_irq); - val id_cause = - Mux(p_irq_ipi, UFix(CAUSE_INTERRUPT+IRQ_IPI,6), - Mux(p_irq_timer, UFix(CAUSE_INTERRUPT+IRQ_TIMER,6), - vec_irq_cause)) + val (id_xcpt, id_cause) = checkExceptions(List( + (id_interrupt, id_interrupt_cause), + (io.imem.resp.bits.xcpt_ma, UFix(0)), + (io.imem.resp.bits.xcpt_if, UFix(1)), + (illegal_inst, UFix(2)), + (id_privileged && !io.dpath.status(SR_S), UFix(3)), + (id_fp_val && !io.dpath.status(SR_EF), UFix(4)), + (id_syscall, UFix(6)), + (id_vec_val && !io.dpath.status(SR_EV), UFix(12)))) - when (reset.toBool || io.dpath.killd) { - ex_reg_br_type := BR_N; + ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid + when (id_xcpt) { ex_reg_cause := id_cause } + + when (ctrl_killd) { ex_reg_jalr := Bool(false) ex_reg_btb_hit := Bool(false); ex_reg_div_val := Bool(false); ex_reg_mul_val := Bool(false); ex_reg_mem_val := Bool(false); ex_reg_valid := Bool(false); - ex_reg_pcr := PCR_N ex_reg_wen := Bool(false); ex_reg_fp_wen := Bool(false); ex_reg_eret := Bool(false); ex_reg_flush_inst := Bool(false); - ex_reg_xcpt_ma_inst := Bool(false); - ex_reg_xcpt_itlb := Bool(false); - ex_reg_xcpt_illegal := Bool(false); - ex_reg_xcpt_privileged := Bool(false); - ex_reg_xcpt_syscall := Bool(false); ex_reg_fp_val := Bool(false); - ex_reg_fp_sboard_set := Bool(false); ex_reg_vec_val := Bool(false); ex_reg_replay_next := Bool(false); ex_reg_load_use := Bool(false); + ex_reg_pcr := PCR_N + ex_reg_br_type := BR_N + ex_reg_xcpt := Bool(false) } .otherwise { ex_reg_br_type := id_br_type; @@ -520,21 +514,14 @@ class rocketCtrl extends Component ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := (id_sync === SYNC_I); - ex_reg_xcpt_ma_inst := io.imem.resp.bits.xcpt_ma - ex_reg_xcpt_itlb := io.imem.resp.bits.xcpt_if - ex_reg_xcpt_illegal := illegal_inst; - ex_reg_xcpt_privileged := (id_privileged & ~io.dpath.status(SR_S)).toBool; - ex_reg_xcpt_syscall := id_syscall.toBool; ex_reg_fp_val := id_fp_val - ex_reg_fp_sboard_set := io.fpu.dec.sboard ex_reg_vec_val := id_vec_val.toBool ex_reg_replay_next := id_replay_next ex_reg_load_use := id_load_use; + ex_reg_mem_cmd := id_mem_cmd + ex_reg_mem_type := id_mem_type.toUFix + ex_reg_xcpt := id_xcpt } - ex_reg_xcpt_interrupt := !take_pc && id_interrupt - ex_reg_mem_cmd := id_mem_cmd - ex_reg_mem_type := id_mem_type.toUFix - ex_reg_cause := id_cause val br_taken = Mux(ex_reg_br_type === BR_EQ, io.dpath.br_eq, @@ -544,15 +531,18 @@ class rocketCtrl extends Component Mux(ex_reg_br_type === BR_LTU, io.dpath.br_ltu, Mux(ex_reg_br_type === BR_GEU, ~io.dpath.br_ltu, ex_reg_br_type === BR_J)))))) - - val mem_reg_div_val = Reg(resetVal = Bool(false)) - val mem_reg_mul_val = Reg(resetVal = Bool(false)) - val mem_reg_eret = Reg(){Bool()}; - val mem_reg_mem_val = Reg(){Bool()}; - val mem_reg_mem_cmd = Reg(){Bits()} - val mem_reg_mem_type = Reg(){Bits()} + val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === br_taken) - when (reset.toBool || io.dpath.killx) { + val (ex_xcpt, ex_cause) = checkExceptions(List( + (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), + (ex_reg_fp_val && io.fpu.illegal_rm, UFix(2)))) + + mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb + when (ex_xcpt) { mem_reg_cause := ex_cause } + mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy + mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy + + when (ctrl_killx) { mem_reg_valid := Bool(false); mem_reg_pcr := PCR_N mem_reg_wen := Bool(false); @@ -560,16 +550,9 @@ class rocketCtrl extends Component mem_reg_eret := Bool(false); mem_reg_mem_val := Bool(false); mem_reg_flush_inst := Bool(false); - mem_reg_xcpt_ma_inst := Bool(false); - mem_reg_xcpt_itlb := Bool(false); - mem_reg_xcpt_illegal := Bool(false); - mem_reg_xcpt_privileged := Bool(false); - mem_reg_xcpt_fpu := Bool(false); - mem_reg_xcpt_vec := Bool(false); - mem_reg_xcpt_syscall := Bool(false); mem_reg_fp_val := Bool(false); - mem_reg_fp_sboard_set := Bool(false) mem_reg_replay_next := Bool(false) + mem_reg_xcpt := Bool(false) } .otherwise { mem_reg_valid := ex_reg_valid @@ -579,23 +562,21 @@ class rocketCtrl extends Component mem_reg_eret := ex_reg_eret; mem_reg_mem_val := ex_reg_mem_val; mem_reg_flush_inst := ex_reg_flush_inst; - mem_reg_xcpt_ma_inst := ex_reg_xcpt_ma_inst; - mem_reg_xcpt_itlb := ex_reg_xcpt_itlb; - mem_reg_xcpt_illegal := ex_reg_xcpt_illegal || ex_reg_fp_val && io.fpu.illegal_rm; - mem_reg_xcpt_privileged := ex_reg_xcpt_privileged; - mem_reg_xcpt_fpu := ex_reg_fp_val && !io.dpath.status(SR_EF).toBool; - mem_reg_xcpt_vec := ex_reg_vec_val && !io.dpath.status(SR_EV).toBool; - mem_reg_xcpt_syscall := ex_reg_xcpt_syscall; mem_reg_fp_val := ex_reg_fp_val - mem_reg_fp_sboard_set := ex_reg_fp_sboard_set mem_reg_replay_next := ex_reg_replay_next + mem_reg_mem_type := ex_reg_mem_type + mem_reg_xcpt := ex_xcpt } - mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy - mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy - mem_reg_mem_cmd := ex_reg_mem_cmd; - mem_reg_mem_type := ex_reg_mem_type; - mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb - mem_reg_cause := ex_reg_cause + + val (mem_xcpt, mem_cause) = checkExceptions(List( + (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), + (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)), + (mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)), + (mem_reg_mem_val && io.xcpt_dtlb_ld, UFix(10)), + (mem_reg_mem_val && io.xcpt_dtlb_st, UFix(11)))) + + wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next + when (mem_xcpt) { wb_reg_cause := mem_cause } when (ctrl_killm) { wb_reg_valid := Bool(false) @@ -606,7 +587,6 @@ class rocketCtrl extends Component wb_reg_flush_inst := Bool(false); wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) - wb_reg_fp_sboard_set := Bool(false) wb_reg_replay_next := Bool(false) } .otherwise { @@ -618,7 +598,6 @@ class rocketCtrl extends Component wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val wb_reg_fp_val := mem_reg_fp_val - wb_reg_fp_sboard_set := mem_reg_fp_sboard_set wb_reg_replay_next := mem_reg_replay_next } @@ -648,7 +627,7 @@ class rocketCtrl extends Component fp_sboard.io.r(2).addr := id_raddr3.toUFix fp_sboard.io.r(3).addr := id_waddr.toUFix - fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen || wb_reg_fp_sboard_set + fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set fp_sboard.io.w(0).data := Bool(true) fp_sboard.io.w(0).addr := io.dpath.fp_sboard_wb_waddr @@ -666,82 +645,41 @@ class rocketCtrl extends Component io.fpu.dec.wen && fp_sboard.io.r(3).data } - // exception handling - val mem_xcpt_ma_ld = io.dmem.xcpt.ma.ld && !mem_reg_kill - val mem_xcpt_ma_st = io.dmem.xcpt.ma.st && !mem_reg_kill - val mem_xcpt_dtlb_ld = io.xcpt_dtlb_ld && !mem_reg_kill - val mem_xcpt_dtlb_st = io.xcpt_dtlb_st && !mem_reg_kill - - val mem_exception = - mem_reg_xcpt_interrupt || - mem_xcpt_ma_ld || - mem_xcpt_ma_st || - mem_xcpt_dtlb_ld || - mem_xcpt_dtlb_st || - mem_reg_xcpt_illegal || - mem_reg_xcpt_privileged || - mem_reg_xcpt_fpu || - mem_reg_xcpt_vec || - mem_reg_xcpt_syscall || - mem_reg_xcpt_itlb || - mem_reg_xcpt_ma_inst; - - val mem_cause = - Mux(mem_reg_xcpt_interrupt, mem_reg_cause, // asynchronous interrupt - Mux(mem_reg_xcpt_itlb, UFix(1,5), // instruction access fault - Mux(mem_reg_xcpt_illegal, UFix(2,5), // illegal instruction - Mux(mem_reg_xcpt_privileged, UFix(3,5), // privileged instruction - Mux(mem_reg_xcpt_fpu, UFix(4,5), // FPU disabled - Mux(mem_reg_xcpt_syscall, UFix(6,5), // system call - // breakpoint - Mux(mem_xcpt_ma_ld, UFix(8,5), // misaligned load - Mux(mem_xcpt_ma_st, UFix(9,5), // misaligned store - Mux(mem_xcpt_dtlb_ld, UFix(10,5), // load fault - Mux(mem_xcpt_dtlb_st, UFix(11,5), // store fault - Mux(mem_reg_xcpt_vec, UFix(12,5), // vector disabled - UFix(0,5)))))))))))); // instruction address misaligned - - // control transfer from ex/mem - val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === br_taken) - take_pc_wb := wb_reg_replay || vec_replay || wb_reg_exception || wb_reg_eret - take_pc := take_pc_ex || take_pc_wb; - - // replay mem stage PC on a DTLB miss or a long-latency writeback - val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) - val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem - val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_exception || mem_reg_kill - ctrl_killm := killm_common || dmem_kill_mem || fpu_kill_mem - - // replay execute stage PC when the D$ is blocked, when the D$ misses, - // for privileged instructions, and for fence.i instructions + // replay inst in ex stage val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy || mem_reg_replay_next - val kill_ex = take_pc_wb || replay_ex + ctrl_killx := take_pc_wb || replay_ex + + // replay inst in mem stage + val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val + val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) + val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem + val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem + val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid + ctrl_killm := killm_common || mem_xcpt || dmem_kill_mem || fpu_kill_mem mem_reg_replay := replay_ex && !take_pc_wb; - mem_reg_kill := kill_ex; - wb_reg_replay := replay_mem && !take_pc_wb - wb_reg_exception := mem_exception && !take_pc_wb && !wb_reg_replay_next - wb_reg_cause := mem_cause; + wb_reg_replay := replay_mem && !take_pc_wb val replay_wb = wb_reg_replay || vec_replay || io.dpath.pcr_replay - val wb_badvaddr_wen = wb_reg_exception && ((wb_reg_cause === UFix(10)) || (wb_reg_cause === UFix(11))) // write cause to PCR on an exception - io.dpath.exception := wb_reg_exception; - io.dpath.cause := wb_reg_cause; - io.dpath.badvaddr_wen := wb_badvaddr_wen; - io.dpath.vec_irq_aux_wen := wb_reg_exception && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) + io.dpath.exception := wb_reg_xcpt + io.dpath.cause := wb_reg_cause + io.dpath.badvaddr_wen := wb_reg_xcpt && (wb_reg_cause === UFix(10) || wb_reg_cause === UFix(11)) + io.dpath.vec_irq_aux_wen := wb_reg_xcpt && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) + + // control transfer from ex/wb + take_pc_wb := wb_reg_replay || vec_replay || wb_reg_xcpt || wb_reg_eret + take_pc := take_pc_ex || take_pc_wb; io.dpath.sel_pc := - Mux(wb_reg_exception, PC_PCR, // exception + Mux(wb_reg_xcpt, PC_PCR, // exception Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(replay_wb, PC_WB, // replay Mux(ex_reg_jalr, PC_EX, // JALR @@ -795,9 +733,6 @@ class rocketCtrl extends Component val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || fp_data_hazard_wb && (wb_reg_dcache_miss || wb_reg_fp_val) - val killd_common = take_pc || id_interrupt || ex_reg_replay_next - val ctrl_killd = killd_common || !io.imem.resp.valid - val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || @@ -805,13 +740,11 @@ class rocketCtrl extends Component id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || (id_sync === SYNC_D || id_sync === SYNC_I) && !io.dmem.req.ready || vec_stalld - + ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt + + io.dpath.killd := take_pc || ctrl_stalld && !id_interrupt io.dpath.flush_inst := wb_reg_flush_inst; - io.dpath.stalld := !ctrl_killd && ctrl_stalld; - io.dpath.killd := ctrl_killd || ctrl_stalld - io.dpath.killx := kill_ex; - io.dpath.killm := killm_common - io.imem.resp.ready := killd_common || !ctrl_stalld + io.imem.resp.ready := take_pc || !ctrl_stalld io.imem.req.bits.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen @@ -836,18 +769,17 @@ class rocketCtrl extends Component io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb.toUFix io.dpath.pcr := wb_reg_pcr.toUFix - io.dpath.id_eret := id_eret.toBool; io.dpath.wb_eret := wb_reg_eret; io.dpath.ex_mem_type := ex_reg_mem_type - io.fpu.valid := !io.dpath.killd && id_fp_val - io.fpu.killx := kill_ex + io.fpu.valid := !ctrl_killd && id_fp_val + io.fpu.killx := ctrl_killx io.fpu.killm := killm_common io.dtlb_val := ex_reg_mem_val - io.dtlb_kill := mem_reg_kill + io.dtlb_kill := !mem_reg_valid io.dmem.req.valid := ex_reg_mem_val - io.dmem.req.bits.kill := killm_common || io.dtlb_miss + io.dmem.req.bits.kill := killm_common || mem_xcpt || io.dtlb_miss io.dmem.req.bits.cmd := ex_reg_mem_cmd io.dmem.req.bits.typ := ex_reg_mem_type } diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index d02859cf..f434a162 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -15,7 +15,7 @@ class rocketDivider(earlyOut: Boolean = false) extends Component { val divby0 = Reg() { Bool() }; val neg_quo = Reg() { Bool() }; val neg_rem = Reg() { Bool() }; - val reg_tag = Reg() { Bits() }; + val reg_tag = Reg() { UFix() }; val rem = Reg() { Bool() }; val half = Reg() { Bool() }; diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index de19a6f9..41e469ac 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -32,8 +32,6 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component val alu = new ALU val ex_alu_out = alu.io.out; val ex_alu_adder_out = alu.io.adder_out; - - val rfile = new rocketDpathRegfile(); // execute definitions val ex_reg_pc = Reg() { UFix() }; @@ -48,6 +46,7 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component val ex_reg_ctrl_fn_alu = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; val ex_wdata = Bits() + val ex_reg_kill = Reg() { Bool() } // memory definitions val mem_reg_pc = Reg() { UFix() }; @@ -57,6 +56,7 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component val mem_reg_wdata = Reg() { Bits() }; val mem_reg_raddr1 = Reg() { UFix() }; val mem_reg_raddr2 = Reg() { UFix() }; + val mem_reg_kill = Reg() { Bool() } // writeback definitions val wb_reg_pc = Reg() { UFix() }; @@ -98,24 +98,14 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component val id_pc = io.imem.resp.bits.pc debug(id_inst) debug(id_pc) + + val regfile_ = Mem(31){Bits(width = 64)} + def readRF(a: UFix) = Mux(a === UFix(0), Bits(0), regfile_(~a)) + def writeRF(a: UFix, d: Bits) = regfile_(~a) := d val id_raddr1 = id_inst(26,22).toUFix; val id_raddr2 = id_inst(21,17).toUFix; - // regfile read - rfile.io.r0.en <> io.ctrl.ren2; - rfile.io.r0.addr := id_raddr2; - val id_rdata2 = rfile.io.r0.data; - - rfile.io.r1.en <> io.ctrl.ren1; - rfile.io.r1.addr := id_raddr1; - val id_rdata1 = rfile.io.r1.data; - - // destination register selection - val id_waddr = - Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUFix, - RA); // WA_RA - // bypass muxes val id_rs1_dmem_bypass = Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, Bool(false), @@ -125,7 +115,7 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, - id_rdata1))) + readRF(id_raddr1)))) val id_rs2_dmem_bypass = Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, Bool(false), @@ -135,7 +125,7 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, - id_rdata2))) + readRF(id_raddr2)))) // immediate generation val id_imm_bj = io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE @@ -160,17 +150,20 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component io.fpu.inst := id_inst // execute stage - ex_reg_pc := id_pc - ex_reg_inst := id_inst - ex_reg_raddr1 := id_raddr1 - ex_reg_raddr2 := id_raddr2; - ex_reg_op2 := id_op2; - ex_reg_rs2 := id_rs2; - ex_reg_rs1 := id_rs1; - ex_reg_waddr := id_waddr; - ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix; - ex_reg_ctrl_fn_alu := io.ctrl.fn_alu; - ex_reg_ctrl_sel_wb := io.ctrl.sel_wb; + ex_reg_kill := io.ctrl.killd + when (!io.ctrl.killd) { + ex_reg_pc := id_pc + ex_reg_inst := id_inst + ex_reg_raddr1 := id_raddr1 + ex_reg_raddr2 := id_raddr2 + ex_reg_op2 := id_op2 + ex_reg_waddr := Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUFix, RA) + ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix + ex_reg_ctrl_fn_alu := io.ctrl.fn_alu + ex_reg_ctrl_sel_wb := io.ctrl.sel_wb + when (io.ctrl.ren1) { ex_reg_rs1 := id_rs1 } + when (io.ctrl.ren2) { ex_reg_rs2 := id_rs2 } + } val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs1) val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs2) @@ -188,7 +181,7 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component div.io.req.valid := io.ctrl.div_val div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.div_fn) div.io.req.bits.in0 := ex_rs1 - div.io.req.bits.in1 := ex_rs2 + div.io.req.bits.in1 := ex_op2 div.io.req_tag := ex_reg_waddr div.io.req_kill := io.ctrl.div_kill div.io.resp_rdy := !dmem_resp_replay @@ -207,7 +200,7 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component mul_io.req.valid := io.ctrl.mul_val mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.mul_fn) mul_io.req.bits.in0 := ex_rs1 - mul_io.req.bits.in1 := ex_rs2 + mul_io.req.bits.in1 := ex_op2 mul_io.req_tag := ex_reg_waddr mul_io.req_kill := io.ctrl.mul_kill mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val @@ -264,13 +257,16 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component storegen.io.din := ex_rs2 // memory stage - mem_reg_pc := ex_reg_pc; - mem_reg_inst := ex_reg_inst - mem_reg_rs2 := storegen.io.dout - mem_reg_waddr := ex_reg_waddr; - mem_reg_wdata := ex_wdata; - mem_reg_raddr1 := ex_reg_raddr1 - mem_reg_raddr2 := ex_reg_raddr2; + mem_reg_kill := ex_reg_kill + when (!ex_reg_kill) { + mem_reg_pc := ex_reg_pc + mem_reg_inst := ex_reg_inst + mem_reg_rs2 := storegen.io.dout + mem_reg_waddr := ex_reg_waddr + mem_reg_wdata := ex_wdata + mem_reg_raddr1 := ex_reg_raddr1 + mem_reg_raddr2 := ex_reg_raddr2 + } // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; @@ -288,12 +284,9 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, Mux(div.io.resp_val, div.io.resp_tag, - Mux(mul_io.resp_val, mul_io.resp_tag, - mem_reg_waddr))).toUFix + mul_io.resp_tag)) val mem_ll_wdata = Mux(div.io.resp_val, div.io.resp_bits, - Mux(mul_io.resp_val, mul_io.resp_bits, - Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, - mem_reg_wdata))) + mul_io.resp_bits) val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu @@ -302,17 +295,25 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component io.fpu.dmem_resp_tag := dmem_resp_waddr // writeback stage - wb_reg_pc := mem_reg_pc; - wb_reg_inst := mem_reg_inst - wb_reg_ll_wb := mem_ll_wb - wb_reg_rs2 := mem_reg_rs2 - wb_reg_waddr := mem_ll_waddr - wb_reg_wdata := mem_ll_wdata - wb_reg_dmem_wdata := io.dmem.resp.bits.data - wb_reg_vec_waddr := mem_reg_waddr - wb_reg_vec_wdata := mem_reg_wdata - wb_reg_raddr1 := mem_reg_raddr1 - wb_reg_raddr2 := mem_reg_raddr2; + when (io.ctrl.mem_load) { + wb_reg_dmem_wdata := io.dmem.resp.bits.data + } + when (!mem_reg_kill) { + wb_reg_pc := mem_reg_pc + wb_reg_inst := mem_reg_inst + wb_reg_rs2 := mem_reg_rs2 + wb_reg_vec_waddr := mem_reg_waddr + wb_reg_vec_wdata := mem_reg_wdata + wb_reg_raddr1 := mem_reg_raddr1 + wb_reg_raddr2 := mem_reg_raddr2 + wb_reg_waddr := mem_reg_waddr + wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) + } + wb_reg_ll_wb := mem_ll_wb + when (mem_ll_wb) { + wb_reg_waddr := mem_ll_waddr + wb_reg_wdata := mem_ll_wdata + } // regfile write val wb_src_dmem = Reg(io.ctrl.mem_load) && io.ctrl.wb_valid || r_dmem_resp_replay @@ -356,9 +357,11 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component wb_reg_wdata) } - rfile.io.w0.addr := wb_reg_waddr - rfile.io.w0.en := io.ctrl.wb_wen || wb_reg_ll_wb - rfile.io.w0.data := Mux(io.ctrl.pcr != PCR_N && io.ctrl.wb_wen, pcr.io.r.data, wb_wdata) + val rf_wen = io.ctrl.wb_wen || wb_reg_ll_wb + val rf_waddr = wb_reg_waddr + val rf_wdata = Mux(io.ctrl.wb_wen && io.ctrl.pcr != PCR_N, pcr.io.r.data, wb_wdata) + List(rf_wen, rf_waddr, rf_wdata).map(debug _) + when (rf_wen) { writeRF(rf_waddr, rf_wdata) } io.ctrl.wb_waddr := wb_reg_waddr io.ctrl.mem_wb := dmem_resp_replay; diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index db40190a..f12a685a 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -56,8 +56,8 @@ class rocketDpathBTB(entries: Int) extends Component class ioDpathPCR(implicit conf: RocketConfiguration) extends Bundle { val host = new ioHTIF(conf.ntiles) - val r = new ioReadPort(); - val w = new ioWritePort(); + val r = new ioReadPort(32, 64) + val w = new ioWritePort(32, 64) val status = Bits(OUTPUT, 32); val ptbr = UFix(OUTPUT, PADDR_BITS); @@ -228,33 +228,18 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component } } -class ioReadPort extends Bundle() +class ioReadPort(d: Int, w: Int) extends Bundle { - val addr = UFix(INPUT, 5); - val en = Bool(INPUT); - val data = Bits(OUTPUT, 64); + val addr = UFix(INPUT, log2Up(d)) + val en = Bool(INPUT) + val data = Bits(OUTPUT, w) + override def clone = new ioReadPort(d, w).asInstanceOf[this.type] } -class ioWritePort extends Bundle() +class ioWritePort(d: Int, w: Int) extends Bundle { - val addr = UFix(INPUT, 5); - val en = Bool(INPUT); - val data = Bits(INPUT, 64); -} - -class ioRegfile extends Bundle() -{ - val r0 = new ioReadPort(); - val r1 = new ioReadPort(); - val w0 = new ioWritePort(); -} - -class rocketDpathRegfile extends Component -{ - override val io = new ioRegfile(); - - val regfile = Mem(32){ Bits(width=64) } - when (io.w0.en) { regfile(io.w0.addr) := io.w0.data } - io.r0.data := Mux((io.r0.addr === UFix(0, 5)) || !io.r0.en, Bits(0, 64), regfile(io.r0.addr)); - io.r1.data := Mux((io.r1.addr === UFix(0, 5)) || !io.r1.en, Bits(0, 64), regfile(io.r1.addr)); + val addr = UFix(INPUT, log2Up(d)) + val en = Bool(INPUT) + val data = Bits(INPUT, w) + override def clone = new ioWritePort(d, w).asInstanceOf[this.type] } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 9f70d637..69e36380 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import Instructions._ +import Util._ object rocketFPConstants { @@ -46,11 +47,10 @@ object rocketFPConstants } import rocketFPConstants._ -class rocketFPUCtrlSigs extends Bundle +class FPUCtrlSigs extends Bundle { val cmd = Bits(width = FCMD_WIDTH) val wen = Bool() - val sboard = Bool() val ren1 = Bool() val ren2 = Bool() val ren3 = Bool() @@ -68,78 +68,77 @@ class rocketFPUDecoder extends Component { val io = new Bundle { val inst = Bits(INPUT, 32) - val sigs = new rocketFPUCtrlSigs().asOutput + val sigs = new FPUCtrlSigs().asOutput } val N = Bool(false) val Y = Bool(true) val X = Bool(false) val decoder = DecodeLogic(io.inst, - List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X), - Array(FLW -> List(FCMD_LOAD, Y,N,N,N,N,Y,N,N,N,N,N,N,N), - FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N,N), - FSW -> List(FCMD_STORE, N,N,N,Y,N,Y,N,N,N,N,Y,N,N), - FSD -> List(FCMD_STORE, N,N,N,Y,N,N,N,N,N,N,Y,N,N), - MXTF_S -> List(FCMD_MXTF, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - MXTF_D -> List(FCMD_MXTF, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,Y,N,Y,N,N,N,N), - FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,N,Y,N,Y,N,N,N,N), - MFTX_S -> List(FCMD_MFTX, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - MFTX_D -> List(FCMD_MFTX, N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_W_S -> List(FCMD_CVT_W_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_L_S -> List(FCMD_CVT_L_FMT, N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,N,Y,N,N,Y,N,Y,N,N,N,N,N), - FCVT_W_D -> List(FCMD_CVT_W_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_L_D -> List(FCMD_CVT_L_FMT, N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,N,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,N,Y,N,N,Y,N,N,Y,N,N,N,N), - FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,N,Y,N,N,N,N,N,Y,N,N,N,N), - FEQ_S -> List(FCMD_EQ, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), - FLT_S -> List(FCMD_LT, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), - FLE_S -> List(FCMD_LE, N,N,Y,Y,N,Y,N,Y,N,N,N,N,N), - FEQ_D -> List(FCMD_EQ, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), - FLT_D -> List(FCMD_LT, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), - FLE_D -> List(FCMD_LE, N,N,Y,Y,N,N,N,Y,N,N,N,N,N), - MTFSR -> List(FCMD_MTFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,Y), - MFFSR -> List(FCMD_MFFSR, N,N,N,N,N,Y,N,Y,N,N,N,Y,N), - FSGNJ_S -> List(FCMD_SGNJ, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FSGNJN_S -> List(FCMD_SGNJN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FSGNJX_S -> List(FCMD_SGNJX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FSGNJ_D -> List(FCMD_SGNJ, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FSGNJN_D -> List(FCMD_SGNJN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FSGNJX_D -> List(FCMD_SGNJX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FMIN_S -> List(FCMD_MIN, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FMAX_S -> List(FCMD_MAX, Y,N,Y,Y,N,Y,N,N,Y,N,N,N,N), - FMIN_D -> List(FCMD_MIN, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FMAX_D -> List(FCMD_MAX, Y,N,Y,Y,N,N,N,N,Y,N,N,N,N), - FADD_S -> List(FCMD_ADD, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), - FSUB_S -> List(FCMD_SUB, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), - FMUL_S -> List(FCMD_MUL, Y,Y,Y,Y,N,Y,N,N,N,Y,N,N,N), - FADD_D -> List(FCMD_ADD, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), - FSUB_D -> List(FCMD_SUB, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), - FMUL_D -> List(FCMD_MUL, Y,Y,Y,Y,N,N,N,N,N,Y,N,N,N), - FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,Y,N,N,N,Y,N,N,N), - FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), - FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), - FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N), - FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,N,Y,N,N,N) + List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X), + Array(FLW -> List(FCMD_LOAD, Y,N,N,N,Y,N,N,N,N,N,N), + FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N), + FSW -> List(FCMD_STORE, N,N,Y,N,Y,N,Y,N,N,N,N), + FSD -> List(FCMD_STORE, N,N,Y,N,N,N,Y,N,N,N,N), + MXTF_S -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,N,N), + MXTF_D -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N,N,N), + FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N,N,N), + FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N,N,N), + FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N,N,N), + FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N,N,N), + FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,N,N), + MFTX_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,N,N), + MFTX_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,N,N), + FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,N,N), + FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,N,N), + FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,N,N), + FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N,N,N), + FCVT_W_D -> List(FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N,N,N), + FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N,N,N), + FCVT_L_D -> List(FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N,N,N), + FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N,N,N), + FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,Y,N,N,Y,N,N,Y,N,N,N), + FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,Y,N,N,N,N,N,Y,N,N,N), + FEQ_S -> List(FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N,N,N), + FLT_S -> List(FCMD_LT, N,Y,Y,N,Y,N,Y,N,N,N,N), + FLE_S -> List(FCMD_LE, N,Y,Y,N,Y,N,Y,N,N,N,N), + FEQ_D -> List(FCMD_EQ, N,Y,Y,N,N,N,Y,N,N,N,N), + FLT_D -> List(FCMD_LT, N,Y,Y,N,N,N,Y,N,N,N,N), + FLE_D -> List(FCMD_LE, N,Y,Y,N,N,N,Y,N,N,N,N), + MTFSR -> List(FCMD_MTFSR, N,N,N,N,Y,N,Y,N,N,Y,Y), + MFFSR -> List(FCMD_MFFSR, N,N,N,N,Y,N,Y,N,N,Y,N), + FSGNJ_S -> List(FCMD_SGNJ, Y,Y,Y,N,Y,N,N,Y,N,N,N), + FSGNJN_S -> List(FCMD_SGNJN, Y,Y,Y,N,Y,N,N,Y,N,N,N), + FSGNJX_S -> List(FCMD_SGNJX, Y,Y,Y,N,Y,N,N,Y,N,N,N), + FSGNJ_D -> List(FCMD_SGNJ, Y,Y,Y,N,N,N,N,Y,N,N,N), + FSGNJN_D -> List(FCMD_SGNJN, Y,Y,Y,N,N,N,N,Y,N,N,N), + FSGNJX_D -> List(FCMD_SGNJX, Y,Y,Y,N,N,N,N,Y,N,N,N), + FMIN_S -> List(FCMD_MIN, Y,Y,Y,N,Y,N,Y,Y,N,N,N), + FMAX_S -> List(FCMD_MAX, Y,Y,Y,N,Y,N,Y,Y,N,N,N), + FMIN_D -> List(FCMD_MIN, Y,Y,Y,N,N,N,Y,Y,N,N,N), + FMAX_D -> List(FCMD_MAX, Y,Y,Y,N,N,N,Y,Y,N,N,N), + FADD_S -> List(FCMD_ADD, Y,Y,Y,N,Y,N,N,N,Y,N,N), + FSUB_S -> List(FCMD_SUB, Y,Y,Y,N,Y,N,N,N,Y,N,N), + FMUL_S -> List(FCMD_MUL, Y,Y,Y,N,Y,N,N,N,Y,N,N), + FADD_D -> List(FCMD_ADD, Y,Y,Y,N,N,N,N,N,Y,N,N), + FSUB_D -> List(FCMD_SUB, Y,Y,Y,N,N,N,N,N,Y,N,N), + FMUL_D -> List(FCMD_MUL, Y,Y,Y,N,N,N,N,N,Y,N,N), + FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,Y,N,N), + FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,Y,N,N), + FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,Y,N,N), + FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,Y,N,N), + FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,N,N,N,N,Y,N,N), + FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,N,N,N,N,Y,N,N), + FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,N,N,N,N,Y,N,N), + FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,N,N,N,N,Y,N,N) )) - val cmd :: wen :: sboard :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: store :: rdfsr :: wrfsr :: Nil = decoder + val cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: rdfsr :: wrfsr :: Nil = decoder io.sigs.cmd := cmd io.sigs.wen := wen.toBool - io.sigs.sboard := sboard.toBool io.sigs.ren1 := ren1.toBool io.sigs.ren2 := ren2.toBool io.sigs.ren3 := ren3.toBool @@ -148,7 +147,6 @@ class rocketFPUDecoder extends Component io.sigs.toint := toint.toBool io.sigs.fastpipe := fastpipe.toBool io.sigs.fma := fma.toBool - io.sigs.store := store.toBool io.sigs.rdfsr := rdfsr.toBool io.sigs.wrfsr := wrfsr.toBool } @@ -172,169 +170,201 @@ class ioCtrlFPU extends Bundle { val illegal_rm = Bool(INPUT) val killx = Bool(OUTPUT) val killm = Bool(OUTPUT) - val dec = new rocketFPUCtrlSigs().asInput + val dec = new FPUCtrlSigs().asInput + val sboard_set = Bool(INPUT) val sboard_clr = Bool(INPUT) val sboard_clra = UFix(INPUT, 5) } -class rocketFPIntUnit extends Component +object RegEn { - val io = new Bundle { - val single = Bool(INPUT) - val cmd = Bits(INPUT, FCMD_WIDTH) - val rm = Bits(INPUT, 3) - val fsr = Bits(INPUT, FSR_WIDTH) - val in1 = Bits(INPUT, 65) - val in2 = Bits(INPUT, 65) - val lt_s = Bool(OUTPUT) - val lt_d = Bool(OUTPUT) - val store_data = Bits(OUTPUT, 64) - val toint_data = Bits(OUTPUT, 64) - val exc = Bits(OUTPUT, 5) + def apply[T <: Data](data: T, en: Bool) = { + val r = Reg() { data.clone } + when (en) { r := data } + r } - - val unrec_s = hardfloat.recodedFloatNToFloatN(io.in1, 23, 9) - val unrec_d = hardfloat.recodedFloatNToFloatN(io.in1, 52, 12) - - io.store_data := Mux(io.single, Cat(unrec_s, unrec_s), unrec_d) - - val scmp = new hardfloat.recodedFloatNCompare(23, 9) - scmp.io.a := io.in1 - scmp.io.b := io.in2 - val scmp_out = (io.cmd & Cat(scmp.io.a_lt_b, scmp.io.a_eq_b)).orR - val scmp_exc = (io.cmd & Cat(scmp.io.a_lt_b_invalid, scmp.io.a_eq_b_invalid)).orR << UFix(4) - - val dcmp = new hardfloat.recodedFloatNCompare(52, 12) - dcmp.io.a := io.in1 - dcmp.io.b := io.in2 - val dcmp_out = (io.cmd & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR - val dcmp_exc = (io.cmd & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UFix(4) - - val s2i = hardfloat.recodedFloatNToAny(io.in1, io.rm, ~io.cmd(1,0), 23, 9, 64) - val d2i = hardfloat.recodedFloatNToAny(io.in1, io.rm, ~io.cmd(1,0), 52, 12, 64) - - // output muxing - val (out_s, exc_s) = (Bits(), Bits()) - out_s := Cat(Fill(32, unrec_s(31)), unrec_s) - exc_s := Bits(0) - val (out_d, exc_d) = (Bits(), Bits()) - out_d := unrec_d - exc_d := Bits(0) - - when (io.cmd === FCMD_MTFSR || io.cmd === FCMD_MFFSR) { - out_s := io.fsr + def apply[T <: Bits](data: T, en: Bool, resetVal: Bool) = { + val r = Reg(resetVal = resetVal) { data.clone } + when (en) { r := data } + r } - when (io.cmd === FCMD_CVT_W_FMT || io.cmd === FCMD_CVT_WU_FMT) { - out_s := Cat(Fill(32, s2i._1(31)), s2i._1(31,0)) - exc_s := s2i._2 - out_d := Cat(Fill(32, d2i._1(31)), d2i._1(31,0)) - exc_d := d2i._2 - } - when (io.cmd === FCMD_CVT_L_FMT || io.cmd === FCMD_CVT_LU_FMT) { - out_s := s2i._1 - exc_s := s2i._2 - out_d := d2i._1 - exc_d := d2i._2 - } - when (io.cmd === FCMD_EQ || io.cmd === FCMD_LT || io.cmd === FCMD_LE) { - out_s := scmp_out - exc_s := scmp_exc - out_d := dcmp_out - exc_d := dcmp_exc - } - - io.toint_data := Mux(io.single, out_s, out_d) - io.exc := Mux(io.single, exc_s, exc_d) - io.lt_s := scmp.io.a_lt_b - io.lt_d := dcmp.io.a_lt_b } -class rocketFPUFastPipe extends Component +class FPToInt extends Component { + class Input extends Bundle { + val single = Bool() + val cmd = Bits(width = FCMD_WIDTH) + val rm = Bits(width = 3) + val fsr = Bits(width = FSR_WIDTH) + val in1 = Bits(width = 65) + val in2 = Bits(width = 65) + override def clone = new Input().asInstanceOf[this.type] + } val io = new Bundle { - val single = Bool(INPUT) - val cmd = Bits(INPUT, FCMD_WIDTH) - val rm = Bits(INPUT, 3) - val fromint = Bits(INPUT, 64) - val in1 = Bits(INPUT, 65) - val in2 = Bits(INPUT, 65) - val lt_s = Bool(INPUT) - val lt_d = Bool(INPUT) - val out_s = Bits(OUTPUT, 33) - val exc_s = Bits(OUTPUT, 5) - val out_d = Bits(OUTPUT, 65) - val exc_d = Bits(OUTPUT, 5) + val in = new PipeIO()(new Input).flip + val out = new PipeIO()(new Bundle { + val lt = Bool() + val store = Bits(width = 64) + val toint = Bits(width = 64) + val exc = Bits(width = 5) + }) } - val i2s = hardfloat.anyToRecodedFloatN(io.fromint, io.rm, ~io.cmd(1,0), 23, 9, 64) - val i2d = hardfloat.anyToRecodedFloatN(io.fromint, io.rm, ~io.cmd(1,0), 52, 12, 64) + val in = Reg() { new Input } + val valid = Reg(io.in.valid) + when (io.in.valid) { + def upconvert(x: Bits) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 + when (io.in.bits.cmd === FCMD_STORE) { + in.in1 := io.in.bits.in2 + }.otherwise { + val doUpconvert = io.in.bits.single && io.in.bits.cmd != FCMD_MFTX + in.in1 := Mux(doUpconvert, upconvert(io.in.bits.in1), io.in.bits.in1) + in.in2 := Mux(doUpconvert, upconvert(io.in.bits.in2), io.in.bits.in2) + } + in.single := io.in.bits.single + in.cmd := io.in.bits.cmd + in.rm := io.in.bits.rm + in.fsr := io.in.bits.fsr + } + + val unrec_s = hardfloat.recodedFloatNToFloatN(in.in1, 23, 9) + val unrec_d = hardfloat.recodedFloatNToFloatN(in.in1, 52, 12) + + val dcmp = new hardfloat.recodedFloatNCompare(52, 12) + dcmp.io.a := in.in1 + dcmp.io.b := in.in2 + val dcmp_out = (in.cmd & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR + val dcmp_exc = (in.cmd & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UFix(4) + + val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, ~in.cmd(1,0), 52, 12, 64) + + io.out.bits.toint := Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d) + io.out.bits.exc := Bits(0) + + when (in.cmd === FCMD_MTFSR || in.cmd === FCMD_MFFSR) { + io.out.bits.toint := io.in.bits.fsr + } + when (in.cmd === FCMD_CVT_W_FMT || in.cmd === FCMD_CVT_WU_FMT) { + io.out.bits.toint := Cat(Fill(32, d2i._1(31)), d2i._1(31,0)) + io.out.bits.exc := d2i._2 + } + when (in.cmd === FCMD_CVT_L_FMT || in.cmd === FCMD_CVT_LU_FMT) { + io.out.bits.toint := d2i._1 + io.out.bits.exc := d2i._2 + } + when (in.cmd === FCMD_EQ || in.cmd === FCMD_LT || in.cmd === FCMD_LE) { + io.out.bits.toint := dcmp_out + io.out.bits.exc := dcmp_exc + } + + io.out.valid := valid + io.out.bits.store := Mux(in.single, Cat(unrec_s, unrec_s), unrec_d) + io.out.bits.lt := dcmp.io.a_lt_b +} + +class FPResult extends Bundle +{ + val data = Bits(width = 65) + val exc = Bits(width = 5) +} + +class IntToFP(val latency: Int) extends Component +{ + class Input extends Bundle { + val single = Bool() + val cmd = Bits(width = FCMD_WIDTH) + val rm = Bits(width = 3) + val data = Bits(width = 64) + override def clone = new Input().asInstanceOf[this.type] + } + val io = new Bundle { + val in = new PipeIO()(new Input).flip + val out = new PipeIO()(new FPResult) + } + + val in = Pipe(io.in) + + val mux = new FPResult + mux.exc := Bits(0) + mux.data := hardfloat.floatNToRecodedFloatN(in.bits.data, 52, 12) + when (in.bits.single) { + mux.data := hardfloat.floatNToRecodedFloatN(in.bits.data, 23, 9) + } + + when (in.bits.cmd === FCMD_CVT_FMT_W || in.bits.cmd === FCMD_CVT_FMT_WU || + in.bits.cmd === FCMD_CVT_FMT_L || in.bits.cmd === FCMD_CVT_FMT_LU) { + when (in.bits.single) { + val u = hardfloat.anyToRecodedFloatN(in.bits.data, in.bits.rm, ~in.bits.cmd(1,0), 23, 9, 64) + mux.data := Cat(Fix(-1, 32), u._1) + mux.exc := u._2 + }.otherwise { + val u = hardfloat.anyToRecodedFloatN(in.bits.data, in.bits.rm, ~in.bits.cmd(1,0), 52, 12, 64) + mux.data := u._1 + mux.exc := u._2 + } + } + + io.out <> Pipe(in.valid, mux, latency-1) +} + +class FPToFP(val latency: Int) extends Component +{ + class Input extends Bundle { + val single = Bool() + val cmd = Bits(width = FCMD_WIDTH) + val rm = Bits(width = 3) + val in1 = Bits(width = 65) + val in2 = Bits(width = 65) + override def clone = new Input().asInstanceOf[this.type] + } + val io = new Bundle { + val in = new PipeIO()(new Input).flip + val out = new PipeIO()(new FPResult) + val lt = Bool(INPUT) // from FPToInt + } + + val in = Pipe(io.in) // fp->fp units - val sign_s = Mux(io.cmd === FCMD_SGNJ, io.in2(32), - Mux(io.cmd === FCMD_SGNJN, ~io.in2(32), - io.in1(32) ^ io.in2(32))) // FCMD_SGNJX - val sign_d = Mux(io.cmd === FCMD_SGNJ, io.in2(64), - Mux(io.cmd === FCMD_SGNJN, ~io.in2(64), - io.in1(64) ^ io.in2(64))) // FCMD_SGNJX - val fsgnj = Cat(Mux(io.single, io.in1(64), sign_d), io.in1(63,33), - Mux(io.single, sign_s, io.in1(32)), io.in1(31,0)) + val sign_s = Mux(in.bits.cmd === FCMD_SGNJ, in.bits.in2(32), + Mux(in.bits.cmd === FCMD_SGNJN, ~in.bits.in2(32), + in.bits.in1(32) ^ in.bits.in2(32))) // FCMD_SGNJX + val sign_d = Mux(in.bits.cmd === FCMD_SGNJ, in.bits.in2(64), + Mux(in.bits.cmd === FCMD_SGNJN, ~in.bits.in2(64), + in.bits.in1(64) ^ in.bits.in2(64))) // FCMD_SGNJX + val fsgnj = Cat(Mux(in.bits.single, in.bits.in1(64), sign_d), in.bits.in1(63,33), + Mux(in.bits.single, sign_s, in.bits.in1(32)), in.bits.in1(31,0)) - val s2d = hardfloat.recodedFloatNToRecodedFloatM(io.in1, io.rm, 23, 9, 52, 12) - val d2s = hardfloat.recodedFloatNToRecodedFloatM(io.in1, io.rm, 52, 12, 23, 9) + val s2d = hardfloat.recodedFloatNToRecodedFloatM(in.bits.in1, in.bits.rm, 23, 9, 52, 12) + val d2s = hardfloat.recodedFloatNToRecodedFloatM(in.bits.in1, in.bits.rm, 52, 12, 23, 9) - val isnan1 = Mux(io.single, io.in1(31,29) === Bits("b111"), io.in1(63,61) === Bits("b111")) - val isnan2 = Mux(io.single, io.in2(31,29) === Bits("b111"), io.in2(63,61) === Bits("b111")) - val issnan1 = isnan1 && ~Mux(io.single, io.in1(22), io.in1(51)) - val issnan2 = isnan2 && ~Mux(io.single, io.in2(22), io.in2(51)) + val isnan1 = Mux(in.bits.single, in.bits.in1(31,29) === Bits("b111"), in.bits.in1(63,61) === Bits("b111")) + val isnan2 = Mux(in.bits.single, in.bits.in2(31,29) === Bits("b111"), in.bits.in2(63,61) === Bits("b111")) + val issnan1 = isnan1 && ~Mux(in.bits.single, in.bits.in1(22), in.bits.in1(51)) + val issnan2 = isnan2 && ~Mux(in.bits.single, in.bits.in2(22), in.bits.in2(51)) val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4)) - val min = io.cmd === FCMD_MIN - val lt = Mux(io.single, io.lt_s, io.lt_d) - val minmax = Mux(isnan2 || !isnan1 && (min === lt), io.in1, io.in2) + val min = in.bits.cmd === FCMD_MIN + val minmax = Mux(isnan2 || !isnan1 && (min === io.lt), in.bits.in1, in.bits.in2) - // output muxing - val (out_s, exc_s) = (Bits(), Bits()) - out_s := Reg(hardfloat.floatNToRecodedFloatN(io.fromint, 23, 9)) - exc_s := Bits(0) - val (out_d, exc_d) = (Bits(), Bits()) - out_d := Reg(hardfloat.floatNToRecodedFloatN(io.fromint, 52, 12)) - exc_d := Bits(0) + val mux = new FPResult + mux.data := fsgnj + mux.exc := Bits(0) - val r_cmd = Reg(io.cmd) - - when (r_cmd === FCMD_MTFSR || r_cmd === FCMD_MFFSR) { - out_s := Reg(io.fromint(FSR_WIDTH-1,0)) + when (in.bits.cmd === FCMD_MIN || in.bits.cmd === FCMD_MAX) { + mux.data := minmax } - when (r_cmd === FCMD_SGNJ || r_cmd === FCMD_SGNJN || r_cmd === FCMD_SGNJX) { - val r_fsgnj = Reg(fsgnj) - out_s := r_fsgnj(32,0) - out_d := r_fsgnj - } - when (r_cmd === FCMD_MIN || r_cmd === FCMD_MAX) { - val r_minmax = Reg(minmax) - val r_minmax_exc = Reg(minmax_exc) - out_s := r_minmax(32,0) - out_d := r_minmax - exc_s := r_minmax_exc - exc_d := r_minmax_exc - } - when (r_cmd === FCMD_CVT_FMT_S || r_cmd === FCMD_CVT_FMT_D) { - out_s := Reg(d2s._1) - exc_s := Reg(d2s._2) - out_d := Reg(s2d._1) - exc_d := Reg(s2d._2) - } - when (r_cmd === FCMD_CVT_FMT_W || r_cmd === FCMD_CVT_FMT_WU || - r_cmd === FCMD_CVT_FMT_L || r_cmd === FCMD_CVT_FMT_LU) { - out_s := Reg(i2s._1) - exc_s := Reg(i2s._2) - out_d := Reg(i2d._1) - exc_d := Reg(i2d._2) + when (in.bits.cmd === FCMD_CVT_FMT_S || in.bits.cmd === FCMD_CVT_FMT_D) { + when (in.bits.single) { + mux.data := Cat(Fix(-1, 32), d2s._1) + mux.exc := d2s._2 + }.otherwise { + mux.data := s2d._1 + mux.exc := s2d._2 + } } - io.out_s := out_s - io.exc_s := exc_s - io.out_d := out_d - io.exc_d := exc_d + io.out <> Pipe(in.valid, mux, latency-1) } class ioFMA(width: Int) extends Bundle { @@ -348,7 +378,7 @@ class ioFMA(width: Int) extends Bundle { val exc = Bits(OUTPUT, 5) } -class rocketFPUSFMAPipe(latency: Int) extends Component +class rocketFPUSFMAPipe(val latency: Int) extends Component { val io = new ioFMA(33) @@ -365,6 +395,7 @@ class rocketFPUSFMAPipe(latency: Int) extends Component val one = Bits("h80000000") val zero = Cat(io.in1(32) ^ io.in2(32), Bits(0, 32)) + val valid = Reg(io.valid) when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm @@ -380,11 +411,11 @@ class rocketFPUSFMAPipe(latency: Int) extends Component fma.io.b := in2 fma.io.c := in3 - io.out := ShiftRegister(latency-1, fma.io.out) - io.exc := ShiftRegister(latency-1, fma.io.exceptionFlags) + io.out := Pipe(valid, fma.io.out, latency-1).bits + io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class rocketFPUDFMAPipe(latency: Int) extends Component +class rocketFPUDFMAPipe(val latency: Int) extends Component { val io = new ioFMA(65) @@ -401,6 +432,7 @@ class rocketFPUDFMAPipe(latency: Int) extends Component val one = Bits("h8000000000000000") val zero = Cat(io.in1(64) ^ io.in2(64), Bits(0, 64)) + val valid = Reg(io.valid) when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm @@ -416,8 +448,8 @@ class rocketFPUDFMAPipe(latency: Int) extends Component fma.io.b := in2 fma.io.c := in3 - io.out := ShiftRegister(latency-1, fma.io.out) - io.exc := ShiftRegister(latency-1, fma.io.exceptionFlags) + io.out := Pipe(valid, fma.io.out, latency-1).bits + io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component @@ -434,16 +466,16 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component ex_reg_inst := io.dpath.inst } val ex_reg_valid = Reg(io.ctrl.valid, Bool(false)) + val mem_reg_valid = Reg(ex_reg_valid && !io.ctrl.killx, resetVal = Bool(false)) + val killm = io.ctrl.killm || io.ctrl.nack_mem + val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) val fp_decoder = new rocketFPUDecoder fp_decoder.io.inst := io.dpath.inst - val ctrl = Reg() { new rocketFPUCtrlSigs } - when (io.ctrl.valid) { - ctrl := fp_decoder.io.sigs - } - val mem_ctrl = Reg(ctrl) - val wb_ctrl = Reg(mem_ctrl) + val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) + val mem_ctrl = RegEn(ctrl, ex_reg_valid) + val wb_ctrl = RegEn(mem_ctrl, mem_reg_valid) // load response val load_wb = Reg(io.dpath.dmem_resp_val, resetVal = Bool(false)) @@ -457,8 +489,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component } val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) - val sp_msbs = Fix(-1, 32) - val load_wb_data_recoded = Mux(load_wb_single, Cat(sp_msbs, rec_s), rec_d) + val load_wb_data_recoded = Mux(load_wb_single, Cat(Fix(-1, 32), rec_s), rec_d) val fsr_rm = Reg() { Bits(width = 3) } val fsr_exc = Reg() { Bits(width = 5) } @@ -472,143 +503,121 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val ex_rs3 = regfile(ex_reg_inst(16,12)) val ex_rm = Mux(ex_reg_inst(11,9) === Bits(7), fsr_rm, ex_reg_inst(11,9)) - val mem_reg_valid = Reg(ex_reg_valid && !io.ctrl.killx, resetVal = Bool(false)) - val mem_fromint_data = Reg() { Bits() } - val mem_rs1 = Reg() { Bits() } - val mem_rs2 = Reg() { Bits() } - val mem_rs3 = Reg() { Bits() } - val mem_rm = Reg() { Bits() } + val fpiu = new FPToInt + fpiu.io.in.valid := ex_reg_valid && ctrl.toint + fpiu.io.in.bits := ctrl + fpiu.io.in.bits.rm := ex_rm + fpiu.io.in.bits.fsr := Cat(fsr_rm, fsr_exc) + fpiu.io.in.bits.in1 := ex_rs1 + fpiu.io.in.bits.in2 := ex_rs2 - when (ex_reg_valid) { - mem_rm := ex_rm - when (ctrl.fromint || ctrl.wrfsr) { - mem_fromint_data := io.dpath.fromint_data - } - when (ctrl.ren1) { - mem_rs1 := ex_rs1 - } - when (ctrl.store) { - mem_rs1 := ex_rs2 - } - when (ctrl.ren2) { - mem_rs2 := ex_rs2 - } - when (ctrl.ren3) { - mem_rs3 := ex_rs3 - } - } + io.dpath.store_data := fpiu.io.out.bits.store + io.dpath.toint_data := fpiu.io.out.bits.toint - // currently we assume FP stores and FP->int ops take 1 cycle (MEM) - val fpiu = new rocketFPIntUnit - fpiu.io.single := mem_ctrl.single - fpiu.io.cmd := mem_ctrl.cmd - fpiu.io.rm := mem_rm - fpiu.io.fsr := Cat(fsr_rm, fsr_exc) - fpiu.io.in1 := mem_rs1 - fpiu.io.in2 := mem_rs2 - - io.dpath.store_data := fpiu.io.store_data - io.dpath.toint_data := fpiu.io.toint_data - - // 2-cycle pipe for int->FP and non-FMA FP->FP ops - val fastpipe = new rocketFPUFastPipe - fastpipe.io.single := mem_ctrl.single - fastpipe.io.cmd := mem_ctrl.cmd - fastpipe.io.rm := mem_rm - fastpipe.io.fromint := mem_fromint_data - fastpipe.io.in1 := mem_rs1 - fastpipe.io.in2 := mem_rs2 - fastpipe.io.lt_s := fpiu.io.lt_s - fastpipe.io.lt_d := fpiu.io.lt_d + val ifpu = new IntToFP(3) + ifpu.io.in.valid := ex_reg_valid && ctrl.fromint + ifpu.io.in.bits := ctrl + ifpu.io.in.bits.rm := ex_rm + ifpu.io.in.bits.data := io.dpath.fromint_data + val fpmu = new FPToFP(2) + fpmu.io.in.valid := ex_reg_valid && ctrl.fastpipe + fpmu.io.in.bits := ctrl + fpmu.io.in.bits.rm := ex_rm + fpmu.io.in.bits.in1 := ex_rs1 + fpmu.io.in.bits.in2 := ex_rs2 + fpmu.io.lt := fpiu.io.out.bits.lt val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB - val sfma = new rocketFPUSFMAPipe(sfma_latency-1) - sfma.io.valid := io.sfma.valid || mem_reg_valid && mem_ctrl.fma && mem_ctrl.single - sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, mem_rs1) - sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, mem_rs2) - sfma.io.in3 := Mux(io.sfma.valid, io.sfma.in3, mem_rs3) - sfma.io.cmd := Mux(io.sfma.valid, io.sfma.cmd, mem_ctrl.cmd) - sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm, mem_rm) + val sfma = new rocketFPUSFMAPipe(sfma_latency) + sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single + sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) + sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) + sfma.io.in3 := Mux(io.sfma.valid, io.sfma.in3, ex_rs3) + sfma.io.cmd := Mux(io.sfma.valid, io.sfma.cmd, ctrl.cmd) + sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm, ex_rm) io.sfma.out := sfma.io.out io.sfma.exc := sfma.io.exc - val dfma = new rocketFPUDFMAPipe(dfma_latency-1) - dfma.io.valid := io.dfma.valid || mem_reg_valid && mem_ctrl.fma && !mem_ctrl.single - dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, mem_rs1) - dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, mem_rs2) - dfma.io.in3 := Mux(io.dfma.valid, io.dfma.in3, mem_rs3) - dfma.io.cmd := Mux(io.dfma.valid, io.dfma.cmd, mem_ctrl.cmd) - dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm, mem_rm) + val dfma = new rocketFPUDFMAPipe(dfma_latency) + dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single + dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) + dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) + dfma.io.in3 := Mux(io.dfma.valid, io.dfma.in3, ex_rs3) + dfma.io.cmd := Mux(io.dfma.valid, io.dfma.cmd, ctrl.cmd) + dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm, ex_rm) io.dfma.out := dfma.io.out io.dfma.exc := dfma.io.exc - val wb_reg_valid = Reg(mem_reg_valid && !io.ctrl.killm, resetVal = Bool(false)) - val wb_toint_exc = Reg(fpiu.io.exc) - // writeback arbitration - val wen = Reg(resetVal = Bits(0, dfma_latency)) - val winfo = Vec(dfma_latency-1) { Reg() { Bits() } } - val mem_wen = Reg(resetVal = Bool(false)) - - val fastpipe_latency = 2 - require(fastpipe_latency < sfma_latency && sfma_latency <= dfma_latency) - val ex_stage_fu_latency = Mux(ctrl.fastpipe, UFix(fastpipe_latency-1), - Mux(ctrl.single, UFix(sfma_latency-1), - UFix(dfma_latency-1))) - val mem_fu_latency = Reg(ex_stage_fu_latency - UFix(1)) - val write_port_busy = Reg(ctrl.fastpipe && wen(fastpipe_latency) || - Bool(sfma_latency < dfma_latency) && ctrl.fma && ctrl.single && wen(sfma_latency) || - mem_wen && mem_fu_latency === ex_stage_fu_latency) - mem_wen := ex_reg_valid && !io.ctrl.killx && (ctrl.fma || ctrl.fastpipe) - val ex_stage_wsrc = Cat(ctrl.fastpipe, ctrl.single) - val mem_winfo = Reg(Cat(ex_reg_inst(31,27), ex_stage_wsrc)) - - for (i <- 0 until dfma_latency-2) { - winfo(i) := winfo(i+1) + case class Pipe(p: Component, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits) + val pipes = List( + Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), + Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), + Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out, sfma.io.exc), + Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out, dfma.io.exc)) + def latencyMask(c: FPUCtrlSigs, offset: Int) = { + require(pipes.forall(_.lat >= offset)) + pipes.map(p => Mux(p.cond(c), UFix(1 << p.lat-offset), UFix(0))).reduce(_|_) } - wen := wen >> UFix(1) + def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UFix(p._2), UFix(0))).reduce(_|_) + val maxLatency = pipes.map(_.lat).max + val memLatencyMask = latencyMask(mem_ctrl, 2) + + val wen = Reg(resetVal = Bits(0, maxLatency-1)) + val winfo = Vec(maxLatency-1) { Reg() { Bits() } } + val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) + val (write_port_busy, mem_winfo) = (Reg{Bool()}, Reg{Bits()}) + when (ex_reg_valid) { + write_port_busy := mem_wen && (memLatencyMask & latencyMask(ctrl, 1)).orR || (wen & latencyMask(ctrl, 0)).orR + mem_winfo := Cat(pipeid(ctrl), ex_reg_inst(31,27)) + } + + for (i <- 0 until maxLatency-2) { + when (wen(i+1)) { winfo(i) := winfo(i+1) } + } + wen := wen >> 1 when (mem_wen) { - when (!io.ctrl.killm) { - wen := (wen >> UFix(1)) | (UFix(1) << mem_fu_latency) + when (!killm) { + wen := wen >> 1 | memLatencyMask } - for (i <- 0 until dfma_latency-1) { - when (!write_port_busy && UFix(i) === mem_fu_latency) { + for (i <- 0 until maxLatency-1) { + when (!write_port_busy && memLatencyMask(i)) { winfo(i) := mem_winfo } } } - val wsrc = winfo(0)(1,0) - val wdata = Mux(wsrc === UFix(0), dfma.io.out, // DFMA - Mux(wsrc === UFix(1), Cat(sp_msbs, sfma.io.out), // SFMA - Mux(wsrc === UFix(2), fastpipe.io.out_d, - Cat(sp_msbs, fastpipe.io.out_s)))) - val wexc = Mux(wsrc === UFix(0), dfma.io.exc, // DFMA - Mux(wsrc === UFix(1), sfma.io.exc, // SFMA - Mux(wsrc === UFix(2), fastpipe.io.exc_d, - fastpipe.io.exc_s))) - val waddr = winfo(0).toUFix >> UFix(2) + val waddr = winfo(0)(4,0).toUFix + val wsrc = winfo(0) >> waddr.getWidth + val wdata = (Vec(pipes.map(_.wdata)){Bits()})(wsrc) + val wexc = (Vec(pipes.map(_.wexc)){Bits()})(wsrc) when (wen(0)) { regfile(waddr(4,0)) := wdata } + val wb_toint_exc = RegEn(fpiu.io.out.bits.exc, mem_ctrl.toint) when (wb_reg_valid && wb_ctrl.toint || wen(0)) { fsr_exc := fsr_exc | Fill(fsr_exc.getWidth, wb_reg_valid && wb_ctrl.toint) & wb_toint_exc | Fill(fsr_exc.getWidth, wen(0)) & wexc } + + val mem_fsr_wdata = RegEn(io.dpath.fromint_data(FSR_WIDTH-1,0), ex_reg_valid && ctrl.wrfsr) + val wb_fsr_wdata = RegEn(mem_fsr_wdata, mem_reg_valid && mem_ctrl.wrfsr) when (wb_reg_valid && wb_ctrl.wrfsr) { - fsr_exc := fastpipe.io.out_s(4,0) - fsr_rm := fastpipe.io.out_s(7,5) + fsr_exc := wb_fsr_wdata + fsr_rm := wb_fsr_wdata >> fsr_exc.getWidth } val fp_inflight = wb_reg_valid && wb_ctrl.toint || wen.orR val fsr_busy = mem_ctrl.rdfsr && fp_inflight || wb_reg_valid && wb_ctrl.wrfsr - val units_busy = mem_reg_valid && mem_ctrl.fma && (io.sfma.valid && mem_ctrl.single || io.dfma.valid && !mem_ctrl.single) + val units_busy = mem_reg_valid && mem_ctrl.fma && Reg(Mux(ctrl.single, io.sfma.valid, io.dfma.valid)) io.ctrl.nack_mem := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs + def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) + io.ctrl.sboard_set := wb_reg_valid && Reg(useScoreboard(_._1.cond(mem_ctrl))) + io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UFix(x._2)) + io.ctrl.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) io.ctrl.illegal_rm := ex_rm(2) - io.ctrl.sboard_clr := wen(0) && !wsrc(1).toBool // only for FMA pipes - io.ctrl.sboard_clra := waddr } diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 989db4cb..5b859ac8 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -8,11 +8,11 @@ import hwacha.Constants._ class ioMultiplier extends Bundle { val req = new io_imul_req().flip - val req_tag = Bits(INPUT, 5) + val req_tag = UFix(INPUT, 5) val req_kill = Bool(INPUT) val resp_val = Bool(OUTPUT) val resp_rdy = Bool(INPUT) - val resp_tag = Bits(OUTPUT, 5) + val resp_tag = UFix(OUTPUT, 5) val resp_bits = Bits(OUTPUT, SZ_XLEN) } @@ -71,7 +71,7 @@ class rocketMultiplier(unroll: Int = 1, earlyOut: Boolean = false) extends Compo val r_val = Reg(resetVal = Bool(false)); val r_dw = Reg { Bits() } val r_fn = Reg { Bits() } - val r_tag = Reg { Bits() } + val r_tag = Reg { UFix() } val r_lhs = Reg { Bits() } val r_prod= Reg { Bits(width = w*2) } val r_lsb = Reg { Bits() } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 2dddcef9..998ecaa6 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,7 +13,7 @@ class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends } val cpu = new rocketProc - val icache = new Frontend(ICacheConfig(128, 4)) // 128 sets x 4 ways (32KB) + val icache = new Frontend(ICacheConfig(4, 1)) // 128 sets x 4 ways (32KB) val dcache = new HellaCache val arbiter = new rocketMemArbiter(DMEM_PORTS) From e9eca6a95d89c038009d894910c77a64f169ee8d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 4 Nov 2012 16:59:36 -0800 Subject: [PATCH 0495/1087] refactor I$ config; remove Top class --- rocket/src/main/scala/icache.scala | 9 +-- rocket/src/main/scala/tile.scala | 7 +- rocket/src/main/scala/top.scala | 100 ----------------------------- 3 files changed, 10 insertions(+), 106 deletions(-) delete mode 100644 rocket/src/main/scala/top.scala diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index a8f99fdf..e618a211 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -6,7 +6,8 @@ import Constants._ import uncore._ import Util._ -case class ICacheConfig(sets: Int, assoc: Int, parity: Boolean = false) +case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, + parity: Boolean = false) { val w = 1 val ibytes = INST_BITS/8 @@ -50,7 +51,7 @@ class IOCPUFrontend extends Bundle { val ptw = new IOTLBPTW().flip } -class Frontend(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Component +class Frontend(c: ICacheConfig) extends Component { val io = new Bundle { val cpu = new IOCPUFrontend().flip @@ -122,7 +123,7 @@ class Frontend(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Comp io.cpu.resp.bits.xcpt_if := s2_xcpt_if } -class ICache(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Component +class ICache(c: ICacheConfig) extends Component { val io = new Bundle { val req = new PipeIO()(new Bundle { @@ -246,7 +247,7 @@ class ICache(c: ICacheConfig)(implicit conf: RocketConfiguration) extends Compon // output signals io.resp.valid := s2_hit io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits := conf.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) + io.mem.xact_init.bits := c.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) io.mem.xact_finish <> finish_q.io.deq // control state machine diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 998ecaa6..6b19ad4b 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -5,6 +5,9 @@ import Node._ import Constants._ import uncore._ +case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, + icache: ICacheConfig) + class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends Component(resetSignal) { val io = new Bundle { @@ -13,7 +16,7 @@ class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends } val cpu = new rocketProc - val icache = new Frontend(ICacheConfig(4, 1)) // 128 sets x 4 ways (32KB) + val icache = new Frontend(conf.icache) val dcache = new HellaCache val arbiter = new rocketMemArbiter(DMEM_PORTS) @@ -31,7 +34,7 @@ class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends if (HAVE_VEC) { - val vicache = new Frontend(ICacheConfig(128, 1)) // 128 sets x 1 ways (8KB) + val vicache = new Frontend(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/top.scala b/rocket/src/main/scala/top.scala deleted file mode 100644 index fae65d92..00000000 --- a/rocket/src/main/scala/top.scala +++ /dev/null @@ -1,100 +0,0 @@ -package rocket - -import Chisel._ -import Node._ -import Constants._ -import uncore._ -import collection.mutable.ArrayBuffer - -object DummyTopLevelConstants extends uncore.constants.CoherenceConfigConstants { - val NTILES = 1 - val ENABLE_SHARING = true - val ENABLE_CLEAN_EXCLUSIVE = true -} -import DummyTopLevelConstants._ - -case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached) - -class Top extends Component -{ - val co = if(ENABLE_SHARING) { - if(ENABLE_CLEAN_EXCLUSIVE) new MESICoherence - else new MSICoherence - } else { - if(ENABLE_CLEAN_EXCLUSIVE) new MEICoherence - else new MICoherence - } - implicit val rconf = RocketConfiguration(NTILES, co) - implicit val uconf = UncoreConfiguration(NTILES+1, log2Up(NTILES)+1, co) - - val io = new Bundle { - val debug = new ioDebug - val host = new ioHost(16) - val mem = new ioMemPipe - } - - val htif = new rocketHTIF(io.host.w) - val hub = new CoherenceHubBroadcast - hub.io.tiles(NTILES) <> htif.io.mem - io.host <> htif.io.host - - io.mem.req_cmd <> Queue(hub.io.mem.req_cmd) - io.mem.req_data <> Queue(hub.io.mem.req_data, REFILL_CYCLES) - hub.io.mem.resp <> Pipe(io.mem.resp) - Assert(hub.io.mem.resp.ready, "hub.io.mem.resp.ready") - - var error_mode = Bool(false) - for (i <- 0 until NTILES) { - val hl = htif.io.cpu(i) - val tl = hub.io.tiles(i) - val tile = new Tile(resetSignal = hl.reset) - - tile.io.host.reset := Reg(Reg(hl.reset)) - tile.io.host.pcr_req <> Queue(hl.pcr_req) - hl.pcr_rep <> Queue(tile.io.host.pcr_rep) - hl.ipi_req <> Queue(tile.io.host.ipi_req) - tile.io.host.ipi_rep <> Queue(hl.ipi_rep) - error_mode = error_mode || Reg(tile.io.host.debug.error_mode) - - tl.xact_init <> Queue(tile.io.tilelink.xact_init) - tl.xact_init_data <> Queue(tile.io.tilelink.xact_init_data) - tile.io.tilelink.xact_abort <> Queue(tl.xact_abort) - tile.io.tilelink.xact_rep <> Queue(tl.xact_rep, 1, pipe = true) - tl.xact_finish <> Queue(tile.io.tilelink.xact_finish) - tile.io.tilelink.probe_req <> Queue(tl.probe_req) - tl.probe_rep <> Queue(tile.io.tilelink.probe_rep, 1) - tl.probe_rep_data <> Queue(tile.io.tilelink.probe_rep_data) - tl.incoherent := hl.reset - } - io.debug.error_mode := error_mode -} - -object top_main { - def main(args: Array[String]): Unit = { - val top = args(0) - val chiselArgs = ArrayBuffer[String]() - - var i = 1 - while (i < args.length) { - val arg = args(i) - arg match { - case "--NUM_PVFB" => { - hwacha.Constants.NUM_PVFB = args(i+1).toInt - i += 1 - } - case "--WIDTH_PVFB" => { - hwacha.Constants.WIDTH_PVFB = args(i+1).toInt - hwacha.Constants.DEPTH_PVFB = args(i+1).toInt - i += 1 - } - case "--CG" => { - hwacha.Constants.coarseGrained = true - } - case any => chiselArgs += arg - } - i += 1 - } - - chiselMain(chiselArgs.toArray, () => Class.forName(top).newInstance.asInstanceOf[Component]) - } -} From 5e103054fd99978d7e0cd3e67b2891899830a2e1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 5 Nov 2012 00:28:25 -0800 Subject: [PATCH 0496/1087] fix bug in quine mccluskey --- rocket/src/main/scala/decode.scala | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 7cd2d917..a63c5c3d 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -52,7 +52,7 @@ class Term(val value: BigInt, val mask: BigInt = 0) { var prime = true - def covers(x: Term) = ((value ^ x.value) &~ mask) == 0 + def covers(x: Term) = ((value ^ x.value) &~ mask | x.mask &~ mask) == 0 def intersects(x: Term) = ((value ^ x.value) &~ mask &~ x.mask) == 0 override def equals(that: Any) = that match { case x: Term => x.value == value && x.mask == mask @@ -182,14 +182,15 @@ object SimplifyDC prime.sort(_<_) } + def verify(cover: Seq[Term], minterms: Seq[Term], maxterms: Seq[Term]) = { + assert(minterms.forall(t => cover.exists(_ covers t))) + assert(maxterms.forall(t => !cover.exists(_ intersects t))) + } def apply(minterms: Seq[Term], maxterms: Seq[Term], bits: Int) = { val prime = getPrimeImplicants(minterms, maxterms, bits) - assert(minterms.forall(t => prime.exists(_ covers t))) val (eprime, prime2, uncovered) = Simplify.getEssentialPrimeImplicants(prime, minterms) - assert(uncovered.forall(t => prime2.exists(_ covers t))) val cover = eprime ++ Simplify.getCover(prime2, uncovered, bits) - minterms.foreach(t => assert(cover.exists(_.covers(t)))) // sanity check - maxterms.foreach(t => assert(!cover.exists(_.intersects(t)))) // sanity check + verify(cover, minterms, maxterms) cover } } From 5b20ed71be9a6e32bfb5fd0d183ae5fc695cb866 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 5 Nov 2012 01:30:57 -0800 Subject: [PATCH 0497/1087] move rd=0 check into bypass logic before, the check was in the write enable logic, but moving it obviated an awkward corner case for mtpcr with rd=0. --- rocket/src/main/scala/ctrl.scala | 18 +++++++++--------- rocket/src/main/scala/dpath.scala | 13 +++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 61fb58b2..713a695d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -504,13 +504,13 @@ class rocketCtrl extends Component ex_reg_br_type := id_br_type; ex_reg_jalr := id_jalr ex_reg_btb_hit := io.imem.resp.bits.taken - ex_reg_div_val := id_div_val.toBool && id_waddr != UFix(0); - ex_reg_mul_val := id_mul_val.toBool && id_waddr != UFix(0); + ex_reg_div_val := id_div_val + ex_reg_mul_val := id_mul_val ex_reg_mul_fn := id_mul_fn.toUFix ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr - ex_reg_wen := id_wen.toBool && id_waddr != UFix(0); + ex_reg_wen := id_wen ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := (id_sync === SYNC_I); @@ -708,9 +708,9 @@ class rocketCtrl extends Component (mem_reg_mem_type === MT_B) || (mem_reg_mem_type === MT_BU) || (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU) val data_hazard_mem = mem_reg_wen && - (id_renx1.toBool && id_raddr1 === io.dpath.mem_waddr || - id_renx2.toBool && id_raddr2 === io.dpath.mem_waddr || - id_wen.toBool && id_waddr === io.dpath.mem_waddr) + (id_raddr1 != UFix(0) && id_renx1 && id_raddr1 === io.dpath.mem_waddr || + id_raddr2 != UFix(0) && id_renx2 && id_raddr2 === io.dpath.mem_waddr || + id_waddr != UFix(0) && id_wen && id_waddr === io.dpath.mem_waddr) val fp_data_hazard_mem = mem_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || @@ -722,9 +722,9 @@ class rocketCtrl extends Component // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. val data_hazard_wb = wb_reg_wen && - (id_renx1.toBool && id_raddr1 === io.dpath.wb_waddr || - id_renx2.toBool && id_raddr2 === io.dpath.wb_waddr || - id_wen.toBool && id_waddr === io.dpath.wb_waddr) + (id_raddr1 != UFix(0) && id_renx1 && id_raddr1 === io.dpath.wb_waddr || + id_raddr2 != UFix(0) && id_renx2 && id_raddr2 === io.dpath.wb_waddr || + id_waddr != UFix(0) && id_wen && id_waddr === io.dpath.wb_waddr) val fp_data_hazard_wb = wb_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 41e469ac..f6679130 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -100,32 +100,33 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component debug(id_pc) val regfile_ = Mem(31){Bits(width = 64)} - def readRF(a: UFix) = Mux(a === UFix(0), Bits(0), regfile_(~a)) + def readRF(a: UFix) = regfile_(~a) def writeRF(a: UFix, d: Bits) = regfile_(~a) := d val id_raddr1 = id_inst(26,22).toUFix; val id_raddr2 = id_inst(21,17).toUFix; // bypass muxes - val id_rs1_dmem_bypass = + val id_rs1_dmem_bypass = id_raddr1 != UFix(0) && Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, Bool(false), Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, io.ctrl.mem_load, Bool(false))) val id_rs1 = + Mux(id_raddr1 === UFix(0), UFix(0), Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, - readRF(id_raddr1)))) + readRF(id_raddr1))))) - val id_rs2_dmem_bypass = + val id_rs2_dmem_bypass = id_raddr2 != UFix(0) && Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, Bool(false), Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, io.ctrl.mem_load, Bool(false))) - val id_rs2 = + val id_rs2 = Mux(id_raddr2 === UFix(0), UFix(0), Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata, Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, - readRF(id_raddr2)))) + readRF(id_raddr2))))) // immediate generation val id_imm_bj = io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE From 2a25307a8f5bbff47f7175142d7153a87f2cbf4c Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 3 Nov 2012 21:51:46 -0700 Subject: [PATCH 0498/1087] revamp the vector unit with the new frontend --- rocket/src/main/scala/cpu.scala | 11 ++++++----- rocket/src/main/scala/package.scala | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 1b1d8c17..9c4a2d7f 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -134,15 +134,16 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component // hooking up vector I$ ptw.io.requestor(2) <> io.vimem.ptw io.vimem.req.bits.status := dpath.io.ctrl.status - io.vimem.req.bits.pc := vu.io.imem_req.bits.toUFix + io.vimem.req.bits.pc := vu.io.imem_req.bits io.vimem.req.valid := vu.io.imem_req.valid io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen - vu.io.imem_req.ready := Bool(true) vu.io.imem_resp.valid := io.vimem.resp.valid - vu.io.imem_resp.bits := io.vimem.resp.bits.data - vu.io.vitlb_exception := io.vimem.resp.bits.xcpt_if - io.vimem.resp.ready := Bool(true) + vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc + vu.io.imem_resp.bits.data := io.vimem.resp.bits.data + vu.io.imem_resp.bits.xcpt_ma := io.vimem.resp.bits.xcpt_ma + vu.io.imem_resp.bits.xcpt_if := io.vimem.resp.bits.xcpt_if + io.vimem.resp.ready := vu.io.imem_resp.ready io.vimem.req.bits.mispredict := Bool(false) io.vimem.req.bits.taken := Bool(false) diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 22d18ca9..28fdcfad 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -18,7 +18,7 @@ object Constants extends { def HAVE_RVC = false def HAVE_FPU = true - def HAVE_VEC = false + def HAVE_VEC = true val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK From ee081d16718d255b9ea879daf5d05fefe57af24b Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 4 Nov 2012 23:31:58 -0800 Subject: [PATCH 0499/1087] modify code to fix UFix := Bits error --- rocket/src/main/scala/multiplier.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 5b859ac8..ce14d301 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -56,7 +56,7 @@ class rocketVUMultiplier(nwbq: Int) extends Component { io.cpu.req.ready := io.vu.req.ready && wbq_rdy io.cpu.resp_val := wbq.io.deq.valid io.cpu.resp_bits := wbq.io.deq.bits >> UFix(io.cpu.resp_tag.width) - io.cpu.resp_tag := wbq.io.deq.bits(io.cpu.resp_tag.width-1,0) + io.cpu.resp_tag := wbq.io.deq.bits(io.cpu.resp_tag.width-1,0).toUFix io.vu.req <> io.cpu.req } From c5b93798fb24567319d27e5ffc4c6574ac24a4ba Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 5 Nov 2012 23:52:32 -0800 Subject: [PATCH 0500/1087] factor out more global constants --- rocket/src/main/scala/arbiter.scala | 16 +- rocket/src/main/scala/consts.scala | 9 - rocket/src/main/scala/cpu.scala | 18 +- rocket/src/main/scala/ctrl.scala | 49 ++-- rocket/src/main/scala/dpath.scala | 41 ++- rocket/src/main/scala/icache.scala | 20 +- rocket/src/main/scala/nbdcache.scala | 385 +++++++++++++-------------- rocket/src/main/scala/ptw.scala | 6 +- rocket/src/main/scala/queues.scala | 31 --- rocket/src/main/scala/tile.scala | 20 +- 10 files changed, 268 insertions(+), 327 deletions(-) delete mode 100644 rocket/src/main/scala/queues.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 9e27f6ed..5e5669ea 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -5,16 +5,12 @@ import Node._ import Constants._ import uncore._ -class ioHellaCacheArbiter(n: Int) extends Bundle +class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Component { - val requestor = Vec(n) { new ioHellaCache() }.flip - val mem = new ioHellaCache -} - -class rocketHellaCacheArbiter(n: Int) extends Component -{ - val io = new ioHellaCacheArbiter(n) - require(DCACHE_TAG_BITS >= log2Up(n) + CPU_TAG_BITS) + val io = new Bundle { + val requestor = Vec(n) { new ioHellaCache()(conf.dcache) }.flip + val mem = new ioHellaCache()(conf.dcache) + } var req_val = Bool(false) var req_rdy = io.mem.req.ready @@ -78,7 +74,7 @@ class ioUncachedRequestor extends Bundle { val xact_finish = (new FIFOIO) { new TransactionFinish } } -class rocketMemArbiter(n: Int) extends Component { +class MemArbiter(n: Int) extends Component { val io = new Bundle { val mem = new ioUncachedRequestor val requestor = Vec(n) { new ioUncachedRequestor }.flip diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index ea28d0cb..055ec62b 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -155,11 +155,6 @@ trait InterruptConstants { } abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants { - val INST_BITS = 32 - val CPU_DATA_BITS = 64; - val CPU_TAG_BITS = 9; - val DCACHE_TAG_BITS = log2Up(DCACHE_PORTS) + CPU_TAG_BITS - val LG_REFILL_WIDTH = 4; // log2(cache bus width in bytes) val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS) val NRPQ = 16; // number of secondary misses @@ -168,10 +163,6 @@ abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.consta require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) - val IDX_BITS = 7; - val TAG_BITS = PADDR_BITS - OFFSET_BITS - IDX_BITS; - val NWAYS = 4 - require(IDX_BITS+OFFSET_BITS <= PGIDX_BITS); } trait TLBConstants { diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 9c4a2d7f..95345f06 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -8,21 +8,21 @@ import hwacha._ class ioRocket(implicit conf: RocketConfiguration) extends Bundle { val host = new ioHTIF(conf.ntiles) - val imem = new IOCPUFrontend - val vimem = new IOCPUFrontend - val dmem = new ioHellaCache + val imem = new IOCPUFrontend()(conf.icache) + val vimem = new IOCPUFrontend()(conf.icache) + val dmem = new ioHellaCache()(conf.dcache) } class rocketProc(implicit conf: RocketConfiguration) extends Component { val io = new ioRocket - val ctrl = new rocketCtrl - val dpath = new rocketDpath + val ctrl = new Control + val dpath = new Datapath val dtlb = new rocketTLB(DTLB_ENTRIES); val ptw = new rocketPTW(if (HAVE_VEC) 3 else 2) - val arb = new rocketHellaCacheArbiter(DCACHE_PORTS) + val arb = new HellaCacheArbiter(DCACHE_PORTS) var vu: vu = null if (HAVE_VEC) @@ -199,17 +199,13 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component vu.io.xcpt.hold := ctrl.io.vec_iface.hold // hooking up vector memory interface - val storegen = new StoreDataGen - storegen.io.typ := vu.io.dmem_req.bits.typ - storegen.io.din := vu.io.dmem_req.bits.data - arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(DCACHE_VU).req.bits.data := Reg(storegen.io.dout) + arb.io.requestor(DCACHE_VU).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 713a695d..987f5b20 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -67,23 +67,7 @@ class ioCtrlDpath extends Bundle() val pcr_replay = Bool(INPUT) } -class ioCtrlAll extends Bundle() -{ - val dpath = new ioCtrlDpath(); - val imem = new IOCPUFrontend - val dmem = new ioHellaCache - val dtlb_val = Bool(OUTPUT); - val dtlb_kill = Bool(OUTPUT); - val dtlb_rdy = Bool(INPUT); - val dtlb_miss = Bool(INPUT); - val xcpt_dtlb_ld = Bool(INPUT); - val xcpt_dtlb_st = Bool(INPUT); - val fpu = new ioCtrlFPU(); - val vec_dpath = new ioCtrlDpathVec() - val vec_iface = new ioCtrlVecInterface() -} - -abstract trait rocketCtrlDecodeConstants +abstract trait DecodeConstants { val xpr64 = Y; @@ -98,7 +82,7 @@ abstract trait rocketCtrlDecodeConstants val table: Array[(Bits, List[Bits])] } -object rocketCtrlXDecode extends rocketCtrlDecodeConstants +object XDecode extends DecodeConstants { val table = Array( // jalr eret @@ -209,7 +193,7 @@ object rocketCtrlXDecode extends rocketCtrlDecodeConstants RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) } -object rocketCtrlFDecode extends rocketCtrlDecodeConstants +object FDecode extends DecodeConstants { val table = Array( // jalr eret @@ -277,7 +261,7 @@ object rocketCtrlFDecode extends rocketCtrlDecodeConstants FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) } -object rocketCtrlVDecode extends rocketCtrlDecodeConstants +object VDecode extends DecodeConstants { val table = Array( // jalr eret @@ -334,15 +318,28 @@ object rocketCtrlVDecode extends rocketCtrlDecodeConstants VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) } -class rocketCtrl extends Component +class Control(implicit conf: RocketConfiguration) extends Component { - val io = new ioCtrlAll(); + val io = new Bundle { + val dpath = new ioCtrlDpath + val imem = new IOCPUFrontend()(conf.icache) + val dmem = new ioHellaCache()(conf.dcache) + val dtlb_val = Bool(OUTPUT) + val dtlb_kill = Bool(OUTPUT) + val dtlb_rdy = Bool(INPUT) + val dtlb_miss = Bool(INPUT) + val xcpt_dtlb_ld = Bool(INPUT) + val xcpt_dtlb_st = Bool(INPUT) + val fpu = new ioCtrlFPU + val vec_dpath = new ioCtrlDpathVec + val vec_iface = new ioCtrlVecInterface + } - var decode_table = rocketCtrlXDecode.table - if (HAVE_FPU) decode_table ++= rocketCtrlFDecode.table - if (HAVE_VEC) decode_table ++= rocketCtrlVDecode.table + var decode_table = XDecode.table + if (HAVE_FPU) decode_table ++= FDecode.table + if (HAVE_VEC) decode_table ++= VDecode.table - val cs = DecodeLogic(io.dpath.inst, rocketCtrlXDecode.decode_default, decode_table) + val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_jalr :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f6679130..d1f5fe69 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -6,25 +6,22 @@ import Constants._ import Instructions._ import hwacha._ -class ioDpathAll(implicit conf: RocketConfiguration) extends Bundle +class Datapath(implicit conf: RocketConfiguration) extends Component { - val host = new ioHTIF(conf.ntiles) - val ctrl = new ioCtrlDpath().flip - val dmem = new ioHellaCache - val dtlb = new ioDTLB_CPU_req_bundle().asOutput() - val imem = new IOCPUFrontend - val ptbr_wen = Bool(OUTPUT); - val ptbr = UFix(OUTPUT, PADDR_BITS); - val fpu = new ioDpathFPU(); - val vec_ctrl = new ioCtrlDpathVec().flip - val vec_iface = new ioDpathVecInterface() - val vec_imul_req = new io_imul_req - val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) -} - -class rocketDpath(implicit conf: RocketConfiguration) extends Component -{ - val io = new ioDpathAll(); + val io = new Bundle { + val host = new ioHTIF(conf.ntiles) + val ctrl = new ioCtrlDpath().flip + val dmem = new ioHellaCache()(conf.dcache) + val dtlb = new ioDTLB_CPU_req_bundle().asOutput() + val imem = new IOCPUFrontend()(conf.icache) + val ptbr_wen = Bool(OUTPUT); + val ptbr = UFix(OUTPUT, PADDR_BITS); + val fpu = new ioDpathFPU(); + val vec_ctrl = new ioCtrlDpathVec().flip + val vec_iface = new ioDpathVecInterface() + val vec_imul_req = new io_imul_req + val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) + } val pcr = new rocketDpathPCR(); val ex_pcr = pcr.io.r.data; @@ -215,6 +212,7 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component io.dmem.req.bits.idx := ex_effective_address io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) + require(io.dmem.req.bits.tag.getWidth >= 6) io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) // processor control regfile read @@ -252,17 +250,12 @@ class rocketDpath(implicit conf: RocketConfiguration) extends Component Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, ex_alu_out))).toBits // WB_ALU - // subword store data generation - val storegen = new StoreDataGen - storegen.io.typ := io.ctrl.ex_mem_type - storegen.io.din := ex_rs2 - // memory stage mem_reg_kill := ex_reg_kill when (!ex_reg_kill) { mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst - mem_reg_rs2 := storegen.io.dout + mem_reg_rs2 := StoreGen(io.ctrl.ex_mem_type, Bits(0), ex_rs2).data mem_reg_waddr := ex_reg_waddr mem_reg_wdata := ex_wdata mem_reg_raddr1 := ex_reg_raddr1 diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index e618a211..8a7492c2 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -10,7 +10,7 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, parity: Boolean = false) { val w = 1 - val ibytes = INST_BITS/8 + val ibytes = 4 val dm = assoc == 1 val lines = sets * assoc @@ -37,29 +37,31 @@ class FrontendReq extends Bundle { val currentpc = UFix(width = VADDR_BITS+1) } -class FrontendResp extends Bundle { +class FrontendResp(implicit conf: ICacheConfig) extends Bundle { val pc = UFix(width = VADDR_BITS+1) // ID stage PC - val data = Bits(width = INST_BITS) + val data = Bits(width = conf.ibytes*8) val taken = Bool() val xcpt_ma = Bool() val xcpt_if = Bool() + + override def clone = new FrontendResp().asInstanceOf[this.type] } -class IOCPUFrontend extends Bundle { +class IOCPUFrontend(implicit conf: ICacheConfig) extends Bundle { val req = new PipeIO()(new FrontendReq) val resp = new FIFOIO()(new FrontendResp).flip val ptw = new IOTLBPTW().flip } -class Frontend(c: ICacheConfig) extends Component +class Frontend(implicit c: ICacheConfig) extends Component { val io = new Bundle { - val cpu = new IOCPUFrontend().flip + val cpu = new IOCPUFrontend()(c).flip val mem = new ioUncachedRequestor } val btb = new rocketDpathBTB(BTB_ENTRIES) - val icache = new ICache(c) + val icache = new ICache val tlb = new TLB(ITLB_ENTRIES) val s1_pc = Reg() { UFix() } @@ -123,7 +125,7 @@ class Frontend(c: ICacheConfig) extends Component io.cpu.resp.bits.xcpt_if := s2_xcpt_if } -class ICache(c: ICacheConfig) extends Component +class ICache(implicit c: ICacheConfig) extends Component { val io = new Bundle { val req = new PipeIO()(new Bundle { @@ -133,7 +135,7 @@ class ICache(c: ICacheConfig) extends Component val kill = Bool() // delayed one cycle }).flip val resp = new FIFOIO()(new Bundle { - val data = Bits(width = INST_BITS) + val data = Bits(width = c.ibytes*8) val datablock = Bits(width = c.databits) }) val mem = new ioUncachedRequestor diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 72c3528f..9b03d97d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -4,162 +4,157 @@ import Chisel._ import Constants._ import uncore._ -class ioReplacementWayGen extends Bundle { - val pick_new_way = Bool(dir = INPUT) - val way_en = Bits(width = NWAYS, dir = INPUT) - val way_id = UFix(width = log2Up(NWAYS), dir = OUTPUT) +case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, + nmshr: Int, nsecondary: Int, nsdq: Int, + reqtagbits: Int = -1) +{ + require(isPow2(sets)) + require(isPow2(ways)) // TODO: relax this + def lines = sets*ways + def dm = ways == 1 + def ppnbits = PPN_BITS + def pgidxbits = PGIDX_BITS + def offbits = OFFSET_BITS + def paddrbits = ppnbits + pgidxbits + def lineaddrbits = ppnbits - offbits + def idxbits = log2Up(sets) + def waybits = log2Up(ways) + def tagbits = lineaddrbits - idxbits + def databytes = 8 // assumed by StoreGen/LoadGen/AMOALU + def databits = databytes*8 } -class RandomReplacementWayGen extends Component { - val io = new ioReplacementWayGen() - //TODO: Actually limit selection based on which ways are allowed (io.ways_en) - io.way_id := UFix(0) - if(NWAYS > 1) - { - val rand_way_id = LFSR16(io.pick_new_way)(log2Up(NWAYS)-1,0) - when (rand_way_id < UFix(NWAYS)) { io.way_id := rand_way_id } - } +abstract class ReplacementPolicy +{ + def way: UFix + def miss: Unit + def hit: Unit } -class StoreMaskGen extends Component { - val io = new Bundle { - val typ = Bits(INPUT, 3) - val addr = Bits(INPUT, 3) - val wmask = Bits(OUTPUT, 8) - } +class RandomReplacement(implicit conf: DCacheConfig) extends ReplacementPolicy +{ + private val replace = Bool() + replace := Bool(false) + val lfsr = LFSR16(replace) - val word = (io.typ === MT_W) || (io.typ === MT_WU) - val half = (io.typ === MT_H) || (io.typ === MT_HU) - val byte_ = (io.typ === MT_B) || (io.typ === MT_BU) - - io.wmask := Mux(byte_, Bits( 1,1) << io.addr(2,0).toUFix, - Mux(half, Bits( 3,2) << Cat(io.addr(2,1), Bits(0,1)).toUFix, - Mux(word, Bits( 15,4) << Cat(io.addr(2), Bits(0,2)).toUFix, - Bits(255,8)))); + def way = if (conf.dm) UFix(0) else lfsr(conf.waybits-1,0) + def miss = replace := Bool(true) + def hit = {} } -class StoreDataGen extends Component { - val io = new Bundle { - val typ = Bits(INPUT, 3) - val din = Bits(INPUT, 64) - val dout = Bits(OUTPUT, 64) - } - - val word = (io.typ === MT_W) || (io.typ === MT_WU) - val half = (io.typ === MT_H) || (io.typ === MT_HU) - val byte_ = (io.typ === MT_B) || (io.typ === MT_BU) - - io.dout := Mux(byte_, Fill(8, io.din( 7,0)), - Mux(half, Fill(4, io.din(15,0)), - Mux(word, Fill(2, io.din(31,0)), - io.din))) +case class StoreGen(typ: Bits, addr: Bits, dat: Bits) +{ + val byte = typ === MT_B || typ === MT_BU + val half = typ === MT_H || typ === MT_HU + val word = typ === MT_W || typ === MT_WU + def mask = + Mux(byte, Bits( 1) << addr(2,0), + Mux(half, Bits( 3) << Cat(addr(2,1), Bits(0,1)), + Mux(word, Bits( 15) << Cat(addr(2), Bits(0,2)), + Bits(255)))) + def data = + Mux(byte, Fill(8, dat( 7,0)), + Mux(half, Fill(4, dat(15,0)), + Mux(word, Fill(2, dat(31,0)), + dat))) } -// this currently requires that CPU_DATA_BITS == 64 -class LoadDataGen extends Component { - val io = new Bundle { - val typ = Bits(INPUT, 3) - val addr = Bits(INPUT, log2Up(MEM_DATA_BITS/8)) - val din = Bits(INPUT, MEM_DATA_BITS) - val dout = Bits(OUTPUT, 64) - val r_dout = Bits(OUTPUT, 64) - val r_dout_subword = Bits(OUTPUT, 64) - } +case class LoadGen(typ: Bits, addr: Bits, dat: Bits) +{ + val t = StoreGen(typ, addr, dat) + val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D - val sext = (io.typ === MT_B) || (io.typ === MT_H) || - (io.typ === MT_W) || (io.typ === MT_D) - val word = (io.typ === MT_W) || (io.typ === MT_WU) - val half = (io.typ === MT_H) || (io.typ === MT_HU) - val byte_ = (io.typ === MT_B) || (io.typ === MT_BU) - - val shifted = io.din >> Cat(io.addr(io.addr.width-1,2), Bits(0, 5)).toUFix - val extended = - Mux(word, Cat(Fill(32, sext & shifted(31)), shifted(31,0)), shifted) - - val r_extended = Reg(extended) - val r_sext = Reg(sext) - val r_half = Reg(half) - val r_byte = Reg(byte_) - val r_addr = Reg(io.addr) - - val shifted_subword = r_extended >> Cat(r_addr(1,0), Bits(0, 3)).toUFix - val extended_subword = - Mux(r_byte, Cat(Fill(56, r_sext & shifted_subword( 7)), shifted_subword( 7,0)), - Mux(r_half, Cat(Fill(48, r_sext & shifted_subword(15)), shifted_subword(15,0)), - shifted_subword)) - - io.dout := extended - io.r_dout := r_extended - io.r_dout_subword := extended_subword + val wordShift = Mux(addr(2), dat(63,32), dat(31,0)) + val word = Cat(Mux(t.word, Fill(32, sign && wordShift(31)), dat(63,32)), wordShift) + val halfShift = Mux(addr(1), word(31,16), word(15,0)) + val half = Cat(Mux(t.half, Fill(48, sign && halfShift(15)), word(63,16)), halfShift) + val byteShift = Mux(addr(0), half(15,8), half(7,0)) + val byte = Cat(Mux(t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } -class MSHRReq extends Bundle { +class MSHRReq(implicit conf: DCacheConfig) extends Bundle { val tag_miss = Bool() val old_dirty = Bool() - val old_tag = Bits(width = TAG_BITS) + val old_tag = Bits(width = conf.tagbits) - val tag = Bits(width = TAG_BITS) - val idx = Bits(width = IDX_BITS) - val way_oh = Bits(width = NWAYS) + val tag = Bits(width = conf.tagbits) + val idx = Bits(width = conf.idxbits) + val way_oh = Bits(width = conf.ways) - val offset = Bits(width = OFFSET_BITS) + val offset = Bits(width = conf.offbits) val cmd = Bits(width = 4) val typ = Bits(width = 3) - val cpu_tag = Bits(width = DCACHE_TAG_BITS) - val data = Bits(width = CPU_DATA_BITS) + val cpu_tag = Bits(width = conf.reqtagbits) + val data = Bits(width = conf.databits) + + override def clone = new MSHRReq().asInstanceOf[this.type] } -class RPQEntry extends Bundle { - val offset = Bits(width = OFFSET_BITS) +class RPQEntry(implicit conf: DCacheConfig) extends Bundle { + val offset = Bits(width = conf.offbits) val cmd = Bits(width = 4) val typ = Bits(width = 3) - val sdq_id = UFix(width = log2Up(NSDQ)) - val cpu_tag = Bits(width = DCACHE_TAG_BITS) + val sdq_id = UFix(width = log2Up(conf.nsdq)) + val cpu_tag = Bits(width = conf.reqtagbits) + + override def clone = new RPQEntry().asInstanceOf[this.type] } -class Replay extends RPQEntry { - val idx = Bits(width = IDX_BITS) - val way_oh = Bits(width = NWAYS) +class Replay(implicit conf: DCacheConfig) extends RPQEntry { + val idx = Bits(width = conf.idxbits) + val way_oh = Bits(width = conf.ways) + + override def clone = new Replay().asInstanceOf[this.type] } -class DataReq extends Bundle { - val idx = Bits(width = IDX_BITS) - val offset = Bits(width = OFFSET_BITS) +class DataReq(implicit conf: DCacheConfig) extends Bundle { + val idx = Bits(width = conf.idxbits) + val offset = Bits(width = conf.offbits) val cmd = Bits(width = 4) val typ = Bits(width = 3) - val data = Bits(width = CPU_DATA_BITS) - val way_oh = Bits(width = NWAYS) + val data = Bits(width = conf.databits) + val way_oh = Bits(width = conf.ways) + + override def clone = new DataReq().asInstanceOf[this.type] } -class DataArrayReq extends Bundle { - val way_en = Bits(width = NWAYS) - val idx = Bits(width = IDX_BITS) +class DataArrayReq(implicit conf: DCacheConfig) extends Bundle { + val way_en = Bits(width = conf.ways) + val idx = Bits(width = conf.idxbits) val offset = Bits(width = log2Up(REFILL_CYCLES)) val rw = Bool() val wmask = Bits(width = MEM_DATA_BITS/8) val data = Bits(width = MEM_DATA_BITS) + + override def clone = new DataArrayReq().asInstanceOf[this.type] } -class WritebackReq extends Bundle { - val tag = Bits(width = TAG_BITS) - val idx = Bits(width = IDX_BITS) - val way_oh = Bits(width = NWAYS) +class WritebackReq(implicit conf: DCacheConfig) extends Bundle { + val tag = Bits(width = conf.tagbits) + val idx = Bits(width = conf.idxbits) + val way_oh = Bits(width = conf.ways) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) + + override def clone = new WritebackReq().asInstanceOf[this.type] } -class MetaData extends Bundle { +class MetaData(implicit conf: DCacheConfig) extends Bundle { val state = UFix(width = 2) - val tag = Bits(width = TAG_BITS) + val tag = Bits(width = conf.tagbits) + + override def clone = new MetaData().asInstanceOf[this.type] } -class MetaArrayReq extends Bundle { - val way_en = Bits(width = NWAYS) - val idx = Bits(width = IDX_BITS) +class MetaArrayReq(implicit conf: DCacheConfig) extends Bundle { + val way_en = Bits(width = conf.ways) + val idx = Bits(width = conf.idxbits) val rw = Bool() val data = new MetaData() + + override def clone = new MetaArrayReq().asInstanceOf[this.type] } -class MSHR(id: Int)(implicit conf: RocketConfiguration) extends Component { +class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -169,10 +164,10 @@ class MSHR(id: Int)(implicit conf: RocketConfiguration) extends Component { val req_sdq_id = UFix(INPUT, log2Up(NSDQ)) val idx_match = Bool(OUTPUT) - val idx = Bits(OUTPUT, IDX_BITS) + val idx = Bits(OUTPUT, conf.idxbits) val refill_count = Bits(OUTPUT, log2Up(REFILL_CYCLES)) - val tag = Bits(OUTPUT, TAG_BITS) - val way_oh = Bits(OUTPUT, NWAYS) + val tag = Bits(OUTPUT, conf.tagbits) + val way_oh = Bits(OUTPUT, conf.ways) val mem_req = (new FIFOIO) { new TransactionInit } val meta_req = (new FIFOIO) { new MetaArrayReq() } @@ -293,14 +288,14 @@ class MSHR(id: Int)(implicit conf: RocketConfiguration) extends Component { io.replay.bits.way_oh := req.way_oh } -class MSHRFile(implicit conf: RocketConfiguration) extends Component { +class MSHRFile(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) - val mem_resp_idx = Bits(OUTPUT, IDX_BITS) + val mem_resp_idx = Bits(OUTPUT, conf.idxbits) val mem_resp_offset = Bits(OUTPUT, log2Up(REFILL_CYCLES)) - val mem_resp_way_oh = Bits(OUTPUT, NWAYS) + val mem_resp_way_oh = Bits(OUTPUT, conf.ways) val fence_rdy = Bool(OUTPUT) @@ -314,7 +309,7 @@ class MSHRFile(implicit conf: RocketConfiguration) extends Component { val probe = (new FIFOIO) { Bool() }.flip val cpu_resp_val = Bool(OUTPUT) - val cpu_resp_tag = Bits(OUTPUT, DCACHE_TAG_BITS) + val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) } val sdq_val = Reg(resetVal = Bits(0, NSDQ)) @@ -416,7 +411,7 @@ class MSHRFile(implicit conf: RocketConfiguration) extends Component { } -class WritebackUnit(implicit conf: RocketConfiguration) extends Component { +class WritebackUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip @@ -485,16 +480,16 @@ class WritebackUnit(implicit conf: RocketConfiguration) extends Component { io.probe_rep_data.bits.data := io.data_resp } -class ProbeUnit(implicit conf: RocketConfiguration) extends Component { +class ProbeUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new ProbeRequest }.flip val rep = (new FIFOIO) { new ProbeReply } val meta_req = (new FIFOIO) { new MetaArrayReq } val mshr_req = (new FIFOIO) { Bool() } val wb_req = (new FIFOIO) { new WritebackReq } - val tag_match_way_oh = Bits(INPUT, NWAYS) + val tag_match_way_oh = Bits(INPUT, conf.ways) val line_state = UFix(INPUT, 2) - val addr = Bits(OUTPUT, PADDR_BITS-OFFSET_BITS) + val addr = Bits(OUTPUT, conf.lineaddrbits) } val s_reset :: s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(8) { UFix() } @@ -535,21 +530,21 @@ class ProbeUnit(implicit conf: RocketConfiguration) extends Component { io.rep.bits := conf.co.newProbeReply(req, Mux(hit, line_state, conf.co.newStateOnFlush)) io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_mshr_req || state === s_probe_rep && hit - io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, ~UFix(0, NWAYS)) + io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, Fix(-1)) io.meta_req.bits.rw := state === s_probe_rep io.meta_req.bits.idx := req.addr io.meta_req.bits.data.state := conf.co.newStateOnProbeRequest(req, line_state) - io.meta_req.bits.data.tag := req.addr >> UFix(IDX_BITS) + io.meta_req.bits.data.tag := req.addr >> UFix(conf.idxbits) io.mshr_req.valid := state === s_meta_resp || state === s_mshr_req io.addr := req.addr io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_oh := way_oh io.wb_req.bits.idx := req.addr - io.wb_req.bits.tag := req.addr >> UFix(IDX_BITS) + io.wb_req.bits.tag := req.addr >> UFix(conf.idxbits) } -class FlushUnit(lines: Int)(implicit conf: RocketConfiguration) extends Component { +class FlushUnit(lines: Int)(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { Bool() }.flip val meta_req = (new FIFOIO) { new MetaArrayReq() } @@ -560,15 +555,15 @@ class FlushUnit(lines: Int)(implicit conf: RocketConfiguration) extends Componen val state = Reg(resetVal = s_reset) val idx_cnt = Reg(resetVal = UFix(0, log2Up(lines))) val next_idx_cnt = idx_cnt + UFix(1) - val way_cnt = if (NWAYS == 1) UFix(0) else Reg(resetVal = UFix(0, log2Up(NWAYS))) + val way_cnt = if (conf.dm) UFix(0) else Reg(resetVal = UFix(0, conf.waybits)) val next_way_cnt = way_cnt + UFix(1) switch (state) { is(s_reset) { when (io.meta_req.ready) { - state := Mux(way_cnt === UFix(NWAYS-1) && idx_cnt.andR, s_ready, s_reset); - when (way_cnt === UFix(NWAYS-1)) { idx_cnt := next_idx_cnt }; - if (NWAYS > 1) way_cnt := next_way_cnt; + state := Mux(way_cnt === UFix(conf.ways-1) && idx_cnt.andR, s_ready, s_reset); + when (way_cnt === UFix(conf.ways-1)) { idx_cnt := next_idx_cnt }; + if (!conf.dm) way_cnt := next_way_cnt; } } is(s_ready) { when (io.req.valid) { state := s_meta_read } } @@ -577,13 +572,13 @@ class FlushUnit(lines: Int)(implicit conf: RocketConfiguration) extends Componen state := s_meta_read when (io.mshr_req.ready) { state := s_meta_read - when (way_cnt === UFix(NWAYS-1)) { + when (way_cnt === UFix(conf.ways-1)) { when (idx_cnt.andR) { state := s_ready } idx_cnt := next_idx_cnt } - if (NWAYS > 1) way_cnt := next_way_cnt; + if (!conf.dm) way_cnt := next_way_cnt; } } } @@ -591,35 +586,35 @@ class FlushUnit(lines: Int)(implicit conf: RocketConfiguration) extends Componen io.req.ready := state === s_ready io.mshr_req.valid := state === s_meta_wait io.meta_req.valid := (state === s_meta_read) || (state === s_reset) - io.meta_req.bits.way_en := UFixToOH(way_cnt, NWAYS) + io.meta_req.bits.way_en := UFixToOH(way_cnt) io.meta_req.bits.idx := idx_cnt io.meta_req.bits.rw := (state === s_reset) io.meta_req.bits.data.state := conf.co.newStateOnFlush() io.meta_req.bits.data.tag := UFix(0) } -class MetaDataArrayArray(lines: Int) extends Component { +class MetaDataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MetaArrayReq() }.flip - val resp = Vec(NWAYS){ (new MetaData).asOutput } + val resp = Vec(conf.ways){ (new MetaData).asOutput } val state_req = (new FIFOIO) { new MetaArrayReq() }.flip - val way_en = Bits(width = NWAYS, dir = OUTPUT) + val way_en = Bits(OUTPUT, conf.ways) } val permBits = io.req.bits.data.state.width - val perms = Mem(lines) { UFix(width = permBits*NWAYS) } - val tags = Mem(lines, seqRead = true) { Bits(width = TAG_BITS*NWAYS) } + val perms = Mem(lines) { UFix(width = permBits*conf.ways) } + val tags = Mem(lines, seqRead = true) { Bits(width = conf.tagbits*conf.ways) } val tag = Reg() { Bits() } val raddr = Reg() { Bits() } - val way_en_ = Reg { Bits(width=NWAYS) } + val way_en_ = Reg { Bits(width = conf.ways) } when (io.state_req.valid && io.state_req.bits.rw) { - perms.write(io.state_req.bits.idx, Fill(NWAYS, io.state_req.bits.data.state), FillInterleaved(permBits, io.state_req.bits.way_en)) + perms.write(io.state_req.bits.idx, Fill(conf.ways, io.state_req.bits.data.state), FillInterleaved(permBits, io.state_req.bits.way_en)) } when (io.req.valid) { when (io.req.bits.rw) { - perms.write(io.req.bits.idx, Fill(NWAYS, io.req.bits.data.state), FillInterleaved(permBits, io.req.bits.way_en)) - tags.write(io.req.bits.idx, Fill(NWAYS, io.req.bits.data.tag), FillInterleaved(TAG_BITS, io.req.bits.way_en)) + perms.write(io.req.bits.idx, Fill(conf.ways, io.req.bits.data.state), FillInterleaved(permBits, io.req.bits.way_en)) + tags.write(io.req.bits.idx, Fill(conf.ways, io.req.bits.data.tag), FillInterleaved(conf.tagbits, io.req.bits.way_en)) } .otherwise { raddr := io.req.bits.idx @@ -629,9 +624,9 @@ class MetaDataArrayArray(lines: Int) extends Component { } val perm = perms(raddr) - for(w <- 0 until NWAYS) { + for (w <- 0 until conf.ways) { io.resp(w).state := perm(permBits*(w+1)-1, permBits*w) - io.resp(w).tag := tag(TAG_BITS*(w+1)-1, TAG_BITS*w) + io.resp(w).tag := tag(conf.tagbits*(w+1)-1, conf.tagbits*w) } io.way_en := way_en_ @@ -639,7 +634,7 @@ class MetaDataArrayArray(lines: Int) extends Component { io.state_req.ready := Bool(true) } -class DataArray(lines: Int) extends Component { +class DataArray(lines: Int)(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new DataArrayReq() }.flip val resp = Bits(width = MEM_DATA_BITS, dir = OUTPUT) @@ -659,19 +654,19 @@ class DataArray(lines: Int) extends Component { io.req.ready := Bool(true) } -class DataArrayArray(lines: Int) extends Component { +class DataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new DataArrayReq() }.flip - val resp = Vec(NWAYS){ Bits(dir = OUTPUT, width = MEM_DATA_BITS) } - val way_en = Bits(width = NWAYS, dir = OUTPUT) + val resp = Vec(conf.ways){ Bits(OUTPUT, MEM_DATA_BITS) } + val way_en = Bits(OUTPUT, conf.ways) } - val way_en_ = Reg { Bits(width=NWAYS) } + val way_en_ = Reg { Bits(width = conf.ways) } when (io.req.valid && io.req.ready) { way_en_ := io.req.bits.way_en } - for(w <- 0 until NWAYS) { + for (w <- 0 until conf.ways) { val way = new DataArray(lines) way.io.req.bits <> io.req.bits way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool @@ -713,24 +708,28 @@ class AMOALU extends Component { io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) } -class HellaCacheReq extends Bundle { +class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { val kill = Bool() val typ = Bits(width = 3) - val idx = Bits(width = PGIDX_BITS) - val ppn = Bits(width = PPN_BITS) - val data = Bits(width = 64) - val tag = Bits(width = DCACHE_TAG_BITS) + val idx = Bits(width = conf.pgidxbits) + val ppn = Bits(width = conf.ppnbits) + val data = Bits(width = conf.databits) + val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) + + override def clone = new HellaCacheReq().asInstanceOf[this.type] } -class HellaCacheResp extends Bundle { +class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { val miss = Bool() val nack = Bool() val replay = Bool() val typ = Bits(width = 3) - val data = Bits(width = 64) - val data_subword = Bits(width = 64) - val tag = Bits(width = DCACHE_TAG_BITS) + val data = Bits(width = conf.databits) + val data_subword = Bits(width = conf.databits) + val tag = Bits(width = conf.reqtagbits) + + override def clone = new HellaCacheResp().asInstanceOf[this.type] } class AlignmentExceptions extends Bundle { @@ -743,29 +742,27 @@ class HellaCacheExceptions extends Bundle { } // interface between D$ and processor/DTLB -class ioHellaCache extends Bundle { +class ioHellaCache(implicit conf: DCacheConfig) extends Bundle { val req = (new FIFOIO){ new HellaCacheReq } val resp = (new PipeIO){ new HellaCacheResp }.flip val xcpt = (new HellaCacheExceptions).asInput } -class HellaCache(implicit conf: RocketConfiguration) extends Component { +class HellaCache(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val cpu = (new ioHellaCache).flip val mem = new ioTileLink } - val lines = 1 << IDX_BITS - val addrbits = PADDR_BITS - val indexbits = IDX_BITS - val offsetbits = OFFSET_BITS - val tagmsb = PADDR_BITS-1 - val taglsb = indexbits+offsetbits + val lines = 1 << conf.idxbits + val indexbits = conf.idxbits + val tagmsb = conf.paddrbits-1 + val taglsb = indexbits+conf.offbits val tagbits = tagmsb-taglsb+1 val indexmsb = taglsb-1 - val indexlsb = offsetbits + val indexlsb = conf.offbits val offsetmsb = indexlsb-1 - val offsetlsb = log2Up(CPU_DATA_BITS/8) + val offsetlsb = log2Up(conf.databytes) val ramindexlsb = log2Up(MEM_DATA_BITS/8) val early_nack = Reg { Bool() } @@ -821,9 +818,9 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { r_cpu_req_tag := io.cpu.req.bits.tag } when (prober.io.meta_req.valid) { - r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(PGIDX_BITS-1,0) + r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(conf.pgidxbits-1,0) } - when (replay_amo_val) { + when (mshr.io.data_req.valid) { r_cpu_req_idx := Cat(mshr.io.data_req.bits.idx, mshr.io.data_req.bits.offset) r_cpu_req_cmd := mshr.io.data_req.bits.cmd r_cpu_req_type := mshr.io.data_req.bits.typ @@ -860,17 +857,17 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { meta_arb.io.in(3).valid := io.cpu.req.valid meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) - meta_arb.io.in(3).bits.way_en := ~UFix(0, NWAYS) + meta_arb.io.in(3).bits.way_en := Fix(-1) val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(PGIDX_BITS-OFFSET_BITS), io.cpu.req.bits.ppn) + val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), io.cpu.req.bits.ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until NWAYS).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR - val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(NWAYS-1, 0) //TODO: use Vec - val tag_hit_arr = (0 until NWAYS).map( w => conf.co.isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) + val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(conf.ways-1, 0) //TODO: use Vec + val tag_hit_arr = (0 until conf.ways).map( w => conf.co.isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_hit = Cat(Bits(0),tag_hit_arr:_*).orR - val meta_resp_way_oh = Mux(meta.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, meta.io.way_en) - val data_resp_way_oh = Mux(data.io.way_en === ~UFix(0, NWAYS), tag_match_way_oh, data.io.way_en) + val meta_resp_way_oh = Mux(meta.io.way_en.andR, tag_match_way_oh, meta.io.way_en) + val data_resp_way_oh = Mux(data.io.way_en.andR, tag_match_way_oh, data.io.way_en) val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) val data_resp_mux = Mux1H(data_resp_way_oh, data.io.resp) @@ -881,9 +878,8 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { wb.io.probe_rep_data <> io.mem.probe_rep_data // replacement policy - val replacer = new RandomReplacementWayGen() - replacer.io.way_en := ~UFix(0, NWAYS) - val replaced_way_oh = Mux(flusher.io.mshr_req.valid, r_way_oh, UFixToOH(replacer.io.way_id, NWAYS)) + val replacer = new RandomReplacement + val replaced_way_oh = Mux(flusher.io.mshr_req.valid, r_way_oh, UFixToOH(replacer.way)) val meta_wb_mux = Mux1H(replaced_way_oh, meta.io.resp) // refill response @@ -900,8 +896,8 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) data_arb.io.in(4).bits.rw := Bool(false) data_arb.io.in(4).valid := io.cpu.req.valid && req_read - data_arb.io.in(4).bits.way_en := ~UFix(0, NWAYS) // intiate load on all ways, mux after tag check - val early_load_nack = req_read && !data_arb.io.in(4).ready + data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check + val early_load_nack = !data_arb.io.in(4).ready // store hits and AMO hits and misses use a pending store register. // we nack new stores if a pending store can't retire for some reason. @@ -963,7 +959,7 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { mshr.io.mem_abort.bits := io.mem.xact_abort.bits io.mem.xact_abort.ready := Bool(true) mshr.io.meta_req <> meta_arb.io.in(1) - replacer.io.pick_new_way := mshr.io.req.valid && mshr.io.req.ready + when (mshr.io.req.fire()) { replacer.miss } // replays val replay = mshr.io.data_req.bits @@ -992,13 +988,11 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { // store write mask generation. // assumes store replays are higher-priority than pending stores. - val maskgen = new StoreMaskGen val store_offset = Mux(!replay_fire, p_store_idx(offsetmsb,0), replay.offset) - maskgen.io.typ := Mux(!replay_fire, p_store_type, replay.typ) - maskgen.io.addr := store_offset(offsetlsb-1,0) - val store_wmask_wide = maskgen.io.wmask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2Up(CPU_DATA_BITS/8))).toUFix + val store_type = Mux(!replay_fire, p_store_type, replay.typ) + val store_wmask_wide = StoreGen(store_type, store_offset, Bits(0)).mask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2Up(conf.databytes))).toUFix val store_data = Mux(!replay_fire, p_store_data, replay.data) - val store_data_wide = Fill(MEM_DATA_BITS/CPU_DATA_BITS, store_data) + val store_data_wide = Fill(MEM_DATA_BITS/conf.databits, store_data) data_arb.io.in(1).bits.data := store_data_wide data_arb.io.in(1).bits.wmask := store_wmask_wide data_arb.io.in(2).bits.data := store_data_wide @@ -1006,15 +1000,12 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { // load data subword mux/sign extension. // subword loads are delayed by one cycle. - val loadgen = new LoadDataGen - val loadgen_use_replay = Reg(replay_fire) - loadgen.io.typ := Mux(loadgen_use_replay, Reg(replay.typ), r_cpu_req_type) - loadgen.io.addr := Mux(loadgen_use_replay, Reg(replay.offset), r_cpu_req_idx)(ramindexlsb-1,0) - loadgen.io.din := data_resp_mux + val loadgen_data = data_resp_mux >> Cat(r_cpu_req_idx(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,6)) + val loadgen = LoadGen(r_cpu_req_type, r_cpu_req_idx, loadgen_data) amoalu.io.cmd := p_store_cmd amoalu.io.typ := p_store_type - amoalu.io.lhs := loadgen.io.r_dout.toUFix + amoalu.io.lhs := Reg(loadgen.word).toUFix amoalu.io.rhs := p_store_data.toUFix early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo @@ -1036,9 +1027,9 @@ class HellaCache(implicit conf: RocketConfiguration) extends Component { io.cpu.resp.bits.replay := mshr.io.cpu_resp_val io.cpu.resp.bits.miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read io.cpu.resp.bits.tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) - io.cpu.resp.bits.typ := loadgen.io.typ - io.cpu.resp.bits.data := loadgen.io.dout - io.cpu.resp.bits.data_subword := loadgen.io.r_dout_subword + io.cpu.resp.bits.typ := r_cpu_req_type + io.cpu.resp.bits.data := loadgen.word + io.cpu.resp.bits.data_subword := Reg(loadgen.byte) val xact_init_arb = (new Arbiter(2)) { new TransactionInit } xact_init_arb.io.in(0) <> wb.io.mem_req diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 796d8a6d..88abdbf9 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,14 +5,14 @@ import Node._ import Constants._ import scala.math._ -class ioPTW(n: Int) extends Bundle +class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle { val requestor = Vec(n) { new IOTLBPTW }.flip - val mem = new ioHellaCache + val mem = new ioHellaCache()(conf.dcache) val ptbr = UFix(INPUT, PADDR_BITS) } -class rocketPTW(n: Int) extends Component +class rocketPTW(n: Int)(implicit conf: RocketConfiguration) extends Component { val io = new ioPTW(n) diff --git a/rocket/src/main/scala/queues.scala b/rocket/src/main/scala/queues.scala deleted file mode 100644 index 204f40f0..00000000 --- a/rocket/src/main/scala/queues.scala +++ /dev/null @@ -1,31 +0,0 @@ -package rocket - -import Chisel._ -import Node._ -import Constants._ - -class SkidBuffer[T <: Data](entries: Int, lateEnq: Boolean = false)(data: => T) extends Component -{ - val io = new Bundle { - val enq = new FIFOIO()(data).flip - val deq = new FIFOIO()(data) - } - - require(entries >= 2) - val fq = new Queue(1, flow = true)(data) - val pq = new Queue(entries-1, pipe = true)(data) - val (iq, oq) = if (lateEnq) (pq, fq) else (fq, pq) - - iq.io.enq <> io.enq - oq.io.enq <> iq.io.deq - io.deq <> oq.io.deq -} - -object SkidBuffer -{ - def apply[T <: Data](enq: FIFOIO[T], entries: Int = 2): FIFOIO[T] = { - val s = new SkidBuffer(entries)(enq.bits.clone) - s.io.enq <> enq - s.io.deq - } -} diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 6b19ad4b..81090090 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,20 +6,26 @@ import Constants._ import uncore._ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, - icache: ICacheConfig) - -class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends Component(resetSignal) + icache: ICacheConfig, dcache: DCacheConfig) { + val dcacheReqTagBits = 9 // enforce compliance with require() +} + +class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) +{ + implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(DMEM_PORTS)) + implicit val conf = confIn.copy(dcache = dcConf) + val io = new Bundle { val tilelink = new ioTileLink val host = new ioHTIF(conf.ntiles) } - + val cpu = new rocketProc - val icache = new Frontend(conf.icache) + val icache = new Frontend()(confIn.icache) val dcache = new HellaCache - val arbiter = new rocketMemArbiter(DMEM_PORTS) + val arbiter = new MemArbiter(DMEM_PORTS) arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem @@ -34,7 +40,7 @@ class Tile(resetSignal: Bool = null)(implicit conf: RocketConfiguration) extends if (HAVE_VEC) { - val vicache = new Frontend(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) + val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem cpu.io.vimem <> vicache.io.cpu } From e76892f75812c718c0f1d236fda797799e1f5aaa Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Nov 2012 02:55:45 -0800 Subject: [PATCH 0501/1087] remove more global constants --- rocket/src/main/scala/consts.scala | 5 -- rocket/src/main/scala/cpu.scala | 116 ++++++++++------------------- rocket/src/main/scala/ctrl.scala | 1 - rocket/src/main/scala/ptw.scala | 4 +- 4 files changed, 42 insertions(+), 84 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 055ec62b..c8b8629d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -198,11 +198,6 @@ trait VectorOpConstants { } abstract trait ArbiterConstants extends TileConfigConstants { - val DTLB_PORTS = 3 - val DTLB_CPU = 0 - val DTLB_VEC = 1 - val DTLB_VPF = 2 - val DCACHE_PORTS = 3 val DCACHE_CPU = 0 val DCACHE_PTW = 1 diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/cpu.scala index 95345f06..0ce54a75 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/cpu.scala @@ -20,104 +20,60 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component val ctrl = new Control val dpath = new Datapath - val dtlb = new rocketTLB(DTLB_ENTRIES); - val ptw = new rocketPTW(if (HAVE_VEC) 3 else 2) - val arb = new HellaCacheArbiter(DCACHE_PORTS) + val ptw = Vec(0) { new IOTLBPTW } + val arb = new HellaCacheArbiter(DCACHE_PORTS) var vu: vu = null if (HAVE_VEC) { vu = new vu() - // cpu, vector prefetch, and vector use the DTLB - val dtlbarb = new RRArbiter(DTLB_PORTS)({new ioDTLB_CPU_req_bundle()}) - val dtlbchosen = Reg(resetVal=Bits(DTLB_CPU,log2Up(DTLB_PORTS))) - when( dtlb.io.cpu_req.ready && dtlbarb.io.out.valid ) { dtlbchosen := dtlbarb.io.chosen } - // tlb respones come out a cycle later - val chosen_vec = dtlbchosen === Bits(DTLB_VEC) - val chosen_pf = dtlbchosen === Bits(DTLB_VPF) - val chosen_cpu = dtlbchosen === Bits(DTLB_CPU) + val vdtlb = new rocketTLB(8) + vdtlb.io.invalidate := dpath.io.ptbr_wen + vdtlb.io.status := dpath.io.ctrl.status + ptw += vdtlb.io.ptw - dtlbarb.io.in(DTLB_VEC) <> vu.io.vec_tlb_req - - vu.io.vec_tlb_resp.xcpt_ld := chosen_vec && dtlb.io.cpu_resp.xcpt_ld - vu.io.vec_tlb_resp.xcpt_st := chosen_vec && dtlb.io.cpu_resp.xcpt_st + vdtlb.io.cpu_req <> vu.io.vec_tlb_req + vu.io.vec_tlb_resp := vdtlb.io.cpu_resp vu.io.vec_tlb_resp.xcpt_pf := Bool(false) - vu.io.vec_tlb_resp.miss := chosen_vec && dtlb.io.cpu_resp.miss - vu.io.vec_tlb_resp.ppn := dtlb.io.cpu_resp.ppn - dtlbarb.io.in(DTLB_VPF) <> vu.io.vec_pftlb_req + val pftlb = new rocketTLB(2) + pftlb.io.invalidate := dpath.io.ptbr_wen + pftlb.io.status := dpath.io.ctrl.status + pftlb.io.cpu_req <> vu.io.vec_pftlb_req + ptw += pftlb.io.ptw + vu.io.vec_pftlb_resp := pftlb.io.cpu_resp vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) vu.io.vec_pftlb_resp.xcpt_st := Bool(false) - vu.io.vec_pftlb_resp.xcpt_pf := chosen_pf && dtlb.io.cpu_resp.xcpt_pf - vu.io.vec_pftlb_resp.miss := chosen_pf && dtlb.io.cpu_resp.miss - vu.io.vec_pftlb_resp.ppn := dtlb.io.cpu_resp.ppn - - // connect DTLB to ctrl+dpath - dtlbarb.io.in(DTLB_CPU).valid := ctrl.io.dtlb_val - dtlbarb.io.in(DTLB_CPU).bits.kill := ctrl.io.dtlb_kill - dtlbarb.io.in(DTLB_CPU).bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlbarb.io.in(DTLB_CPU).bits.asid := UFix(0) - dtlbarb.io.in(DTLB_CPU).bits.vpn := dpath.io.dtlb.vpn - ctrl.io.dtlb_rdy := dtlbarb.io.in(DTLB_CPU).ready - - ctrl.io.xcpt_dtlb_ld := chosen_cpu && dtlb.io.cpu_resp.xcpt_ld - ctrl.io.xcpt_dtlb_st := chosen_cpu && dtlb.io.cpu_resp.xcpt_st - ctrl.io.dtlb_miss := chosen_cpu && dtlb.io.cpu_resp.miss - - dtlb.io.cpu_req <> dtlbarb.io.out - } - else - { - // connect DTLB to ctrl+dpath - dtlb.io.cpu_req.valid := ctrl.io.dtlb_val - dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill - dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlb.io.cpu_req.bits.asid := UFix(0) - dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn - ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld - ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st - ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready - ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss } + // connect DTLB to ctrl+dpath + val dtlb = new rocketTLB(DTLB_ENTRIES) dtlb.io.invalidate := dpath.io.ptbr_wen dtlb.io.status := dpath.io.ctrl.status + ptw += dtlb.io.ptw + + dtlb.io.cpu_req.valid := ctrl.io.dtlb_val + dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill + dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd + dtlb.io.cpu_req.bits.asid := UFix(0) + dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn + ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld + ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st + ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready + ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn - ctrl.io.dmem.req.ready := dtlb.io.cpu_req.ready && arb.io.requestor(DCACHE_CPU).req.ready - // connect page table walker to TLBs, page table base register (from PCR) - // and D$ arbiter (selects between requests from pipeline and PTW, PTW has priority) - ptw.io.requestor(0) <> io.imem.ptw - ptw.io.requestor(1) <> dtlb.io.ptw - ptw.io.ptbr := dpath.io.ptbr; - arb.io.requestor(DCACHE_PTW) <> ptw.io.mem - arb.io.mem <> io.dmem + ctrl.io.dpath <> dpath.io.ctrl + dpath.io.host <> io.host - ctrl.io.dpath <> dpath.io.ctrl; - dpath.io.host <> io.host; - - // FIXME: try to make this more compact - - // connect I$ ctrl.io.imem <> io.imem dpath.io.imem <> io.imem - // connect arbiter to ctrl+dpath+DTLB - //TODO: views on nested bundles? - arb.io.requestor(DCACHE_CPU).resp <> ctrl.io.dmem.resp - arb.io.requestor(DCACHE_CPU).xcpt <> ctrl.io.dmem.xcpt - arb.io.requestor(DCACHE_CPU).resp <> dpath.io.dmem.resp - arb.io.requestor(DCACHE_CPU).req.valid := ctrl.io.dmem.req.valid - ctrl.io.dmem.req.ready := arb.io.requestor(DCACHE_CPU).req.ready - arb.io.requestor(DCACHE_CPU).req.bits.kill := ctrl.io.dmem.req.bits.kill - arb.io.requestor(DCACHE_CPU).req.bits.cmd := ctrl.io.dmem.req.bits.cmd - arb.io.requestor(DCACHE_CPU).req.bits.typ := ctrl.io.dmem.req.bits.typ - arb.io.requestor(DCACHE_CPU).req.bits.idx := dpath.io.dmem.req.bits.idx - arb.io.requestor(DCACHE_CPU).req.bits.tag := dpath.io.dmem.req.bits.tag - arb.io.requestor(DCACHE_CPU).req.bits.data := dpath.io.dmem.req.bits.data + ctrl.io.dmem <> arb.io.requestor(DCACHE_CPU) + dpath.io.dmem <> arb.io.requestor(DCACHE_CPU) var fpu: rocketFPU = null if (HAVE_FPU) @@ -132,7 +88,7 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component dpath.io.vec_ctrl <> ctrl.io.vec_dpath // hooking up vector I$ - ptw.io.requestor(2) <> io.vimem.ptw + ptw += io.vimem.ptw io.vimem.req.bits.status := dpath.io.ctrl.status io.vimem.req.bits.pc := vu.io.imem_req.bits io.vimem.req.valid := vu.io.imem_req.valid @@ -232,4 +188,12 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component fpu.io.dfma.valid := Bool(false) } } + + ptw += io.imem.ptw + val thePTW = new PTW(ptw.length) + thePTW.io.requestor <> ptw + thePTW.io.ptbr := dpath.io.ptbr; + arb.io.requestor(DCACHE_PTW) <> thePTW.io.mem + + arb.io.mem <> io.dmem } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 987f5b20..a36ce98f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -735,7 +735,6 @@ class Control(implicit conf: RocketConfiguration) extends Component id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_fp_val && id_stall_fpu || id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || - (id_sync === SYNC_D || id_sync === SYNC_I) && !io.dmem.req.ready || vec_stalld ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 88abdbf9..bf55f259 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -12,7 +12,7 @@ class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle val ptbr = UFix(INPUT, PADDR_BITS) } -class rocketPTW(n: Int)(implicit conf: RocketConfiguration) extends Component +class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component { val io = new ioPTW(n) @@ -34,7 +34,7 @@ class rocketPTW(n: Int)(implicit conf: RocketConfiguration) extends Component val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) - val arb = new Arbiter(n)(UFix(width = VPN_BITS)) + val arb = new RRArbiter(n)(UFix(width = VPN_BITS)) arb.io.in <> io.requestor.map(_.req) arb.io.out.ready := state === s_ready From 4d1ca8ba3ae632c9212109983dd14b36884962f1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 6 Nov 2012 08:13:44 -0800 Subject: [PATCH 0502/1087] remove more global consts; refactor DTLBs D$ now contains DTLB. provide full VAddr with initial request. VU now has its own DTLBs. --- rocket/src/main/scala/arbiter.scala | 108 ++++++--------- rocket/src/main/scala/consts.scala | 19 +-- .../src/main/scala/{cpu.scala => core.scala} | 130 ++++++------------ rocket/src/main/scala/ctrl.scala | 15 +- rocket/src/main/scala/dpath.scala | 13 +- rocket/src/main/scala/fpu.scala | 18 +-- rocket/src/main/scala/icache.scala | 7 +- rocket/src/main/scala/nbdcache.scala | 81 ++++++----- rocket/src/main/scala/ptw.scala | 39 ++++-- rocket/src/main/scala/tile.scala | 24 ++-- rocket/src/main/scala/tlb.scala | 33 ++--- 11 files changed, 206 insertions(+), 281 deletions(-) rename rocket/src/main/scala/{cpu.scala => core.scala} (61%) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 5e5669ea..8fb5d3f7 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -12,58 +12,40 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp val mem = new ioHellaCache()(conf.dcache) } - var req_val = Bool(false) - var req_rdy = io.mem.req.ready - for (i <- 0 until n) - { - io.requestor(i).req.ready := req_rdy - req_val = req_val || io.requestor(i).req.valid - req_rdy = req_rdy && !io.requestor(i).req.valid + val r_valid = io.requestor.map(r => Reg(r.req.valid)) + + io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) + io.requestor(0).req.ready := io.mem.req.ready + for (i <- 1 until n) + io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid + + io.mem.req.bits := io.requestor(n-1).req.bits + io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UFix(n-1, log2Up(n))) + for (i <- n-2 to 0 by -1) { + val req = io.requestor(i).req + when (req.valid) { + io.mem.req.bits.cmd := req.bits.cmd + io.mem.req.bits.typ := req.bits.typ + io.mem.req.bits.addr := req.bits.addr + io.mem.req.bits.phys := req.bits.phys + io.mem.req.bits.tag := Cat(req.bits.tag, UFix(i, log2Up(n))) + } + when (r_valid(i)) { + io.mem.req.bits.kill := req.bits.kill + io.mem.req.bits.data := req.bits.data + } } - var req_cmd = io.requestor(n-1).req.bits.cmd - var req_type = io.requestor(n-1).req.bits.typ - var req_idx = io.requestor(n-1).req.bits.idx - var req_ppn = io.requestor(n-1).req.bits.ppn - var req_data = io.requestor(n-1).req.bits.data - var req_kill = io.requestor(n-1).req.bits.kill - var req_tag = io.requestor(n-1).req.bits.tag - for (i <- n-1 to 0 by -1) - { - val r = io.requestor(i).req - req_cmd = Mux(r.valid, r.bits.cmd, req_cmd) - req_type = Mux(r.valid, r.bits.typ, req_type) - req_idx = Mux(r.valid, r.bits.idx, req_idx) - req_ppn = Mux(Reg(r.valid), r.bits.ppn, req_ppn) - req_data = Mux(Reg(r.valid), r.bits.data, req_data) - req_kill = Mux(Reg(r.valid), r.bits.kill, req_kill) - req_tag = Mux(r.valid, Cat(r.bits.tag, UFix(i, log2Up(n))), req_tag) - } - - io.mem.req.valid := req_val - io.mem.req.bits.cmd := req_cmd - io.mem.req.bits.typ := req_type - io.mem.req.bits.idx := req_idx - io.mem.req.bits.ppn := req_ppn - io.mem.req.bits.data := req_data - io.mem.req.bits.kill := req_kill - io.mem.req.bits.tag := req_tag - - for (i <- 0 until n) - { - val r = io.requestor(i).resp - val x = io.requestor(i).xcpt + for (i <- 0 until n) { + val resp = io.requestor(i).resp val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) - x.ma.ld := io.mem.xcpt.ma.ld && Reg(io.requestor(i).req.valid) - x.ma.st := io.mem.xcpt.ma.st && Reg(io.requestor(i).req.valid) - r.valid := io.mem.resp.valid && tag_hit - r.bits.miss := io.mem.resp.bits.miss && tag_hit - r.bits.nack := io.mem.resp.bits.nack && Reg(io.requestor(i).req.valid) - r.bits.replay := io.mem.resp.bits.replay && tag_hit - r.bits.data := io.mem.resp.bits.data - r.bits.data_subword := io.mem.resp.bits.data_subword - r.bits.typ := io.mem.resp.bits.typ - r.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) + resp.valid := io.mem.resp.valid && tag_hit + io.requestor(i).xcpt := io.mem.xcpt + resp.bits := io.mem.resp.bits + resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) + resp.bits.miss := io.mem.resp.bits.miss && tag_hit + resp.bits.nack := io.mem.resp.bits.nack && r_valid(i) + resp.bits.replay := io.mem.resp.bits.replay && tag_hit } } @@ -80,15 +62,6 @@ class MemArbiter(n: Int) extends Component { val requestor = Vec(n) { new ioUncachedRequestor }.flip } - var xi_val = Bool(false) - var xi_rdy = io.mem.xact_init.ready - for (i <- 0 until n) - { - io.requestor(i).xact_init.ready := xi_rdy - xi_val = xi_val || io.requestor(i).xact_init.valid - xi_rdy = xi_rdy && !io.requestor(i).xact_init.valid - } - var xi_bits = new TransactionInit xi_bits := io.requestor(n-1).xact_init.bits xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) @@ -101,24 +74,21 @@ class MemArbiter(n: Int) extends Component { xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) } - io.mem.xact_init.valid := xi_val io.mem.xact_init.bits := xi_bits - - var xf_val = Bool(false) - var xf_rdy = io.mem.xact_finish.ready - for (i <- 0 until n) - { - io.requestor(i).xact_finish.ready := xf_rdy - xf_val = xf_val || io.requestor(i).xact_finish.valid - xf_rdy = xf_rdy && !io.requestor(i).xact_finish.valid - } + io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_) + io.requestor(0).xact_init.ready := io.mem.xact_init.ready + for (i <- 1 until n) + io.requestor(i).xact_init.ready := io.requestor(i-1).xact_init.ready && !io.requestor(i-1).xact_init.valid var xf_bits = io.requestor(n-1).xact_finish.bits for (i <- n-2 to 0 by -1) xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) - io.mem.xact_finish.valid := xf_val io.mem.xact_finish.bits := xf_bits + io.mem.xact_finish.valid := io.requestor.map(_.xact_finish.valid).reduce(_||_) + io.requestor(0).xact_finish.ready := io.mem.xact_finish.ready + for (i <- 1 until n) + io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid for (i <- 0 until n) { diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index c8b8629d..b482673d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -154,12 +154,7 @@ trait InterruptConstants { val IRQ_TIMER = 7 } -abstract trait RocketDcacheConstants extends ArbiterConstants with uncore.constants.AddressConstants { - val NMSHR = if (HAVE_VEC) 4 else 2 // number of primary misses - require(log2Up(NMSHR)+3 <= uncore.Constants.TILE_XACT_ID_BITS) - val NRPQ = 16; // number of secondary misses - val NSDQ = 17; // number of secondary stores/AMOs - val OFFSET_BITS = 6; // log2(cache line size in bytes) +abstract trait RocketDcacheConstants extends TileConfigConstants with uncore.constants.CacheConstants with uncore.constants.AddressConstants { require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) @@ -196,15 +191,3 @@ trait VectorOpConstants { val VIMM2_ALU = UFix(1, 1) val VIMM2_X = UFix(0, 1) } - -abstract trait ArbiterConstants extends TileConfigConstants { - val DCACHE_PORTS = 3 - val DCACHE_CPU = 0 - val DCACHE_PTW = 1 - val DCACHE_VU = 2 - - val DMEM_PORTS = if (HAVE_VEC) 3 else 2 - val DMEM_DCACHE = 0 - val DMEM_ICACHE = 1 - val DMEM_VICACHE = 2 -} diff --git a/rocket/src/main/scala/cpu.scala b/rocket/src/main/scala/core.scala similarity index 61% rename from rocket/src/main/scala/cpu.scala rename to rocket/src/main/scala/core.scala index 0ce54a75..d27dc1e3 100644 --- a/rocket/src/main/scala/cpu.scala +++ b/rocket/src/main/scala/core.scala @@ -13,87 +13,57 @@ class ioRocket(implicit conf: RocketConfiguration) extends Bundle val dmem = new ioHellaCache()(conf.dcache) } -class rocketProc(implicit conf: RocketConfiguration) extends Component +class Core(implicit conf: RocketConfiguration) extends Component { val io = new ioRocket val ctrl = new Control val dpath = new Datapath - val ptw = Vec(0) { new IOTLBPTW } - val arb = new HellaCacheArbiter(DCACHE_PORTS) - - var vu: vu = null - if (HAVE_VEC) - { - vu = new vu() - - val vdtlb = new rocketTLB(8) - vdtlb.io.invalidate := dpath.io.ptbr_wen - vdtlb.io.status := dpath.io.ctrl.status - ptw += vdtlb.io.ptw - - vdtlb.io.cpu_req <> vu.io.vec_tlb_req - vu.io.vec_tlb_resp := vdtlb.io.cpu_resp - vu.io.vec_tlb_resp.xcpt_pf := Bool(false) - - val pftlb = new rocketTLB(2) - pftlb.io.invalidate := dpath.io.ptbr_wen - pftlb.io.status := dpath.io.ctrl.status - pftlb.io.cpu_req <> vu.io.vec_pftlb_req - ptw += pftlb.io.ptw - - vu.io.vec_pftlb_resp := pftlb.io.cpu_resp - vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) - vu.io.vec_pftlb_resp.xcpt_st := Bool(false) - } - - // connect DTLB to ctrl+dpath - val dtlb = new rocketTLB(DTLB_ENTRIES) - dtlb.io.invalidate := dpath.io.ptbr_wen - dtlb.io.status := dpath.io.ctrl.status - ptw += dtlb.io.ptw - - dtlb.io.cpu_req.valid := ctrl.io.dtlb_val - dtlb.io.cpu_req.bits.kill := ctrl.io.dtlb_kill - dtlb.io.cpu_req.bits.cmd := ctrl.io.dmem.req.bits.cmd - dtlb.io.cpu_req.bits.asid := UFix(0) - dtlb.io.cpu_req.bits.vpn := dpath.io.dtlb.vpn - ctrl.io.xcpt_dtlb_ld := dtlb.io.cpu_resp.xcpt_ld - ctrl.io.xcpt_dtlb_st := dtlb.io.cpu_resp.xcpt_st - ctrl.io.dtlb_rdy := dtlb.io.cpu_req.ready - ctrl.io.dtlb_miss := dtlb.io.cpu_resp.miss - - arb.io.requestor(DCACHE_CPU).req.bits.ppn := dtlb.io.cpu_resp.ppn - ctrl.io.dpath <> dpath.io.ctrl dpath.io.host <> io.host ctrl.io.imem <> io.imem dpath.io.imem <> io.imem - ctrl.io.dmem <> arb.io.requestor(DCACHE_CPU) - dpath.io.dmem <> arb.io.requestor(DCACHE_CPU) + val dmemArb = new HellaCacheArbiter(if (HAVE_VEC) 3 else 2) + dmemArb.io.mem <> io.dmem + val dmem = dmemArb.io.requestor + dmem(1) <> ctrl.io.dmem + dmem(1) <> dpath.io.dmem - var fpu: rocketFPU = null - if (HAVE_FPU) - { - fpu = new rocketFPU(4,6) + val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw) + + val fpu: FPU = if (HAVE_FPU) { + val fpu = new FPU(4,6) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl - } + fpu + } else null + + if (HAVE_VEC) { + val vu = new vu() + + val vdtlb = new rocketTLB(8) + ptw += vdtlb.io.ptw + vdtlb.io.cpu_req <> vu.io.vec_tlb_req + vu.io.vec_tlb_resp := vdtlb.io.cpu_resp + vu.io.vec_tlb_resp.xcpt_pf := Bool(false) + + val pftlb = new rocketTLB(2) + pftlb.io.cpu_req <> vu.io.vec_pftlb_req + ptw += pftlb.io.ptw + vu.io.vec_pftlb_resp := pftlb.io.cpu_resp + vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) + vu.io.vec_pftlb_resp.xcpt_st := Bool(false) - if (HAVE_VEC) - { dpath.io.vec_ctrl <> ctrl.io.vec_dpath // hooking up vector I$ ptw += io.vimem.ptw - io.vimem.req.bits.status := dpath.io.ctrl.status io.vimem.req.bits.pc := vu.io.imem_req.bits io.vimem.req.valid := vu.io.imem_req.valid io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst - io.vimem.req.bits.invalidateTLB := dpath.io.ptbr_wen vu.io.imem_resp.valid := io.vimem.resp.valid vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc vu.io.imem_resp.bits.data := io.vimem.resp.bits.data @@ -155,21 +125,16 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component vu.io.xcpt.hold := ctrl.io.vec_iface.hold // hooking up vector memory interface - arb.io.requestor(DCACHE_VU).req.valid := vu.io.dmem_req.valid - arb.io.requestor(DCACHE_VU).req.bits.kill := vu.io.dmem_req.bits.kill - arb.io.requestor(DCACHE_VU).req.bits.cmd := vu.io.dmem_req.bits.cmd - arb.io.requestor(DCACHE_VU).req.bits.typ := vu.io.dmem_req.bits.typ - arb.io.requestor(DCACHE_VU).req.bits.idx := vu.io.dmem_req.bits.idx - arb.io.requestor(DCACHE_VU).req.bits.ppn := Reg(vu.io.dmem_req.bits.ppn) - arb.io.requestor(DCACHE_VU).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) - arb.io.requestor(DCACHE_VU).req.bits.tag := vu.io.dmem_req.bits.tag + dmem(2).req.valid := vu.io.dmem_req.valid + dmem(2).req.bits := vu.io.dmem_req.bits + dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) - vu.io.dmem_req.ready := arb.io.requestor(DCACHE_VU).req.ready - vu.io.dmem_resp.valid := Reg(arb.io.requestor(DCACHE_VU).resp.valid) - vu.io.dmem_resp.bits.nack := arb.io.requestor(DCACHE_VU).resp.bits.nack - vu.io.dmem_resp.bits.data := arb.io.requestor(DCACHE_VU).resp.bits.data_subword - vu.io.dmem_resp.bits.tag := Reg(arb.io.requestor(DCACHE_VU).resp.bits.tag) - vu.io.dmem_resp.bits.typ := Reg(arb.io.requestor(DCACHE_VU).resp.bits.typ) + vu.io.dmem_req.ready := dmem(2).req.ready + vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid) + vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack + vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword + vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag) + vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ) // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req @@ -178,22 +143,13 @@ class rocketProc(implicit conf: RocketConfiguration) extends Component // share sfma and dfma pipelines with rocket fpu.io.sfma <> vu.io.cp_sfma fpu.io.dfma <> vu.io.cp_dfma - } - else - { - arb.io.requestor(DCACHE_VU).req.valid := Bool(false) - if (HAVE_FPU) - { - fpu.io.sfma.valid := Bool(false) - fpu.io.dfma.valid := Bool(false) - } + } else if (fpu != null) { + fpu.io.sfma.valid := Bool(false) + fpu.io.dfma.valid := Bool(false) } - ptw += io.imem.ptw val thePTW = new PTW(ptw.length) - thePTW.io.requestor <> ptw - thePTW.io.ptbr := dpath.io.ptbr; - arb.io.requestor(DCACHE_PTW) <> thePTW.io.mem - - arb.io.mem <> io.dmem + ptw zip thePTW.io.requestor map { case (a, b) => a <> b } + thePTW.io.dpath <> dpath.io.ptw + dmem(0) <> thePTW.io.mem } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a36ce98f..46afcc98 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -569,8 +569,8 @@ class Control(implicit conf: RocketConfiguration) extends Component (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)), (mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)), - (mem_reg_mem_val && io.xcpt_dtlb_ld, UFix(10)), - (mem_reg_mem_val && io.xcpt_dtlb_st, UFix(11)))) + (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)), + (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next when (mem_xcpt) { wb_reg_cause := mem_cause } @@ -644,7 +644,7 @@ class Control(implicit conf: RocketConfiguration) extends Component // replay inst in ex stage val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + ex_reg_mem_val && !io.dmem.req.ready || ex_reg_div_val && !io.dpath.div_rdy || ex_reg_mul_val && !io.dpath.mul_rdy || mem_reg_replay_next @@ -652,7 +652,7 @@ class Control(implicit conf: RocketConfiguration) extends Component // replay inst in mem stage val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = mem_reg_valid && (io.dtlb_miss || io.dmem.resp.bits.nack) + val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid @@ -734,7 +734,7 @@ class Control(implicit conf: RocketConfiguration) extends Component id_ex_hazard || id_mem_hazard || id_wb_hazard || id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || id_fp_val && id_stall_fpu || - id_mem_val && !(io.dmem.req.ready && io.dtlb_rdy) || + id_mem_val && !io.dmem.req.ready || vec_stalld ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt @@ -772,10 +772,9 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.killx := ctrl_killx io.fpu.killm := killm_common - io.dtlb_val := ex_reg_mem_val - io.dtlb_kill := !mem_reg_valid io.dmem.req.valid := ex_reg_mem_val - io.dmem.req.bits.kill := killm_common || mem_xcpt || io.dtlb_miss + io.dmem.req.bits.kill := killm_common || mem_xcpt io.dmem.req.bits.cmd := ex_reg_mem_cmd io.dmem.req.bits.typ := ex_reg_mem_type + io.dmem.req.bits.phys := Bool(false) } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d1f5fe69..02c21851 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -13,9 +13,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache()(conf.dcache) val dtlb = new ioDTLB_CPU_req_bundle().asOutput() + val ptw = new IODatapathPTW().flip val imem = new IOCPUFrontend()(conf.icache) - val ptbr_wen = Bool(OUTPUT); - val ptbr = UFix(OUTPUT, PADDR_BITS); val fpu = new ioDpathFPU(); val vec_ctrl = new ioCtrlDpathVec().flip val vec_iface = new ioDpathVecInterface() @@ -81,9 +80,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix // hook up I$ - io.imem.req.bits.invalidateTLB := pcr.io.ptbr_wen io.imem.req.bits.currentpc := ex_reg_pc - io.imem.req.bits.status := pcr.io.status io.imem.req.bits.pc := Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), @@ -209,7 +206,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.idx := ex_effective_address + io.dmem.req.bits.addr := ex_effective_address io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) @@ -225,8 +222,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ctrl.irq_ipi := pcr.io.irq_ipi; io.ctrl.status := pcr.io.status; io.ctrl.pcr_replay := pcr.io.replay - io.ptbr := pcr.io.ptbr; - io.ptbr_wen := pcr.io.ptbr_wen; + + io.ptw.ptbr := pcr.io.ptbr + io.ptw.invalidate := pcr.io.ptbr_wen + io.ptw.status := pcr.io.status // branch resolution logic io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 69e36380..3ea883db 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -5,8 +5,9 @@ import Node._ import Constants._ import Instructions._ import Util._ +import FPConstants._ -object rocketFPConstants +object FPConstants { val FCMD_ADD = Bits("b000000") val FCMD_SUB = Bits("b000001") @@ -45,7 +46,6 @@ object rocketFPConstants val FCMD_WIDTH = 6 val FSR_WIDTH = 8 } -import rocketFPConstants._ class FPUCtrlSigs extends Bundle { @@ -64,7 +64,7 @@ class FPUCtrlSigs extends Bundle val wrfsr = Bool() } -class rocketFPUDecoder extends Component +class FPUDecoder extends Component { val io = new Bundle { val inst = Bits(INPUT, 32) @@ -378,7 +378,7 @@ class ioFMA(width: Int) extends Bundle { val exc = Bits(OUTPUT, 5) } -class rocketFPUSFMAPipe(val latency: Int) extends Component +class FPUSFMAPipe(val latency: Int) extends Component { val io = new ioFMA(33) @@ -415,7 +415,7 @@ class rocketFPUSFMAPipe(val latency: Int) extends Component io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class rocketFPUDFMAPipe(val latency: Int) extends Component +class FPUDFMAPipe(val latency: Int) extends Component { val io = new ioFMA(65) @@ -452,7 +452,7 @@ class rocketFPUDFMAPipe(val latency: Int) extends Component io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component +class FPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { val ctrl = new ioCtrlFPU().flip @@ -470,7 +470,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val killm = io.ctrl.killm || io.ctrl.nack_mem val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) - val fp_decoder = new rocketFPUDecoder + val fp_decoder = new FPUDecoder fp_decoder.io.inst := io.dpath.inst val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) @@ -530,7 +530,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB - val sfma = new rocketFPUSFMAPipe(sfma_latency) + val sfma = new FPUSFMAPipe(sfma_latency) sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) @@ -540,7 +540,7 @@ class rocketFPU(sfma_latency: Int, dfma_latency: Int) extends Component io.sfma.out := sfma.io.out io.sfma.exc := sfma.io.exc - val dfma = new rocketFPUDFMAPipe(dfma_latency) + val dfma = new FPUDFMAPipe(dfma_latency) dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 8a7492c2..df7506a0 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -29,9 +29,7 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, class FrontendReq extends Bundle { val pc = UFix(width = VADDR_BITS+1) - val status = Bits(width = 32) val invalidate = Bool() - val invalidateTLB = Bool() val mispredict = Bool() val taken = Bool() val currentpc = UFix(width = VADDR_BITS+1) @@ -99,14 +97,13 @@ class Frontend(implicit c: ICacheConfig) extends Component btb.io.clr := !io.cpu.req.bits.taken btb.io.correct_pc := io.cpu.req.bits.currentpc btb.io.correct_target := io.cpu.req.bits.pc - btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.req.bits.invalidateTLB + btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) - tlb.io.req.bits.status := io.cpu.req.bits.status tlb.io.req.bits.asid := UFix(0) - tlb.io.req.bits.invalidate := io.cpu.req.bits.invalidateTLB + tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) icache.io.mem <> io.mem diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9b03d97d..d027eca7 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -3,16 +3,18 @@ package rocket import Chisel._ import Constants._ import uncore._ +import Util._ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, - nmshr: Int, nsecondary: Int, nsdq: Int, + nmshr: Int, nrpq: Int, nsdq: Int, reqtagbits: Int = -1) { require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this def lines = sets*ways def dm = ways == 1 - def ppnbits = PPN_BITS + def ppnbits = PADDR_BITS - PGIDX_BITS + def vpnbits = VADDR_BITS - PGIDX_BITS def pgidxbits = PGIDX_BITS def offbits = OFFSET_BITS def paddrbits = ppnbits + pgidxbits @@ -161,7 +163,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) val req_bits = new MSHRReq().asInput - val req_sdq_id = UFix(INPUT, log2Up(NSDQ)) + val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq)) val idx_match = Bool(OUTPUT) val idx = Bits(OUTPUT, conf.idxbits) @@ -194,7 +196,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val idx_match = req.idx === io.req_bits.idx val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val rpq = (new Queue(NRPQ)) { new RPQEntry } + val rpq = (new Queue(conf.nrpq)) { new RPQEntry } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id @@ -312,24 +314,24 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) } - val sdq_val = Reg(resetVal = Bits(0, NSDQ)) - val sdq_alloc_id = PriorityEncoder(~sdq_val(NSDQ-1,0)) + val sdq_val = Reg(resetVal = Bits(0, conf.nsdq)) + val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) val sdq_rdy = !sdq_val.andR val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) val sdq_enq = io.req.valid && io.req.ready && req_write - val sdq = Mem(NSDQ) { io.req.bits.data.clone } + val sdq = Mem(conf.nsdq) { io.req.bits.data.clone } when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val idxMatch = Vec(NMSHR) { Bool() } - val tagList = Vec(NMSHR) { Bits() } - val wbTagList = Vec(NMSHR) { Bits() } - val memRespMux = Vec(NMSHR) { new DataArrayReq } - val meta_req_arb = (new Arbiter(NMSHR)) { new MetaArrayReq() } - val mem_req_arb = (new Arbiter(NMSHR)) { new TransactionInit } - val mem_finish_arb = (new Arbiter(NMSHR)) { new TransactionFinish } - val wb_req_arb = (new Arbiter(NMSHR)) { new WritebackReq } - val replay_arb = (new Arbiter(NMSHR)) { new Replay() } - val alloc_arb = (new Arbiter(NMSHR)) { Bool() } + val idxMatch = Vec(conf.nmshr) { Bool() } + val tagList = Vec(conf.nmshr) { Bits() } + val wbTagList = Vec(conf.nmshr) { Bits() } + val memRespMux = Vec(conf.nmshr) { new DataArrayReq } + val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() } + val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } + val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } + val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } + val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } + val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag @@ -341,7 +343,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { var writeback_probe_rdy = Bool(true) var refill_probe_rdy = Bool(true) - for (i <- 0 to NMSHR-1) { + for (i <- 0 to conf.nmshr-1) { val mshr = new MSHR(i) idxMatch(i) := mshr.io.idx_match @@ -400,8 +402,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) val sdq_free = replay.valid && replay.ready && replay_write - sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, NSDQ)) | - PriorityEncoderOH(~sdq_val(NSDQ-1,0)) & Fill(NSDQ, sdq_enq && io.req.bits.tag_miss) + sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) | + PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss) val sdq_rdata = Reg() { io.req.bits.data.clone } sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) io.data_req.bits.data := sdq_rdata @@ -711,8 +713,8 @@ class AMOALU extends Component { class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { val kill = Bool() val typ = Bits(width = 3) - val idx = Bits(width = conf.pgidxbits) - val ppn = Bits(width = conf.ppnbits) + val phys = Bool() + val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits) val data = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) @@ -739,6 +741,7 @@ class AlignmentExceptions extends Bundle { class HellaCacheExceptions extends Bundle { val ma = new AlignmentExceptions + val pf = new AlignmentExceptions } // interface between D$ and processor/DTLB @@ -746,6 +749,7 @@ class ioHellaCache(implicit conf: DCacheConfig) extends Bundle { val req = (new FIFOIO){ new HellaCacheReq } val resp = (new PipeIO){ new HellaCacheResp }.flip val xcpt = (new HellaCacheExceptions).asInput + val ptw = new IOTLBPTW().flip } class HellaCache(implicit conf: DCacheConfig) extends Component { @@ -768,6 +772,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val early_nack = Reg { Bool() } val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack + val r_cpu_req_phys = Reg() { Bool() } + val r_cpu_req_vpn = Reg() { UFix() } val r_cpu_req_idx = Reg() { Bits() } val r_cpu_req_cmd = Reg() { Bits() } val r_cpu_req_type = Reg() { Bits() } @@ -799,6 +805,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch val nack_hit = Bool() + val dtlb = new TLB(8) + dtlb.io.ptw <> io.cpu.ptw + dtlb.io.req.valid := r_cpu_req_val_ && r_req_readwrite && !r_cpu_req_phys + dtlb.io.req.bits.passthrough := r_cpu_req_phys + dtlb.io.req.bits.asid := UFix(0) + dtlb.io.req.bits.vpn := r_cpu_req_vpn + dtlb.io.req.bits.instruction := Bool(false) + val wb = new WritebackUnit val prober = new ProbeUnit val mshr = new MSHRFile @@ -812,7 +826,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { flusher.io.mshr_req.ready := mshr.io.req.ready when (io.cpu.req.valid) { - r_cpu_req_idx := io.cpu.req.bits.idx + r_cpu_req_phys := io.cpu.req.bits.phys + r_cpu_req_vpn := io.cpu.req.bits.addr >> taglsb + r_cpu_req_idx := io.cpu.req.bits.addr(indexmsb,0) r_cpu_req_cmd := io.cpu.req.bits.cmd r_cpu_req_type := io.cpu.req.bits.typ r_cpu_req_tag := io.cpu.req.bits.tag @@ -839,8 +855,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); - io.cpu.xcpt.ma.ld := r_cpu_req_val_ && !early_nack && r_req_read && misaligned - io.cpu.xcpt.ma.st := r_cpu_req_val_ && !early_nack && r_req_write && misaligned + io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned + io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned + io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st // tags val meta = new MetaDataArrayArray(lines) @@ -855,11 +873,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // cpu tag check meta_arb.io.in(3).valid := io.cpu.req.valid - meta_arb.io.in(3).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) + meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := Fix(-1) val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), io.cpu.req.bits.ppn) + val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn) val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) val tag_match = Cat(Bits(0),tag_match_arr:_*).orR @@ -892,8 +910,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) // load hits - data_arb.io.in(4).bits.offset := io.cpu.req.bits.idx(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.idx := io.cpu.req.bits.idx(indexmsb,indexlsb) + data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb) + data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) data_arb.io.in(4).bits.rw := Bool(false) data_arb.io.in(4).valid := io.cpu.req.valid && req_read data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check @@ -1015,13 +1033,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val pending_fence = Reg(resetVal = Bool(false)) pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || - p_store_idx_match && meta.io.state_req.valid + p_store_idx_match && meta.io.state_req.valid || + !r_cpu_req_phys && dtlb.io.resp.miss val nack_miss = !mshr.io.req.ready val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || !flushed && r_req_flush val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush - io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence + io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys) io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack io.cpu.resp.bits.replay := mshr.io.cpu_resp_val diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index bf55f259..ac84e608 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,16 +5,31 @@ import Node._ import Constants._ import scala.math._ -class ioPTW(n: Int)(implicit conf: RocketConfiguration) extends Bundle -{ - val requestor = Vec(n) { new IOTLBPTW }.flip - val mem = new ioHellaCache()(conf.dcache) - val ptbr = UFix(INPUT, PADDR_BITS) +class IOTLBPTW extends Bundle { + val req = new FIFOIO()(UFix(width = VPN_BITS)) + val resp = new PipeIO()(new Bundle { + val error = Bool() + val ppn = UFix(width = PPN_BITS) + val perm = Bits(width = PERM_BITS) + }).flip + + val status = Bits(INPUT, width = 32) + val invalidate = Bool(INPUT) +} + +class IODatapathPTW extends Bundle { + val ptbr = UFix(INPUT, PADDR_BITS) + val invalidate = Bool(INPUT) + val status = Bits(INPUT, 32) } class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component { - val io = new ioPTW(n) + val io = new Bundle { + val requestor = Vec(n) { new IOTLBPTW }.flip + val mem = new ioHellaCache()(conf.dcache) + val dpath = new IODatapathPTW + } val levels = 3 val bitsPerLevel = VPN_BITS/levels @@ -27,7 +42,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val r_req_vpn = Reg() { Bits() } val r_req_dest = Reg() { Bits() } - val req_addr = Reg() { Bits() } + val req_addr = Reg() { UFix() } val r_resp_ppn = Reg() { Bits() }; val r_resp_perm = Reg() { Bits() }; @@ -41,21 +56,21 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component when (arb.io.out.fire()) { r_req_vpn := arb.io.out.bits r_req_dest := arb.io.chosen - req_addr := Cat(io.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), Bits(0,3)) + req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3)) } val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) when (dmem_resp_val) { - req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, Bits(0,3)) + req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix r_resp_perm := io.mem.resp.bits.data_subword(9,4); r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); } io.mem.req.valid := state === s_req + io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.idx := req_addr(PGIDX_BITS-1,0) - io.mem.req.bits.ppn := Reg(req_addr(PADDR_BITS-1,PGIDX_BITS)) + io.mem.req.bits.addr := req_addr io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error @@ -73,6 +88,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component io.requestor(i).resp.bits.error := resp_err io.requestor(i).resp.bits.perm := r_resp_perm io.requestor(i).resp.bits.ppn := resp_ppn.toUFix + io.requestor(i).invalidate := io.dpath.invalidate + io.requestor(i).status := io.dpath.status } // control state machine diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 81090090..2138561b 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,7 +13,8 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) { - implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(DMEM_PORTS)) + val memPorts = if (HAVE_VEC) 3 else 2 + implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts)) implicit val conf = confIn.copy(dcache = dcConf) val io = new Bundle { @@ -21,13 +22,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val host = new ioHTIF(conf.ntiles) } - val cpu = new rocketProc + val core = new Core val icache = new Frontend()(confIn.icache) val dcache = new HellaCache - val arbiter = new MemArbiter(DMEM_PORTS) - arbiter.io.requestor(DMEM_DCACHE) <> dcache.io.mem - arbiter.io.requestor(DMEM_ICACHE) <> icache.io.mem + val arbiter = new MemArbiter(memPorts) + arbiter.io.requestor(0) <> dcache.io.mem + arbiter.io.requestor(1) <> icache.io.mem io.tilelink.xact_init <> arbiter.io.mem.xact_init io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data @@ -38,14 +39,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.probe_rep <> dcache.io.mem.probe_rep io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data - if (HAVE_VEC) - { + if (HAVE_VEC) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) - arbiter.io.requestor(DMEM_VICACHE) <> vicache.io.mem - cpu.io.vimem <> vicache.io.cpu + arbiter.io.requestor(2) <> vicache.io.mem + core.io.vimem <> vicache.io.cpu } - cpu.io.host <> io.host - cpu.io.imem <> icache.io.cpu - cpu.io.dmem <> dcache.io.cpu + core.io.host <> io.host + core.io.imem <> icache.io.cpu + core.io.dmem <> dcache.io.cpu } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 6ce671a4..0e2f1d33 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -66,21 +66,11 @@ class PseudoLRU(n: Int) } } -class IOTLBPTW extends Bundle { - val req = new FIFOIO()(UFix(width = VPN_BITS)) - val resp = new PipeIO()(new Bundle { - val error = Bool() - val ppn = UFix(width = PPN_BITS) - val perm = Bits(width = PERM_BITS) - }).flip -} - class TLBReq extends Bundle { val asid = UFix(width = ASID_BITS) val vpn = UFix(width = VPN_BITS+1) - val status = Bits(width = 32) - val invalidate = Bool() + val passthrough = Bool() val instruction = Bool() } @@ -116,7 +106,7 @@ class TLB(entries: Int) extends Component when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix - tag_cam.io.clear := io.req.bits.invalidate + tag_cam.io.clear := io.ptw.invalidate tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) tag_cam.io.tag := lookup_tag tag_cam.io.write := state === s_wait && io.ptw.resp.valid @@ -148,8 +138,8 @@ class TLB(entries: Int) extends Component val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val status_s = io.req.bits.status(SR_S) // user/supervisor mode - val status_vm = io.req.bits.status(SR_VM) // virtual memory enable + val status_s = io.ptw.status(SR_S) // user/supervisor mode + val status_vm = io.ptw.status(SR_VM) // virtual memory enable val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) val tlb_hit = status_vm && tag_hit val tlb_miss = status_vm && !tag_hit && !bad_va @@ -163,7 +153,7 @@ class TLB(entries: Int) extends Component io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) io.resp.miss := tlb_miss - io.resp.ppn := Mux(status_vm, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) + io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) io.resp.hit_idx := tag_cam.io.hits io.ptw.req.valid := state === s_request @@ -175,15 +165,15 @@ class TLB(entries: Int) extends Component r_refill_waddr := repl_waddr } when (state === s_request) { - when (io.req.bits.invalidate) { + when (io.ptw.invalidate) { state := s_ready } when (io.ptw.req.ready) { state := s_wait - when (io.req.bits.invalidate) { state := s_wait_invalidate } + when (io.ptw.invalidate) { state := s_wait_invalidate } } } - when (state === s_wait && io.req.bits.invalidate) { + when (state === s_wait && io.ptw.invalidate) { state := s_wait_invalidate } when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { @@ -204,10 +194,6 @@ class ioDTLB_CPU_resp extends TLBResp(1) class ioDTLB extends Bundle { - // status bits (from PCR), to check current permission and whether VM is enabled - val status = Bits(INPUT, 32) - // invalidate all TLB entries - val invalidate = Bool(INPUT) val cpu_req = new ioDTLB_CPU_req().flip val cpu_resp = new ioDTLB_CPU_resp() val ptw = new IOTLBPTW @@ -225,8 +211,7 @@ class rocketTLB(entries: Int) extends Component val tlb = new TLB(entries) tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill tlb.io.req.bits.instruction := Bool(false) - tlb.io.req.bits.invalidate := io.invalidate - tlb.io.req.bits.status := io.status + tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.vpn := r_cpu_req_vpn tlb.io.req.bits.asid := r_cpu_req_asid From 9a02298f6f60f098aab16d78e54fa27e2fbf1879 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 6 Nov 2012 23:52:58 -0800 Subject: [PATCH 0503/1087] andrew's fix for tlb lockup --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d027eca7..5094f186 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -807,7 +807,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val dtlb = new TLB(8) dtlb.io.ptw <> io.cpu.ptw - dtlb.io.req.valid := r_cpu_req_val_ && r_req_readwrite && !r_cpu_req_phys + dtlb.io.req.valid := r_cpu_req_val && r_req_readwrite && !r_cpu_req_phys dtlb.io.req.bits.passthrough := r_cpu_req_phys dtlb.io.req.bits.asid := UFix(0) dtlb.io.req.bits.vpn := r_cpu_req_vpn From 8764fe786adc7ab8ee5e42b49527b0c0bafe49fb Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 6 Nov 2012 23:53:52 -0800 Subject: [PATCH 0504/1087] refactored vector tlb --- rocket/src/main/scala/core.scala | 13 +++---- rocket/src/main/scala/dpath.scala | 2 -- rocket/src/main/scala/tlb.scala | 59 ------------------------------- 3 files changed, 4 insertions(+), 70 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index d27dc1e3..9ef40699 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -44,18 +44,13 @@ class Core(implicit conf: RocketConfiguration) extends Component if (HAVE_VEC) { val vu = new vu() - val vdtlb = new rocketTLB(8) + val vdtlb = new TLB(8) ptw += vdtlb.io.ptw - vdtlb.io.cpu_req <> vu.io.vec_tlb_req - vu.io.vec_tlb_resp := vdtlb.io.cpu_resp - vu.io.vec_tlb_resp.xcpt_pf := Bool(false) + vdtlb.io <> vu.io.vec_tlb - val pftlb = new rocketTLB(2) - pftlb.io.cpu_req <> vu.io.vec_pftlb_req + val pftlb = new TLB(2) + pftlb.io <> vu.io.vec_pftlb ptw += pftlb.io.ptw - vu.io.vec_pftlb_resp := pftlb.io.cpu_resp - vu.io.vec_pftlb_resp.xcpt_ld := Bool(false) - vu.io.vec_pftlb_resp.xcpt_st := Bool(false) dpath.io.vec_ctrl <> ctrl.io.vec_dpath diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 02c21851..4e6bcb7c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -12,7 +12,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val host = new ioHTIF(conf.ntiles) val ctrl = new ioCtrlDpath().flip val dmem = new ioHellaCache()(conf.dcache) - val dtlb = new ioDTLB_CPU_req_bundle().asOutput() val ptw = new IODatapathPTW().flip val imem = new IOCPUFrontend()(conf.icache) val fpu = new ioDpathFPU(); @@ -210,7 +209,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) - io.dtlb.vpn := ex_effective_address >> UFix(PGIDX_BITS) // processor control regfile read pcr.io.r.en := io.ctrl.pcr != PCR_N diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 0e2f1d33..c2a04cfa 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -82,7 +82,6 @@ class TLBResp(entries: Int) extends Bundle val ppn = UFix(OUTPUT, PPN_BITS) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) - val xcpt_pf = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) override def clone = new TLBResp(entries).asInstanceOf[this.type] @@ -180,61 +179,3 @@ class TLB(entries: Int) extends Component state := s_ready } } - -// ioDTLB_CPU also located in hwacha/src/vuVXU-Interface.scala -// should keep them in sync - -class ioDTLB_CPU_req_bundle extends TLBReq -{ - val kill = Bool() - val cmd = Bits(width=4) // load/store/amo -} -class ioDTLB_CPU_req extends FIFOIO()( { new ioDTLB_CPU_req_bundle() } ) -class ioDTLB_CPU_resp extends TLBResp(1) - -class ioDTLB extends Bundle -{ - val cpu_req = new ioDTLB_CPU_req().flip - val cpu_resp = new ioDTLB_CPU_resp() - val ptw = new IOTLBPTW -} - -class rocketTLB(entries: Int) extends Component -{ - val io = new ioDTLB(); - - val r_cpu_req_val = Reg(resetVal = Bool(false)); - val r_cpu_req_vpn = Reg() { UFix() } - val r_cpu_req_cmd = Reg() { Bits() } - val r_cpu_req_asid = Reg() { UFix() } - - val tlb = new TLB(entries) - tlb.io.req.valid := r_cpu_req_val && !io.cpu_req.bits.kill - tlb.io.req.bits.instruction := Bool(false) - tlb.io.req.bits.passthrough := Bool(false) - tlb.io.req.bits.vpn := r_cpu_req_vpn - tlb.io.req.bits.asid := r_cpu_req_asid - - def cmdIsRead(cmd: Bits) = cmd === M_XRD || cmd(3) - def cmdIsWrite(cmd: Bits) = cmd === M_XWR || cmd(3) - def cmdIsPrefetch(cmd: Bits) = cmd === M_PFR || cmd === M_PFW - def cmdNeedsTLB(cmd: Bits) = cmdIsRead(cmd) || cmdIsWrite(cmd) || cmdIsPrefetch(cmd) - - when (io.cpu_req.fire() && cmdNeedsTLB(io.cpu_req.bits.cmd)) { - r_cpu_req_vpn := io.cpu_req.bits.vpn; - r_cpu_req_cmd := io.cpu_req.bits.cmd; - r_cpu_req_asid := io.cpu_req.bits.asid; - r_cpu_req_val := Bool(true); - } - .otherwise { - r_cpu_req_val := Bool(false); - } - - io.cpu_req.ready := tlb.io.req.ready && !io.cpu_resp.miss - io.cpu_resp.ppn := tlb.io.resp.ppn - io.cpu_resp.miss := r_cpu_req_val && tlb.io.resp.miss - io.cpu_resp.xcpt_ld := r_cpu_req_val && tlb.io.resp.xcpt_ld && cmdIsRead(r_cpu_req_cmd) - io.cpu_resp.xcpt_st := r_cpu_req_val && tlb.io.resp.xcpt_st && cmdIsWrite(r_cpu_req_cmd) - io.cpu_resp.xcpt_pf := r_cpu_req_val && tlb.io.resp.xcpt_ld && cmdIsPrefetch(r_cpu_req_cmd) - io.ptw <> tlb.io.ptw -} From be1980dd2d3ffd722dcaf4ad56fd71f5f5dda831 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 7 Nov 2012 01:15:33 -0800 Subject: [PATCH 0505/1087] refactored vector queue interface --- rocket/src/main/scala/core.scala | 57 +++++++++++---------------- rocket/src/main/scala/ctrl.scala | 16 ++++---- rocket/src/main/scala/ctrl_vec.scala | 57 ++++++++++++--------------- rocket/src/main/scala/dpath_vec.scala | 30 +++++++++----- 4 files changed, 76 insertions(+), 84 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 9ef40699..8cb6bc43 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -46,10 +46,10 @@ class Core(implicit conf: RocketConfiguration) extends Component val vdtlb = new TLB(8) ptw += vdtlb.io.ptw - vdtlb.io <> vu.io.vec_tlb + vdtlb.io <> vu.io.vtlb val pftlb = new TLB(2) - pftlb.io <> vu.io.vec_pftlb + pftlb.io <> vu.io.vpftlb ptw += pftlb.io.ptw dpath.io.vec_ctrl <> ctrl.io.vec_dpath @@ -68,44 +68,33 @@ class Core(implicit conf: RocketConfiguration) extends Component io.vimem.req.bits.mispredict := Bool(false) io.vimem.req.bits.taken := Bool(false) - // hooking up vector command queues - vu.io.vec_cmdq.valid := ctrl.io.vec_iface.vcmdq_valid - vu.io.vec_cmdq.bits := dpath.io.vec_iface.vcmdq_bits - vu.io.vec_ximm1q.valid := ctrl.io.vec_iface.vximm1q_valid - vu.io.vec_ximm1q.bits := dpath.io.vec_iface.vximm1q_bits - vu.io.vec_ximm2q.valid := ctrl.io.vec_iface.vximm2q_valid - vu.io.vec_ximm2q.bits := dpath.io.vec_iface.vximm2q_bits - vu.io.vec_cntq.valid := ctrl.io.vec_iface.vcntq_valid - vu.io.vec_cntq.bits := Cat(dpath.io.vec_iface.vcntq_last, dpath.io.vec_iface.vcntq_bits) + ctrl.io.vec_iface.vcmdq <> vu.io.vcmdq + ctrl.io.vec_iface.vximm1q <> vu.io.vximm1q + ctrl.io.vec_iface.vximm2q <> vu.io.vximm2q + ctrl.io.vec_iface.vcntq <> vu.io.vcntq - // prefetch queues - vu.io.vec_pfcmdq.valid := ctrl.io.vec_iface.vpfcmdq_valid - vu.io.vec_pfcmdq.bits := dpath.io.vec_iface.vcmdq_bits - vu.io.vec_pfximm1q.valid := ctrl.io.vec_iface.vpfximm1q_valid - vu.io.vec_pfximm1q.bits := dpath.io.vec_iface.vximm1q_bits - vu.io.vec_pfximm2q.valid := ctrl.io.vec_iface.vpfximm2q_valid - vu.io.vec_pfximm2q.bits := dpath.io.vec_iface.vximm2q_bits - vu.io.vec_pfcntq.valid := ctrl.io.vec_iface.vpfcntq_valid - vu.io.vec_pfcntq.bits := dpath.io.vec_iface.vcntq_bits + dpath.io.vec_iface.vcmdq <> vu.io.vcmdq + dpath.io.vec_iface.vximm1q <> vu.io.vximm1q + dpath.io.vec_iface.vximm2q <> vu.io.vximm2q + dpath.io.vec_iface.vcntq <> vu.io.vcntq - // don't have to use pf ready signals - // if cmdq is not a load or store - ctrl.io.vec_iface.vcmdq_ready := vu.io.vec_cmdq.ready - ctrl.io.vec_iface.vximm1q_ready := vu.io.vec_ximm1q.ready - ctrl.io.vec_iface.vximm2q_ready := vu.io.vec_ximm2q.ready - ctrl.io.vec_iface.vcntq_ready := vu.io.vec_cntq.ready - ctrl.io.vec_iface.vpfcmdq_ready := vu.io.vec_pfcmdq.ready - ctrl.io.vec_iface.vpfximm1q_ready := vu.io.vec_pfximm1q.ready - ctrl.io.vec_iface.vpfximm2q_ready := vu.io.vec_pfximm2q.ready - ctrl.io.vec_iface.vpfcntq_ready := vu.io.vec_pfcntq.ready + ctrl.io.vec_iface.vpfcmdq <> vu.io.vpfcmdq + ctrl.io.vec_iface.vpfximm1q <> vu.io.vpfximm1q + ctrl.io.vec_iface.vpfximm2q <> vu.io.vpfximm2q + ctrl.io.vec_iface.vpfcntq <> vu.io.vpfcntq + + dpath.io.vec_iface.vpfcmdq <> vu.io.vpfcmdq + dpath.io.vec_iface.vpfximm1q <> vu.io.vpfximm1q + dpath.io.vec_iface.vpfximm2q <> vu.io.vpfximm2q + dpath.io.vec_iface.vpfcntq <> vu.io.vpfcntq // user level vector command queue ready signals - ctrl.io.vec_iface.vcmdq_user_ready := vu.io.vec_cmdq_user_ready - ctrl.io.vec_iface.vximm1q_user_ready := vu.io.vec_ximm1q_user_ready - ctrl.io.vec_iface.vximm2q_user_ready := vu.io.vec_ximm2q_user_ready + ctrl.io.vec_iface.vcmdq_user_ready := vu.io.vcmdq_user_ready + ctrl.io.vec_iface.vximm1q_user_ready := vu.io.vximm1q_user_ready + ctrl.io.vec_iface.vximm2q_user_ready := vu.io.vximm2q_user_ready // fences - ctrl.io.vec_iface.vfence_ready := vu.io.vec_fence_ready + ctrl.io.vec_iface.vfence_ready := vu.io.vfence_ready // irqs ctrl.io.vec_iface.irq := vu.io.irq diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 46afcc98..daa8e004 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -437,14 +437,14 @@ class Control(implicit conf: RocketConfiguration) extends Component vec_dec.io.inst := io.dpath.inst val s = io.dpath.status(SR_S) - val mask_cmdq_ready = !vec_dec.io.sigs.enq_cmdq || s && io.vec_iface.vcmdq_ready || !s && io.vec_iface.vcmdq_user_ready - val mask_ximm1q_ready = !vec_dec.io.sigs.enq_ximm1q || s && io.vec_iface.vximm1q_ready || !s && io.vec_iface.vximm1q_user_ready - val mask_ximm2q_ready = !vec_dec.io.sigs.enq_ximm2q || s && io.vec_iface.vximm2q_ready || !s && io.vec_iface.vximm2q_user_ready - val mask_cntq_ready = !vec_dec.io.sigs.enq_cntq || io.vec_iface.vcntq_ready - val mask_pfcmdq_ready = !vec_dec.io.sigs.enq_pfcmdq || io.vec_iface.vpfcmdq_ready - val mask_pfximm1q_ready = !vec_dec.io.sigs.enq_pfximm1q || io.vec_iface.vpfximm1q_ready - val mask_pfximm2q_ready = !vec_dec.io.sigs.enq_pfximm2q || io.vec_iface.vpfximm2q_ready - val mask_pfcntq_ready = !vec_dec.io.sigs.enq_pfcntq || io.vec_iface.vpfcntq_ready + val mask_cmdq_ready = !vec_dec.io.sigs.enq_cmdq || s && io.vec_iface.vcmdq.ready || !s && io.vec_iface.vcmdq_user_ready + val mask_ximm1q_ready = !vec_dec.io.sigs.enq_ximm1q || s && io.vec_iface.vximm1q.ready || !s && io.vec_iface.vximm1q_user_ready + val mask_ximm2q_ready = !vec_dec.io.sigs.enq_ximm2q || s && io.vec_iface.vximm2q.ready || !s && io.vec_iface.vximm2q_user_ready + val mask_cntq_ready = !vec_dec.io.sigs.enq_cntq || io.vec_iface.vcntq.ready + val mask_pfcmdq_ready = !vec_dec.io.sigs.enq_pfcmdq || io.vec_iface.vpfcmdq.ready + val mask_pfximm1q_ready = !vec_dec.io.sigs.enq_pfximm1q || io.vec_iface.vpfximm1q.ready + val mask_pfximm2q_ready = !vec_dec.io.sigs.enq_pfximm2q || io.vec_iface.vpfximm2q.ready + val mask_pfcntq_ready = !vec_dec.io.sigs.enq_pfcntq || io.vec_iface.vpfcntq.ready vec_stalld = id_vec_val && ( diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 4fe89d5d..fd45b56b 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import Instructions._ +import hwacha.Constants._ class ioCtrlDpathVec extends Bundle { @@ -19,23 +20,15 @@ class ioCtrlDpathVec extends Bundle class ioCtrlVecInterface extends Bundle { - val vcmdq_valid = Bool(OUTPUT) - val vcmdq_ready = Bool(INPUT) - val vximm1q_valid = Bool(OUTPUT) - val vximm1q_ready = Bool(INPUT) - val vximm2q_valid = Bool(OUTPUT) - val vximm2q_ready = Bool(INPUT) - val vcntq_valid = Bool(OUTPUT) - val vcntq_ready = Bool(INPUT) + val vcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) + val vximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) + val vximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) + val vcntq = new FIFOIO()(Bits(width = SZ_VLEN+1)) - val vpfcmdq_valid = Bool(OUTPUT) - val vpfcmdq_ready = Bool(INPUT) - val vpfximm1q_valid = Bool(OUTPUT) - val vpfximm1q_ready = Bool(INPUT) - val vpfximm2q_valid = Bool(OUTPUT) - val vpfximm2q_ready = Bool(INPUT) - val vpfcntq_valid = Bool(OUTPUT) - val vpfcntq_ready = Bool(INPUT) + val vpfcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) + val vpfximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) + val vpfximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) + val vpfcntq = new FIFOIO()(Bits(width = SZ_VLEN)) val vcmdq_user_ready = Bool(INPUT) val vximm1q_user_ready = Bool(INPUT) @@ -207,14 +200,14 @@ class rocketCtrlVec extends Component val enq_pfximm2q_mask_pfq = dec.io.sigs.enq_pfximm2q && (!dec.io.sigs.pfaq || io.dpath.pfq) val enq_pfcntq_mask_pfq = dec.io.sigs.enq_pfcntq && (!dec.io.sigs.pfaq || io.dpath.pfq) - val mask_cmdq_ready = !dec.io.sigs.enq_cmdq || io.s && io.iface.vcmdq_ready || !io.s && io.iface.vcmdq_user_ready - val mask_ximm1q_ready = !dec.io.sigs.enq_ximm1q || io.s && io.iface.vximm1q_ready || !io.s && io.iface.vximm1q_user_ready - val mask_ximm2q_ready = !dec.io.sigs.enq_ximm2q || io.s && io.iface.vximm2q_ready || !io.s && io.iface.vximm2q_user_ready - val mask_cntq_ready = !dec.io.sigs.enq_cntq || io.iface.vcntq_ready - val mask_pfcmdq_ready = !enq_pfcmdq_mask_pfq || io.iface.vpfcmdq_ready - val mask_pfximm1q_ready = !enq_pfximm1q_mask_pfq || io.iface.vpfximm1q_ready - val mask_pfximm2q_ready = !enq_pfximm2q_mask_pfq || io.iface.vpfximm2q_ready - val mask_pfcntq_ready = !enq_pfcntq_mask_pfq || io.iface.vpfcntq_ready + val mask_cmdq_ready = !dec.io.sigs.enq_cmdq || io.s && io.iface.vcmdq.ready || !io.s && io.iface.vcmdq_user_ready + val mask_ximm1q_ready = !dec.io.sigs.enq_ximm1q || io.s && io.iface.vximm1q.ready || !io.s && io.iface.vximm1q_user_ready + val mask_ximm2q_ready = !dec.io.sigs.enq_ximm2q || io.s && io.iface.vximm2q.ready || !io.s && io.iface.vximm2q_user_ready + val mask_cntq_ready = !dec.io.sigs.enq_cntq || io.iface.vcntq.ready + val mask_pfcmdq_ready = !enq_pfcmdq_mask_pfq || io.iface.vpfcmdq.ready + val mask_pfximm1q_ready = !enq_pfximm1q_mask_pfq || io.iface.vpfximm1q.ready + val mask_pfximm2q_ready = !enq_pfximm2q_mask_pfq || io.iface.vpfximm2q.ready + val mask_pfcntq_ready = !enq_pfcntq_mask_pfq || io.iface.vpfcntq.ready io.dpath.wen := dec.io.sigs.wen io.dpath.fn := dec.io.sigs.fn @@ -222,42 +215,42 @@ class rocketCtrlVec extends Component io.dpath.sel_vimm := dec.io.sigs.sel_vimm io.dpath.sel_vimm2 := dec.io.sigs.sel_vimm2 - io.iface.vcmdq_valid := + io.iface.vcmdq.valid := valid_common && dec.io.sigs.enq_cmdq && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - io.iface.vximm1q_valid := + io.iface.vximm1q.valid := valid_common && mask_cmdq_ready && dec.io.sigs.enq_ximm1q && mask_ximm2q_ready && mask_cntq_ready && mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - io.iface.vximm2q_valid := + io.iface.vximm2q.valid := valid_common && mask_cmdq_ready && mask_ximm1q_ready && dec.io.sigs.enq_ximm2q && mask_cntq_ready && mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - io.iface.vcntq_valid := + io.iface.vcntq.valid := valid_common && mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && dec.io.sigs.enq_cntq && mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - io.iface.vpfcmdq_valid := + io.iface.vpfcmdq.valid := valid_common && mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && enq_pfcmdq_mask_pfq && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - io.iface.vpfximm1q_valid := + io.iface.vpfximm1q.valid := valid_common && mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && mask_pfcmdq_ready && enq_pfximm1q_mask_pfq && mask_pfximm2q_ready && mask_pfcntq_ready - io.iface.vpfximm2q_valid := + io.iface.vpfximm2q.valid := valid_common && mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && mask_pfcmdq_ready && mask_pfximm1q_ready && enq_pfximm2q_mask_pfq && mask_pfcntq_ready - io.iface.vpfcntq_valid := + io.iface.vpfcntq.valid := valid_common && mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && enq_pfcntq_mask_pfq diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 8a4faabb..6bf6a8b1 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -8,11 +8,16 @@ import hwacha.Constants._ class ioDpathVecInterface extends Bundle { - val vcmdq_bits = Bits(OUTPUT, SZ_VCMD) - val vximm1q_bits = Bits(OUTPUT, SZ_VIMM) - val vximm2q_bits = Bits(OUTPUT, SZ_VSTRIDE) - val vcntq_bits = Bits(OUTPUT, SZ_VLEN) - val vcntq_last = Bool(OUTPUT) + val vcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) + val vximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) + val vximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) + val vcntq = new FIFOIO()(Bits(width = SZ_VLEN+1)) + + val vpfcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) + val vpfximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) + val vpfximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) + val vpfcntq = new FIFOIO()(Bits(width = SZ_VLEN)) + val evac_addr = Bits(OUTPUT, 64) val irq_aux = Bits(INPUT, 64) } @@ -147,7 +152,7 @@ class rocketDpathVec extends Component val appvlm1 = appvl - UFix(1) - io.iface.vcmdq_bits := + io.iface.vcmdq.bits := Mux(io.ctrl.sel_vcmd === VCMD_I, Cat(Bits(0,2), Bits(0,4), io.inst(9,8), Bits(0,6), Bits(0,6)), Mux(io.ctrl.sel_vcmd === VCMD_F, Cat(Bits(0,2), Bits(1,3), io.inst(9,7), Bits(0,6), Bits(0,6)), Mux(io.ctrl.sel_vcmd === VCMD_TX, Cat(Bits(1,2), io.inst(13,8), Bits(0,1), io.waddr, Bits(0,1), io.raddr1), @@ -157,16 +162,21 @@ class rocketDpathVec extends Component Mux(io.ctrl.sel_vcmd === VCMD_A, io.wdata(SZ_VCMD-1, 0), Bits(0,20)))))))) - io.iface.vximm1q_bits := + io.iface.vximm1q.bits := Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, nfregs(5,0), nxregs(5,0), appvlm1(10,0)), io.wdata) // VIMM_ALU - io.iface.vximm2q_bits := + io.iface.vximm2q.bits := Mux(io.ctrl.sel_vimm2 === VIMM2_RS2, io.rs2, io.wdata) // VIMM2_ALU - io.iface.vcntq_bits := io.wdata(SZ_VLEN-1, 0) - io.iface.vcntq_last := io.rs2(1) + val last = io.rs2(1) + io.iface.vcntq.bits := Cat(last, io.iface.vpfcntq.bits) + + io.iface.vpfcmdq.bits := io.iface.vcmdq.bits + io.iface.vpfximm1q.bits := io.iface.vximm1q.bits + io.iface.vpfximm2q.bits := io.iface.vximm2q.bits + io.iface.vpfcntq.bits := io.wdata(SZ_VLEN-1, 0) io.iface.evac_addr := io.wdata From 6d10115b195334d46294a4dc55603be345813b91 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 15 Nov 2012 16:45:51 -0800 Subject: [PATCH 0506/1087] fix D$ tag width --- rocket/src/main/scala/nbdcache.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 5094f186..5c9bed76 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -18,12 +18,15 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, def pgidxbits = PGIDX_BITS def offbits = OFFSET_BITS def paddrbits = ppnbits + pgidxbits - def lineaddrbits = ppnbits - offbits + def lineaddrbits = paddrbits - offbits def idxbits = log2Up(sets) def waybits = log2Up(ways) + def untagbits = offbits + idxbits def tagbits = lineaddrbits - idxbits + def ramoffbits = log2Up(MEM_DATA_BITS/8) def databytes = 8 // assumed by StoreGen/LoadGen/AMOALU def databits = databytes*8 + def wordoffbits = log2Up(databytes) } abstract class ReplacementPolicy From ff8c736d94c9f29484205d263a550a778f61231f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Nov 2012 01:55:45 -0800 Subject: [PATCH 0507/1087] move icache invalidate out of request bundle --- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/ctrl.scala | 8 +++----- rocket/src/main/scala/icache.scala | 15 ++++++++------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 8cb6bc43..819aa73b 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -58,7 +58,7 @@ class Core(implicit conf: RocketConfiguration) extends Component ptw += io.vimem.ptw io.vimem.req.bits.pc := vu.io.imem_req.bits io.vimem.req.valid := vu.io.imem_req.valid - io.vimem.req.bits.invalidate := ctrl.io.dpath.flush_inst + io.vimem.invalidate := ctrl.io.imem.invalidate vu.io.imem_resp.valid := io.vimem.resp.valid vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc vu.io.imem_resp.bits.data := io.vimem.resp.bits.data diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index daa8e004..ac609557 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -35,7 +35,6 @@ class ioCtrlDpath extends Bundle() val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); val wb_valid = Bool(OUTPUT) - val flush_inst = Bool(OUTPUT); val ex_mem_type = Bits(OUTPUT, 3) // exception handling val exception = Bool(OUTPUT); @@ -391,7 +390,6 @@ class Control(implicit conf: RocketConfiguration) extends Component val mem_reg_cause = Reg(){UFix()} val mem_reg_mem_type = Reg(){Bits()} - val wb_reg_valid = Reg(resetVal = Bool(false)) val wb_reg_pcr = Reg(resetVal = PCR_N) val wb_reg_wen = Reg(resetVal = Bool(false)) @@ -407,6 +405,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val take_pc = Bool() + val pc_taken = Reg(take_pc, resetVal = Bool(false)) val take_pc_wb = Bool() val ctrl_killd = Bool() val ctrl_killx = Bool() @@ -739,9 +738,8 @@ class Control(implicit conf: RocketConfiguration) extends Component ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt io.dpath.killd := take_pc || ctrl_stalld && !id_interrupt - io.dpath.flush_inst := wb_reg_flush_inst; - io.imem.resp.ready := take_pc || !ctrl_stalld - io.imem.req.bits.invalidate := wb_reg_flush_inst + io.imem.resp.ready := pc_taken || !ctrl_stalld + io.imem.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.ren2 := id_renx2.toBool; diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index df7506a0..8845f5ee 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -29,7 +29,6 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, class FrontendReq extends Bundle { val pc = UFix(width = VADDR_BITS+1) - val invalidate = Bool() val mispredict = Bool() val taken = Bool() val currentpc = UFix(width = VADDR_BITS+1) @@ -49,6 +48,7 @@ class IOCPUFrontend(implicit conf: ICacheConfig) extends Bundle { val req = new PipeIO()(new FrontendReq) val resp = new FIFOIO()(new FrontendResp).flip val ptw = new IOTLBPTW().flip + val invalidate = Bool(OUTPUT) } class Frontend(implicit c: ICacheConfig) extends Component @@ -97,7 +97,7 @@ class Frontend(implicit c: ICacheConfig) extends Component btb.io.clr := !io.cpu.req.bits.taken btb.io.correct_pc := io.cpu.req.bits.currentpc btb.io.correct_target := io.cpu.req.bits.pc - btb.io.invalidate := io.cpu.req.bits.invalidate || io.cpu.ptw.invalidate + btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss @@ -109,7 +109,7 @@ class Frontend(implicit c: ICacheConfig) extends Component icache.io.mem <> io.mem icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) - icache.io.req.bits.invalidate := io.cpu.req.bits.invalidate + icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss icache.io.resp.ready := !stall && !s1_same_block @@ -127,7 +127,6 @@ class ICache(implicit c: ICacheConfig) extends Component val io = new Bundle { val req = new PipeIO()(new Bundle { val idx = UFix(width = PGIDX_BITS) - val invalidate = Bool() val ppn = UFix(width = PPN_BITS) // delayed one cycle val kill = Bool() // delayed one cycle }).flip @@ -135,6 +134,7 @@ class ICache(implicit c: ICacheConfig) extends Component val data = Bits(width = c.ibytes*8) val datablock = Bits(width = c.databits) }) + val invalidate = Bool(INPUT) val mem = new ioUncachedRequestor } @@ -153,10 +153,10 @@ class ICache(implicit c: ICacheConfig) extends Component val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUFix val s1_tag = s1_addr(c.tagbits+c.untagbits-1,c.untagbits) - val s0_valid = io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill + val s0_valid = io.req.valid || s1_valid && stall val s0_pgoff = Mux(io.req.valid, io.req.bits.idx, s1_pgoff) - s1_valid := s0_valid + s1_valid := io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill when (io.req.valid && rdy) { s1_pgoff := s0_pgoff } @@ -191,7 +191,7 @@ class ICache(implicit c: ICacheConfig) extends Component when (refill_done && !invalidated) { vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true)) } - when (io.req.bits.invalidate) { + when (io.invalidate) { vb_array := Bits(0) invalidated := Bool(true) } @@ -248,6 +248,7 @@ class ICache(implicit c: ICacheConfig) extends Component io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready io.mem.xact_init.bits := c.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) io.mem.xact_finish <> finish_q.io.deq + io.mem.xact_rep.ready := Bool(true) // control state machine switch (state) { From a90a1790a58bc80bfff98f49a4521ca36507dd34 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Nov 2012 01:59:38 -0800 Subject: [PATCH 0508/1087] improve tlb qor --- rocket/src/main/scala/tlb.scala | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index c2a04cfa..6abdff11 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -102,7 +102,6 @@ class TLB(entries: Int) extends Component val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); val tag_ram = Vec(entries) { Reg() { io.ptw.resp.bits.ppn.clone } } - when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn } val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix tag_cam.io.clear := io.ptw.invalidate @@ -115,13 +114,14 @@ class TLB(entries: Int) extends Component val tag_hit_addr = OHToUFix(tag_cam.io.hits) // permission bit arrays - val ur_array = Reg(resetVal = Bits(0, entries)) // user read permission - val uw_array = Reg(resetVal = Bits(0, entries)) // user write permission - val ux_array = Reg(resetVal = Bits(0, entries)) // user execute permission - val sr_array = Reg(resetVal = Bits(0, entries)) // supervisor read permission - val sw_array = Reg(resetVal = Bits(0, entries)) // supervisor write permission - val sx_array = Reg(resetVal = Bits(0, entries)) // supervisor execute permission - when (tag_cam.io.write) { + val ur_array = Reg{Bits()} // user read permission + val uw_array = Reg{Bits()} // user write permission + val ux_array = Reg{Bits()} // user execute permission + val sr_array = Reg{Bits()} // supervisor read permission + val sw_array = Reg{Bits()} // supervisor write permission + val sx_array = Reg{Bits()} // supervisor execute permission + when (io.ptw.resp.valid) { + tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn val perm = (!io.ptw.resp.bits.error).toFix & io.ptw.resp.bits.perm(5,0) ur_array := ur_array.bitSet(r_refill_waddr, perm(2)) uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) @@ -144,13 +144,13 @@ class TLB(entries: Int) extends Component val tlb_miss = status_vm && !tag_hit && !bad_va when (io.req.valid && tlb_hit) { - plru.access(tag_hit_addr) + plru.access(OHToUFix(tag_cam.io.hits)) } io.req.ready := state === s_ready - io.resp.xcpt_ld := bad_va || tlb_hit && !Mux(status_s, sr_array(tag_hit_addr), ur_array(tag_hit_addr)) - io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, sw_array(tag_hit_addr), uw_array(tag_hit_addr)) - io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, sx_array(tag_hit_addr), ux_array(tag_hit_addr)) + io.resp.xcpt_ld := bad_va || tlb_hit && !Mux(status_s, (sr_array & tag_cam.io.hits).orR, (ur_array & tag_cam.io.hits).orR) + io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, (sw_array & tag_cam.io.hits).orR, (uw_array & tag_cam.io.hits).orR) + io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, (sx_array & tag_cam.io.hits).orR, (ux_array & tag_cam.io.hits).orR) io.resp.miss := tlb_miss io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) io.resp.hit_idx := tag_cam.io.hits @@ -175,7 +175,7 @@ class TLB(entries: Int) extends Component when (state === s_wait && io.ptw.invalidate) { state := s_wait_invalidate } - when ((state === s_wait || state === s_wait_invalidate) && io.ptw.resp.valid) { + when (io.ptw.resp.valid) { state := s_ready } } From 8dce89703a520bf0b95da9d1f638ab0da48f588c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Nov 2012 02:39:33 -0800 Subject: [PATCH 0509/1087] new D$ with better QoR and AMO pipelining Vector unit is disabled because nack handling needs to be fixed. --- rocket/src/main/scala/arbiter.scala | 11 +- rocket/src/main/scala/consts.scala | 34 -- rocket/src/main/scala/core.scala | 6 +- rocket/src/main/scala/ctrl.scala | 613 ++++++++++---------- rocket/src/main/scala/ctrl_util.scala | 32 - rocket/src/main/scala/dpath.scala | 158 +++-- rocket/src/main/scala/fpu.scala | 15 +- rocket/src/main/scala/nbdcache.scala | 805 +++++++++++--------------- rocket/src/main/scala/package.scala | 2 +- rocket/src/main/scala/ptw.scala | 15 +- rocket/src/main/scala/tile.scala | 3 +- 11 files changed, 738 insertions(+), 956 deletions(-) delete mode 100644 rocket/src/main/scala/ctrl_util.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 8fb5d3f7..119362ee 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -43,8 +43,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp io.requestor(i).xcpt := io.mem.xcpt resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) - resp.bits.miss := io.mem.resp.bits.miss && tag_hit - resp.bits.nack := io.mem.resp.bits.nack && r_valid(i) + resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit } } @@ -90,10 +89,15 @@ class MemArbiter(n: Int) extends Component { for (i <- 1 until n) io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid + io.mem.xact_rep.ready := Bool(false) for (i <- 0 until n) { val tag = io.mem.xact_rep.bits.tile_xact_id - io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid && tag(log2Up(n)-1,0) === UFix(i) + io.requestor(i).xact_rep.valid := Bool(false) + when (tag(log2Up(n)-1,0) === UFix(i)) { + io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid + io.mem.xact_rep.ready := io.requestor(i).xact_rep.ready + } io.requestor(i).xact_rep.bits := io.mem.xact_rep.bits io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2Up(n)) } @@ -107,5 +111,4 @@ class MemArbiter(n: Int) extends Component { } io.mem.xact_abort.ready := Bool(true) - io.mem.xact_rep.ready := Bool(true) } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index b482673d..3490ce4b 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -72,35 +72,6 @@ trait ScalarOpConstants { val RA = UFix(1, 5); } -trait MemoryOpConstants { - val MT_X = Bits("b???", 3); - val MT_B = Bits("b000", 3); - val MT_H = Bits("b001", 3); - val MT_W = Bits("b010", 3); - val MT_D = Bits("b011", 3); - val MT_BU = Bits("b100", 3); - val MT_HU = Bits("b101", 3); - val MT_WU = Bits("b110", 3); - - val M_X = Bits("b????", 4); - val M_XRD = Bits("b0000", 4); // int load - val M_XWR = Bits("b0001", 4); // int store - val M_PFR = Bits("b0010", 4); // prefetch with intent to read - val M_PFW = Bits("b0011", 4); // prefetch with intent to write - val M_FLA = Bits("b0100", 4); // write back and invlaidate all lines - val M_FENCE = Bits("b0101", 4); // memory fence - val M_INV = Bits("b0110", 4); // write back and invalidate line - val M_CLN = Bits("b0111", 4); // write back line - val M_XA_ADD = Bits("b1000", 4); - val M_XA_SWAP = Bits("b1001", 4); - val M_XA_AND = Bits("b1010", 4); - val M_XA_OR = Bits("b1011", 4); - val M_XA_MIN = Bits("b1100", 4); - val M_XA_MAX = Bits("b1101", 4); - val M_XA_MINU = Bits("b1110", 4); - val M_XA_MAXU = Bits("b1111", 4); -} - trait PCRConstants { val PCR_X = Bits("b???", 3) val PCR_N = Bits(0,3) @@ -109,11 +80,6 @@ trait PCRConstants { val PCR_C = Bits(6,3) // clearpcr val PCR_S = Bits(7,3) // setpcr - val SYNC_X = Bits("b??", 2) - val SYNC_N = Bits(0,2); - val SYNC_D = Bits(1,2); - val SYNC_I = Bits(2,2); - val PCR_STATUS = UFix( 0, 5); val PCR_EPC = UFix( 1, 5); val PCR_BADVADDR = UFix( 2, 5); diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 819aa73b..60973cea 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -114,11 +114,11 @@ class Core(implicit conf: RocketConfiguration) extends Component dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) vu.io.dmem_req.ready := dmem(2).req.ready - vu.io.dmem_resp.valid := Reg(dmem(2).resp.valid) + vu.io.dmem_resp.valid := dmem(2).resp.valid vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword - vu.io.dmem_resp.bits.tag := Reg(dmem(2).resp.bits.tag) - vu.io.dmem_resp.bits.typ := Reg(dmem(2).resp.bits.typ) + vu.io.dmem_resp.bits.tag := dmem(2).resp.bits.tag + vu.io.dmem_resp.bits.typ := dmem(2).resp.bits.typ // share vector integer multiplier with rocket dpath.io.vec_imul_req <> vu.io.cp_imul_req diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ac609557..86835d4a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -26,8 +26,9 @@ class ioCtrlDpath extends Bundle() val sel_wa = Bool(OUTPUT); val sel_wb = UFix(OUTPUT, 3); val pcr = UFix(OUTPUT, 3) - val wb_eret = Bool(OUTPUT); + val eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); + val wb_load = Bool(OUTPUT) val ex_fp_val= Bool(OUTPUT); val mem_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); @@ -44,23 +45,20 @@ class ioCtrlDpath extends Bundle() // inputs from datapath val inst = Bits(INPUT, 32); val jalr_eq = Bool(INPUT) - val br_eq = Bool(INPUT); - val br_lt = Bool(INPUT); - val br_ltu = Bool(INPUT); + val ex_br_type = Bits(OUTPUT, 3) + val ex_br_taken = Bool(INPUT) val div_rdy = Bool(INPUT); val div_result_val = Bool(INPUT); val mul_rdy = Bool(INPUT); val mul_result_val = Bool(INPUT); - val mem_wb = Bool(INPUT); + val mem_ll_wb = Bool(INPUT) + val mem_ll_waddr = UFix(INPUT, 5) val ex_waddr = UFix(INPUT, 5); // write addr from execute stage val mem_waddr = UFix(INPUT, 5); // write addr from memory stage val wb_waddr = UFix(INPUT, 5); // write addr from writeback stage val status = Bits(INPUT, 32); - val sboard_clr = Bool(INPUT); - val sboard_clra = UFix(INPUT, 5); val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UFix(INPUT, 5); - val fp_sboard_wb_waddr = UFix(INPUT, 5); val irq_timer = Bool(INPUT); val irq_ipi = Bool(INPUT); val pcr_replay = Bool(INPUT) @@ -71,12 +69,13 @@ abstract trait DecodeConstants val xpr64 = Y; val decode_default = - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,X,WA_X, WB_X, PCR_X,SYNC_X,X,X,X,X) + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,X,WA_X, WB_X, PCR_X,N,X,X,X,X) val table: Array[(Bits, List[Bits])] } @@ -84,237 +83,239 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RA,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), - RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,SYNC_N,N,N,N,N), + J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RA,WB_PC, PCR_N,N,N,N,N,N), + JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,SYNC_N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_C,SYNC_N,N,N,Y,Y), - ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_I,N,N,N,Y), - CFLUSH-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FLA, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_F,SYNC_N,N,N,Y,Y), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_T,SYNC_N,N,N,Y,Y), - RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,SYNC_N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,SYNC_N,N,N,N,N)) + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_C,N,N,N,Y,Y), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,Y,N,N,N,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_F,N,N,N,Y,Y), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_T,N,N,N,Y,Y), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_S-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MXTF_D-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - MTFSR-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FLW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FLD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N)) + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MXTF_S-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MXTF_D-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MTFSR-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + FLD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + FSW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N)) } object VDecode extends DecodeConstants { val table = Array( - // jalr eret - // fp_val | renx2 div_val | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | sync | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VVCFG-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VSETVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,Y), - VF-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - VMSV-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,N,N), - FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_D,N,N,N,N), - VLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLWU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLHU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLBU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTWU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTHU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VLSTBU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VSSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), - VFSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,N,N), + // fence.i + // jalr | eret + // fp_val | renx2 div_val | | syscall + // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VVCFG-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VSETVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VF-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + VMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + VMSV-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + VLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLWU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLHU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLBU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTWU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTHU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTBU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VENQCMD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VENQCNT-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,SYNC_N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,SYNC_N,N,N,Y,N)) + VENQCMD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQIMM1-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQIMM2-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQCNT-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N), + VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N)) } class Control(implicit conf: RocketConfiguration) extends Component @@ -342,7 +343,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_jalr :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_pcr :: id_sync :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + val id_pcr :: id_fence_i :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 val id_raddr3 = io.dpath.inst(16,12); val id_raddr2 = io.dpath.inst(21,17); @@ -395,13 +396,13 @@ class Control(implicit conf: RocketConfiguration) extends Component val wb_reg_wen = Reg(resetVal = Bool(false)) val wb_reg_fp_wen = Reg(resetVal = Bool(false)) val wb_reg_flush_inst = Reg(resetVal = Bool(false)) + val wb_reg_mem_val = Reg(resetVal = Bool(false)) val wb_reg_eret = Reg(resetVal = Bool(false)) val wb_reg_xcpt = Reg(resetVal = Bool(false)) val wb_reg_replay = Reg(resetVal = Bool(false)) val wb_reg_replay_next = Reg(resetVal = Bool(false)) val wb_reg_cause = Reg(){UFix()} val wb_reg_fp_val = Reg(resetVal = Bool(false)) - val wb_reg_dcache_miss = Reg(io.dmem.resp.bits.miss || io.dmem.resp.bits.nack, resetVal = Bool(false)) val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) val take_pc = Bool() @@ -509,7 +510,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_wen := id_wen ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; - ex_reg_flush_inst := (id_sync === SYNC_I); + ex_reg_flush_inst := id_fence_i ex_reg_fp_val := id_fp_val ex_reg_vec_val := id_vec_val.toBool ex_reg_replay_next := id_replay_next @@ -519,20 +520,22 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_xcpt := id_xcpt } - val br_taken = - Mux(ex_reg_br_type === BR_EQ, io.dpath.br_eq, - Mux(ex_reg_br_type === BR_NE, ~io.dpath.br_eq, - Mux(ex_reg_br_type === BR_LT, io.dpath.br_lt, - Mux(ex_reg_br_type === BR_GE, ~io.dpath.br_lt, - Mux(ex_reg_br_type === BR_LTU, io.dpath.br_ltu, - Mux(ex_reg_br_type === BR_GEU, ~io.dpath.br_ltu, - ex_reg_br_type === BR_J)))))) - val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === br_taken) + // replay inst in ex stage + val wb_dcache_miss = wb_reg_mem_val && (wb_reg_wen || wb_reg_fp_wen) && !io.dmem.resp.valid + val replay_ex = wb_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || + ex_reg_mem_val && !io.dmem.req.ready || + ex_reg_div_val && !io.dpath.div_rdy || + ex_reg_mul_val && !io.dpath.mul_rdy || + mem_reg_replay_next + ctrl_killx := take_pc_wb || replay_ex + + val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === io.dpath.ex_br_taken) val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), (ex_reg_fp_val && io.fpu.illegal_rm, UFix(2)))) + mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb when (ex_xcpt) { mem_reg_cause := ex_cause } mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy @@ -568,9 +571,16 @@ class Control(implicit conf: RocketConfiguration) extends Component (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)), (mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)), - (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)), - (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) + (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)), + (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) + val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem + val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen) + val replay_mem = ll_wb_kill_mem || mem_reg_replay || fpu_kill_mem + val killm_common = ll_wb_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid + ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem + + wb_reg_replay := replay_mem && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next when (mem_xcpt) { wb_reg_cause := mem_cause } @@ -581,6 +591,7 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_fp_wen := Bool(false); wb_reg_eret := Bool(false); wb_reg_flush_inst := Bool(false); + wb_reg_mem_val := Bool(false) wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) wb_reg_replay_next := Bool(false) @@ -592,77 +603,45 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_fp_wen := mem_reg_fp_wen; wb_reg_eret := mem_reg_eret && !mem_reg_replay wb_reg_flush_inst := mem_reg_flush_inst; + wb_reg_mem_val := mem_reg_mem_val wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val wb_reg_fp_val := mem_reg_fp_val wb_reg_replay_next := mem_reg_replay_next } - val sboard = new rocketCtrlSboard(32, 3, 2); - sboard.io.r(0).addr := id_raddr2.toUFix; - sboard.io.r(1).addr := id_raddr1.toUFix; - sboard.io.r(2).addr := id_waddr.toUFix; + val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || vec_replay || io.dpath.pcr_replay - // scoreboard set (for D$ misses, div, mul) - sboard.io.w(0).en := wb_reg_div_mul_val || wb_reg_dcache_miss && wb_reg_wen - sboard.io.w(0).data := Bool(true) - sboard.io.w(0).addr := io.dpath.wb_waddr + class Scoreboard + { + val r = Reg(resetVal = Bits(0)) + var next = r + var ens = Bool(false) + def apply(addr: UFix) = r(addr) + def set(en: Bool, addr: UFix): Unit = update(en, next | mask(en, addr)) + def clear(en: Bool, addr: UFix): Unit = update(en, next & ~mask(en, addr)) + private def mask(en: Bool, addr: UFix) = Mux(en, UFix(1) << addr, UFix(0)) + private def update(en: Bool, update: Bits) = { + next = update + ens = ens || en + when (ens) { r := next } + } + } - sboard.io.w(1).en := io.dpath.sboard_clr - sboard.io.w(1).data := Bool(false) - sboard.io.w(1).addr := io.dpath.sboard_clra + val sboard = new Scoreboard + sboard.set(wb_reg_div_mul_val || wb_dcache_miss && io.dpath.wb_wen, io.dpath.wb_waddr) + sboard.clear(io.dpath.mem_ll_wb, io.dpath.mem_ll_waddr) - val id_stall_raddr2 = id_renx2.toBool && sboard.io.r(0).data - val id_stall_raddr1 = id_renx1.toBool && sboard.io.r(1).data - val id_stall_waddr = id_wen.toBool && sboard.io.r(2).data - - var id_stall_fpu = Bool(false) - if (HAVE_FPU) { - val fp_sboard = new rocketCtrlSboard(32, 4, 3); - fp_sboard.io.r(0).addr := id_raddr1.toUFix - fp_sboard.io.r(1).addr := id_raddr2.toUFix - fp_sboard.io.r(2).addr := id_raddr3.toUFix - fp_sboard.io.r(3).addr := id_waddr.toUFix - - fp_sboard.io.w(0).en := wb_reg_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set - fp_sboard.io.w(0).data := Bool(true) - fp_sboard.io.w(0).addr := io.dpath.fp_sboard_wb_waddr - - fp_sboard.io.w(1).en := io.dpath.fp_sboard_clr - fp_sboard.io.w(1).data := Bool(false) - fp_sboard.io.w(1).addr := io.dpath.fp_sboard_clra - - fp_sboard.io.w(2).en := io.fpu.sboard_clr - fp_sboard.io.w(2).data := Bool(false) - fp_sboard.io.w(2).addr := io.fpu.sboard_clra - - id_stall_fpu = io.fpu.dec.ren1 && fp_sboard.io.r(0).data || - io.fpu.dec.ren2 && fp_sboard.io.r(1).data || - io.fpu.dec.ren3 && fp_sboard.io.r(2).data || - io.fpu.dec.wen && fp_sboard.io.r(3).data - } - - // replay inst in ex stage - val replay_ex = wb_reg_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_mem_val && !io.dmem.req.ready || - ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy || - mem_reg_replay_next - ctrl_killx := take_pc_wb || replay_ex - - // replay inst in mem stage - val mem_ll_wb = io.dpath.mem_wb || io.dpath.mul_result_val || io.dpath.div_result_val - val dmem_kill_mem = mem_reg_valid && io.dmem.resp.bits.nack - val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val replay_mem = dmem_kill_mem || mem_reg_wen && mem_ll_wb || mem_reg_replay || fpu_kill_mem - val killm_common = mem_reg_wen && mem_ll_wb || take_pc_wb || mem_reg_xcpt || !mem_reg_valid - ctrl_killm := killm_common || mem_xcpt || dmem_kill_mem || fpu_kill_mem - - mem_reg_replay := replay_ex && !take_pc_wb; - - wb_reg_replay := replay_mem && !take_pc_wb - - val replay_wb = wb_reg_replay || vec_replay || io.dpath.pcr_replay + val id_stall_fpu = if (HAVE_FPU) { + val fp_sboard = new Scoreboard + fp_sboard.set(wb_dcache_miss && wb_reg_fp_wen && !replay_wb || io.fpu.sboard_set, io.dpath.wb_waddr) + fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) + fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) + io.fpu.dec.ren1 && fp_sboard(id_raddr1) || + io.fpu.dec.ren2 && fp_sboard(id_raddr2) || + io.fpu.dec.ren3 && fp_sboard(id_raddr3) || + io.fpu.dec.wen && fp_sboard(id_waddr) + } else Bool(false) // write cause to PCR on an exception io.dpath.exception := wb_reg_xcpt @@ -671,7 +650,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.vec_irq_aux_wen := wb_reg_xcpt && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) // control transfer from ex/wb - take_pc_wb := wb_reg_replay || vec_replay || wb_reg_xcpt || wb_reg_eret + take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_eret take_pc := take_pc_ex || take_pc_wb; io.dpath.sel_pc := @@ -700,7 +679,7 @@ class Control(implicit conf: RocketConfiguration) extends Component fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. - val mem_mem_cmd_bh = + val mem_mem_cmd_bh = if (conf.fastLoadByte) Bool(false) else (mem_reg_mem_type === MT_B) || (mem_reg_mem_type === MT_BU) || (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU) val data_hazard_mem = mem_reg_wen && @@ -718,20 +697,24 @@ class Control(implicit conf: RocketConfiguration) extends Component // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. val data_hazard_wb = wb_reg_wen && - (id_raddr1 != UFix(0) && id_renx1 && id_raddr1 === io.dpath.wb_waddr || - id_raddr2 != UFix(0) && id_renx2 && id_raddr2 === io.dpath.wb_waddr || - id_waddr != UFix(0) && id_wen && id_waddr === io.dpath.wb_waddr) + (id_raddr1 != UFix(0) && id_renx1 && (id_raddr1 === io.dpath.wb_waddr) || + id_raddr2 != UFix(0) && id_renx2 && (id_raddr2 === io.dpath.wb_waddr) || + id_waddr != UFix(0) && id_wen && (id_waddr === io.dpath.wb_waddr)) val fp_data_hazard_wb = wb_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = data_hazard_wb && (wb_reg_dcache_miss || wb_reg_div_mul_val) || - fp_data_hazard_wb && (wb_reg_dcache_miss || wb_reg_fp_val) + val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) || + fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) + + val id_sboard_hazard = + (id_raddr1 != UFix(0) && id_renx1 && sboard(id_raddr1) || + id_raddr2 != UFix(0) && id_renx2 && sboard(id_raddr2) || + id_waddr != UFix(0) && id_wen && sboard(id_waddr)) val ctrl_stalld = - id_ex_hazard || id_mem_hazard || id_wb_hazard || - id_stall_raddr1 || id_stall_raddr2 || id_stall_waddr || + id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_fp_val && id_stall_fpu || id_mem_val && !io.dmem.req.ready || vec_stalld @@ -742,6 +725,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.imem.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen + io.dpath.wb_load := wb_reg_mem_val && io.dpath.wb_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2.toUFix @@ -758,13 +742,14 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.ex_jalr := ex_reg_jalr io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; - io.dpath.wb_wen := wb_reg_wen; - io.dpath.wb_valid := wb_reg_valid && !vec_replay + io.dpath.wb_wen := wb_reg_wen && !replay_wb + io.dpath.wb_valid := wb_reg_valid && !replay_wb io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb.toUFix io.dpath.pcr := wb_reg_pcr.toUFix - io.dpath.wb_eret := wb_reg_eret; + io.dpath.eret := wb_reg_eret io.dpath.ex_mem_type := ex_reg_mem_type + io.dpath.ex_br_type := ex_reg_br_type io.fpu.valid := !ctrl_killd && id_fp_val io.fpu.killx := ctrl_killx diff --git a/rocket/src/main/scala/ctrl_util.scala b/rocket/src/main/scala/ctrl_util.scala deleted file mode 100644 index 34b6d40b..00000000 --- a/rocket/src/main/scala/ctrl_util.scala +++ /dev/null @@ -1,32 +0,0 @@ -package rocket - -import Chisel._ -import Node._ - -class rocketCtrlSboard(entries: Int, nread: Int, nwrite: Int) extends Component -{ - class read_port extends Bundle { - val addr = UFix(INPUT, log2Up(entries)) - val data = Bool(OUTPUT) - } - class write_port extends Bundle { - val en = Bool(INPUT) - val addr = UFix(INPUT, log2Up(entries)) - val data = Bool(INPUT) - } - - val io = new Bundle { - val r = Vec(nread) { new read_port() } - val w = Vec(nwrite) { new write_port() } - } - - val busybits = Reg(resetVal = Bits(0, entries)) - - val wmasks = (0 until nwrite).map(i => Fill(entries, io.w(i).en) & (UFix(1) << io.w(i).addr)) - val wdatas = (0 until nwrite).map(i => Mux(io.w(i).data, wmasks(i), UFix(0))) - var next = busybits & ~wmasks.reduceLeft(_|_) | wdatas.reduceLeft(_|_) - busybits := next - - for (i <- 0 until nread) - io.r(i).data := busybits(io.r(i).addr) -} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4e6bcb7c..3979bef0 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import Instructions._ +import Util._ import hwacha._ class Datapath(implicit conf: RocketConfiguration) extends Component @@ -21,13 +22,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) } - val pcr = new rocketDpathPCR(); - val ex_pcr = pcr.io.r.data; - - val alu = new ALU - val ex_alu_out = alu.io.out; - val ex_alu_adder_out = alu.io.adder_out; - // execute definitions val ex_reg_pc = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; @@ -59,7 +53,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val wb_reg_rs2 = Reg() { Bits() }; val wb_reg_waddr = Reg() { UFix() } val wb_reg_wdata = Reg() { Bits() } - val wb_reg_dmem_wdata = Reg() { Bits() } val wb_reg_vec_waddr = Reg() { UFix() } val wb_reg_vec_wdata = Reg() { Bits() } val wb_reg_raddr1 = Reg() { UFix() }; @@ -67,25 +60,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val wb_reg_ll_wb = Reg(resetVal = Bool(false)); val wb_wdata = Bits(); - val dmem_resp_replay = Bool() - val r_dmem_resp_replay = Reg(resetVal = Bool(false)); - val r_dmem_fp_replay = Reg(resetVal = Bool(false)); - val r_dmem_resp_waddr = Reg() { UFix() }; - - val ex_pc_plus4 = ex_reg_pc + UFix(4); - val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix - - val ex_ea_sign = Mux(ex_alu_adder_out(VADDR_BITS-1), ~ex_alu_adder_out(63,VADDR_BITS) === UFix(0), ex_alu_adder_out(63,VADDR_BITS) != UFix(0)) - val ex_effective_address = Cat(ex_ea_sign, ex_alu_adder_out(VADDR_BITS-1,0)).toUFix - - // hook up I$ - io.imem.req.bits.currentpc := ex_reg_pc - io.imem.req.bits.pc := - Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), - Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, - wb_reg_pc))) // PC_WB - // instruction decode stage val id_inst = io.imem.resp.bits.data val id_pc = io.imem.resp.bits.pc @@ -159,16 +133,16 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (io.ctrl.ren2) { ex_reg_rs2 := id_rs2 } } - val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs1) - val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_rs2) - val ex_op2 = Mux(Reg(id_op2_dmem_bypass), wb_reg_dmem_wdata, ex_reg_op2) + val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data + val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), dmem_resp_data, ex_reg_rs1) + val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), dmem_resp_data, ex_reg_rs2) + val ex_op2 = Mux(Reg(id_op2_dmem_bypass), dmem_resp_data, ex_reg_op2) - alu.io.dw := ex_reg_ctrl_fn_dw; - alu.io.fn := ex_reg_ctrl_fn_alu; - alu.io.in2 := ex_op2.toUFix - alu.io.in1 := ex_rs1.toUFix - - io.fpu.fromint_data := ex_rs1 + val alu = new ALU + alu.io.dw := ex_reg_ctrl_fn_dw; + alu.io.fn := ex_reg_ctrl_fn_alu; + alu.io.in2 := ex_op2.toUFix + alu.io.in1 := ex_rs1.toUFix // divider val div = new rocketDivider(earlyOut = true) @@ -178,7 +152,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component div.io.req.bits.in1 := ex_op2 div.io.req_tag := ex_reg_waddr div.io.req_kill := io.ctrl.div_kill - div.io.resp_rdy := !dmem_resp_replay + div.io.resp_rdy := Bool(true) io.ctrl.div_rdy := div.io.req.ready io.ctrl.div_result_val := div.io.resp_val @@ -197,11 +171,23 @@ class Datapath(implicit conf: RocketConfiguration) extends Component mul_io.req.bits.in1 := ex_op2 mul_io.req_tag := ex_reg_waddr mul_io.req_kill := io.ctrl.mul_kill - mul_io.resp_rdy := !dmem_resp_replay && !div.io.resp_val + mul_io.resp_rdy := Bool(true) io.ctrl.mul_rdy := mul_io.req.ready io.ctrl.mul_result_val := mul_io.resp_val - io.ctrl.ex_waddr := ex_reg_waddr; // for load/use hazard detection & bypass control + io.fpu.fromint_data := ex_rs1 + io.ctrl.ex_waddr := ex_reg_waddr + + def vaSign(a0: Bits, ea: Bits) = { + // efficient means to compress 64-bit VA into VADDR_BITS+1 bits + // (VA is bad if VA(VADDR_BITS) != VA(VADDR_BITS-1)) + val a = a0 >> VADDR_BITS-1 + val e = ea(VADDR_BITS,VADDR_BITS-1) + Mux(a === UFix(0) || a === UFix(1), e != UFix(0), + Mux(a === Fix(-1) || a === Fix(-2), e === Fix(-1), + Bool(false))) + } + val ex_effective_address = Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUFix // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module @@ -211,15 +197,14 @@ class Datapath(implicit conf: RocketConfiguration) extends Component require(io.dmem.req.bits.tag.getWidth >= 6) // processor control regfile read + val pcr = new rocketDpathPCR() pcr.io.r.en := io.ctrl.pcr != PCR_N pcr.io.r.addr := wb_reg_raddr1 pcr.io.host <> io.host - - io.ctrl.irq_timer := pcr.io.irq_timer; - io.ctrl.irq_ipi := pcr.io.irq_ipi; - io.ctrl.status := pcr.io.status; - io.ctrl.pcr_replay := pcr.io.replay + pcr.io <> io.ctrl + pcr.io.pc := wb_reg_pc + io.ctrl.pcr_replay := pcr.io.replay io.ptw.ptbr := pcr.io.ptbr io.ptw.invalidate := pcr.io.ptbr_wen @@ -227,11 +212,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // branch resolution logic io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) - io.ctrl.br_eq := (ex_rs1 === ex_rs2) - io.ctrl.br_ltu := (ex_rs1.toUFix < ex_rs2.toUFix) - io.ctrl.br_lt := - (~(ex_rs1(63) ^ ex_rs2(63)) & io.ctrl.br_ltu | - ex_rs1(63) & ~ex_rs2(63)).toBool + io.ctrl.ex_br_taken := + Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2, + Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2, + Mux(io.ctrl.ex_br_type === BR_LT, ex_rs1.toFix < ex_rs2.toFix, + Mux(io.ctrl.ex_br_type === BR_GE, ex_rs1.toFix >= ex_rs2.toFix, + Mux(io.ctrl.ex_br_type === BR_LTU, ex_rs1 < ex_rs2, + Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, + io.ctrl.ex_br_type === BR_J)))))) + + val ex_pc_plus4 = ex_reg_pc + UFix(4) + val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix // time stamp counter val tsc_reg = Reg(resetVal = UFix(0,64)); @@ -245,7 +236,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4.toFix, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, - ex_alu_out))).toBits // WB_ALU + alu.io.out))).toBits // WB_ALU // memory stage mem_reg_kill := ex_reg_kill @@ -262,23 +253,29 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // for load/use hazard detection (load byte/halfword) io.ctrl.mem_waddr := mem_reg_waddr; - // 32/64 bit load handling (moved to earlier in file) - // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool val dmem_resp_waddr = io.dmem.resp.bits.tag.toUFix >> UFix(1) - dmem_resp_replay := io.dmem.resp.bits.replay && dmem_resp_xpu; - r_dmem_resp_replay := dmem_resp_replay - r_dmem_resp_waddr := dmem_resp_waddr - r_dmem_fp_replay := io.dmem.resp.bits.replay && dmem_resp_fpu; + val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu - val mem_ll_waddr = Mux(dmem_resp_replay, dmem_resp_waddr, - Mux(div.io.resp_val, div.io.resp_tag, - mul_io.resp_tag)) - val mem_ll_wdata = Mux(div.io.resp_val, div.io.resp_bits, - mul_io.resp_bits) - val mem_ll_wb = dmem_resp_replay || div.io.resp_val || mul_io.resp_val + val mem_ll_wdata = Bits() + mem_ll_wdata := mul_io.resp_bits + io.ctrl.mem_ll_waddr := mul_io.resp_tag + io.ctrl.mem_ll_wb := mul_io.resp_val + when (div.io.resp_val) { + mul_io.resp_rdy := Bool(false) + mem_ll_wdata := div.io.resp_bits + io.ctrl.mem_ll_waddr := div.io.resp_tag + io.ctrl.mem_ll_wb := Bool(true) + } + when (dmem_resp_replay) { + mul_io.resp_rdy := Bool(false) + div.io.resp_rdy := Bool(false) + mem_ll_wdata := io.dmem.resp.bits.data_subword + io.ctrl.mem_ll_waddr := dmem_resp_waddr + io.ctrl.mem_ll_wb := Bool(true) + } io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data @@ -286,9 +283,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.fpu.dmem_resp_tag := dmem_resp_waddr // writeback stage - when (io.ctrl.mem_load) { - wb_reg_dmem_wdata := io.dmem.resp.bits.data - } when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc wb_reg_inst := mem_reg_inst @@ -300,15 +294,12 @@ class Datapath(implicit conf: RocketConfiguration) extends Component wb_reg_waddr := mem_reg_waddr wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) } - wb_reg_ll_wb := mem_ll_wb - when (mem_ll_wb) { - wb_reg_waddr := mem_ll_waddr + wb_reg_ll_wb := io.ctrl.mem_ll_wb + when (io.ctrl.mem_ll_wb) { + wb_reg_waddr := io.ctrl.mem_ll_waddr wb_reg_wdata := mem_ll_wdata } - // regfile write - val wb_src_dmem = Reg(io.ctrl.mem_load) && io.ctrl.wb_valid || r_dmem_resp_replay - if (HAVE_VEC) { // vector datapath @@ -333,7 +324,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component wb_wdata := Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), - Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, + Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, wb_reg_wdata)) } else @@ -344,7 +335,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component pcr.io.vec_nfregs := UFix(0) wb_wdata := - Mux(wb_src_dmem, io.dmem.resp.bits.data_subword, + Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, wb_reg_wdata) } @@ -355,14 +346,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (rf_wen) { writeRF(rf_waddr, rf_wdata) } io.ctrl.wb_waddr := wb_reg_waddr - io.ctrl.mem_wb := dmem_resp_replay; // scoreboard clear (for div/mul and D$ load miss writebacks) - io.ctrl.sboard_clr := mem_ll_wb - io.ctrl.sboard_clra := mem_ll_waddr - io.ctrl.fp_sboard_clr := r_dmem_fp_replay - io.ctrl.fp_sboard_clra := r_dmem_resp_waddr - io.ctrl.fp_sboard_wb_waddr := Reg(mem_reg_waddr) + io.ctrl.fp_sboard_clr := io.dmem.resp.bits.replay && dmem_resp_fpu + io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write pcr.io.w.addr := wb_reg_raddr1 @@ -371,10 +358,11 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.pcr === PCR_C, pcr.io.r.data & ~wb_reg_wdata, wb_reg_wdata)) - pcr.io.eret := io.ctrl.wb_eret; - pcr.io.exception := io.ctrl.exception; - pcr.io.cause := io.ctrl.cause; - pcr.io.pc := wb_reg_pc; - pcr.io.badvaddr_wen := io.ctrl.badvaddr_wen; - pcr.io.vec_irq_aux_wen := io.ctrl.vec_irq_aux_wen + // hook up I$ + io.imem.req.bits.currentpc := ex_reg_pc + io.imem.req.bits.pc := + Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, + Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), + Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, + wb_reg_pc))) // PC_WB } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 3ea883db..9e2961fe 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -183,7 +183,7 @@ object RegEn when (en) { r := data } r } - def apply[T <: Bits](data: T, en: Bool, resetVal: Bool) = { + def apply[T <: Bits](data: T, en: Bool, resetVal: T) = { val r = Reg(resetVal = resetVal) { data.clone } when (en) { r := data } r @@ -478,15 +478,10 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val wb_ctrl = RegEn(mem_ctrl, mem_reg_valid) // load response - val load_wb = Reg(io.dpath.dmem_resp_val, resetVal = Bool(false)) - val load_wb_single = Reg() { Bool() } - val load_wb_data = Reg() { Bits(width = 64) } // XXX WTF why doesn't bit width inference work for the regfile?! - val load_wb_tag = Reg() { UFix() } - when (io.dpath.dmem_resp_val) { - load_wb_single := io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU - load_wb_data := io.dpath.dmem_resp_data - load_wb_tag := io.dpath.dmem_resp_tag - } + val load_wb = io.dpath.dmem_resp_val + val load_wb_single = io.dpath.dmem_resp_type === MT_W + val load_wb_data = io.dpath.dmem_resp_data + val load_wb_tag = io.dpath.dmem_resp_tag val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) val load_wb_data_recoded = Mux(load_wb_single, Cat(Fix(-1, 32), rec_s), rec_d) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 5c9bed76..5db2faf0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1,6 +1,7 @@ package rocket import Chisel._ +import Node._ import Constants._ import uncore._ import Util._ @@ -78,66 +79,46 @@ case class LoadGen(typ: Bits, addr: Bits, dat: Bits) } class MSHRReq(implicit conf: DCacheConfig) extends Bundle { - val tag_miss = Bool() val old_dirty = Bool() val old_tag = Bits(width = conf.tagbits) - val tag = Bits(width = conf.tagbits) - val idx = Bits(width = conf.idxbits) - val way_oh = Bits(width = conf.ways) + val way_en = Bits(width = conf.ways) - val offset = Bits(width = conf.offbits) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) - val cpu_tag = Bits(width = conf.reqtagbits) - val data = Bits(width = conf.databits) + val addr = UFix(width = conf.paddrbits) + val cmd = Bits(width = 4) + val typ = Bits(width = 3) + val tag = Bits(width = conf.reqtagbits) + val data = Bits(width = conf.databits) override def clone = new MSHRReq().asInstanceOf[this.type] } -class RPQEntry(implicit conf: DCacheConfig) extends Bundle { - val offset = Bits(width = conf.offbits) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) +class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { val sdq_id = UFix(width = log2Up(conf.nsdq)) - val cpu_tag = Bits(width = conf.reqtagbits) - - override def clone = new RPQEntry().asInstanceOf[this.type] -} - -class Replay(implicit conf: DCacheConfig) extends RPQEntry { - val idx = Bits(width = conf.idxbits) - val way_oh = Bits(width = conf.ways) override def clone = new Replay().asInstanceOf[this.type] } -class DataReq(implicit conf: DCacheConfig) extends Bundle { - val idx = Bits(width = conf.idxbits) - val offset = Bits(width = conf.offbits) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) - val data = Bits(width = conf.databits) - val way_oh = Bits(width = conf.ways) +class DataReadReq(implicit conf: DCacheConfig) extends Bundle { + val way_en = Bits(width = conf.ways) + val addr = Bits(width = conf.untagbits) - override def clone = new DataReq().asInstanceOf[this.type] + override def clone = new DataReadReq().asInstanceOf[this.type] } -class DataArrayReq(implicit conf: DCacheConfig) extends Bundle { +class DataWriteReq(implicit conf: DCacheConfig) extends Bundle { val way_en = Bits(width = conf.ways) - val idx = Bits(width = conf.idxbits) - val offset = Bits(width = log2Up(REFILL_CYCLES)) - val rw = Bool() - val wmask = Bits(width = MEM_DATA_BITS/8) + val addr = Bits(width = conf.untagbits) + val wmask = Bits(width = MEM_DATA_BITS/conf.databits) val data = Bits(width = MEM_DATA_BITS) - override def clone = new DataArrayReq().asInstanceOf[this.type] + override def clone = new DataWriteReq().asInstanceOf[this.type] } class WritebackReq(implicit conf: DCacheConfig) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) - val way_oh = Bits(width = conf.ways) + val way_en = Bits(width = conf.ways) val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) override def clone = new WritebackReq().asInstanceOf[this.type] @@ -169,14 +150,12 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq)) val idx_match = Bool(OUTPUT) - val idx = Bits(OUTPUT, conf.idxbits) - val refill_count = Bits(OUTPUT, log2Up(REFILL_CYCLES)) val tag = Bits(OUTPUT, conf.tagbits) - val way_oh = Bits(OUTPUT, conf.ways) val mem_req = (new FIFOIO) { new TransactionInit } + val mem_resp = new DataWriteReq().asOutput val meta_req = (new FIFOIO) { new MetaArrayReq() } - val replay = (new FIFOIO) { new Replay() } + val replay = (new FIFOIO) { new Replay() } val mem_abort = (new PipeIO) { new TransactionAbort }.flip val mem_rep = (new PipeIO) { new TransactionReply }.flip val mem_finish = (new FIFOIO) { new TransactionFinish } @@ -185,9 +164,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val probe_refill = (new FIFOIO) { Bool() }.flip } - val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_drain_rpq :: Nil = Enum(7) { UFix() } + val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write :: s_drain_rpq :: Nil = Enum(8) { UFix() } val state = Reg(resetVal = s_invalid) - val flush = Reg { Bool() } val xacx_type = Reg { UFix() } val line_state = Reg { UFix() } @@ -195,15 +173,16 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req = Reg { new MSHRReq() } val req_cmd = io.req_bits.cmd - val req_use_rpq = (req_cmd != M_PFR) && (req_cmd != M_PFW) && (req_cmd != M_FLA) - val idx_match = req.idx === io.req_bits.idx - val sec_rdy = idx_match && !flush && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val req_use_rpq = req_cmd != M_PFR && req_cmd != M_PFW + val req_idx = req.addr(conf.untagbits-1,conf.offbits) + val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) + val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val rpq = (new Queue(conf.nrpq)) { new RPQEntry } + val rpq = (new Queue(conf.nrpq)) { new Replay } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id - rpq.io.deq.ready := io.replay.ready && (state === s_drain_rpq) || (state === s_invalid) + rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid val abort = io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id) val reply = io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id) @@ -214,11 +193,14 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { finish_q.io.enq.valid := wb_done || refill_done finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id - when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid && io.meta_req.ready) { + when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { state := s_invalid } + when (state === s_meta_write && io.meta_req.ready) { + state := s_drain_rpq + } when (state === s_refill_resp) { - when (refill_done) { state := s_drain_rpq } + when (refill_done) { state := s_meta_write } when (reply) { refill_count := refill_count + UFix(1) line_state := conf.co.newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) @@ -226,8 +208,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (abort) { state := s_refill_req } } when (state === s_refill_req) { - when (flush) { state := s_drain_rpq } - .elsewhen (abort) { state := s_refill_req } + when (abort) { state := s_refill_req } .elsewhen (io.mem_req.ready) { state := s_refill_resp } } when (state === s_meta_clear && io.meta_req.ready) { @@ -246,51 +227,64 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { xacx_type := conf.co.getTransactionInitTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { - flush := req_cmd === M_FLA line_state := conf.co.newStateOnFlush() refill_count := UFix(0) xacx_type := conf.co.getTransactionInitTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) req := io.req_bits - - when (io.req_bits.tag_miss) { - state := Mux(io.req_bits.old_dirty, s_wb_req, s_refill_req) - } + state := Mux(io.req_bits.old_dirty, s_wb_req, s_refill_req) } io.idx_match := (state != s_invalid) && idx_match - io.idx := req.idx - io.tag := req.tag - io.way_oh := req.way_oh - io.refill_count := refill_count + io.mem_resp := req + io.mem_resp.addr := Cat(req_idx, refill_count) << conf.ramoffbits + io.tag := req.addr >> conf.untagbits io.req_pri_rdy := (state === s_invalid) io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.meta_req.valid := (state === s_drain_rpq) && !rpq.io.deq.valid && !finish_q.io.deq.valid || (state === s_meta_clear) - io.meta_req.bits.rw := Bool(true) - io.meta_req.bits.idx := req.idx + io.meta_req.valid := state === s_meta_write || state === s_meta_clear || state === s_drain_rpq + io.meta_req.bits.rw := state != s_drain_rpq + io.meta_req.bits.idx := req_idx io.meta_req.bits.data.state := Mux(state === s_meta_clear, conf.co.newStateOnFlush(), line_state) - io.meta_req.bits.data.tag := req.tag - io.meta_req.bits.way_en := req.way_oh + io.meta_req.bits.data.tag := io.tag + io.meta_req.bits.way_en := req.way_en io.wb_req.valid := (state === s_wb_req) && !(io.probe_writeback.valid && idx_match) io.wb_req.bits.tag := req.old_tag - io.wb_req.bits.idx := req.idx - io.wb_req.bits.way_oh := req.way_oh + io.wb_req.bits.idx := req_idx + io.wb_req.bits.way_en := req.way_en io.wb_req.bits.tile_xact_id := Bits(id) io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match - io.mem_req.valid := (state === s_refill_req) && !flush + io.mem_req.valid := state === s_refill_req io.mem_req.bits.x_type := xacx_type - io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix + io.mem_req.bits.addr := Cat(io.tag, req_idx).toUFix io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq - io.replay.valid := (state === s_drain_rpq) && rpq.io.deq.valid - io.replay.bits <> rpq.io.deq.bits - io.replay.bits.idx := req.idx - io.replay.bits.way_oh := req.way_oh + io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid + io.replay.bits := rpq.io.deq.bits + io.replay.bits.phys := Bool(true) + io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(conf.offbits-1,0)).toUFix + + // don't issue back-to-back replays with store->load dependence + val r1_replay_valid = Reg(rpq.io.deq.fire()) + val r2_replay_valid = Reg(r1_replay_valid) + val (r1_replay, r2_replay) = (Reg{new Replay}, Reg{new Replay}) + when (rpq.io.deq.fire()) { r1_replay := rpq.io.deq.bits } + when (r1_replay_valid) { r2_replay := r1_replay } + def offsetMatch(dst: HellaCacheReq, src: HellaCacheReq) = { + def mask(x: HellaCacheReq) = StoreGen(x.typ, x.addr, Bits(0)).mask + // TODO: this is overly restrictive + dst.addr(conf.offbits-1,conf.wordoffbits) === src.addr(conf.offbits-1,conf.wordoffbits) + // && (mask(dst) & mask(src)).orR + } + when (r1_replay_valid && offsetMatch(io.replay.bits, r1_replay) || + r2_replay_valid && offsetMatch(io.replay.bits, r2_replay)) { + rpq.io.deq.ready := Bool(false) + io.replay.bits.cmd := M_FENCE // NOP + } } class MSHRFile(implicit conf: DCacheConfig) extends Component { @@ -298,37 +292,30 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) - val mem_resp_idx = Bits(OUTPUT, conf.idxbits) - val mem_resp_offset = Bits(OUTPUT, log2Up(REFILL_CYCLES)) - val mem_resp_way_oh = Bits(OUTPUT, conf.ways) - - val fence_rdy = Bool(OUTPUT) - val mem_req = (new FIFOIO) { new TransactionInit } + val mem_resp = new DataWriteReq().asOutput val meta_req = (new FIFOIO) { new MetaArrayReq() } - val data_req = (new FIFOIO) { new DataReq() } + val replay = (new FIFOIO) { new Replay } val mem_abort = (new PipeIO) { new TransactionAbort }.flip val mem_rep = (new PipeIO) { new TransactionReply }.flip val mem_finish = (new FIFOIO) { new TransactionFinish } val wb_req = (new FIFOIO) { new WritebackReq } val probe = (new FIFOIO) { Bool() }.flip - val cpu_resp_val = Bool(OUTPUT) - val cpu_resp_tag = Bits(OUTPUT, conf.reqtagbits) + val fence_rdy = Bool(OUTPUT) } val sdq_val = Reg(resetVal = Bits(0, conf.nsdq)) val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) val sdq_rdy = !sdq_val.andR - val (req_read, req_write) = cpuCmdToRW(io.req.bits.cmd) - val sdq_enq = io.req.valid && io.req.ready && req_write + val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) val sdq = Mem(conf.nsdq) { io.req.bits.data.clone } when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } val idxMatch = Vec(conf.nmshr) { Bool() } val tagList = Vec(conf.nmshr) { Bits() } val wbTagList = Vec(conf.nmshr) { Bits() } - val memRespMux = Vec(conf.nmshr) { new DataArrayReq } + val memRespMux = Vec(conf.nmshr) { new DataWriteReq } val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() } val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } @@ -336,8 +323,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } - val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.tag - val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.tag + val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> conf.untagbits + val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.addr >> conf.untagbits var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -371,9 +358,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mshr.io.mem_abort <> io.mem_abort mshr.io.mem_rep <> io.mem_rep - memRespMux(i).idx := mshr.io.idx - memRespMux(i).offset := mshr.io.refill_count - memRespMux(i).way_en := mshr.io.way_oh + memRespMux(i) := mshr.io.mem_resp pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy @@ -392,27 +377,18 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match - val memResp = memRespMux(io.mem_rep.bits.tile_xact_id) - io.mem_resp_idx := memResp.idx - io.mem_resp_offset := memResp.offset - io.mem_resp_way_oh := memResp.way_en + io.mem_resp := memRespMux(io.mem_rep.bits.tile_xact_id) io.fence_rdy := !fence io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) - val replay = Queue(replay_arb.io.out, 1, pipe = true) - replay.ready := io.data_req.ready - io.data_req <> replay + val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) + io.replay.bits.data := sdq(RegEn(replay_arb.io.out.bits.sdq_id, free_sdq)) + io.replay <> replay_arb.io.out - val (replay_read, replay_write) = cpuCmdToRW(replay.bits.cmd) - val sdq_free = replay.valid && replay.ready && replay_write - sdq_val := sdq_val & ~((UFix(1) << replay.bits.sdq_id) & Fill(sdq_free, conf.nsdq)) | - PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq && io.req.bits.tag_miss) - val sdq_rdata = Reg() { io.req.bits.data.clone } - sdq_rdata := sdq(Mux(replay.valid && !replay.ready, replay.bits.sdq_id, replay_arb.io.out.bits.sdq_id)) - io.data_req.bits.data := sdq_rdata - - io.cpu_resp_val := Reg(replay.valid && replay.ready && replay_read, resetVal = Bool(false)) - io.cpu_resp_tag := Reg(replay.bits.cpu_tag) + when (io.replay.valid || sdq_enq) { + sdq_val := sdq_val & ~(UFixToOH(io.replay.bits.sdq_id) & Fill(conf.nsdq, free_sdq)) | + PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq) + } } @@ -420,7 +396,8 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip - val data_req = (new FIFOIO) { new DataArrayReq() } + val meta_req = (new FIFOIO) { new MetaArrayReq } + val data_req = (new FIFOIO) { new DataReadReq() } val data_resp = Bits(INPUT, MEM_DATA_BITS) val mem_req = (new FIFOIO) { new TransactionInit } val mem_req_data = (new FIFOIO) { new TransactionInitData } @@ -430,6 +407,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val valid = Reg(resetVal = Bool(false)) val is_probe = Reg() { Bool() } val data_req_fired = Reg(resetVal = Bool(false)) + val r_data_req_fired = Reg(data_req_fired, resetVal = Bool(false)) val cmd_sent = Reg() { Bool() } val cnt = Reg() { UFix(width = log2Up(REFILL_CYCLES+1)) } val req = Reg() { new WritebackReq() } @@ -439,7 +417,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { when (valid && io.mem_req.ready) { cmd_sent := Bool(true) } - when (io.data_req.valid && io.data_req.ready) { + when (io.data_req.fire()) { data_req_fired := Bool(true) cnt := cnt + UFix(1) } @@ -465,23 +443,26 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { req := io.req.bits } + val fire = valid && cnt < UFix(REFILL_CYCLES) io.req.ready := !valid && !io.probe.valid io.probe.ready := !valid - io.data_req.valid := valid && (cnt < UFix(REFILL_CYCLES)) - io.data_req.bits.way_en := req.way_oh - io.data_req.bits.idx := req.idx - io.data_req.bits.offset := cnt - io.data_req.bits.rw := Bool(false) - io.data_req.bits.wmask := Bits(0) - io.data_req.bits.data := Bits(0) + io.data_req.valid := fire && io.meta_req.ready + io.data_req.bits.way_en := req.way_en + io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(REFILL_CYCLES)-1,0)) << conf.ramoffbits + + io.meta_req.valid := fire && io.data_req.ready + io.meta_req.bits.way_en := Fix(-1) + io.meta_req.bits.rw := Bool(false) + io.meta_req.bits.idx := req.idx + io.meta_req.bits.data.tag := req.tag io.mem_req.valid := valid && !cmd_sent io.mem_req.bits.x_type := conf.co.getTransactionInitTypeOnWriteback() io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id - io.mem_req_data.valid := data_req_fired && !is_probe + io.mem_req_data.valid := r_data_req_fired && !is_probe io.mem_req_data.bits.data := io.data_resp - io.probe_rep_data.valid := data_req_fired && is_probe + io.probe_rep_data.valid := r_data_req_fired && is_probe io.probe_rep_data.bits.data := io.data_resp } @@ -492,129 +473,93 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { val meta_req = (new FIFOIO) { new MetaArrayReq } val mshr_req = (new FIFOIO) { Bool() } val wb_req = (new FIFOIO) { new WritebackReq } - val tag_match_way_oh = Bits(INPUT, conf.ways) + val way_en = Bits(INPUT, conf.ways) val line_state = UFix(INPUT, 2) val addr = Bits(OUTPUT, conf.lineaddrbits) } - val s_reset :: s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: Nil = Enum(8) { UFix() } - val state = Reg(resetVal = s_reset) + val s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(8) { UFix() } + val state = Reg(resetVal = s_invalid) val line_state = Reg() { UFix() } - val way_oh = Reg() { Bits() } + val way_en = Reg() { Bits() } val req = Reg() { new ProbeRequest() } - val hit = way_oh.orR + val hit = way_en.orR - when ((state === s_writeback_resp) && io.wb_req.ready) { + when (state === s_meta_write && io.meta_req.ready) { state := s_invalid } - when ((state === s_writeback_req) && io.wb_req.ready) { + when (state === s_writeback_resp && io.wb_req.ready) { + state := s_meta_write + } + when (state === s_writeback_req && io.wb_req.ready) { state := s_writeback_resp } - when ((state === s_probe_rep) && io.meta_req.ready && io.rep.ready) { - state := Mux(hit && conf.co.needsWriteback(line_state), s_writeback_req, s_invalid) + when (state === s_probe_rep && io.rep.ready) { + state := s_invalid + when (hit) { + state := Mux(conf.co.needsWriteback(line_state), s_writeback_req, s_meta_write) + } } - when ((state === s_mshr_req) && io.mshr_req.ready) { - state := s_meta_req + when (state === s_mshr_req) { + state := s_probe_rep + line_state := io.line_state + way_en := io.way_en + when (!io.mshr_req.ready) { state := s_meta_req } } when (state === s_meta_resp) { - way_oh := io.tag_match_way_oh - line_state := io.line_state - state := Mux(!io.mshr_req.ready, s_mshr_req, s_probe_rep) + state := s_mshr_req } - when ((state === s_meta_req) && io.meta_req.ready) { + when (state === s_meta_req && io.meta_req.ready) { state := s_meta_resp } - when ((state === s_invalid) && io.req.valid) { + when (state === s_invalid && io.req.valid) { state := s_meta_req req := io.req.bits } - when (state === s_reset) { state := s_invalid } - io.req.ready := state === s_invalid - io.rep.valid := state === s_probe_rep && io.meta_req.ready + io.req.ready := state === s_invalid && !reset + io.rep.valid := state === s_probe_rep io.rep.bits := conf.co.newProbeReply(req, Mux(hit, line_state, conf.co.newStateOnFlush)) - io.meta_req.valid := state === s_meta_req || state === s_meta_resp || state === s_mshr_req || state === s_probe_rep && hit - io.meta_req.bits.way_en := Mux(state === s_probe_rep, way_oh, Fix(-1)) - io.meta_req.bits.rw := state === s_probe_rep + io.meta_req.valid := state === s_meta_req || state === s_meta_write + io.meta_req.bits.way_en := Mux(state === s_meta_write, way_en, Fix(-1)) + io.meta_req.bits.rw := state === s_meta_write io.meta_req.bits.idx := req.addr io.meta_req.bits.data.state := conf.co.newStateOnProbeRequest(req, line_state) io.meta_req.bits.data.tag := req.addr >> UFix(conf.idxbits) - io.mshr_req.valid := state === s_meta_resp || state === s_mshr_req + io.mshr_req.valid := state === s_mshr_req io.addr := req.addr io.wb_req.valid := state === s_writeback_req - io.wb_req.bits.way_oh := way_oh + io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr io.wb_req.bits.tag := req.addr >> UFix(conf.idxbits) } -class FlushUnit(lines: Int)(implicit conf: DCacheConfig) extends Component { - val io = new Bundle { - val req = (new FIFOIO) { Bool() }.flip - val meta_req = (new FIFOIO) { new MetaArrayReq() } - val mshr_req = (new FIFOIO) { Bool() } - } - - val s_reset :: s_ready :: s_meta_read :: s_meta_wait :: Nil = Enum(4) { UFix() } - val state = Reg(resetVal = s_reset) - val idx_cnt = Reg(resetVal = UFix(0, log2Up(lines))) - val next_idx_cnt = idx_cnt + UFix(1) - val way_cnt = if (conf.dm) UFix(0) else Reg(resetVal = UFix(0, conf.waybits)) - val next_way_cnt = way_cnt + UFix(1) - - switch (state) { - is(s_reset) { - when (io.meta_req.ready) { - state := Mux(way_cnt === UFix(conf.ways-1) && idx_cnt.andR, s_ready, s_reset); - when (way_cnt === UFix(conf.ways-1)) { idx_cnt := next_idx_cnt }; - if (!conf.dm) way_cnt := next_way_cnt; - } - } - is(s_ready) { when (io.req.valid) { state := s_meta_read } } - is(s_meta_read) { when (io.meta_req.ready) { state := s_meta_wait } } - is(s_meta_wait) { - state := s_meta_read - when (io.mshr_req.ready) { - state := s_meta_read - when (way_cnt === UFix(conf.ways-1)) { - when (idx_cnt.andR) { - state := s_ready - } - idx_cnt := next_idx_cnt - } - if (!conf.dm) way_cnt := next_way_cnt; - } - } - } - - io.req.ready := state === s_ready - io.mshr_req.valid := state === s_meta_wait - io.meta_req.valid := (state === s_meta_read) || (state === s_reset) - io.meta_req.bits.way_en := UFixToOH(way_cnt) - io.meta_req.bits.idx := idx_cnt - io.meta_req.bits.rw := (state === s_reset) - io.meta_req.bits.data.state := conf.co.newStateOnFlush() - io.meta_req.bits.data.tag := UFix(0) -} - -class MetaDataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Component { +class MetaDataArray(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MetaArrayReq() }.flip val resp = Vec(conf.ways){ (new MetaData).asOutput } - val state_req = (new FIFOIO) { new MetaArrayReq() }.flip + val state_req = (new PipeIO) { new MetaArrayReq() }.flip val way_en = Bits(OUTPUT, conf.ways) } + val rst_cnt = Reg(resetVal = UFix(0, log2Up(conf.sets+1))) + val rst = rst_cnt < conf.sets + when (rst) { rst_cnt := rst_cnt+1 } + val permBits = io.req.bits.data.state.width - val perms = Mem(lines) { UFix(width = permBits*conf.ways) } - val tags = Mem(lines, seqRead = true) { Bits(width = conf.tagbits*conf.ways) } + val perms = Mem(conf.sets) { UFix(width = permBits*conf.ways) } + val tags = Mem(conf.sets, seqRead = true) { Bits(width = conf.tagbits*conf.ways) } val tag = Reg() { Bits() } val raddr = Reg() { Bits() } val way_en_ = Reg { Bits(width = conf.ways) } - when (io.state_req.valid && io.state_req.bits.rw) { - perms.write(io.state_req.bits.idx, Fill(conf.ways, io.state_req.bits.data.state), FillInterleaved(permBits, io.state_req.bits.way_en)) + when (rst || io.state_req.valid && io.state_req.bits.rw) { + val addr = Mux(rst, rst_cnt, io.state_req.bits.idx) + val data = Mux(rst, conf.co.newStateOnFlush, io.state_req.bits.data.state) + val mask = Mux(rst, Fix(-1), io.state_req.bits.way_en) + perms.write(addr, Fill(conf.ways, data), FillInterleaved(permBits, mask)) } when (io.req.valid) { when (io.req.bits.rw) { @@ -635,82 +580,71 @@ class MetaDataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Compon } io.way_en := way_en_ - io.req.ready := Bool(true) - io.state_req.ready := Bool(true) + io.req.ready := !rst } -class DataArray(lines: Int)(implicit conf: DCacheConfig) extends Component { +class DataArray(implicit conf: DCacheConfig) extends Component { val io = new Bundle { - val req = (new FIFOIO) { new DataArrayReq() }.flip - val resp = Bits(width = MEM_DATA_BITS, dir = OUTPUT) - } - - val wmask = FillInterleaved(8, io.req.bits.wmask) - val addr = Cat(io.req.bits.idx, io.req.bits.offset) - val rdata = Reg() { Bits() } - - val array = Mem(lines*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } - when (io.req.valid) { - when (io.req.bits.rw) { array.write(addr, io.req.bits.data, wmask) } - .otherwise { rdata := array(addr) } - } - - io.resp := rdata - io.req.ready := Bool(true) -} - -class DataArrayArray(lines: Int)(implicit conf: DCacheConfig) extends Component { - val io = new Bundle { - val req = (new FIFOIO) { new DataArrayReq() }.flip + val read = new FIFOIO()(new DataReadReq).flip + val write = new FIFOIO()(new DataWriteReq).flip val resp = Vec(conf.ways){ Bits(OUTPUT, MEM_DATA_BITS) } - val way_en = Bits(OUTPUT, conf.ways) } - val way_en_ = Reg { Bits(width = conf.ways) } - when (io.req.valid && io.req.ready) { - way_en_ := io.req.bits.way_en - } + val wmask = FillInterleaved(conf.databits, io.write.bits.wmask) + val waddr = io.write.bits.addr >> conf.ramoffbits + val raddr = io.read.bits.addr >> conf.ramoffbits for (w <- 0 until conf.ways) { - val way = new DataArray(lines) - way.io.req.bits <> io.req.bits - way.io.req.valid := io.req.valid && io.req.bits.way_en(w).toBool - way.io.resp <> io.resp(w) + val rdata = Reg() { Bits() } + val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } + when (io.write.bits.way_en(w) && io.write.valid) { + array.write(waddr, io.write.bits.data, wmask) + } + when (io.read.bits.way_en(w) && io.read.valid) { + rdata := array(raddr) + } + io.resp(w) := rdata } - io.way_en := way_en_ - io.req.ready := Bool(true) + io.read.ready := Bool(true) + io.write.ready := Bool(true) } -class AMOALU extends Component { +class AMOALU(implicit conf: DCacheConfig) extends Component { val io = new Bundle { + val lhs_raw = Bits(INPUT, conf.databits) + val addr = Bits(INPUT, conf.offbits) val cmd = Bits(INPUT, 4) val typ = Bits(INPUT, 3) - val lhs = UFix(INPUT, 64) - val rhs = UFix(INPUT, 64) - val out = UFix(OUTPUT, 64) + val lhs = Bits(INPUT, conf.databits) + val rhs = Bits(INPUT, conf.databits) + val out = Bits(OUTPUT, conf.databits) } + + require(conf.databytes == 8) val sgned = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) - val sub = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) + val minmax = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) val min = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) val word = (io.typ === MT_W) || (io.typ === MT_WU) - val adder_out = (Cat(io.lhs, UFix(0,1)).toUFix + Cat(io.rhs ^ Fill(io.rhs.width, sub), sub).toUFix) >> UFix(1) + val adder_out = io.lhs + io.rhs val cmp_lhs = Mux(word, io.lhs(31), io.lhs(63)) val cmp_rhs = Mux(word, io.rhs(31), io.rhs(63)) - val cmp_diff = Mux(word, adder_out(31), adder_out(63)) + val cmp_diff = Mux(word, io.lhs(31,0) < io.rhs(31,0), io.lhs < io.rhs) val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(sgned, cmp_lhs, cmp_rhs)) val cmp_out = Mux(min === less, io.lhs, io.rhs) val out = Mux(io.cmd === M_XA_ADD, adder_out, - Mux(io.cmd === M_XA_SWAP, io.rhs, Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, - /* MIN[U]/MAX[U] */ cmp_out)))); + Mux(minmax, cmp_out, + io.rhs)))) - io.out := Mux(word, Cat(out(31,0), out(31,0)).toUFix, out) + val wdata = Mux(word, Cat(out(31,0), out(31,0)), out) + val wmask = FillInterleaved(8, StoreGen(io.typ, io.addr, Bits(0)).mask) + io.out := wmask & wdata | ~wmask & io.lhs_raw } class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { @@ -726,13 +660,12 @@ class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { } class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { - val miss = Bool() - val nack = Bool() + val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() - val typ = Bits(width = 3) - val data = Bits(width = conf.databits) + val typ = Bits(width = 3) + val data = Bits(width = conf.databits) val data_subword = Bits(width = conf.databits) - val tag = Bits(width = conf.reqtagbits) + val tag = Bits(width = conf.reqtagbits) override def clone = new HellaCacheResp().asInstanceOf[this.type] } @@ -761,297 +694,241 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val mem = new ioTileLink } - val lines = 1 << conf.idxbits - val indexbits = conf.idxbits - val tagmsb = conf.paddrbits-1 - val taglsb = indexbits+conf.offbits - val tagbits = tagmsb-taglsb+1 - val indexmsb = taglsb-1 + val indexmsb = conf.untagbits-1 val indexlsb = conf.offbits val offsetmsb = indexlsb-1 val offsetlsb = log2Up(conf.databytes) - val ramindexlsb = log2Up(MEM_DATA_BITS/8) - - val early_nack = Reg { Bool() } - val r_cpu_req_val_ = Reg(io.cpu.req.valid && io.cpu.req.ready, resetVal = Bool(false)) - val r_cpu_req_val = r_cpu_req_val_ && !io.cpu.req.bits.kill && !early_nack - val r_cpu_req_phys = Reg() { Bool() } - val r_cpu_req_vpn = Reg() { UFix() } - val r_cpu_req_idx = Reg() { Bits() } - val r_cpu_req_cmd = Reg() { Bits() } - val r_cpu_req_type = Reg() { Bits() } - val r_cpu_req_tag = Reg() { Bits() } - val r_amo_replay_data = Reg() { Bits() } - val r_way_oh = Reg() { Bits() } - - val p_store_valid = Reg(resetVal = Bool(false)) - val p_store_data = Reg() { Bits() } - val p_store_idx = Reg() { Bits() } - val p_store_cmd = Reg() { Bits() } - val p_store_type = Reg() { Bits() } - val p_store_way_oh = Reg() { Bits() } - val r_replay_amo = Reg(resetVal = Bool(false)) - - val req_store = (io.cpu.req.bits.cmd === M_XWR) - val req_load = (io.cpu.req.bits.cmd === M_XRD) - val req_amo = io.cpu.req.bits.cmd(3).toBool - val req_read = req_load || req_amo - val req_write = req_store || req_amo - val r_req_load = (r_cpu_req_cmd === M_XRD) - val r_req_store = (r_cpu_req_cmd === M_XWR) - val r_req_flush = (r_cpu_req_cmd === M_FLA) - val r_req_fence = (r_cpu_req_cmd === M_FENCE) - val r_req_prefetch = (r_cpu_req_cmd === M_PFR) || (r_cpu_req_cmd === M_PFW) - val r_req_amo = r_cpu_req_cmd(3).toBool - val r_req_read = r_req_load || r_req_amo - val r_req_write = r_req_store || r_req_amo - val r_req_readwrite = r_req_read || r_req_write || r_req_prefetch - val nack_hit = Bool() - - val dtlb = new TLB(8) - dtlb.io.ptw <> io.cpu.ptw - dtlb.io.req.valid := r_cpu_req_val && r_req_readwrite && !r_cpu_req_phys - dtlb.io.req.bits.passthrough := r_cpu_req_phys - dtlb.io.req.bits.asid := UFix(0) - dtlb.io.req.bits.vpn := r_cpu_req_vpn - dtlb.io.req.bits.instruction := Bool(false) val wb = new WritebackUnit val prober = new ProbeUnit val mshr = new MSHRFile - val flusher = new FlushUnit(lines) - val replay_amo_val = mshr.io.data_req.valid && mshr.io.data_req.bits.cmd(3).toBool - // reset and flush unit - val flushed = Reg(resetVal = Bool(true)) - flushed := flushed && (!r_cpu_req_val || r_req_flush) || r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && flusher.io.req.ready - flusher.io.req.valid := r_cpu_req_val && r_req_flush && mshr.io.fence_rdy && !flushed - flusher.io.mshr_req.ready := mshr.io.req.ready + io.cpu.req.ready := Bool(true) + val s1_valid = Reg(io.cpu.req.fire(), resetVal = Bool(false)) + val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill + val s1_replay = Reg(resetVal = Bool(false)) + val s1_req = Reg{io.cpu.req.bits.clone} + val s2_req = Reg{io.cpu.req.bits.clone} + + val s2_valid = Reg(s1_valid_masked, resetVal = Bool(false)) + val s2_replay = Reg(s1_replay, resetVal = Bool(false)) + val s2_valid_masked = Bool() + val s2_nack_hit = Bool() + + val s3_valid = Reg(resetVal = Bool(false)) + val s3_req = Reg{io.cpu.req.bits.clone} + val s3_way = Reg{Bits()} + + val s1_read = isRead(s1_req.cmd) + val s1_write = isWrite(s1_req.cmd) + val s1_readwrite = s1_read || s1_write + + val dtlb = new TLB(8) + dtlb.io.ptw <> io.cpu.ptw + dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys + dtlb.io.req.bits.passthrough := s1_req.phys + dtlb.io.req.bits.asid := UFix(0) + dtlb.io.req.bits.vpn := s1_req.addr >> conf.pgidxbits + dtlb.io.req.bits.instruction := Bool(false) + when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } when (io.cpu.req.valid) { - r_cpu_req_phys := io.cpu.req.bits.phys - r_cpu_req_vpn := io.cpu.req.bits.addr >> taglsb - r_cpu_req_idx := io.cpu.req.bits.addr(indexmsb,0) - r_cpu_req_cmd := io.cpu.req.bits.cmd - r_cpu_req_type := io.cpu.req.bits.typ - r_cpu_req_tag := io.cpu.req.bits.tag + s1_req := io.cpu.req.bits + } + when (wb.io.meta_req.valid) { + s1_req.phys := Bool(true) + s1_req.addr := Cat(wb.io.meta_req.bits.data.tag, wb.io.meta_req.bits.idx, UFix(0, conf.offbits)).toUFix } when (prober.io.meta_req.valid) { - r_cpu_req_idx := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, mshr.io.data_req.bits.offset)(conf.pgidxbits-1,0) + s1_req.addr := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, UFix(0, conf.offbits)).toUFix + s1_req.phys := Bool(true) } - when (mshr.io.data_req.valid) { - r_cpu_req_idx := Cat(mshr.io.data_req.bits.idx, mshr.io.data_req.bits.offset) - r_cpu_req_cmd := mshr.io.data_req.bits.cmd - r_cpu_req_type := mshr.io.data_req.bits.typ - r_amo_replay_data := mshr.io.data_req.bits.data - r_way_oh := mshr.io.data_req.bits.way_oh + when (mshr.io.replay.valid) { + s1_req := mshr.io.replay.bits } - when (flusher.io.meta_req.valid) { - r_cpu_req_idx := Cat(flusher.io.meta_req.bits.idx, mshr.io.data_req.bits.offset) - r_cpu_req_cmd := M_FLA - r_way_oh := flusher.io.meta_req.bits.way_en + val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(conf.pgidxbits-1,0)) + + when (s1_valid || s1_replay) { + s2_req.addr := s1_addr + s2_req.typ := s1_req.typ + s2_req.cmd := s1_req.cmd + s2_req.tag := s1_req.tag + when (s1_write) { + s2_req.data := Mux(s1_replay, mshr.io.replay.bits.data, io.cpu.req.bits.data) + } } - val cpu_req_data = Mux(r_replay_amo, r_amo_replay_data, io.cpu.req.bits.data) val misaligned = - (((r_cpu_req_type === MT_H) || (r_cpu_req_type === MT_HU)) && (r_cpu_req_idx(0) != Bits(0))) || - (((r_cpu_req_type === MT_W) || (r_cpu_req_type === MT_WU)) && (r_cpu_req_idx(1,0) != Bits(0))) || - ((r_cpu_req_type === MT_D) && (r_cpu_req_idx(2,0) != Bits(0))); + (((s1_req.typ === MT_H) || (s1_req.typ === MT_HU)) && (s1_req.addr(0) != Bits(0))) || + (((s1_req.typ === MT_W) || (s1_req.typ === MT_WU)) && (s1_req.addr(1,0) != Bits(0))) || + ((s1_req.typ === MT_D) && (s1_req.addr(2,0) != Bits(0))); - io.cpu.xcpt.ma.ld := r_cpu_req_val_ && r_req_read && misaligned - io.cpu.xcpt.ma.st := r_cpu_req_val_ && r_req_write && misaligned - io.cpu.xcpt.pf.ld := r_cpu_req_val_ && r_req_read && dtlb.io.resp.xcpt_ld - io.cpu.xcpt.pf.st := r_cpu_req_val_ && r_req_write && dtlb.io.resp.xcpt_st + io.cpu.xcpt.ma.ld := s1_read && misaligned + io.cpu.xcpt.ma.st := s1_write && misaligned + io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - val meta = new MetaDataArrayArray(lines) + val meta = new MetaDataArray val meta_arb = (new Arbiter(4)) { new MetaArrayReq() } - flusher.io.meta_req <> meta_arb.io.in(0) meta_arb.io.out <> meta.io.req // data - val data = new DataArrayArray(lines) - val data_arb = (new Arbiter(5)) { new DataArrayReq() } - data_arb.io.out <> data.io.req + val data = new DataArray + val readArb = new Arbiter(3)(new DataReadReq) + val writeArb = new Arbiter(2)(new DataWriteReq) + readArb.io.out.ready := !io.mem.xact_rep.valid || io.mem.xact_rep.ready // insert bubble if refill gets blocked + readArb.io.out <> data.io.read + writeArb.io.out <> data.io.write // cpu tag check meta_arb.io.in(3).valid := io.cpu.req.valid meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := Fix(-1) - val early_tag_nack = !meta_arb.io.in(3).ready - val cpu_req_ppn = Mux(prober.io.mshr_req.valid, prober.io.addr >> UFix(conf.pgidxbits-conf.offbits), dtlb.io.resp.ppn) - val cpu_req_tag = Cat(cpu_req_ppn, r_cpu_req_idx)(tagmsb,taglsb) - val tag_match_arr = (0 until conf.ways).map( w => conf.co.isValid(meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) - val tag_match = Cat(Bits(0),tag_match_arr:_*).orR - val tag_match_way_oh = Cat(Bits(0),tag_match_arr.reverse:_*)(conf.ways-1, 0) //TODO: use Vec - val tag_hit_arr = (0 until conf.ways).map( w => conf.co.isHit(r_cpu_req_cmd, meta.io.resp(w).state) && (meta.io.resp(w).tag === cpu_req_tag)) - val tag_hit = Cat(Bits(0),tag_hit_arr:_*).orR - val meta_resp_way_oh = Mux(meta.io.way_en.andR, tag_match_way_oh, meta.io.way_en) - val data_resp_way_oh = Mux(data.io.way_en.andR, tag_match_way_oh, data.io.way_en) - val meta_resp_mux = Mux1H(meta_resp_way_oh, meta.io.resp) - val data_resp_mux = Mux1H(data_resp_way_oh, data.io.resp) + when (!meta_arb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } + def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(i => f(i))){gen} + val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits + val s1_hit_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isHit(s1_req.cmd, meta.io.resp(w).state)){Bits()}.toBits + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isValid(meta.io.resp(w).state)){Bits()}.toBits + val s1_hit = s1_hit_way.orR + val s1_clk_en = Reg(meta_arb.io.out.valid) + val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) + val s2_tag_match = s2_tag_match_way.orR + val s2_hit = Reg(s1_hit) + val s2_data = wayMap((w: Int) => RegEn(data.io.resp(w), s1_clk_en && s1_tag_eq_way(w))){Bits()} + val data_resp_mux = Mux1H(s2_tag_match_way, s2_data) // writeback unit wb.io.req <> mshr.io.wb_req - wb.io.data_req <> data_arb.io.in(3) + wb.io.meta_req <> meta_arb.io.in(2) + wb.io.data_req <> readArb.io.in(1) wb.io.data_resp <> data_resp_mux wb.io.probe_rep_data <> io.mem.probe_rep_data // replacement policy val replacer = new RandomReplacement - val replaced_way_oh = Mux(flusher.io.mshr_req.valid, r_way_oh, UFixToOH(replacer.way)) - val meta_wb_mux = Mux1H(replaced_way_oh, meta.io.resp) + val s1_replaced_way_en = UFixToOH(replacer.way) + val s2_replaced_way_en = UFixToOH(RegEn(replacer.way, s1_clk_en)) + val s2_repl_state = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_replaced_way_en(w))){Bits()}) + val s2_repl_tag = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).tag, s1_clk_en && s1_replaced_way_en(w))){Bits()}) + val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_tag_eq_way(w))){Bits()}) // refill response - data_arb.io.in(0).bits.offset := mshr.io.mem_resp_offset - data_arb.io.in(0).bits.idx := mshr.io.mem_resp_idx - data_arb.io.in(0).bits.rw := Bool(true) - data_arb.io.in(0).bits.wmask := ~UFix(0, MEM_DATA_BITS/8) - data_arb.io.in(0).bits.data := io.mem.xact_rep.bits.data - data_arb.io.in(0).bits.way_en := mshr.io.mem_resp_way_oh - data_arb.io.in(0).valid := io.mem.xact_rep.valid && conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) + val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) + writeArb.io.in(0).valid := io.mem.xact_rep.valid && refill + io.mem.xact_rep.ready := writeArb.io.in(0).ready || !refill + writeArb.io.in(0).bits := mshr.io.mem_resp + writeArb.io.in(0).bits.wmask := Fix(-1) + writeArb.io.in(0).bits.data := io.mem.xact_rep.bits.data // load hits - data_arb.io.in(4).bits.offset := io.cpu.req.bits.addr(offsetmsb,ramindexlsb) - data_arb.io.in(4).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) - data_arb.io.in(4).bits.rw := Bool(false) - data_arb.io.in(4).valid := io.cpu.req.valid && req_read - data_arb.io.in(4).bits.way_en := Fix(-1) // intiate load on all ways, mux after tag check - val early_load_nack = !data_arb.io.in(4).ready + readArb.io.in(2).bits.addr := io.cpu.req.bits.addr + readArb.io.in(2).valid := io.cpu.req.valid + readArb.io.in(2).bits.way_en := Fix(-1) + when (!readArb.io.in(2).ready) { io.cpu.req.ready := Bool(false) } - // store hits and AMO hits and misses use a pending store register. - // we nack new stores if a pending store can't retire for some reason. - // we drain a pending store if the CPU performs a store or a - // conflictig load, or if the cache is idle, or after a miss. - val p_store_idx_match = p_store_valid && (r_cpu_req_idx(indexmsb,indexlsb) === p_store_idx(indexmsb,indexlsb)) - val p_store_offset_match = (r_cpu_req_idx(indexlsb-1,offsetlsb) === p_store_idx(indexlsb-1,offsetlsb)) - val p_store_match = r_cpu_req_val_ && r_req_read && p_store_idx_match && p_store_offset_match - val drain_store_val = (p_store_valid && (!io.cpu.req.valid || req_write || wb.io.data_req.valid || mshr.io.data_req.valid)) || p_store_match - data_arb.io.in(2).bits.offset := p_store_idx(offsetmsb,ramindexlsb) - data_arb.io.in(2).bits.idx := p_store_idx(indexmsb,indexlsb) - data_arb.io.in(2).bits.rw := Bool(true) - data_arb.io.in(2).valid := drain_store_val - data_arb.io.in(2).bits.way_en := p_store_way_oh - val drain_store = drain_store_val && data_arb.io.in(2).ready - val p_amo = Reg(resetVal = Bool(false)) - val p_store_rdy = !(p_store_valid && !drain_store) && !(mshr.io.data_req.valid || r_replay_amo || p_amo) - p_amo := r_cpu_req_val && tag_hit && r_req_amo && mshr.io.req.ready && !nack_hit || r_replay_amo - p_store_valid := p_store_valid && !drain_store || (r_cpu_req_val && tag_hit && r_req_store && mshr.io.req.ready && !nack_hit) || p_amo + // store/amo hits + def idxMatch(dst: HellaCacheReq, src: HellaCacheReq) = dst.addr(indexmsb,indexlsb) === src.addr(indexmsb,indexlsb) + def offsetMatch(dst: HellaCacheReq, src: HellaCacheReq) = { + def mask(x: HellaCacheReq) = StoreGen(x.typ, x.addr, Bits(0)).mask + // TODO: this is overly restrictive. need write-combining buffer. + isWrite(src.cmd) && + dst.addr(indexlsb-1,offsetlsb) === src.addr(indexlsb-1,offsetlsb) && + ((mask(dst) & mask(src)).orR || isWrite(dst.cmd)) + } + def storeMatch(dst: HellaCacheReq, src: HellaCacheReq) = idxMatch(dst, src) && offsetMatch(dst, src) + val p_store_match = s2_valid && storeMatch(s1_req, s2_req) || + s3_valid && storeMatch(s1_req, s3_req) + writeArb.io.in(1).bits.addr := s3_req.addr + writeArb.io.in(1).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix + writeArb.io.in(1).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) + writeArb.io.in(1).valid := s3_valid + writeArb.io.in(1).bits.way_en := s3_way // tag update after a store to an exclusive clean line. - val new_hit_state = conf.co.newStateOnHit(r_cpu_req_cmd, meta_resp_mux.state) - val set_hit_state = r_cpu_req_val && tag_hit && meta_resp_mux.state != new_hit_state + val new_hit_state = conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) meta.io.state_req.bits.rw := Bool(true) - meta.io.state_req.bits.idx := Reg(r_cpu_req_idx(indexmsb,indexlsb)) - meta.io.state_req.bits.data.state := Reg(new_hit_state) - meta.io.state_req.bits.way_en := Reg(tag_match_way_oh) - meta.io.state_req.valid := Reg(set_hit_state, resetVal = Bool(false)) + meta.io.state_req.bits.idx := s2_req.addr(indexmsb,indexlsb) + meta.io.state_req.bits.data.state := new_hit_state + meta.io.state_req.bits.way_en := s2_tag_match_way + meta.io.state_req.valid := s2_valid_masked && s2_hit && s2_hit_state != new_hit_state // pending store data, also used for AMO RHS + s3_valid := (s2_valid_masked && s2_hit || s2_replay) && isWrite(s2_req.cmd) val amoalu = new AMOALU - when (r_cpu_req_val_ && r_req_write && p_store_rdy || r_replay_amo) { - p_store_idx := r_cpu_req_idx - p_store_type := r_cpu_req_type - p_store_cmd := r_cpu_req_cmd - p_store_way_oh := Mux(r_replay_amo, r_way_oh, tag_match_way_oh) - p_store_data := cpu_req_data - } - when (p_amo) { - p_store_data := amoalu.io.out + when ((s2_valid || s2_replay) && isWrite(s2_req.cmd)) { + s3_req := s2_req + s3_req.data := amoalu.io.out + s3_way := s2_tag_match_way } // miss handling - mshr.io.req.valid := r_cpu_req_val && r_req_readwrite && !nack_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.tag_miss := !tag_hit || flusher.io.mshr_req.valid - mshr.io.req.bits.old_dirty := conf.co.needsWriteback(meta_wb_mux.state) && (!tag_match || flusher.io.mshr_req.valid) // don't wb upgrades - mshr.io.req.bits.old_tag := meta_wb_mux.tag - mshr.io.req.bits.tag := cpu_req_tag - mshr.io.req.bits.idx := r_cpu_req_idx(indexmsb,indexlsb) - mshr.io.req.bits.cpu_tag := r_cpu_req_tag - mshr.io.req.bits.offset := r_cpu_req_idx(offsetmsb,0) - mshr.io.req.bits.cmd := r_cpu_req_cmd - mshr.io.req.bits.typ := r_cpu_req_type - mshr.io.req.bits.way_oh := Mux(tag_match && !flusher.io.mshr_req.valid, tag_match_way_oh, replaced_way_oh) - mshr.io.req.bits.data := cpu_req_data + mshr.io.req.valid := s2_valid_masked && !s2_hit && (isRead(s2_req.cmd) || isWrite(s2_req.cmd)) && !s2_nack_hit + mshr.io.req.bits := s2_req + mshr.io.req.bits.old_dirty := conf.co.needsWriteback(s2_repl_state) && !s2_tag_match // don't wb upgrades + mshr.io.req.bits.old_tag := s2_repl_tag + mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) + mshr.io.req.bits.data := s2_req.data mshr.io.mem_rep <> io.mem.xact_rep mshr.io.mem_abort.valid := io.mem.xact_abort.valid mshr.io.mem_abort.bits := io.mem.xact_abort.bits io.mem.xact_abort.ready := Bool(true) - mshr.io.meta_req <> meta_arb.io.in(1) when (mshr.io.req.fire()) { replacer.miss } // replays - val replay = mshr.io.data_req.bits - val stall_replay = r_replay_amo || p_amo || flusher.io.meta_req.valid || p_store_valid - val replay_val = mshr.io.data_req.valid - val replay_fire = replay_val && !stall_replay - val replay_rdy = data_arb.io.in(1).ready && !stall_replay - data_arb.io.in(1).bits.offset := replay.offset(offsetmsb,ramindexlsb) - data_arb.io.in(1).bits.idx := replay.idx - data_arb.io.in(1).bits.rw := replay.cmd === M_XWR - data_arb.io.in(1).valid := replay_fire - data_arb.io.in(1).bits.way_en := mshr.io.data_req.bits.way_oh - mshr.io.data_req.ready := replay_rdy - r_replay_amo := replay_amo_val && replay_rdy + readArb.io.in(0).valid := mshr.io.replay.valid + readArb.io.in(0).bits := mshr.io.replay.bits + readArb.io.in(0).bits.way_en := Fix(-1) + mshr.io.replay.ready := Bool(true) + s1_replay := mshr.io.replay.fire() + meta_arb.io.in(0) <> mshr.io.meta_req // probes prober.io.req <> io.mem.probe_req prober.io.rep <> io.mem.probe_rep prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe - prober.io.tag_match_way_oh := tag_match_way_oh - prober.io.line_state := meta_resp_mux.state - prober.io.meta_req.ready := meta_arb.io.in(2).ready && !replay_amo_val - meta_arb.io.in(2).valid := prober.io.meta_req.valid - meta_arb.io.in(2).bits := prober.io.meta_req.bits - - // store write mask generation. - // assumes store replays are higher-priority than pending stores. - val store_offset = Mux(!replay_fire, p_store_idx(offsetmsb,0), replay.offset) - val store_type = Mux(!replay_fire, p_store_type, replay.typ) - val store_wmask_wide = StoreGen(store_type, store_offset, Bits(0)).mask << Cat(store_offset(ramindexlsb-1,offsetlsb), Bits(0, log2Up(conf.databytes))).toUFix - val store_data = Mux(!replay_fire, p_store_data, replay.data) - val store_data_wide = Fill(MEM_DATA_BITS/conf.databits, store_data) - data_arb.io.in(1).bits.data := store_data_wide - data_arb.io.in(1).bits.wmask := store_wmask_wide - data_arb.io.in(2).bits.data := store_data_wide - data_arb.io.in(2).bits.wmask := store_wmask_wide + prober.io.way_en := s2_tag_match_way + prober.io.line_state := s2_hit_state + prober.io.meta_req <> meta_arb.io.in(1) // load data subword mux/sign extension. // subword loads are delayed by one cycle. - val loadgen_data = data_resp_mux >> Cat(r_cpu_req_idx(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,6)) - val loadgen = LoadGen(r_cpu_req_type, r_cpu_req_idx, loadgen_data) + val loadgen_data = data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) + val loadgen = LoadGen(s2_req.typ, s2_req.addr, loadgen_data) - amoalu.io.cmd := p_store_cmd - amoalu.io.typ := p_store_type - amoalu.io.lhs := Reg(loadgen.word).toUFix - amoalu.io.rhs := p_store_data.toUFix + amoalu.io := s2_req + amoalu.io.lhs_raw := loadgen_data + amoalu.io.lhs := loadgen.word + amoalu.io.rhs := s2_req.data - early_nack := early_tag_nack || early_load_nack || r_cpu_req_val && r_req_amo || replay_amo_val || r_replay_amo + val s1_nack = p_store_match || dtlb.io.req.valid && dtlb.io.resp.miss || + idxMatch(s1_req, s2_req) && meta.io.state_req.valid || + s1_req.addr(indexmsb,indexlsb) === prober.io.meta_req.bits.idx && !prober.io.req.ready + s2_nack_hit := Reg(s1_nack) || mshr.io.secondary_miss + val s2_nack_miss = !s2_hit && !mshr.io.req.ready + val s2_nack = s2_nack_hit || s2_nack_miss + s2_valid_masked := s2_valid && !s2_nack - // we usually nack rather than reporting that the cache is not ready. - // fences and flushes are the exceptions. - val pending_fence = Reg(resetVal = Bool(false)) - pending_fence := (r_cpu_req_val_ && r_req_fence || pending_fence) && !mshr.io.fence_rdy - nack_hit := p_store_match || replay_val || r_req_write && !p_store_rdy || - p_store_idx_match && meta.io.state_req.valid || - !r_cpu_req_phys && dtlb.io.resp.miss - val nack_miss = !mshr.io.req.ready - val nack_flush = !mshr.io.fence_rdy && (r_req_fence || r_req_flush) || - !flushed && r_req_flush - val nack = early_nack || r_req_readwrite && (nack_hit || nack_miss) || nack_flush + // after a nack, block until nack condition resolves (saves energy) + val block_fence = Reg(resetVal = Bool(false)) + block_fence := (s1_valid && s1_req.cmd === M_FENCE || block_fence) && !mshr.io.fence_rdy + val block_miss = Reg(resetVal = Bool(false)) + block_miss := (s2_valid || block_miss) && s2_nack_miss + when (block_fence || block_miss) { + io.cpu.req.ready := Bool(false) + } - io.cpu.req.ready := flusher.io.req.ready && !(r_cpu_req_val_ && r_req_flush) && !pending_fence && (dtlb.io.req.ready || io.cpu.req.bits.phys) - io.cpu.resp.valid := (r_cpu_req_val && tag_hit && !mshr.io.secondary_miss && !nack && r_req_read) || mshr.io.cpu_resp_val - io.cpu.resp.bits.nack := r_cpu_req_val_ && !io.cpu.req.bits.kill && nack - io.cpu.resp.bits.replay := mshr.io.cpu_resp_val - io.cpu.resp.bits.miss := r_cpu_req_val_ && (!tag_hit || mshr.io.secondary_miss) && r_req_read - io.cpu.resp.bits.tag := Mux(mshr.io.cpu_resp_val, mshr.io.cpu_resp_tag, r_cpu_req_tag) - io.cpu.resp.bits.typ := r_cpu_req_type + val s2_read = isRead(s2_req.cmd) + io.cpu.resp.valid := s2_read && (s2_replay || s2_valid_masked && s2_hit) + io.cpu.resp.bits.nack := s2_valid && s2_nack + io.cpu.resp.bits.replay := s2_replay && s2_read + io.cpu.resp.bits.tag := s2_req.tag + io.cpu.resp.bits.typ := s2_req.typ io.cpu.resp.bits.data := loadgen.word - io.cpu.resp.bits.data_subword := Reg(loadgen.byte) + io.cpu.resp.bits.data_subword := loadgen.byte val xact_init_arb = (new Arbiter(2)) { new TransactionInit } xact_init_arb.io.in(0) <> wb.io.mem_req diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 28fdcfad..22d18ca9 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -18,7 +18,7 @@ object Constants extends { def HAVE_RVC = false def HAVE_FPU = true - def HAVE_VEC = true + def HAVE_VEC = false val MAX_THREADS = hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index ac84e608..dff3590c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -59,11 +59,10 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3)) } - val dmem_resp_val = Reg(io.mem.resp.valid, resetVal = Bool(false)) - when (dmem_resp_val) { - req_addr := Cat(io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix - r_resp_perm := io.mem.resp.bits.data_subword(9,4); - r_resp_ppn := io.mem.resp.bits.data_subword(PADDR_BITS-1, PGIDX_BITS); + when (io.mem.resp.valid) { + req_addr := Cat(io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix + r_resp_perm := io.mem.resp.bits.data(9,4); + r_resp_ppn := io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS); } io.mem.req.valid := state === s_req @@ -76,8 +75,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val resp_val = state === s_done || state === s_error val resp_err = state === s_error || state === s_wait - val resp_ptd = io.mem.resp.bits.data_subword(1,0) === Bits(1) - val resp_pte = io.mem.resp.bits.data_subword(1,0) === Bits(2) + val resp_ptd = io.mem.resp.bits.data(1,0) === Bits(1) + val resp_pte = io.mem.resp.bits.data(1,0) === Bits(2) val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn(PPN_BITS-1, VPN_BITS-bitsPerLevel*(i+1)), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j)) @@ -109,7 +108,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component when (io.mem.resp.bits.nack) { state := s_req } - when (dmem_resp_val) { + when (io.mem.resp.valid) { when (resp_pte) { // page table entry state := s_done } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 2138561b..4833cd66 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,8 @@ import Constants._ import uncore._ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, - icache: ICacheConfig, dcache: DCacheConfig) + icache: ICacheConfig, dcache: DCacheConfig, + fastLoadByte: Boolean = false) { val dcacheReqTagBits = 9 // enforce compliance with require() } From 9e010beffe3e5f213ad26e24bdbb57f165371df1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Nov 2012 21:05:29 -0800 Subject: [PATCH 0510/1087] fix D$ refill bug --- rocket/src/main/scala/nbdcache.scala | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 5db2faf0..2d3e1cc4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -816,11 +816,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // refill response val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) - writeArb.io.in(0).valid := io.mem.xact_rep.valid && refill - io.mem.xact_rep.ready := writeArb.io.in(0).ready || !refill - writeArb.io.in(0).bits := mshr.io.mem_resp - writeArb.io.in(0).bits.wmask := Fix(-1) - writeArb.io.in(0).bits.data := io.mem.xact_rep.bits.data + writeArb.io.in(1).valid := io.mem.xact_rep.valid && refill + io.mem.xact_rep.ready := writeArb.io.in(1).ready || !refill + writeArb.io.in(1).bits := mshr.io.mem_resp + writeArb.io.in(1).bits.wmask := Fix(-1) + writeArb.io.in(1).bits.data := io.mem.xact_rep.bits.data // load hits readArb.io.in(2).bits.addr := io.cpu.req.bits.addr @@ -840,11 +840,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { def storeMatch(dst: HellaCacheReq, src: HellaCacheReq) = idxMatch(dst, src) && offsetMatch(dst, src) val p_store_match = s2_valid && storeMatch(s1_req, s2_req) || s3_valid && storeMatch(s1_req, s3_req) - writeArb.io.in(1).bits.addr := s3_req.addr - writeArb.io.in(1).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix - writeArb.io.in(1).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) - writeArb.io.in(1).valid := s3_valid - writeArb.io.in(1).bits.way_en := s3_way + writeArb.io.in(0).bits.addr := s3_req.addr + writeArb.io.in(0).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix + writeArb.io.in(0).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) + writeArb.io.in(0).valid := s3_valid + writeArb.io.in(0).bits.way_en := s3_way // tag update after a store to an exclusive clean line. val new_hit_state = conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) @@ -871,7 +871,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshr.io.req.bits.data := s2_req.data - mshr.io.mem_rep <> io.mem.xact_rep + mshr.io.mem_rep.valid := io.mem.xact_rep.fire() + mshr.io.mem_rep.bits := io.mem.xact_rep.bits mshr.io.mem_abort.valid := io.mem.xact_abort.valid mshr.io.mem_abort.bits := io.mem.xact_abort.bits io.mem.xact_abort.ready := Bool(true) From cb8ac730457b2265affdcde57642e03e9f488d4d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Nov 2012 21:15:13 -0800 Subject: [PATCH 0511/1087] provide store data with cache response --- rocket/src/main/scala/nbdcache.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2d3e1cc4..91f9dcd9 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -666,6 +666,7 @@ class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { val data = Bits(width = conf.databits) val data_subword = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) + val store_data = Bits(width = conf.databits) override def clone = new HellaCacheResp().asInstanceOf[this.type] } @@ -930,6 +931,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { io.cpu.resp.bits.typ := s2_req.typ io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := loadgen.byte + io.cpu.resp.bits.store_data := s2_req.data val xact_init_arb = (new Arbiter(2)) { new TransactionInit } xact_init_arb.io.in(0) <> wb.io.mem_req From dad7b71062030af45271cbaf278a0de22e8d26d9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 16 Nov 2012 21:26:12 -0800 Subject: [PATCH 0512/1087] provide cmd/addr with cache response --- rocket/src/main/scala/nbdcache.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 91f9dcd9..078076d1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -666,6 +666,8 @@ class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { val data = Bits(width = conf.databits) val data_subword = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) + val cmd = Bits(width = 4) + val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits) val store_data = Bits(width = conf.databits) override def clone = new HellaCacheResp().asInstanceOf[this.type] @@ -926,9 +928,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_read = isRead(s2_req.cmd) io.cpu.resp.valid := s2_read && (s2_replay || s2_valid_masked && s2_hit) io.cpu.resp.bits.nack := s2_valid && s2_nack + io.cpu.resp.bits := s2_req io.cpu.resp.bits.replay := s2_replay && s2_read - io.cpu.resp.bits.tag := s2_req.tag - io.cpu.resp.bits.typ := s2_req.typ io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := loadgen.byte io.cpu.resp.bits.store_data := s2_req.data From e68b039133ec40125db415100494d08f9c99f739 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Nov 2012 06:47:27 -0800 Subject: [PATCH 0513/1087] fix misc. D$ control bugs --- rocket/src/main/scala/nbdcache.scala | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 078076d1..d315e0e6 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -271,9 +271,10 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { // don't issue back-to-back replays with store->load dependence val r1_replay_valid = Reg(rpq.io.deq.fire()) val r2_replay_valid = Reg(r1_replay_valid) - val (r1_replay, r2_replay) = (Reg{new Replay}, Reg{new Replay}) - when (rpq.io.deq.fire()) { r1_replay := rpq.io.deq.bits } - when (r1_replay_valid) { r2_replay := r1_replay } + val r3_replay_valid = Reg(r2_replay_valid) + val r1_replay = RegEn(rpq.io.deq.bits, rpq.io.deq.fire()) + val r2_replay = RegEn(r1_replay, r1_replay_valid) + val r3_replay = RegEn(r2_replay, r2_replay_valid) def offsetMatch(dst: HellaCacheReq, src: HellaCacheReq) = { def mask(x: HellaCacheReq) = StoreGen(x.typ, x.addr, Bits(0)).mask // TODO: this is overly restrictive @@ -281,7 +282,9 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { // && (mask(dst) & mask(src)).orR } when (r1_replay_valid && offsetMatch(io.replay.bits, r1_replay) || - r2_replay_valid && offsetMatch(io.replay.bits, r2_replay)) { + r2_replay_valid && offsetMatch(io.replay.bits, r2_replay) || + r3_replay_valid && offsetMatch(io.replay.bits, r3_replay) || + !io.meta_req.ready) { rpq.io.deq.ready := Bool(false) io.replay.bits.cmd := M_FENCE // NOP } @@ -722,6 +725,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s3_req = Reg{io.cpu.req.bits.clone} val s3_way = Reg{Bits()} + val s4_valid = Reg(s3_valid, resetVal = Bool(false)) + val s4_req = RegEn(s3_req, s3_valid) + val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_readwrite = s1_read || s1_write @@ -842,7 +848,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { } def storeMatch(dst: HellaCacheReq, src: HellaCacheReq) = idxMatch(dst, src) && offsetMatch(dst, src) val p_store_match = s2_valid && storeMatch(s1_req, s2_req) || - s3_valid && storeMatch(s1_req, s3_req) + s3_valid && storeMatch(s1_req, s3_req) || + s4_valid && storeMatch(s1_req, s4_req) writeArb.io.in(0).bits.addr := s3_req.addr writeArb.io.in(0).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix writeArb.io.in(0).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) @@ -911,14 +918,15 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s1_nack = p_store_match || dtlb.io.req.valid && dtlb.io.resp.miss || idxMatch(s1_req, s2_req) && meta.io.state_req.valid || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_req.bits.idx && !prober.io.req.ready - s2_nack_hit := Reg(s1_nack) || mshr.io.secondary_miss + s2_nack_hit := Reg(s1_nack) || s2_hit && mshr.io.secondary_miss val s2_nack_miss = !s2_hit && !mshr.io.req.ready - val s2_nack = s2_nack_hit || s2_nack_miss + val s2_nack_fence = s2_req.cmd === M_FENCE && !mshr.io.fence_rdy + val s2_nack = s2_nack_hit || s2_nack_miss || s2_nack_fence s2_valid_masked := s2_valid && !s2_nack // after a nack, block until nack condition resolves (saves energy) val block_fence = Reg(resetVal = Bool(false)) - block_fence := (s1_valid && s1_req.cmd === M_FENCE || block_fence) && !mshr.io.fence_rdy + block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshr.io.fence_rdy val block_miss = Reg(resetVal = Bool(false)) block_miss := (s2_valid || block_miss) && s2_nack_miss when (block_fence || block_miss) { From cc067026a260f4c0fe4f94d51e692dc37a8407d4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Nov 2012 06:48:11 -0800 Subject: [PATCH 0514/1087] pipeline D$ response -> FPU regfile --- rocket/src/main/scala/fpu.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 9e2961fe..7cd5b989 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -478,13 +478,13 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val wb_ctrl = RegEn(mem_ctrl, mem_reg_valid) // load response - val load_wb = io.dpath.dmem_resp_val - val load_wb_single = io.dpath.dmem_resp_type === MT_W - val load_wb_data = io.dpath.dmem_resp_data - val load_wb_tag = io.dpath.dmem_resp_tag + val load_wb = Reg(io.dpath.dmem_resp_val) + val load_wb_single = RegEn(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val) + val load_wb_data = RegEn(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val) + val load_wb_tag = RegEn(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) - val load_wb_data_recoded = Mux(load_wb_single, Cat(Fix(-1, 32), rec_s), rec_d) + val load_wb_data_recoded = Mux(load_wb_single, Cat(Fix(-1), rec_s), rec_d) val fsr_rm = Reg() { Bits(width = 3) } val fsr_exc = Reg() { Bits(width = 5) } From 5a7777fe4d7bd83db4f1c0e0e575f6fd5d3b6674 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Nov 2012 06:48:44 -0800 Subject: [PATCH 0515/1087] clock gate integer datapath more aggressively --- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/ctrl.scala | 25 ++-- rocket/src/main/scala/dpath.scala | 199 ++++++++++++------------- rocket/src/main/scala/dpath_util.scala | 30 ++-- rocket/src/main/scala/dpath_vec.scala | 12 +- rocket/src/main/scala/util.scala | 26 ++++ 6 files changed, 158 insertions(+), 136 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 60973cea..d2d677b4 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -111,7 +111,7 @@ class Core(implicit conf: RocketConfiguration) extends Component // hooking up vector memory interface dmem(2).req.valid := vu.io.dmem_req.valid dmem(2).req.bits := vu.io.dmem_req.bits - dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) + dmem(2).req.bits.data := RegEn(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data, vu.io.dmem_req.valid && isWrite(vu.io.dmem_req.bits.cmd)) vu.io.dmem_req.ready := dmem(2).req.ready vu.io.dmem_resp.valid := dmem(2).resp.valid diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 86835d4a..c31a1dac 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -385,6 +385,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val mem_reg_mem_val = Reg(resetVal = Bool(false)) val mem_reg_xcpt = Reg(resetVal = Bool(false)) val mem_reg_fp_val = Reg(resetVal = Bool(false)) + val mem_reg_vec_val = Reg(resetVal = Bool(false)) val mem_reg_replay = Reg(resetVal = Bool(false)) val mem_reg_replay_next = Reg(resetVal = Bool(false)) val mem_reg_pcr = Reg(resetVal = PCR_N) @@ -507,7 +508,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr - ex_reg_wen := id_wen + ex_reg_wen := id_wen && id_waddr != UFix(0) ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := id_fence_i @@ -521,7 +522,7 @@ class Control(implicit conf: RocketConfiguration) extends Component } // replay inst in ex stage - val wb_dcache_miss = wb_reg_mem_val && (wb_reg_wen || wb_reg_fp_wen) && !io.dmem.resp.valid + val wb_dcache_miss = wb_reg_mem_val && !io.dmem.resp.valid val replay_ex = wb_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_mem_val && !io.dmem.req.ready || ex_reg_div_val && !io.dpath.div_rdy || @@ -549,8 +550,9 @@ class Control(implicit conf: RocketConfiguration) extends Component mem_reg_eret := Bool(false); mem_reg_mem_val := Bool(false); mem_reg_flush_inst := Bool(false); - mem_reg_fp_val := Bool(false); - mem_reg_replay_next := Bool(false) + mem_reg_fp_val := Bool(false) + mem_reg_vec_val := Bool(false) + mem_reg_replay_next := Bool(false) mem_reg_xcpt := Bool(false) } .otherwise { @@ -561,9 +563,10 @@ class Control(implicit conf: RocketConfiguration) extends Component mem_reg_eret := ex_reg_eret; mem_reg_mem_val := ex_reg_mem_val; mem_reg_flush_inst := ex_reg_flush_inst; - mem_reg_fp_val := ex_reg_fp_val - mem_reg_replay_next := ex_reg_replay_next - mem_reg_mem_type := ex_reg_mem_type + mem_reg_fp_val := ex_reg_fp_val + mem_reg_vec_val := ex_reg_vec_val + mem_reg_replay_next := ex_reg_replay_next + mem_reg_mem_type := ex_reg_mem_type mem_reg_xcpt := ex_xcpt } @@ -575,7 +578,7 @@ class Control(implicit conf: RocketConfiguration) extends Component (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen) + val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_vec_val || mem_reg_pcr != PCR_N) val replay_mem = ll_wb_kill_mem || mem_reg_replay || fpu_kill_mem val killm_common = ll_wb_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem @@ -628,12 +631,12 @@ class Control(implicit conf: RocketConfiguration) extends Component } val sboard = new Scoreboard - sboard.set(wb_reg_div_mul_val || wb_dcache_miss && io.dpath.wb_wen, io.dpath.wb_waddr) + sboard.set((wb_reg_div_mul_val || wb_dcache_miss) && io.dpath.wb_wen, io.dpath.wb_waddr) sboard.clear(io.dpath.mem_ll_wb, io.dpath.mem_ll_waddr) val id_stall_fpu = if (HAVE_FPU) { val fp_sboard = new Scoreboard - fp_sboard.set(wb_dcache_miss && wb_reg_fp_wen && !replay_wb || io.fpu.sboard_set, io.dpath.wb_waddr) + fp_sboard.set((wb_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set) && !replay_wb, io.dpath.wb_waddr) fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) @@ -725,7 +728,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.imem.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen - io.dpath.wb_load := wb_reg_mem_val && io.dpath.wb_wen + io.dpath.wb_load := wb_reg_mem_val && wb_reg_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2.toUFix diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 3979bef0..ce2d3c80 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -25,46 +25,42 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // execute definitions val ex_reg_pc = Reg() { UFix() }; val ex_reg_inst = Reg() { Bits() }; - val ex_reg_raddr1 = Reg() { UFix() }; - val ex_reg_raddr2 = Reg() { UFix() }; - val ex_reg_op2 = Reg() { Bits() }; - val ex_reg_rs2 = Reg() { Bits() }; - val ex_reg_rs1 = Reg() { Bits() }; val ex_reg_waddr = Reg() { UFix() }; val ex_reg_ctrl_fn_dw = Reg() { UFix() }; val ex_reg_ctrl_fn_alu = Reg() { UFix() }; + val ex_reg_sel_alu2 = Reg() { UFix() }; val ex_reg_ctrl_sel_wb = Reg() { UFix() }; - val ex_wdata = Bits() - val ex_reg_kill = Reg() { Bool() } + val ex_reg_kill = Reg{Bool()} + val ex_reg_rs1_bypass = Reg{Bool()} + val ex_reg_rs1_lsb = Reg{Bits()} + val ex_reg_rs1_msb = Reg{Bits()} + val ex_reg_rs2_bypass = Reg{Bool()} + val ex_reg_rs2_lsb = Reg{Bits()} + val ex_reg_rs2_msb = Reg{Bits()} // memory definitions val mem_reg_pc = Reg() { UFix() }; val mem_reg_inst = Reg() { Bits() }; + val mem_reg_rs1 = Reg() { Bits() }; val mem_reg_rs2 = Reg() { Bits() }; val mem_reg_waddr = Reg() { UFix() }; val mem_reg_wdata = Reg() { Bits() }; - val mem_reg_raddr1 = Reg() { UFix() }; - val mem_reg_raddr2 = Reg() { UFix() }; val mem_reg_kill = Reg() { Bool() } // writeback definitions val wb_reg_pc = Reg() { UFix() }; val wb_reg_inst = Reg() { Bits() }; + val wb_reg_rs1 = Reg() { Bits() }; val wb_reg_rs2 = Reg() { Bits() }; val wb_reg_waddr = Reg() { UFix() } val wb_reg_wdata = Reg() { Bits() } - val wb_reg_vec_waddr = Reg() { UFix() } - val wb_reg_vec_wdata = Reg() { Bits() } - val wb_reg_raddr1 = Reg() { UFix() }; - val wb_reg_raddr2 = Reg() { UFix() }; val wb_reg_ll_wb = Reg(resetVal = Bool(false)); val wb_wdata = Bits(); + val wb_wen = io.ctrl.wb_wen && io.ctrl.wb_valid || wb_reg_ll_wb // instruction decode stage val id_inst = io.imem.resp.bits.data val id_pc = io.imem.resp.bits.pc - debug(id_inst) - debug(id_pc) val regfile_ = Mem(31){Bits(width = 64)} def readRF(a: UFix) = regfile_(~a) @@ -74,45 +70,36 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val id_raddr2 = id_inst(21,17).toUFix; // bypass muxes - val id_rs1_dmem_bypass = id_raddr1 != UFix(0) && - Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, Bool(false), - Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, io.ctrl.mem_load, - Bool(false))) + val id_rs1_zero = id_raddr1 === UFix(0) + val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr + val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr + val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass + val id_rs1_bypass_src = Mux(id_rs1_zero, UFix(0), Mux(id_rs1_ex_bypass, UFix(1), UFix(2) | io.ctrl.mem_load)) val id_rs1 = Mux(id_raddr1 === UFix(0), UFix(0), - Mux(io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr, ex_wdata, - Mux(io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr, mem_reg_wdata, - Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr1 === wb_reg_waddr, wb_wdata, - readRF(id_raddr1))))) + Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, + readRF(id_raddr1))) - val id_rs2_dmem_bypass = id_raddr2 != UFix(0) && - Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, Bool(false), - Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, io.ctrl.mem_load, - Bool(false))) + val id_rs2_zero = id_raddr2 === UFix(0) + val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr + val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr + val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass + val id_rs2_bypass_src = Mux(id_rs2_zero, UFix(0), Mux(id_rs2_ex_bypass, UFix(1), UFix(2) | io.ctrl.mem_load)) val id_rs2 = Mux(id_raddr2 === UFix(0), UFix(0), - Mux(io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr, ex_wdata, - Mux(io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr, mem_reg_wdata, - Mux((io.ctrl.wb_wen || wb_reg_ll_wb) && id_raddr2 === wb_reg_waddr, wb_wdata, - readRF(id_raddr2))))) + Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, + readRF(id_raddr2))) // immediate generation - val id_imm_bj = io.ctrl.sel_alu2 === A2_BTYPE || io.ctrl.sel_alu2 === A2_JTYPE - val id_imm_l = io.ctrl.sel_alu2 === A2_LTYPE - val id_imm_zero = io.ctrl.sel_alu2 === A2_ZERO || io.ctrl.sel_alu2 === A2_RTYPE - val id_imm_ibz = io.ctrl.sel_alu2 === A2_ITYPE || io.ctrl.sel_alu2 === A2_BTYPE || id_imm_zero - val id_imm_sign = Mux(id_imm_bj, id_inst(31), - Mux(id_imm_l, id_inst(26), - Mux(id_imm_zero, Bits(0,1), - id_inst(21)))) // IMM_ITYPE - val id_imm_small = Mux(id_imm_zero, Bits(0,12), - Cat(Mux(id_imm_bj, id_inst(31,27), id_inst(21,17)), id_inst(16,10))) - val id_imm = Cat(Fill(32, id_imm_sign), - Mux(id_imm_l, Cat(id_inst(26,7), Bits(0,12)), - Mux(id_imm_ibz, Cat(Fill(20, id_imm_sign), id_imm_small), - Cat(Fill(7, id_imm_sign), id_inst(31,7))))) // A2_JTYPE - - val id_op2_dmem_bypass = id_rs2_dmem_bypass && io.ctrl.sel_alu2 === A2_RTYPE - val id_op2 = Mux(io.ctrl.sel_alu2 === A2_RTYPE, id_rs2, id_imm) + def imm(sel: Bits, inst: Bits) = { + val lsbs = Mux(sel === A2_LTYPE || sel === A2_ZERO, Bits(0), + Mux(sel === A2_BTYPE, Cat(inst(31,27), inst(16,10)), + Mux(sel === A2_JTYPE, inst(18,7), + inst(21,10)))) + val msbs = Mux(sel === A2_LTYPE, inst(26,7), + Mux(sel === A2_JTYPE, inst(31,19).toFix, + Mux(sel === A2_ITYPE, inst(21), inst(31)).toFix)) + Cat(msbs, lsbs).toFix + } io.ctrl.inst := id_inst io.fpu.inst := id_inst @@ -122,21 +109,47 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (!io.ctrl.killd) { ex_reg_pc := id_pc ex_reg_inst := id_inst - ex_reg_raddr1 := id_raddr1 - ex_reg_raddr2 := id_raddr2 - ex_reg_op2 := id_op2 ex_reg_waddr := Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUFix, RA) ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix ex_reg_ctrl_fn_alu := io.ctrl.fn_alu + ex_reg_sel_alu2 := io.ctrl.sel_alu2 ex_reg_ctrl_sel_wb := io.ctrl.sel_wb - when (io.ctrl.ren1) { ex_reg_rs1 := id_rs1 } - when (io.ctrl.ren2) { ex_reg_rs2 := id_rs2 } + when (io.ctrl.ren1) { + ex_reg_rs1_bypass := id_rs1_bypass + ex_reg_rs1_lsb := id_rs1_bypass_src + when (!id_rs1_bypass) { + ex_reg_rs1_lsb := id_rs1(id_rs1_bypass_src.getWidth-1,0) + ex_reg_rs1_msb := id_rs1(63,id_rs1_bypass_src.getWidth) + } + } + when (io.ctrl.ren2) { + ex_reg_rs2_bypass := id_rs2_bypass + ex_reg_rs2_lsb := id_rs2_bypass_src + when (!id_rs2_bypass) { + ex_reg_rs2_lsb := id_rs2(id_rs2_bypass_src.getWidth-1,0) + ex_reg_rs2_msb := id_rs2(63,id_rs2_bypass_src.getWidth) + } + } } + val ex_raddr1 = ex_reg_inst(26,22) + val ex_raddr2 = ex_reg_inst(21,17) + val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data - val ex_rs1 = Mux(Reg(id_rs1_dmem_bypass), dmem_resp_data, ex_reg_rs1) - val ex_rs2 = Mux(Reg(id_rs2_dmem_bypass), dmem_resp_data, ex_reg_rs2) - val ex_op2 = Mux(Reg(id_op2_dmem_bypass), dmem_resp_data, ex_reg_op2) + val ex_rs1 = + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(3), dmem_resp_data, + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(2), wb_reg_wdata, + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(1), mem_reg_wdata, + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(0), Bits(0), + Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb))))) + val ex_rs2 = + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(3), dmem_resp_data, + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(2), wb_reg_wdata, + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(1), mem_reg_wdata, + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(0), Bits(0), + Cat(ex_reg_rs2_msb, ex_reg_rs2_lsb))))) + val ex_imm = imm(ex_reg_sel_alu2, ex_reg_inst) + val ex_op2 = Mux(ex_reg_sel_alu2 != A2_RTYPE, ex_imm, ex_rs2) val alu = new ALU alu.io.dw := ex_reg_ctrl_fn_dw; @@ -149,7 +162,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component div.io.req.valid := io.ctrl.div_val div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.div_fn) div.io.req.bits.in0 := ex_rs1 - div.io.req.bits.in1 := ex_op2 + div.io.req.bits.in1 := ex_rs2 div.io.req_tag := ex_reg_waddr div.io.req_kill := io.ctrl.div_kill div.io.resp_rdy := Bool(true) @@ -168,7 +181,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component mul_io.req.valid := io.ctrl.mul_val mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.mul_fn) mul_io.req.bits.in0 := ex_rs1 - mul_io.req.bits.in1 := ex_op2 + mul_io.req.bits.in1 := ex_rs2 mul_io.req_tag := ex_reg_waddr mul_io.req_kill := io.ctrl.mul_kill mul_io.resp_rdy := Bool(true) @@ -199,11 +212,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // processor control regfile read val pcr = new rocketDpathPCR() pcr.io.r.en := io.ctrl.pcr != PCR_N - pcr.io.r.addr := wb_reg_raddr1 + pcr.io.r.addr := wb_reg_inst(26,22).toUFix pcr.io.host <> io.host pcr.io <> io.ctrl - pcr.io.pc := wb_reg_pc io.ctrl.pcr_replay := pcr.io.replay io.ptw.ptbr := pcr.io.ptbr @@ -211,7 +223,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ptw.status := pcr.io.status // branch resolution logic - io.ctrl.jalr_eq := ex_reg_rs1 === id_pc.toFix && ex_reg_op2(id_imm_small.getWidth-1,0) === UFix(0) + io.ctrl.jalr_eq := ex_rs1 === id_pc.toFix && ex_reg_inst(11,0) === UFix(0) io.ctrl.ex_br_taken := Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2, Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2, @@ -221,21 +233,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, io.ctrl.ex_br_type === BR_J)))))) - val ex_pc_plus4 = ex_reg_pc + UFix(4) - val ex_branch_target = ex_reg_pc + Cat(ex_reg_op2(VADDR_BITS-1,0), Bits(0,1)).toUFix + val ex_pc_plus4 = ex_reg_pc + 4 + val ex_branch_target = (ex_reg_pc.toFix + (ex_imm << 1)).toUFix - // time stamp counter - val tsc_reg = Reg(resetVal = UFix(0,64)); - tsc_reg := tsc_reg + UFix(1); - // instructions retired counter - val irt_reg = Reg(resetVal = UFix(0,64)); - when (io.ctrl.wb_valid) { irt_reg := irt_reg + UFix(1); } + val tsc_reg = WideCounter(64) + val irt_reg = WideCounter(64, io.ctrl.wb_valid) // writeback select mux - ex_wdata := + val ex_wdata = Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4.toFix, - Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg, - Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg, + Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg.value, + Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg.value, alu.io.out))).toBits // WB_ALU // memory stage @@ -243,11 +251,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (!ex_reg_kill) { mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst + mem_reg_rs1 := ex_rs1 mem_reg_rs2 := StoreGen(io.ctrl.ex_mem_type, Bits(0), ex_rs2).data mem_reg_waddr := ex_reg_waddr mem_reg_wdata := ex_wdata - mem_reg_raddr1 := ex_reg_raddr1 - mem_reg_raddr2 := ex_reg_raddr2 } // for load/use hazard detection (load byte/halfword) @@ -276,6 +283,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ctrl.mem_ll_waddr := dmem_resp_waddr io.ctrl.mem_ll_wb := Bool(true) } + when (io.ctrl.mem_ll_waddr === UFix(0)) { io.ctrl.mem_ll_wb := Bool(false) } io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data @@ -286,11 +294,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc wb_reg_inst := mem_reg_inst + wb_reg_rs1 := mem_reg_rs1 wb_reg_rs2 := mem_reg_rs2 - wb_reg_vec_waddr := mem_reg_waddr - wb_reg_vec_wdata := mem_reg_wdata - wb_reg_raddr1 := mem_reg_raddr1 - wb_reg_raddr2 := mem_reg_raddr2 wb_reg_waddr := mem_reg_waddr wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) } @@ -299,6 +304,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Component wb_reg_waddr := io.ctrl.mem_ll_waddr wb_reg_wdata := mem_ll_wdata } + wb_wdata := Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, + Mux(io.ctrl.pcr != PCR_N, pcr.io.r.data, + wb_reg_wdata)) if (HAVE_VEC) { @@ -310,11 +318,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Component vec.io.valid := io.ctrl.wb_valid && pcr.io.status(SR_EV) vec.io.inst := wb_reg_inst - vec.io.waddr := wb_reg_vec_waddr - vec.io.raddr1 := wb_reg_raddr1 vec.io.vecbank := pcr.io.vecbank vec.io.vecbankcnt := pcr.io.vecbankcnt - vec.io.wdata := wb_reg_vec_wdata + vec.io.wdata := wb_reg_wdata vec.io.rs2 := wb_reg_rs2 pcr.io.vec_irq_aux := vec.io.irq_aux @@ -322,29 +328,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component pcr.io.vec_nxregs := vec.io.nxregs pcr.io.vec_nfregs := vec.io.nfregs - wb_wdata := - Mux(vec.io.wen, Cat(Bits(0,52), vec.io.appvl), - Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, - wb_reg_wdata)) - } - else - { - pcr.io.vec_irq_aux := UFix(0) - pcr.io.vec_appvl := UFix(0) - pcr.io.vec_nxregs := UFix(0) - pcr.io.vec_nfregs := UFix(0) - - wb_wdata := - Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, - wb_reg_wdata) + when (vec.io.wen) { wb_wdata := vec.io.appvl } } - val rf_wen = io.ctrl.wb_wen || wb_reg_ll_wb - val rf_waddr = wb_reg_waddr - val rf_wdata = Mux(io.ctrl.wb_wen && io.ctrl.pcr != PCR_N, pcr.io.r.data, wb_wdata) - List(rf_wen, rf_waddr, rf_wdata).map(debug _) - when (rf_wen) { writeRF(rf_waddr, rf_wdata) } - + when (wb_wen) { writeRF(wb_reg_waddr, wb_wdata) } io.ctrl.wb_waddr := wb_reg_waddr // scoreboard clear (for div/mul and D$ load miss writebacks) @@ -352,7 +339,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write - pcr.io.w.addr := wb_reg_raddr1 + pcr.io.w.addr := wb_reg_inst(26,22).toUFix pcr.io.w.en := io.ctrl.pcr === PCR_T || io.ctrl.pcr === PCR_S || io.ctrl.pcr === PCR_C pcr.io.w.data := Mux(io.ctrl.pcr === PCR_S, pcr.io.r.data | wb_reg_wdata, Mux(io.ctrl.pcr === PCR_C, pcr.io.r.data & ~wb_reg_wdata, @@ -365,4 +352,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, wb_reg_pc))) // PC_WB + + // expose debug signals to testbench + // XXX debug() doesn't right, so create a false dependence + val debugList = List(wb_reg_pc, wb_reg_inst, wb_wen, wb_reg_waddr, wb_wdata, wb_reg_rs1, wb_reg_rs2) + pcr.io.pc := wb_reg_pc | (debugList.map(d => d^d).reduce(_|_)).toUFix + debugList.foreach(debug _) } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index f12a685a..3dc911be 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import scala.math._ +import Util._ class ioDpathBTB extends Bundle() { @@ -86,10 +87,10 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component { val io = new ioDpathPCR - val reg_epc = Reg() { UFix() }; - val reg_badvaddr = Reg() { UFix() }; - val reg_ebase = Reg() { UFix() }; - val reg_count = Reg() { UFix() }; + val reg_epc = Reg{Fix()} + val reg_badvaddr = Reg{Fix()} + val reg_ebase = Reg{Fix()} + val reg_count = WideCounter(32) val reg_compare = Reg() { UFix() }; val reg_cause = Reg() { Bits() }; val reg_tohost = Reg(resetVal = Bits(0, 64)); @@ -128,7 +129,7 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component io.ptbr_wen := reg_status_vm.toBool && wen && (waddr === PCR_PTBR); io.status := Cat(reg_status_im, Bits(0,7), reg_status_vm, reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); - io.evec := Mux(io.exception, reg_ebase, reg_epc) + io.evec := Mux(io.exception, reg_ebase, reg_epc).toUFix io.ptbr := reg_ptbr; io.host.debug.error_mode := reg_error_mode; io.r.data := rdata; @@ -168,7 +169,6 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component when (reg_count === reg_compare) { r_irq_timer := Bool(true); } - reg_count := reg_count + UFix(1); io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; @@ -191,9 +191,9 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component reg_status_ec := Bool(HAVE_RVC) && wdata(SR_EC).toBool; reg_status_et := wdata(SR_ET).toBool; } - when (waddr === PCR_EPC) { reg_epc := wdata(VADDR_BITS,0).toUFix; } + when (waddr === PCR_EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } when (waddr === PCR_EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } - when (waddr === PCR_COUNT) { reg_count := wdata(31,0).toUFix; } + when (waddr === PCR_COUNT) { reg_count := wdata.toUFix } when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } when (waddr === PCR_COREID) { reg_coreid := wdata(15,0) } when (waddr === PCR_FROMHOST) { when (reg_fromhost === UFix(0) || io.w.en) { reg_fromhost := wdata } } @@ -210,19 +210,19 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component rdata := io.status // raddr === PCR_STATUS switch (raddr) { - is (PCR_EPC) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_epc(VADDR_BITS)), reg_epc); } - is (PCR_BADVADDR) { rdata := Cat(Fill(64-VADDR_BITS-1, reg_badvaddr(VADDR_BITS)), reg_badvaddr); } - is (PCR_EVEC) { rdata := Cat(Fill(64-VADDR_BITS, reg_ebase(VADDR_BITS-1)), reg_ebase); } - is (PCR_COUNT) { rdata := Cat(Fill(32, reg_count(31)), reg_count); } - is (PCR_COMPARE) { rdata := Cat(Fill(32, reg_compare(31)), reg_compare); } - is (PCR_CAUSE) { rdata := Cat(reg_cause(5), Bits(0,58), reg_cause(4,0)); } + is (PCR_EPC) { rdata := reg_epc } + is (PCR_BADVADDR) { rdata := reg_badvaddr } + is (PCR_EVEC) { rdata := reg_ebase } + is (PCR_COUNT) { rdata := reg_count } + is (PCR_COMPARE) { rdata := reg_compare } + is (PCR_CAUSE) { rdata := reg_cause(5) << 63 | reg_cause(4,0) } is (PCR_COREID) { rdata := reg_coreid } is (PCR_IMPL) { rdata := Bits(2) } is (PCR_FROMHOST) { rdata := reg_fromhost; } is (PCR_TOHOST) { rdata := reg_tohost; } is (PCR_K0) { rdata := reg_k0; } is (PCR_K1) { rdata := reg_k1; } - is (PCR_PTBR) { rdata := Cat(Bits(0,64-PADDR_BITS), reg_ptbr); } + is (PCR_PTBR) { rdata := reg_ptbr } is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } is (PCR_VECCFG) { rdata := Cat(Bits(0, 40), io.vec_nfregs, io.vec_nxregs, io.vec_appvl) } } diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 6bf6a8b1..7d922813 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -28,8 +28,6 @@ class ioDpathVec extends Bundle val iface = new ioDpathVecInterface() val valid = Bool(INPUT) val inst = Bits(INPUT, 32) - val waddr = UFix(INPUT, 5) - val raddr1 = UFix(INPUT, 5) val vecbank = Bits(INPUT, 8) val vecbankcnt = UFix(INPUT, 4) val wdata = Bits(INPUT, 64) @@ -151,14 +149,16 @@ class rocketDpathVec extends Component io.nfregs := reg_nfregs val appvlm1 = appvl - UFix(1) + val waddr = io.inst(31,27) + val raddr1 = io.inst(26,22) io.iface.vcmdq.bits := Mux(io.ctrl.sel_vcmd === VCMD_I, Cat(Bits(0,2), Bits(0,4), io.inst(9,8), Bits(0,6), Bits(0,6)), Mux(io.ctrl.sel_vcmd === VCMD_F, Cat(Bits(0,2), Bits(1,3), io.inst(9,7), Bits(0,6), Bits(0,6)), - Mux(io.ctrl.sel_vcmd === VCMD_TX, Cat(Bits(1,2), io.inst(13,8), Bits(0,1), io.waddr, Bits(0,1), io.raddr1), - Mux(io.ctrl.sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), io.waddr, Bits(1,1), io.raddr1), - Mux(io.ctrl.sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), io.waddr, Bits(0,1), io.waddr), - Mux(io.ctrl.sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), io.waddr, Bits(1,1), io.waddr), + Mux(io.ctrl.sel_vcmd === VCMD_TX, Cat(Bits(1,2), io.inst(13,8), Bits(0,1), waddr, Bits(0,1), raddr1), + Mux(io.ctrl.sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), waddr, Bits(1,1), raddr1), + Mux(io.ctrl.sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), waddr, Bits(0,1), waddr), + Mux(io.ctrl.sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), waddr, Bits(1,1), waddr), Mux(io.ctrl.sel_vcmd === VCMD_A, io.wdata(SZ_VCMD-1, 0), Bits(0,20)))))))) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 3b11ffd6..bd747109 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -7,4 +7,30 @@ object Util implicit def intToUFix(x: Int): UFix = UFix(x) implicit def intToBoolean(x: Int): Boolean = if (x != 0) true else false implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 + + implicit def wcToUFix(c: WideCounter): UFix = c.value +} + +// a counter that clock gates most of its MSBs using the LSB carry-out +case class WideCounter(width: Int, inc: Bool = Bool(true)) +{ + private val isWide = width >= 4 + private val smallWidth = if (isWide) log2Up(width) else width + private val small = Reg(resetVal = UFix(0, smallWidth)) + private val nextSmall = small + UFix(1, smallWidth+1) + when (inc) { small := nextSmall(smallWidth-1,0) } + + private val large = if (isWide) { + val r = Reg(resetVal = UFix(0, width - smallWidth)) + when (inc && nextSmall(smallWidth)) { r := r + UFix(1) } + r + } else null + + val value = Cat(large, small) + + def := (x: UFix) = { + val w = x.getWidth + small := x(w.min(smallWidth)-1,0) + if (isWide) large := (if (w < smallWidth) UFix(0) else x(w.min(width)-1,smallWidth)) + } } From 29bc361d6c298fec7f3d9c3fe9573937a3890d85 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 17 Nov 2012 17:24:08 -0800 Subject: [PATCH 0516/1087] remove global constants; disentangle hwacha a bit --- rocket/src/main/scala/consts.scala | 25 +- rocket/src/main/scala/core.scala | 15 +- rocket/src/main/scala/ctrl.scala | 474 ++++++++++++------------- rocket/src/main/scala/divider.scala | 39 +- rocket/src/main/scala/dpath.scala | 67 ++-- rocket/src/main/scala/dpath_alu.scala | 36 +- rocket/src/main/scala/dpath_util.scala | 6 +- rocket/src/main/scala/dpath_vec.scala | 3 +- rocket/src/main/scala/multiplier.scala | 119 +++---- rocket/src/main/scala/nbdcache.scala | 7 +- rocket/src/main/scala/package.scala | 8 - rocket/src/main/scala/tile.scala | 12 +- rocket/src/main/scala/tlb.scala | 1 - 13 files changed, 375 insertions(+), 437 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 3490ce4b..d0351cbd 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -4,16 +4,6 @@ package constants import Chisel._ import scala.math._ -abstract trait TileConfigConstants { - def HAVE_RVC: Boolean - def HAVE_FPU: Boolean - def HAVE_VEC: Boolean - val FPU_N = UFix(0, 1) - val FPU_Y = if (HAVE_FPU) UFix(1, 1) else FPU_N - val VEC_N = UFix(0, 1); - val VEC_Y = if (HAVE_VEC) UFix(1, 1) else VEC_N -} - trait ScalarOpConstants { val BR_X = Bits("b???", 3) val BR_EQ = Bits(0, 3) @@ -38,18 +28,6 @@ trait ScalarOpConstants { val A2_JTYPE = UFix(5, 3); val A2_RTYPE = UFix(6, 3); - val MUL_X = Bits("b??", 2) - val MUL_LO = UFix(0, 2); - val MUL_H = UFix(1, 2); - val MUL_HSU = UFix(2, 2); - val MUL_HU = UFix(3, 2); - - val DIV_X = Bits("b??", 2) - val DIV_D = UFix(0, 2); - val DIV_DU = UFix(1, 2); - val DIV_R = UFix(2, 2); - val DIV_RU = UFix(3, 2); - val X = Bits("b?", 1) val N = Bits(0, 1); val Y = Bits(1, 1); @@ -64,6 +42,7 @@ trait ScalarOpConstants { val WB_TSC = UFix(4, 3); val WB_IRT = UFix(5, 3); + val SZ_DW = 1 val DW_X = X val DW_32 = N val DW_64 = Y @@ -120,7 +99,7 @@ trait InterruptConstants { val IRQ_TIMER = 7 } -abstract trait RocketDcacheConstants extends TileConfigConstants with uncore.constants.CacheConstants with uncore.constants.AddressConstants { +abstract trait RocketDcacheConstants extends uncore.constants.CacheConstants with uncore.constants.AddressConstants { require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index d2d677b4..0d67f2f1 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import hwacha._ +import Util._ class ioRocket(implicit conf: RocketConfiguration) extends Bundle { @@ -26,7 +27,7 @@ class Core(implicit conf: RocketConfiguration) extends Component ctrl.io.imem <> io.imem dpath.io.imem <> io.imem - val dmemArb = new HellaCacheArbiter(if (HAVE_VEC) 3 else 2) + val dmemArb = new HellaCacheArbiter(2 + conf.vec) dmemArb.io.mem <> io.dmem val dmem = dmemArb.io.requestor dmem(1) <> ctrl.io.dmem @@ -34,14 +35,14 @@ class Core(implicit conf: RocketConfiguration) extends Component val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw) - val fpu: FPU = if (HAVE_FPU) { + val fpu: FPU = if (conf.fpu) { val fpu = new FPU(4,6) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl fpu } else null - if (HAVE_VEC) { + if (conf.vec) { val vu = new vu() val vdtlb = new TLB(8) @@ -120,14 +121,14 @@ class Core(implicit conf: RocketConfiguration) extends Component vu.io.dmem_resp.bits.tag := dmem(2).resp.bits.tag vu.io.dmem_resp.bits.typ := dmem(2).resp.bits.typ - // share vector integer multiplier with rocket - dpath.io.vec_imul_req <> vu.io.cp_imul_req - dpath.io.vec_imul_resp <> vu.io.cp_imul_resp + // DON'T share vector integer multiplier with rocket + vu.io.cp_imul_req.valid := Bool(false) // share sfma and dfma pipelines with rocket + require(conf.fpu) fpu.io.sfma <> vu.io.cp_sfma fpu.io.dfma <> vu.io.cp_dfma - } else if (fpu != null) { + } else if (conf.fpu) { fpu.io.sfma.valid := Bool(false) fpu.io.dfma.valid := Bool(false) } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c31a1dac..6a5c2802 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -16,12 +16,10 @@ class ioCtrlDpath extends Bundle() val ren1 = Bool(OUTPUT); val sel_alu2 = UFix(OUTPUT, 3); val fn_dw = Bool(OUTPUT); - val fn_alu = UFix(OUTPUT, 4); + val fn_alu = UFix(OUTPUT, SZ_ALU_FN); val mul_val = Bool(OUTPUT); - val mul_fn = Bits(OUTPUT, 2); val mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); - val div_fn = Bits(OUTPUT, 2); val div_kill = Bool(OUTPUT) val sel_wa = Bool(OUTPUT); val sel_wb = UFix(OUTPUT, 3); @@ -69,13 +67,13 @@ abstract trait DecodeConstants val xpr64 = Y; val decode_default = - // fence.i - // jalr | eret - // fp_val | renx2 div_val | | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,MUL_X, X,X,WA_X, WB_X, PCR_X,N,X,X,X,X) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WA_X, WB_X, PCR_X,N,X,X,X,X) val table: Array[(Bits, List[Bits])] } @@ -83,239 +81,239 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr | eret - // fp_val | renx2 div_val | | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RA,WB_PC, PCR_N,N,N,N,N,N), - JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WA_RA,WB_PC, PCR_N,N,N,N,N,N), + JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU,N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_H, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HU, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, Y,MUL_HSU,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, Y,MUL_LO, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_D, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_DU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_R, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_X, N,M_X, MT_X, N,DIV_RU, Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_S,N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_C,N,N,N,Y,Y), - ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,Y,N,N,N,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_F,N,N,N,Y,Y), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_T,N,N,N,Y,Y), - RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_IRT,PCR_N,N,N,N,N,N)) + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_S,N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_C,N,N,N,Y,Y), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR_N,Y,N,N,N,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_F,N,N,N,Y,Y), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_T,N,N,N,Y,Y), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_IRT,PCR_N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr | eret - // fp_val | renx2 div_val | | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMIN_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMIN_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMAX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMAX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMUL_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMUL_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMADD_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMADD_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMSUB_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMSUB_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MFTX_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MFTX_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_W_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_W_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_WU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_WU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_L_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_L_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_LU_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_LU_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FEQ_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FEQ_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLT_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLT_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLE_S-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLE_D-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MXTF_S-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MXTF_D-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_W-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_WU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_L-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_LU-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MFFSR-> List(FPU_Y,Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MTFSR-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - FLD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - FSW-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - FSD-> List(FPU_Y,Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N)) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFTX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFTX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MXTF_S-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MXTF_D-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MFFSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MTFSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N)) } object VDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr | eret - // fp_val | renx2 div_val | | syscall - // | vec_val | | renx1 mem_val mul_val | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| mul_fn | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), - VVCFG-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), - VSETVL-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), - VF-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - VMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - VMSV-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFMVV-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FENCE_V_L-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - FENCE_V_G-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - VLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLWU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLHU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLBU-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSH-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSB-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSD-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSW-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTWU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTHU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTBU-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTH-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTB-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSSTD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSSTW-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - - VENQCMD-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VENQIMM1-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VENQIMM2-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VENQCNT-> List(VEC_Y,N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VXCPTEVAC-> List(VEC_Y,N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,MUL_X, N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VXCPTKILL-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N), - VXCPTHOLD-> List(VEC_Y,N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,MUL_X, N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N)) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | + VVCFGIVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VVCFG-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), + VF-> List(Y, N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + VMSV-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), + FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + + VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), + VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N), + VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N)) } class Control(implicit conf: RocketConfiguration) extends Component @@ -336,13 +334,13 @@ class Control(implicit conf: RocketConfiguration) extends Component } var decode_table = XDecode.table - if (HAVE_FPU) decode_table ++= FDecode.table - if (HAVE_VEC) decode_table ++= VDecode.table + if (conf.fpu) decode_table ++= FDecode.table + if (conf.vec) decode_table ++= VDecode.table val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_jalr :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs - val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_mul_fn :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 + val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 val id_pcr :: id_fence_i :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 val id_raddr3 = io.dpath.inst(16,12); @@ -369,7 +367,6 @@ class Control(implicit conf: RocketConfiguration) extends Component val ex_reg_load_use = Reg(resetVal = Bool(false)) val ex_reg_pcr = Reg(resetVal = PCR_N) val ex_reg_br_type = Reg(resetVal = BR_N) - val ex_reg_mul_fn = Reg(){Bits()} val ex_reg_mem_cmd = Reg(){Bits()} val ex_reg_mem_type = Reg(){Bits()} val ex_reg_cause = Reg(){UFix()} @@ -418,10 +415,7 @@ class Control(implicit conf: RocketConfiguration) extends Component (io.dpath.irq_timer, IRQ_TIMER)) var id_interrupts = id_maskable_interrupts.map(i => (io.dpath.status(SR_IM+i._2) && i._1, UFix(CAUSE_INTERRUPT+i._2))) - var vec_replay = Bool(false) - var vec_stalld = Bool(false) - if (HAVE_VEC) - { + val (vec_replay, vec_stalld) = if (conf.vec) { // vector control val vec = new rocketCtrlVec() @@ -447,15 +441,16 @@ class Control(implicit conf: RocketConfiguration) extends Component val mask_pfximm2q_ready = !vec_dec.io.sigs.enq_pfximm2q || io.vec_iface.vpfximm2q.ready val mask_pfcntq_ready = !vec_dec.io.sigs.enq_pfcntq || io.vec_iface.vpfcntq.ready - vec_stalld = + id_interrupts = id_interrupts :+ (vec.io.irq, vec.io.irq_cause) + + val stalld = id_vec_val && ( !mask_cmdq_ready || !mask_ximm1q_ready || !mask_ximm2q_ready || !mask_cntq_ready || !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready || vec_dec.io.sigs.vfence && !vec.io.vfence_ready) - vec_replay = vec.io.replay - id_interrupts = id_interrupts :+ (vec.io.irq, vec.io.irq_cause) - } + (vec.io.replay, stalld) + } else (Bool(false), Bool(false)) val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) val id_interrupt = io.dpath.status(SR_ET) && id_interrupt_unmasked @@ -504,7 +499,6 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_btb_hit := io.imem.resp.bits.taken ex_reg_div_val := id_div_val ex_reg_mul_val := id_mul_val - ex_reg_mul_fn := id_mul_fn.toUFix ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr @@ -634,7 +628,7 @@ class Control(implicit conf: RocketConfiguration) extends Component sboard.set((wb_reg_div_mul_val || wb_dcache_miss) && io.dpath.wb_wen, io.dpath.wb_waddr) sboard.clear(io.dpath.mem_ll_wb, io.dpath.mem_ll_waddr) - val id_stall_fpu = if (HAVE_FPU) { + val id_stall_fpu = if (conf.fpu) { val fp_sboard = new Scoreboard fp_sboard.set((wb_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set) && !replay_wb, io.dpath.wb_waddr) fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) @@ -734,10 +728,8 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.sel_alu2 := id_sel_alu2.toUFix io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu.toUFix - io.dpath.div_fn := ex_reg_mul_fn io.dpath.div_val := ex_reg_div_val io.dpath.div_kill := mem_reg_div_val && killm_common - io.dpath.mul_fn := ex_reg_mul_fn io.dpath.mul_val := ex_reg_mul_val io.dpath.mul_kill := mem_reg_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val; diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index f434a162..f7e97185 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -3,10 +3,11 @@ package rocket import Chisel._ import Node._ import Constants._ +import ALU._ -class rocketDivider(earlyOut: Boolean = false) extends Component { - val io = new ioMultiplier - val w = io.req.bits.in0.getWidth +class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { + val io = new MultiplierIO + val w = io.req.bits.in1.getWidth val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; val state = Reg(resetVal = s_ready); @@ -15,25 +16,25 @@ class rocketDivider(earlyOut: Boolean = false) extends Component { val divby0 = Reg() { Bool() }; val neg_quo = Reg() { Bool() }; val neg_rem = Reg() { Bool() }; - val reg_tag = Reg() { UFix() }; val rem = Reg() { Bool() }; val half = Reg() { Bool() }; + val r_req = Reg{io.req.bits.clone} val divisor = Reg() { Bits() } val remainder = Reg() { Bits(width = 2*w+1) } val subtractor = remainder(2*w,w) - divisor - val dw = io.req.bits.fn(io.req.bits.fn.width-1) - val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) - val tc = (fn === DIV_D) || (fn === DIV_R); + val dw = io.req.bits.dw + val fn = io.req.bits.fn + val tc = isMulFN(fn, FN_DIV) || isMulFN(fn, FN_REM) - val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in0(w-1), io.req.bits.in0(w/2-1)) - val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w-1,w/2), Fill(w/2, lhs_sign)) - val lhs_in = Cat(lhs_hi, io.req.bits.in0(w/2-1,0)) + val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) + val lhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, lhs_sign)) + val lhs_in = Cat(lhs_hi, io.req.bits.in1(w/2-1,0)) - val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) - val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, rhs_sign)) - val rhs_in = Cat(rhs_hi, io.req.bits.in1(w/2-1,0)) + val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in2(w-1), io.req.bits.in2(w/2-1)) + val rhs_hi = Mux(dw === DW_64, io.req.bits.in2(w-1,w/2), Fill(w/2, rhs_sign)) + val rhs_in = Cat(rhs_hi, io.req.bits.in2(w/2-1,0)) when (state === s_neg_inputs) { state := s_busy @@ -77,7 +78,7 @@ class rocketDivider(earlyOut: Boolean = false) extends Component { count := shift } } - when (state === s_done && io.resp_rdy || io.req_kill) { + when (io.resp.fire() || io.kill) { state := s_ready } when (io.req.fire()) { @@ -86,17 +87,17 @@ class rocketDivider(earlyOut: Boolean = false) extends Component { half := (dw === DW_32); neg_quo := lhs_sign != rhs_sign neg_rem := lhs_sign - rem := (fn === DIV_R) || (fn === DIV_RU); - reg_tag := io.req_tag; + rem := isMulFN(fn, FN_REM) || isMulFN(fn, FN_REMU) divby0 := Bool(true); divisor := rhs_in remainder := lhs_in + r_req := io.req.bits } val result = Mux(rem, remainder(w+w, w+1), remainder(w-1,0)) - io.resp_bits := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) - io.resp_tag := reg_tag - io.resp_val := state === s_done + io.resp.bits := r_req + io.resp.bits.data := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) + io.resp.valid := state === s_done io.req.ready := state === s_ready } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ce2d3c80..3bbd78ed 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -18,8 +18,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val fpu = new ioDpathFPU(); val vec_ctrl = new ioCtrlDpathVec().flip val vec_iface = new ioDpathVecInterface() - val vec_imul_req = new io_imul_req - val vec_imul_resp = Bits(INPUT, hwacha.Constants.SZ_XLEN) } // execute definitions @@ -158,35 +156,30 @@ class Datapath(implicit conf: RocketConfiguration) extends Component alu.io.in1 := ex_rs1.toUFix // divider - val div = new rocketDivider(earlyOut = true) + val div = new Divider(earlyOut = true) div.io.req.valid := io.ctrl.div_val - div.io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.div_fn) - div.io.req.bits.in0 := ex_rs1 - div.io.req.bits.in1 := ex_rs2 - div.io.req_tag := ex_reg_waddr - div.io.req_kill := io.ctrl.div_kill - div.io.resp_rdy := Bool(true) + div.io.req.bits.dw := ex_reg_ctrl_fn_dw + div.io.req.bits.fn := ex_reg_ctrl_fn_alu + div.io.req.bits.in1 := ex_rs1 + div.io.req.bits.in2 := ex_rs2 + div.io.req.bits.tag := ex_reg_waddr + div.io.kill := io.ctrl.div_kill + div.io.resp.ready := Bool(true) io.ctrl.div_rdy := div.io.req.ready - io.ctrl.div_result_val := div.io.resp_val + io.ctrl.div_result_val := div.io.resp.valid // multiplier - var mul_io = new rocketMultiplier(unroll = 4, earlyOut = true).io - if (HAVE_VEC) - { - val vu_mul = new rocketVUMultiplier(nwbq = 1) - vu_mul.io.vu.req <> io.vec_imul_req - vu_mul.io.vu.resp <> io.vec_imul_resp - mul_io = vu_mul.io.cpu - } - mul_io.req.valid := io.ctrl.mul_val - mul_io.req.bits.fn := Cat(ex_reg_ctrl_fn_dw, io.ctrl.mul_fn) - mul_io.req.bits.in0 := ex_rs1 - mul_io.req.bits.in1 := ex_rs2 - mul_io.req_tag := ex_reg_waddr - mul_io.req_kill := io.ctrl.mul_kill - mul_io.resp_rdy := Bool(true) - io.ctrl.mul_rdy := mul_io.req.ready - io.ctrl.mul_result_val := mul_io.resp_val + val mul = new Multiplier(unroll = 4, earlyOut = true) + mul.io.req.valid := io.ctrl.mul_val + mul.io.req.bits.dw := ex_reg_ctrl_fn_dw + mul.io.req.bits.fn := ex_reg_ctrl_fn_alu + mul.io.req.bits.in1 := ex_rs1 + mul.io.req.bits.in2 := ex_rs2 + mul.io.req.bits.tag := ex_reg_waddr + mul.io.kill := io.ctrl.mul_kill + mul.io.resp.ready := Bool(true) + io.ctrl.mul_rdy := mul.io.req.ready + io.ctrl.mul_result_val := mul.io.resp.valid io.fpu.fromint_data := ex_rs1 io.ctrl.ex_waddr := ex_reg_waddr @@ -267,18 +260,18 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu val mem_ll_wdata = Bits() - mem_ll_wdata := mul_io.resp_bits - io.ctrl.mem_ll_waddr := mul_io.resp_tag - io.ctrl.mem_ll_wb := mul_io.resp_val - when (div.io.resp_val) { - mul_io.resp_rdy := Bool(false) - mem_ll_wdata := div.io.resp_bits - io.ctrl.mem_ll_waddr := div.io.resp_tag + mem_ll_wdata := mul.io.resp.bits.data + io.ctrl.mem_ll_waddr := mul.io.resp.bits.tag + io.ctrl.mem_ll_wb := mul.io.resp.valid + when (div.io.resp.valid) { + mul.io.resp.ready := Bool(false) + mem_ll_wdata := div.io.resp.bits.data + io.ctrl.mem_ll_waddr := div.io.resp.bits.tag io.ctrl.mem_ll_wb := Bool(true) } when (dmem_resp_replay) { - mul_io.resp_rdy := Bool(false) - div.io.resp_rdy := Bool(false) + mul.io.resp.ready := Bool(false) + div.io.resp.ready := Bool(false) mem_ll_wdata := io.dmem.resp.bits.data_subword io.ctrl.mem_ll_waddr := dmem_resp_waddr io.ctrl.mem_ll_wb := Bool(true) @@ -308,7 +301,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.pcr != PCR_N, pcr.io.r.data, wb_reg_wdata)) - if (HAVE_VEC) + if (conf.vec) { // vector datapath val vec = new rocketDpathVec() diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 3160211c..34f76abd 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -5,17 +5,9 @@ import Node._ import Constants._ import Instructions._ -class ioALU extends Bundle(){ - val dw = UFix(INPUT, 1); - val fn = UFix(INPUT, 4); - val in2 = UFix(INPUT, 64); - val in1 = UFix(INPUT, 64); - val out = UFix(OUTPUT, 64); - val adder_out = UFix(OUTPUT, 64); -} - object ALU { + val SZ_ALU_FN = 4 val FN_X = Bits("b????") val FN_ADD = UFix(0) val FN_SL = UFix(1) @@ -29,14 +21,34 @@ object ALU val FN_SRA = UFix(13) val FN_OP2 = UFix(15) + val FN_DIV = FN_XOR + val FN_DIVU = FN_SR + val FN_REM = FN_OR + val FN_REMU = FN_AND + + val FN_MUL = FN_ADD + val FN_MULH = FN_SL + val FN_MULHSU = FN_SLT + val FN_MULHU = FN_SLTU + + def isMulFN(fn: Bits, cmp: Bits) = fn(1,0) === cmp(1,0) def isSub(cmd: Bits) = cmd(3) def isSLTU(cmd: Bits) = cmd(0) } +import ALU._ -class ALU extends Component +class ALUIO(implicit conf: RocketConfiguration) extends Bundle { + val dw = Bits(INPUT, SZ_DW) + val fn = Bits(INPUT, SZ_ALU_FN) + val in2 = UFix(INPUT, conf.xprlen) + val in1 = UFix(INPUT, conf.xprlen) + val out = UFix(OUTPUT, conf.xprlen) + val adder_out = UFix(OUTPUT, conf.xprlen) +} + +class ALU(implicit conf: RocketConfiguration) extends Component { - import ALU._ - val io = new ioALU(); + val io = new ALUIO // ADD, SUB val sub = isSub(io.fn) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 3dc911be..d896034a 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -186,9 +186,9 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component reg_status_ux := wdata(SR_U64).toBool; reg_status_s := wdata(SR_S).toBool; reg_status_ps := wdata(SR_PS).toBool; - reg_status_ev := Bool(HAVE_VEC) && wdata(SR_EV).toBool; - reg_status_ef := Bool(HAVE_FPU) && wdata(SR_EF).toBool; - reg_status_ec := Bool(HAVE_RVC) && wdata(SR_EC).toBool; + reg_status_ev := Bool(conf.vec) && wdata(SR_EV).toBool; + reg_status_ef := Bool(conf.fpu) && wdata(SR_EF).toBool; + reg_status_ec := Bool(conf.rvc) && wdata(SR_EC).toBool; reg_status_et := wdata(SR_ET).toBool; } when (waddr === PCR_EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 7d922813..9ef695a2 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -108,7 +108,8 @@ class rocketDpathVec extends Component UFix(52,7) -> UFix(5,9) )) - val uts_per_bank = Mux(Bool(hwacha.Constants.HAVE_PVFB) & nreg_mod_bank > UFix(MAX_THREADS,9), UFix(MAX_THREADS, 9), nreg_mod_bank) + val max_threads = UFix(WIDTH_BMASK) + val uts_per_bank = Mux(Bool(HAVE_PVFB) & nreg_mod_bank > max_threads, max_threads, nreg_mod_bank) val reg_hwvl = Reg(resetVal = UFix(32, 12)) val reg_appvl0 = Reg(resetVal = Bool(true)) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index ce14d301..79905b4e 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -3,109 +3,72 @@ package rocket import Chisel._ import Node._ import Constants._ -import hwacha._ -import hwacha.Constants._ +import ALU._ -class ioMultiplier extends Bundle { - val req = new io_imul_req().flip - val req_tag = UFix(INPUT, 5) - val req_kill = Bool(INPUT) - val resp_val = Bool(OUTPUT) - val resp_rdy = Bool(INPUT) - val resp_tag = UFix(OUTPUT, 5) - val resp_bits = Bits(OUTPUT, SZ_XLEN) +class MultiplierReq(implicit conf: RocketConfiguration) extends Bundle { + val fn = Bits(width = SZ_ALU_FN) + val dw = Bits(width = SZ_DW) + val in1 = Bits(width = conf.xprlen) + val in2 = Bits(width = conf.xprlen) + val tag = UFix(width = conf.nxprbits) + + override def clone = new MultiplierReq().asInstanceOf[this.type] } -class rocketVUMultiplier(nwbq: Int) extends Component { - val io = new Bundle { - val cpu = new ioMultiplier - val vu = new Bundle { - val req = new io_imul_req - val resp = Bits(INPUT, SZ_XLEN) - } - } +class MultiplierResp(implicit conf: RocketConfiguration) extends Bundle { + val data = Bits(width = conf.xprlen) + val tag = UFix(width = conf.nxprbits) - val valid = Reg(resetVal = Bits(0, IMUL_STAGES)) - val wbq_cnt = Reg(resetVal = Bits(0, log2Up(nwbq+1))) - val tag = Vec(IMUL_STAGES) { Reg() { Bits() } } - - val fire = io.cpu.req.valid && io.cpu.req.ready - - valid := Cat(fire, valid(IMUL_STAGES-1) && !io.cpu.req_kill, valid(IMUL_STAGES-2,1)) - when (fire) { - tag(IMUL_STAGES-1) := io.cpu.req_tag - } - for (i <- 0 until IMUL_STAGES-1) { - tag(i) := tag(i+1) - } - when (valid(0) != (io.cpu.resp_val && io.cpu.resp_rdy)) { - wbq_cnt := Mux(valid(0), wbq_cnt + UFix(1), wbq_cnt - UFix(1)) - } - - var inflight_cnt = valid(0) - for (i <- 1 until IMUL_STAGES) - inflight_cnt = inflight_cnt + valid(i) - inflight_cnt = inflight_cnt + wbq_cnt - val wbq_rdy = inflight_cnt < UFix(nwbq) - - val wbq = (new Queue(nwbq)) { Bits(width = io.cpu.resp_bits.width + io.cpu.resp_tag.width) } - wbq.io.enq.valid := valid(0) - wbq.io.enq.bits := Cat(io.vu.resp, tag(0)) - wbq.io.deq.ready := io.cpu.resp_rdy - - io.cpu.req.ready := io.vu.req.ready && wbq_rdy - io.cpu.resp_val := wbq.io.deq.valid - io.cpu.resp_bits := wbq.io.deq.bits >> UFix(io.cpu.resp_tag.width) - io.cpu.resp_tag := wbq.io.deq.bits(io.cpu.resp_tag.width-1,0).toUFix - - io.vu.req <> io.cpu.req + override def clone = new MultiplierResp().asInstanceOf[this.type] } -class rocketMultiplier(unroll: Int = 1, earlyOut: Boolean = false) extends Component { - val io = new ioMultiplier +class MultiplierIO(implicit conf: RocketConfiguration) extends Bundle { + val req = new FIFOIO()(new MultiplierReq).flip + val kill = Bool(INPUT) + val resp = new FIFOIO()(new MultiplierResp) +} - val w0 = io.req.bits.in0.getWidth +class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { + val io = new MultiplierIO + + val w0 = io.req.bits.in1.getWidth val w = (w0+1+unroll-1)/unroll*unroll val cycles = w/unroll val r_val = Reg(resetVal = Bool(false)); - val r_dw = Reg { Bits() } - val r_fn = Reg { Bits() } - val r_tag = Reg { UFix() } - val r_lhs = Reg { Bits() } val r_prod= Reg { Bits(width = w*2) } val r_lsb = Reg { Bits() } val r_cnt = Reg { UFix(width = log2Up(cycles+1)) } + val r_req = Reg{new MultiplierReq} + val r_lhs = Reg{Bits(width = w0+1)} - val dw = io.req.bits.fn(io.req.bits.fn.width-1) - val fn = io.req.bits.fn(io.req.bits.fn.width-2,0) + val dw = io.req.bits.dw + val fn = io.req.bits.fn - val lhs_msb = Mux(dw === DW_64, io.req.bits.in0(w0-1), io.req.bits.in0(w0/2-1)).toBool - val lhs_sign = ((fn === MUL_H) || (fn === MUL_HSU)) && lhs_msb - val lhs_hi = Mux(dw === DW_64, io.req.bits.in0(w0-1,w0/2), Fill(w0/2, lhs_sign)) - val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in0(w0/2-1,0)) + val lhs_msb = Mux(dw === DW_64, io.req.bits.in1(w0-1), io.req.bits.in1(w0/2-1)).toBool + val lhs_sign = (isMulFN(fn, FN_MULH) || isMulFN(fn, FN_MULHSU)) && lhs_msb + val lhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, lhs_sign)) + val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in1(w0/2-1,0)) - val rhs_msb = Mux(dw === DW_64, io.req.bits.in1(w0-1), io.req.bits.in1(w0/2-1)).toBool - val rhs_sign = (fn === MUL_H) && rhs_msb - val rhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, rhs_sign)) - val rhs_in = Cat(Fill(w-w0, rhs_sign), rhs_hi, io.req.bits.in1(w0/2-1,0)) + val rhs_msb = Mux(dw === DW_64, io.req.bits.in2(w0-1), io.req.bits.in2(w0/2-1)).toBool + val rhs_sign = isMulFN(fn, FN_MULH) && rhs_msb + val rhs_hi = Mux(dw === DW_64, io.req.bits.in2(w0-1,w0/2), Fill(w0/2, rhs_sign)) + val rhs_in = Cat(Fill(w-w0, rhs_sign), rhs_hi, io.req.bits.in2(w0/2-1,0)) - when (io.req.valid && io.req.ready) { + when (io.req.fire()) { r_val := Bool(true) r_cnt := UFix(0, log2Up(cycles+1)) - r_dw := dw - r_fn := fn - r_tag := io.req_tag + r_req := io.req.bits r_lhs := lhs_in r_prod:= rhs_in r_lsb := Bool(false) } - .elsewhen (io.resp_val && io.resp_rdy || io.req_kill) { + .elsewhen (io.resp.fire() || io.kill) { r_val := Bool(false) } val eOutDist = (UFix(cycles)-r_cnt)*UFix(unroll) - val outShift = Mux(r_fn === MUL_LO, UFix(0), Mux(r_dw === DW_64, UFix(64), UFix(32))) + val outShift = Mux(isMulFN(r_req.fn, FN_MUL), UFix(0), Mux(r_req.dw === DW_64, UFix(64), UFix(32))) val shiftDist = Mux(r_cnt === UFix(cycles), outShift, eOutDist) val eOutMask = (UFix(1) << eOutDist) - UFix(1) val eOut = r_cnt != UFix(0) && Bool(earlyOut) && !((r_prod(w-1,0) ^ r_lsb.toFix) & eOutMask).orR @@ -126,7 +89,7 @@ class rocketMultiplier(unroll: Int = 1, earlyOut: Boolean = false) extends Compo val out64 = shift(w0-1,0) io.req.ready := !r_val - io.resp_bits := Mux(r_dw === DW_64, out64, out32) - io.resp_tag := r_tag; - io.resp_val := r_val && (r_cnt === UFix(cycles)) + io.resp.bits := r_req + io.resp.bits.data := Mux(r_req.dw === DW_64, out64, out32) + io.resp.valid := r_val && (r_cnt === UFix(cycles)) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d315e0e6..2e7fc2ce 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -8,7 +8,7 @@ import Util._ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, nmshr: Int, nrpq: Int, nsdq: Int, - reqtagbits: Int = -1) + reqtagbits: Int = -1, databits: Int = -1) { require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this @@ -25,8 +25,7 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, def untagbits = offbits + idxbits def tagbits = lineaddrbits - idxbits def ramoffbits = log2Up(MEM_DATA_BITS/8) - def databytes = 8 // assumed by StoreGen/LoadGen/AMOALU - def databits = databytes*8 + def databytes = databits/8 def wordoffbits = log2Up(databytes) } @@ -624,7 +623,7 @@ class AMOALU(implicit conf: DCacheConfig) extends Component { val out = Bits(OUTPUT, conf.databits) } - require(conf.databytes == 8) + require(conf.databits == 64) val sgned = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) val minmax = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 22d18ca9..c30c6913 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -16,13 +16,5 @@ object Constants extends TLBConstants with uncore.constants.MemoryInterfaceConstants { - def HAVE_RVC = false - def HAVE_FPU = true - def HAVE_VEC = false - - val MAX_THREADS = - hwacha.Constants.NUM_PVFB * hwacha.Constants.WIDTH_PVFB / hwacha.Constants.SZ_BANK - val START_ADDR = 0x2000 - } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 4833cd66..401b11aa 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,18 +4,24 @@ import Chisel._ import Node._ import Constants._ import uncore._ +import Util._ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, icache: ICacheConfig, dcache: DCacheConfig, + fpu: Boolean, vec: Boolean, fastLoadByte: Boolean = false) { val dcacheReqTagBits = 9 // enforce compliance with require() + val xprlen = 64 + val nxpr = 32 + val nxprbits = log2Up(nxpr) + val rvc = false } class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) { - val memPorts = if (HAVE_VEC) 3 else 2 - implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts)) + val memPorts = 2 + confIn.vec + implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) implicit val conf = confIn.copy(dcache = dcConf) val io = new Bundle { @@ -40,7 +46,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.probe_rep <> dcache.io.mem.probe_rep io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data - if (HAVE_VEC) { + if (conf.vec) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem core.io.vimem <> vicache.io.cpu diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 6abdff11..a6ad8765 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -4,7 +4,6 @@ import Chisel._; import Node._; import Constants._; import scala.math._; -import hwacha._ class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { val clear = Bool(INPUT); From 81d711e892d2caaf5e03189a52820a94587f37f2 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 17 Nov 2012 10:47:55 -0800 Subject: [PATCH 0517/1087] fix D$ bug; now D$ doesn't respond to prefetches --- rocket/src/main/scala/nbdcache.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2e7fc2ce..3a3ba883 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -172,13 +172,12 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req = Reg { new MSHRReq() } val req_cmd = io.req_bits.cmd - val req_use_rpq = req_cmd != M_PFR && req_cmd != M_PFW val req_idx = req.addr(conf.untagbits-1,conf.offbits) val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val rpq = (new Queue(conf.nrpq)) { new Replay } - rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && req_use_rpq + rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid @@ -873,7 +872,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { } // miss handling - mshr.io.req.valid := s2_valid_masked && !s2_hit && (isRead(s2_req.cmd) || isWrite(s2_req.cmd)) && !s2_nack_hit + mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) && !s2_nack_hit mshr.io.req.bits := s2_req mshr.io.req.bits.old_dirty := conf.co.needsWriteback(s2_repl_state) && !s2_tag_match // don't wb upgrades mshr.io.req.bits.old_tag := s2_repl_tag From 06eeb90e2a4c3e6586ddeb24f55fc32a28d86ebf Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 17 Nov 2012 10:52:10 -0800 Subject: [PATCH 0518/1087] vector unit interfaces to the new D$ --- rocket/src/main/scala/core.scala | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 0d67f2f1..ef918f8d 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -110,16 +110,9 @@ class Core(implicit conf: RocketConfiguration) extends Component vu.io.xcpt.hold := ctrl.io.vec_iface.hold // hooking up vector memory interface - dmem(2).req.valid := vu.io.dmem_req.valid - dmem(2).req.bits := vu.io.dmem_req.bits - dmem(2).req.bits.data := RegEn(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data, vu.io.dmem_req.valid && isWrite(vu.io.dmem_req.bits.cmd)) - - vu.io.dmem_req.ready := dmem(2).req.ready - vu.io.dmem_resp.valid := dmem(2).resp.valid - vu.io.dmem_resp.bits.nack := dmem(2).resp.bits.nack - vu.io.dmem_resp.bits.data := dmem(2).resp.bits.data_subword - vu.io.dmem_resp.bits.tag := dmem(2).resp.bits.tag - vu.io.dmem_resp.bits.typ := dmem(2).resp.bits.typ + dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) + dmem(2).req <> vu.io.dmem_req + dmem(2).resp <> vu.io.dmem_resp // DON'T share vector integer multiplier with rocket vu.io.cp_imul_req.valid := Bool(false) From 395e4e3dd6a82cfae9dacaf971d667ffca71284b Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 18 Nov 2012 03:11:06 -0800 Subject: [PATCH 0519/1087] andrew'x fix for D$ corner case in writeback->abort->probe --- rocket/src/main/scala/nbdcache.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3a3ba883..f418682d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -190,6 +190,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val finish_q = (new Queue(2 /* wb + refill */)) { new TransactionFinish } finish_q.io.enq.valid := wb_done || refill_done finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id + io.wb_req.valid := Bool(false) when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { state := s_invalid @@ -217,8 +218,11 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (abort) { state := s_wb_req } } when (state === s_wb_req) { - when (io.probe_writeback.valid && io.probe_writeback.bits && idx_match) { state := s_refill_req } - .elsewhen (io.wb_req.ready) { state := s_wb_resp } + io.wb_req.valid := Bool(true) + when (io.probe_writeback.valid && idx_match) { + io.wb_req.valid := Bool(false) + when (io.probe_writeback.bits) { state := s_refill_req } + }.elsewhen (io.wb_req.ready) { state := s_wb_resp } } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req @@ -246,7 +250,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.meta_req.bits.data.tag := io.tag io.meta_req.bits.way_en := req.way_en - io.wb_req.valid := (state === s_wb_req) && !(io.probe_writeback.valid && idx_match) io.wb_req.bits.tag := req.old_tag io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en From 30038bda8a25769c26c6b9ff090972dd8027a9aa Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Nov 2012 01:32:33 -0800 Subject: [PATCH 0520/1087] bypass stores to subsequent loads since we handle subword stores as RMW operations, this occurs frequently --- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 192 +++++++++++++-------------- 2 files changed, 97 insertions(+), 97 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index ef918f8d..2c161514 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -110,7 +110,7 @@ class Core(implicit conf: RocketConfiguration) extends Component vu.io.xcpt.hold := ctrl.io.vec_iface.hold // hooking up vector memory interface - dmem(2).req.bits.data := Reg(StoreGen(vu.io.dmem_req.bits.typ, Bits(0), vu.io.dmem_req.bits.data).data) + dmem(2).req.bits.data := RegEn(StoreGen(vu.io.dmem_req.bits).data, vu.io.dmem_req.valid && isWrite(vu.io.dmem_req.bits.cmd)) dmem(2).req <> vu.io.dmem_req dmem(2).resp <> vu.io.dmem_resp diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f418682d..66afc35b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -47,7 +47,14 @@ class RandomReplacement(implicit conf: DCacheConfig) extends ReplacementPolicy def hit = {} } -case class StoreGen(typ: Bits, addr: Bits, dat: Bits) +object StoreGen +{ + def apply(r: HellaCacheReq) = new StoreGen(r.typ, r.addr, r.data) + def apply(r: hwacha.io_dmem_req_bundle) = new StoreGen(r.typ, r.addr, r.data) + def apply(typ: Bits, addr: Bits, data: Bits = Bits(0)) = new StoreGen(typ, addr, data) +} + +class StoreGen(typ: Bits, addr: Bits, dat: Bits) { val byte = typ === MT_B || typ === MT_BU val half = typ === MT_H || typ === MT_HU @@ -64,7 +71,7 @@ case class StoreGen(typ: Bits, addr: Bits, dat: Bits) dat))) } -case class LoadGen(typ: Bits, addr: Bits, dat: Bits) +class LoadGen(typ: Bits, addr: Bits, dat: Bits) { val t = StoreGen(typ, addr, dat) val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D @@ -269,23 +276,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.replay.bits.phys := Bool(true) io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(conf.offbits-1,0)).toUFix - // don't issue back-to-back replays with store->load dependence - val r1_replay_valid = Reg(rpq.io.deq.fire()) - val r2_replay_valid = Reg(r1_replay_valid) - val r3_replay_valid = Reg(r2_replay_valid) - val r1_replay = RegEn(rpq.io.deq.bits, rpq.io.deq.fire()) - val r2_replay = RegEn(r1_replay, r1_replay_valid) - val r3_replay = RegEn(r2_replay, r2_replay_valid) - def offsetMatch(dst: HellaCacheReq, src: HellaCacheReq) = { - def mask(x: HellaCacheReq) = StoreGen(x.typ, x.addr, Bits(0)).mask - // TODO: this is overly restrictive - dst.addr(conf.offbits-1,conf.wordoffbits) === src.addr(conf.offbits-1,conf.wordoffbits) - // && (mask(dst) & mask(src)).orR - } - when (r1_replay_valid && offsetMatch(io.replay.bits, r1_replay) || - r2_replay_valid && offsetMatch(io.replay.bits, r2_replay) || - r3_replay_valid && offsetMatch(io.replay.bits, r3_replay) || - !io.meta_req.ready) { + when (!io.meta_req.ready) { rpq.io.deq.ready := Bool(false) io.replay.bits.cmd := M_FENCE // NOP } @@ -616,7 +607,6 @@ class DataArray(implicit conf: DCacheConfig) extends Component { class AMOALU(implicit conf: DCacheConfig) extends Component { val io = new Bundle { - val lhs_raw = Bits(INPUT, conf.databits) val addr = Bits(INPUT, conf.offbits) val cmd = Bits(INPUT, 4) val typ = Bits(INPUT, 3) @@ -627,28 +617,31 @@ class AMOALU(implicit conf: DCacheConfig) extends Component { require(conf.databits == 64) - val sgned = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MAX) - val minmax = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) || (io.cmd === M_XA_MAX) || (io.cmd === M_XA_MAXU) - val min = (io.cmd === M_XA_MIN) || (io.cmd === M_XA_MINU) - val word = (io.typ === MT_W) || (io.typ === MT_WU) + val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX + val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU + val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU + val word = io.typ === MT_W || io.typ === MT_WU || io.typ === MT_B || io.typ === MT_BU - val adder_out = io.lhs + io.rhs + val mask = Fix(-1,64) ^ ((word & io.addr(2)) << 31) + val adder_out = (io.lhs & mask) + (io.rhs & mask) - val cmp_lhs = Mux(word, io.lhs(31), io.lhs(63)) - val cmp_rhs = Mux(word, io.rhs(31), io.rhs(63)) - val cmp_diff = Mux(word, io.lhs(31,0) < io.rhs(31,0), io.lhs < io.rhs) - val less = Mux(cmp_lhs === cmp_rhs, cmp_diff, Mux(sgned, cmp_lhs, cmp_rhs)) - val cmp_out = Mux(min === less, io.lhs, io.rhs) + val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) + val cmp_rhs = Mux(word && !io.addr(2), io.rhs(31), io.rhs(63)) + val lt_lo = io.lhs(31,0) < io.rhs(31,0) + val lt_hi = io.lhs(63,32) < io.rhs(63,32) + val eq_hi = io.lhs(63,32) === io.rhs(63,32) + val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo) + val less = Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs)) - val out = Mux(io.cmd === M_XA_ADD, adder_out, - Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, - Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, - Mux(minmax, cmp_out, + val out = Mux(io.cmd === M_XA_ADD, adder_out, + Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, + Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, + Mux(Mux(less, min, max), io.lhs, io.rhs)))) val wdata = Mux(word, Cat(out(31,0), out(31,0)), out) - val wmask = FillInterleaved(8, StoreGen(io.typ, io.addr, Bits(0)).mask) - io.out := wmask & wdata | ~wmask & io.lhs_raw + val wmask = FillInterleaved(8, StoreGen(io.typ, io.addr).mask) + io.out := wmask & wdata | ~wmask & io.lhs } class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { @@ -712,15 +705,19 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { io.cpu.req.ready := Bool(true) val s1_valid = Reg(io.cpu.req.fire(), resetVal = Bool(false)) + val s1_req = Reg{io.cpu.req.bits.clone} val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill val s1_replay = Reg(resetVal = Bool(false)) - val s1_req = Reg{io.cpu.req.bits.clone} - val s2_req = Reg{io.cpu.req.bits.clone} + val s1_store_bypass = Bool() val s2_valid = Reg(s1_valid_masked, resetVal = Bool(false)) + val s2_req = Reg{io.cpu.req.bits.clone} val s2_replay = Reg(s1_replay, resetVal = Bool(false)) val s2_valid_masked = Bool() val s2_nack_hit = Bool() + val s2_store_bypass = Reg{Bool()} + val s2_store_bypass_data = Reg{Bits(width = conf.databits)} + val s2_store_bypass_mask = Reg{Bits(width = conf.databytes)} val s3_valid = Reg(resetVal = Bool(false)) val s3_req = Reg{io.cpu.req.bits.clone} @@ -763,6 +760,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { s2_req.typ := s1_req.typ s2_req.cmd := s1_req.cmd s2_req.tag := s1_req.tag + s2_store_bypass := s1_store_bypass when (s1_write) { s2_req.data := Mux(s1_replay, mshr.io.replay.bits.data, io.cpu.req.bits.data) } @@ -791,12 +789,20 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { readArb.io.out <> data.io.read writeArb.io.out <> data.io.write - // cpu tag check + // tag read for new requests meta_arb.io.in(3).valid := io.cpu.req.valid meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) meta_arb.io.in(3).bits.rw := Bool(false) meta_arb.io.in(3).bits.way_en := Fix(-1) when (!meta_arb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } + + // data read for new requests + readArb.io.in(2).bits.addr := io.cpu.req.bits.addr + readArb.io.in(2).valid := io.cpu.req.valid + readArb.io.in(2).bits.way_en := Fix(-1) + when (!readArb.io.in(2).ready) { io.cpu.req.ready := Bool(false) } + + // tag check and way muxing def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(i => f(i))){gen} val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits val s1_hit_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isHit(s1_req.cmd, meta.io.resp(w).state)){Bits()}.toBits @@ -806,57 +812,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit = Reg(s1_hit) + val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_tag_eq_way(w))){Bits()}) val s2_data = wayMap((w: Int) => RegEn(data.io.resp(w), s1_clk_en && s1_tag_eq_way(w))){Bits()} val data_resp_mux = Mux1H(s2_tag_match_way, s2_data) - // writeback unit - wb.io.req <> mshr.io.wb_req - wb.io.meta_req <> meta_arb.io.in(2) - wb.io.data_req <> readArb.io.in(1) - wb.io.data_resp <> data_resp_mux - wb.io.probe_rep_data <> io.mem.probe_rep_data - - // replacement policy - val replacer = new RandomReplacement - val s1_replaced_way_en = UFixToOH(replacer.way) - val s2_replaced_way_en = UFixToOH(RegEn(replacer.way, s1_clk_en)) - val s2_repl_state = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_replaced_way_en(w))){Bits()}) - val s2_repl_tag = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).tag, s1_clk_en && s1_replaced_way_en(w))){Bits()}) - val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_tag_eq_way(w))){Bits()}) - - // refill response - val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) - writeArb.io.in(1).valid := io.mem.xact_rep.valid && refill - io.mem.xact_rep.ready := writeArb.io.in(1).ready || !refill - writeArb.io.in(1).bits := mshr.io.mem_resp - writeArb.io.in(1).bits.wmask := Fix(-1) - writeArb.io.in(1).bits.data := io.mem.xact_rep.bits.data - - // load hits - readArb.io.in(2).bits.addr := io.cpu.req.bits.addr - readArb.io.in(2).valid := io.cpu.req.valid - readArb.io.in(2).bits.way_en := Fix(-1) - when (!readArb.io.in(2).ready) { io.cpu.req.ready := Bool(false) } - - // store/amo hits - def idxMatch(dst: HellaCacheReq, src: HellaCacheReq) = dst.addr(indexmsb,indexlsb) === src.addr(indexmsb,indexlsb) - def offsetMatch(dst: HellaCacheReq, src: HellaCacheReq) = { - def mask(x: HellaCacheReq) = StoreGen(x.typ, x.addr, Bits(0)).mask - // TODO: this is overly restrictive. need write-combining buffer. - isWrite(src.cmd) && - dst.addr(indexlsb-1,offsetlsb) === src.addr(indexlsb-1,offsetlsb) && - ((mask(dst) & mask(src)).orR || isWrite(dst.cmd)) - } - def storeMatch(dst: HellaCacheReq, src: HellaCacheReq) = idxMatch(dst, src) && offsetMatch(dst, src) - val p_store_match = s2_valid && storeMatch(s1_req, s2_req) || - s3_valid && storeMatch(s1_req, s3_req) || - s4_valid && storeMatch(s1_req, s4_req) - writeArb.io.in(0).bits.addr := s3_req.addr - writeArb.io.in(0).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix - writeArb.io.in(0).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) - writeArb.io.in(0).valid := s3_valid - writeArb.io.in(0).bits.way_en := s3_way - // tag update after a store to an exclusive clean line. val new_hit_state = conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) meta.io.state_req.bits.rw := Bool(true) @@ -865,7 +824,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { meta.io.state_req.bits.way_en := s2_tag_match_way meta.io.state_req.valid := s2_valid_masked && s2_hit && s2_hit_state != new_hit_state - // pending store data, also used for AMO RHS + // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && isWrite(s2_req.cmd) val amoalu = new AMOALU when ((s2_valid || s2_replay) && isWrite(s2_req.cmd)) { @@ -874,6 +833,19 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { s3_way := s2_tag_match_way } + writeArb.io.in(0).bits.addr := s3_req.addr + writeArb.io.in(0).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix + writeArb.io.in(0).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) + writeArb.io.in(0).valid := s3_valid + writeArb.io.in(0).bits.way_en := s3_way + + // replacement policy + val replacer = new RandomReplacement + val s1_replaced_way_en = UFixToOH(replacer.way) + val s2_replaced_way_en = UFixToOH(RegEn(replacer.way, s1_clk_en)) + val s2_repl_state = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_replaced_way_en(w))){Bits()}) + val s2_repl_tag = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).tag, s1_clk_en && s1_replaced_way_en(w))){Bits()}) + // miss handling mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) && !s2_nack_hit mshr.io.req.bits := s2_req @@ -906,17 +878,45 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { prober.io.line_state := s2_hit_state prober.io.meta_req <> meta_arb.io.in(1) - // load data subword mux/sign extension. - // subword loads are delayed by one cycle. - val loadgen_data = data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) - val loadgen = LoadGen(s2_req.typ, s2_req.addr, loadgen_data) + // refills + val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) + writeArb.io.in(1).valid := io.mem.xact_rep.valid && refill + io.mem.xact_rep.ready := writeArb.io.in(1).ready || !refill + writeArb.io.in(1).bits := mshr.io.mem_resp + writeArb.io.in(1).bits.wmask := Fix(-1) + writeArb.io.in(1).bits.data := io.mem.xact_rep.bits.data + + // writebacks + wb.io.req <> mshr.io.wb_req + wb.io.meta_req <> meta_arb.io.in(2) + wb.io.data_req <> readArb.io.in(1) + wb.io.data_resp <> data_resp_mux + wb.io.probe_rep_data <> io.mem.probe_rep_data + + // store->load bypassing + val bypasses = List( + (s2_valid_masked || s2_replay, s2_req, amoalu.io.out), + (s3_valid, s3_req, s3_req.data), + (s4_valid, s4_req, s4_req.data) + ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3, StoreGen(r._2).mask)) + s1_store_bypass := bypasses.map(_._1).reduce(_||_) + when (s1_clk_en && s1_store_bypass) { + s2_store_bypass_data := PriorityMux(bypasses.map(x => (x._1, x._2))) + s2_store_bypass_mask := PriorityMux(bypasses.map(x => (x._1, x._3))) + } + + // load data subword mux/sign extension + val s2_data_word_prebypass = data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) + val s2_data_word = Cat(null, (0 until conf.databytes).map(i => Mux(s2_store_bypass && s2_store_bypass_mask(i), s2_store_bypass_data, s2_data_word_prebypass)(8*(i+1)-1,8*i)).reverse:_*) + val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word) amoalu.io := s2_req - amoalu.io.lhs_raw := loadgen_data - amoalu.io.lhs := loadgen.word + amoalu.io.lhs := s2_data_word amoalu.io.rhs := s2_req.data - val s1_nack = p_store_match || dtlb.io.req.valid && dtlb.io.resp.miss || + // nack it like it's hot + def idxMatch(dst: HellaCacheReq, src: HellaCacheReq) = dst.addr(indexmsb,indexlsb) === src.addr(indexmsb,indexlsb) + val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || idxMatch(s1_req, s2_req) && meta.io.state_req.valid || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_req.bits.idx && !prober.io.req.ready s2_nack_hit := Reg(s1_nack) || s2_hit && mshr.io.secondary_miss @@ -925,7 +925,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_nack = s2_nack_hit || s2_nack_miss || s2_nack_fence s2_valid_masked := s2_valid && !s2_nack - // after a nack, block until nack condition resolves (saves energy) + // after a nack, block until nack condition resolves to save energy val block_fence = Reg(resetVal = Bool(false)) block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshr.io.fence_rdy val block_miss = Reg(resetVal = Bool(false)) From 72f94d11412b2086206380199c8781279c8f56ca Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Nov 2012 04:06:57 -0800 Subject: [PATCH 0521/1087] fix virtual address sign extension detection --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 3bbd78ed..1f877e5e 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -191,7 +191,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val e = ea(VADDR_BITS,VADDR_BITS-1) Mux(a === UFix(0) || a === UFix(1), e != UFix(0), Mux(a === Fix(-1) || a === Fix(-2), e === Fix(-1), - Bool(false))) + e(0))) } val ex_effective_address = Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUFix From 2b26082132550f834e695f3d6ad7dbba81cdedc3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 20 Nov 2012 04:09:26 -0800 Subject: [PATCH 0522/1087] use 1r1w ram for tags; merge tags & permissions setting the dirty bit now allocates an MSHR (to reuse the existing datapath) --- rocket/src/main/scala/nbdcache.scala | 215 +++++++++++++-------------- 1 file changed, 103 insertions(+), 112 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 66afc35b..13bee641 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -84,18 +84,11 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits) val byte = Cat(Mux(t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } -class MSHRReq(implicit conf: DCacheConfig) extends Bundle { - val old_dirty = Bool() - val old_tag = Bits(width = conf.tagbits) - +class MSHRReq(implicit conf: DCacheConfig) extends HellaCacheReq { + val tag_match = Bool() + val old_meta = new MetaData val way_en = Bits(width = conf.ways) - val addr = UFix(width = conf.paddrbits) - val cmd = Bits(width = 4) - val typ = Bits(width = 3) - val tag = Bits(width = conf.reqtagbits) - val data = Bits(width = conf.databits) - override def clone = new MSHRReq().asInstanceOf[this.type] } @@ -137,13 +130,18 @@ class MetaData(implicit conf: DCacheConfig) extends Bundle { override def clone = new MetaData().asInstanceOf[this.type] } -class MetaArrayReq(implicit conf: DCacheConfig) extends Bundle { +class MetaReadReq(implicit conf: DCacheConfig) extends Bundle { + val addr = UFix(width = conf.paddrbits) + + override def clone = new MetaReadReq().asInstanceOf[this.type] +} + +class MetaWriteReq(implicit conf: DCacheConfig) extends Bundle { val way_en = Bits(width = conf.ways) val idx = Bits(width = conf.idxbits) - val rw = Bool() val data = new MetaData() - override def clone = new MetaArrayReq().asInstanceOf[this.type] + override def clone = new MetaWriteReq().asInstanceOf[this.type] } class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { @@ -160,7 +158,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val mem_req = (new FIFOIO) { new TransactionInit } val mem_resp = new DataWriteReq().asOutput - val meta_req = (new FIFOIO) { new MetaArrayReq() } + val meta_read = (new FIFOIO) { new MetaReadReq } + val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay() } val mem_abort = (new PipeIO) { new TransactionAbort }.flip val mem_rep = (new PipeIO) { new TransactionReply }.flip @@ -202,7 +201,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { state := s_invalid } - when (state === s_meta_write && io.meta_req.ready) { + when (state === s_meta_write && io.meta_write.ready) { state := s_drain_rpq } when (state === s_refill_resp) { @@ -217,7 +216,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (abort) { state := s_refill_req } .elsewhen (io.mem_req.ready) { state := s_refill_resp } } - when (state === s_meta_clear && io.meta_req.ready) { + when (state === s_meta_clear && io.meta_write.ready) { state := s_refill_req } when (state === s_wb_resp) { @@ -240,7 +239,16 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { refill_count := UFix(0) xacx_type := conf.co.getTransactionInitTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) req := io.req_bits - state := Mux(io.req_bits.old_dirty, s_wb_req, s_refill_req) + + state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) + when (io.req_bits.tag_match) { + when (conf.co.isHit(req_cmd, io.req_bits.old_meta.state)) { // set dirty bit + state := s_meta_write + line_state := conf.co.newStateOnHit(req_cmd, io.req_bits.old_meta.state) + }.otherwise { // upgrade permissions + state := s_refill_req + } + } } io.idx_match := (state != s_invalid) && idx_match @@ -250,14 +258,13 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.req_pri_rdy := (state === s_invalid) io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.meta_req.valid := state === s_meta_write || state === s_meta_clear || state === s_drain_rpq - io.meta_req.bits.rw := state != s_drain_rpq - io.meta_req.bits.idx := req_idx - io.meta_req.bits.data.state := Mux(state === s_meta_clear, conf.co.newStateOnFlush(), line_state) - io.meta_req.bits.data.tag := io.tag - io.meta_req.bits.way_en := req.way_en + io.meta_write.valid := state === s_meta_write || state === s_meta_clear + io.meta_write.bits.idx := req_idx + io.meta_write.bits.data.state := Mux(state === s_meta_clear, conf.co.newStateOnFlush(), line_state) + io.meta_write.bits.data.tag := io.tag + io.meta_write.bits.way_en := req.way_en - io.wb_req.bits.tag := req.old_tag + io.wb_req.bits.tag := req.old_meta.tag io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en io.wb_req.bits.tile_xact_id := Bits(id) @@ -271,12 +278,15 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.mem_req.bits.tile_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq + io.meta_read.valid := state === s_drain_rpq + io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits + io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid io.replay.bits := rpq.io.deq.bits io.replay.bits.phys := Bool(true) io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(conf.offbits-1,0)).toUFix - when (!io.meta_req.ready) { + when (!io.meta_read.ready) { rpq.io.deq.ready := Bool(false) io.replay.bits.cmd := M_FENCE // NOP } @@ -289,7 +299,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val mem_req = (new FIFOIO) { new TransactionInit } val mem_resp = new DataWriteReq().asOutput - val meta_req = (new FIFOIO) { new MetaArrayReq() } + val meta_read = (new FIFOIO) { new MetaReadReq } + val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay } val mem_abort = (new PipeIO) { new TransactionAbort }.flip val mem_rep = (new PipeIO) { new TransactionReply }.flip @@ -311,7 +322,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val tagList = Vec(conf.nmshr) { Bits() } val wbTagList = Vec(conf.nmshr) { Bits() } val memRespMux = Vec(conf.nmshr) { new DataWriteReq } - val meta_req_arb = (new Arbiter(conf.nmshr)) { new MetaArrayReq() } + val meta_read_arb = (new Arbiter(conf.nmshr)) { new MetaReadReq } + val meta_write_arb = (new Arbiter(conf.nmshr)) { new MetaWriteReq } val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } @@ -342,7 +354,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mshr.io.req_bits := io.req.bits mshr.io.req_sdq_id := sdq_alloc_id - mshr.io.meta_req <> meta_req_arb.io.in(i) + mshr.io.meta_read <> meta_read_arb.io.in(i) + mshr.io.meta_write <> meta_write_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.wb_req <> wb_req_arb.io.in(i) @@ -365,7 +378,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match - meta_req_arb.io.out <> io.meta_req + meta_read_arb.io.out <> io.meta_read + meta_write_arb.io.out <> io.meta_write mem_req_arb.io.out <> io.mem_req mem_finish_arb.io.out <> io.mem_finish wb_req_arb.io.out <> io.wb_req @@ -391,7 +405,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip - val meta_req = (new FIFOIO) { new MetaArrayReq } + val meta_read = (new FIFOIO) { new MetaReadReq } val data_req = (new FIFOIO) { new DataReadReq() } val data_resp = Bits(INPUT, MEM_DATA_BITS) val mem_req = (new FIFOIO) { new TransactionInit } @@ -441,16 +455,10 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val fire = valid && cnt < UFix(REFILL_CYCLES) io.req.ready := !valid && !io.probe.valid io.probe.ready := !valid - io.data_req.valid := fire && io.meta_req.ready + io.data_req.valid := fire && io.meta_read.ready io.data_req.bits.way_en := req.way_en io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(REFILL_CYCLES)-1,0)) << conf.ramoffbits - io.meta_req.valid := fire && io.data_req.ready - io.meta_req.bits.way_en := Fix(-1) - io.meta_req.bits.rw := Bool(false) - io.meta_req.bits.idx := req.idx - io.meta_req.bits.data.tag := req.tag - io.mem_req.valid := valid && !cmd_sent io.mem_req.bits.x_type := conf.co.getTransactionInitTypeOnWriteback() io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix @@ -459,28 +467,31 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { io.mem_req_data.bits.data := io.data_resp io.probe_rep_data.valid := r_data_req_fired && is_probe io.probe_rep_data.bits.data := io.data_resp + + io.meta_read.valid := fire && io.data_req.ready + io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits } class ProbeUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val req = (new FIFOIO) { new ProbeRequest }.flip val rep = (new FIFOIO) { new ProbeReply } - val meta_req = (new FIFOIO) { new MetaArrayReq } + val meta_read = (new FIFOIO) { new MetaReadReq } + val meta_write = (new FIFOIO) { new MetaWriteReq } val mshr_req = (new FIFOIO) { Bool() } val wb_req = (new FIFOIO) { new WritebackReq } val way_en = Bits(INPUT, conf.ways) val line_state = UFix(INPUT, 2) - val addr = Bits(OUTPUT, conf.lineaddrbits) } - val s_invalid :: s_meta_req :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(8) { UFix() } + val s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(8) { UFix() } val state = Reg(resetVal = s_invalid) val line_state = Reg() { UFix() } val way_en = Reg() { Bits() } val req = Reg() { new ProbeRequest() } val hit = way_en.orR - when (state === s_meta_write && io.meta_req.ready) { + when (state === s_meta_write && io.meta_write.ready) { state := s_invalid } when (state === s_writeback_resp && io.wb_req.ready) { @@ -499,16 +510,16 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { state := s_probe_rep line_state := io.line_state way_en := io.way_en - when (!io.mshr_req.ready) { state := s_meta_req } + when (!io.mshr_req.ready) { state := s_meta_read } } when (state === s_meta_resp) { state := s_mshr_req } - when (state === s_meta_req && io.meta_req.ready) { + when (state === s_meta_read && io.meta_read.ready) { state := s_meta_resp } when (state === s_invalid && io.req.valid) { - state := s_meta_req + state := s_meta_read req := io.req.bits } @@ -516,15 +527,16 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.rep.valid := state === s_probe_rep io.rep.bits := conf.co.newProbeReply(req, Mux(hit, line_state, conf.co.newStateOnFlush)) - io.meta_req.valid := state === s_meta_req || state === s_meta_write - io.meta_req.bits.way_en := Mux(state === s_meta_write, way_en, Fix(-1)) - io.meta_req.bits.rw := state === s_meta_write - io.meta_req.bits.idx := req.addr - io.meta_req.bits.data.state := conf.co.newStateOnProbeRequest(req, line_state) - io.meta_req.bits.data.tag := req.addr >> UFix(conf.idxbits) - io.mshr_req.valid := state === s_mshr_req - io.addr := req.addr + io.meta_read.valid := state === s_meta_read + io.meta_read.bits.addr := req.addr << UFix(conf.offbits) + io.meta_write.valid := state === s_meta_write + io.meta_write.bits.way_en := way_en + io.meta_write.bits.idx := req.addr + io.meta_write.bits.data.state := conf.co.newStateOnProbeRequest(req, line_state) + io.meta_write.bits.data.tag := req.addr >> UFix(conf.idxbits) + + io.mshr_req.valid := state === s_mshr_req io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr @@ -533,49 +545,37 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { class MetaDataArray(implicit conf: DCacheConfig) extends Component { val io = new Bundle { - val req = (new FIFOIO) { new MetaArrayReq() }.flip + val read = (new FIFOIO) { new MetaReadReq }.flip + val write = (new FIFOIO) { new MetaWriteReq }.flip val resp = Vec(conf.ways){ (new MetaData).asOutput } - val state_req = (new PipeIO) { new MetaArrayReq() }.flip - val way_en = Bits(OUTPUT, conf.ways) } val rst_cnt = Reg(resetVal = UFix(0, log2Up(conf.sets+1))) val rst = rst_cnt < conf.sets when (rst) { rst_cnt := rst_cnt+1 } - val permBits = io.req.bits.data.state.width - val perms = Mem(conf.sets) { UFix(width = permBits*conf.ways) } - val tags = Mem(conf.sets, seqRead = true) { Bits(width = conf.tagbits*conf.ways) } - val tag = Reg() { Bits() } - val raddr = Reg() { Bits() } - val way_en_ = Reg { Bits(width = conf.ways) } + val metabits = io.write.bits.data.state.width + conf.tagbits + val tags = Mem(conf.sets, seqRead = true) { UFix(width = metabits*conf.ways) } + val tag = Reg{UFix()} - when (rst || io.state_req.valid && io.state_req.bits.rw) { - val addr = Mux(rst, rst_cnt, io.state_req.bits.idx) - val data = Mux(rst, conf.co.newStateOnFlush, io.state_req.bits.data.state) - val mask = Mux(rst, Fix(-1), io.state_req.bits.way_en) - perms.write(addr, Fill(conf.ways, data), FillInterleaved(permBits, mask)) + when (io.read.valid) { + tag := tags(io.read.bits.addr(conf.untagbits-1,conf.offbits)) } - when (io.req.valid) { - when (io.req.bits.rw) { - perms.write(io.req.bits.idx, Fill(conf.ways, io.req.bits.data.state), FillInterleaved(permBits, io.req.bits.way_en)) - tags.write(io.req.bits.idx, Fill(conf.ways, io.req.bits.data.tag), FillInterleaved(conf.tagbits, io.req.bits.way_en)) - } - .otherwise { - raddr := io.req.bits.idx - tag := tags(io.req.bits.idx) - } - way_en_ := io.req.bits.way_en + when (rst || io.write.valid) { + val addr = Mux(rst, rst_cnt, io.write.bits.idx) + val data = Cat(Mux(rst, conf.co.newStateOnFlush, io.write.bits.data.state), io.write.bits.data.tag) + val mask = Mux(rst, Fix(-1), io.write.bits.way_en) + tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) } - val perm = perms(raddr) for (w <- 0 until conf.ways) { - io.resp(w).state := perm(permBits*(w+1)-1, permBits*w) - io.resp(w).tag := tag(conf.tagbits*(w+1)-1, conf.tagbits*w) + val m = tag(metabits*(w+1)-1, metabits*w) + io.resp(w).state := m >> conf.tagbits + io.resp(w).tag := m } - io.way_en := way_en_ - io.req.ready := !rst + io.read.ready := Bool(true) + io.write.ready := !rst } class DataArray(implicit conf: DCacheConfig) extends Component { @@ -742,12 +742,12 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { when (io.cpu.req.valid) { s1_req := io.cpu.req.bits } - when (wb.io.meta_req.valid) { + when (wb.io.meta_read.valid) { + s1_req := wb.io.meta_read.bits s1_req.phys := Bool(true) - s1_req.addr := Cat(wb.io.meta_req.bits.data.tag, wb.io.meta_req.bits.idx, UFix(0, conf.offbits)).toUFix } - when (prober.io.meta_req.valid) { - s1_req.addr := Cat(prober.io.meta_req.bits.data.tag, prober.io.meta_req.bits.idx, UFix(0, conf.offbits)).toUFix + when (prober.io.meta_read.valid) { + s1_req := prober.io.meta_read.bits s1_req.phys := Bool(true) } when (mshr.io.replay.valid) { @@ -778,8 +778,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // tags val meta = new MetaDataArray - val meta_arb = (new Arbiter(4)) { new MetaArrayReq() } - meta_arb.io.out <> meta.io.req + val metaReadArb = (new Arbiter(4)) { new MetaReadReq } + val metaWriteArb = (new Arbiter(2)) { new MetaWriteReq } + metaReadArb.io.out <> meta.io.read + metaWriteArb.io.out <> meta.io.write // data val data = new DataArray @@ -790,11 +792,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { writeArb.io.out <> data.io.write // tag read for new requests - meta_arb.io.in(3).valid := io.cpu.req.valid - meta_arb.io.in(3).bits.idx := io.cpu.req.bits.addr(indexmsb,indexlsb) - meta_arb.io.in(3).bits.rw := Bool(false) - meta_arb.io.in(3).bits.way_en := Fix(-1) - when (!meta_arb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } + metaReadArb.io.in(3).valid := io.cpu.req.valid + metaReadArb.io.in(3).bits.addr := io.cpu.req.bits.addr + when (!metaReadArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } // data read for new requests readArb.io.in(2).bits.addr := io.cpu.req.bits.addr @@ -805,24 +805,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // tag check and way muxing def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(i => f(i))){gen} val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits - val s1_hit_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isHit(s1_req.cmd, meta.io.resp(w).state)){Bits()}.toBits val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isValid(meta.io.resp(w).state)){Bits()}.toBits - val s1_hit = s1_hit_way.orR - val s1_clk_en = Reg(meta_arb.io.out.valid) + val s1_clk_en = Reg(metaReadArb.io.out.valid) val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR - val s2_hit = Reg(s1_hit) val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_tag_eq_way(w))){Bits()}) + val s2_hit = conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) val s2_data = wayMap((w: Int) => RegEn(data.io.resp(w), s1_clk_en && s1_tag_eq_way(w))){Bits()} val data_resp_mux = Mux1H(s2_tag_match_way, s2_data) - - // tag update after a store to an exclusive clean line. - val new_hit_state = conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) - meta.io.state_req.bits.rw := Bool(true) - meta.io.state_req.bits.idx := s2_req.addr(indexmsb,indexlsb) - meta.io.state_req.bits.data.state := new_hit_state - meta.io.state_req.bits.way_en := s2_tag_match_way - meta.io.state_req.valid := s2_valid_masked && s2_hit && s2_hit_state != new_hit_state // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && isWrite(s2_req.cmd) @@ -849,8 +839,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // miss handling mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) && !s2_nack_hit mshr.io.req.bits := s2_req - mshr.io.req.bits.old_dirty := conf.co.needsWriteback(s2_repl_state) && !s2_tag_match // don't wb upgrades - mshr.io.req.bits.old_tag := s2_repl_tag + mshr.io.req.bits.tag_match := s2_tag_match + mshr.io.req.bits.old_meta.state := s2_repl_state + mshr.io.req.bits.old_meta.tag := s2_repl_tag mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshr.io.req.bits.data := s2_req.data @@ -867,7 +858,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { readArb.io.in(0).bits.way_en := Fix(-1) mshr.io.replay.ready := Bool(true) s1_replay := mshr.io.replay.fire() - meta_arb.io.in(0) <> mshr.io.meta_req + metaReadArb.io.in(0) <> mshr.io.meta_read + metaWriteArb.io.in(0) <> mshr.io.meta_write // probes prober.io.req <> io.mem.probe_req @@ -876,7 +868,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way prober.io.line_state := s2_hit_state - prober.io.meta_req <> meta_arb.io.in(1) + prober.io.meta_read <> metaReadArb.io.in(1) + prober.io.meta_write <> metaWriteArb.io.in(1) // refills val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) @@ -888,7 +881,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // writebacks wb.io.req <> mshr.io.wb_req - wb.io.meta_req <> meta_arb.io.in(2) + wb.io.meta_read <> metaReadArb.io.in(2) wb.io.data_req <> readArb.io.in(1) wb.io.data_resp <> data_resp_mux wb.io.probe_rep_data <> io.mem.probe_rep_data @@ -915,10 +908,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { amoalu.io.rhs := s2_req.data // nack it like it's hot - def idxMatch(dst: HellaCacheReq, src: HellaCacheReq) = dst.addr(indexmsb,indexlsb) === src.addr(indexmsb,indexlsb) val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || - idxMatch(s1_req, s2_req) && meta.io.state_req.valid || - s1_req.addr(indexmsb,indexlsb) === prober.io.meta_req.bits.idx && !prober.io.req.ready + s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready s2_nack_hit := Reg(s1_nack) || s2_hit && mshr.io.secondary_miss val s2_nack_miss = !s2_hit && !mshr.io.req.ready val s2_nack_fence = s2_req.cmd === M_FENCE && !mshr.io.fence_rdy From 55082e45c4083978b4144f5da290aefafee3640f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Nov 2012 18:19:28 -0800 Subject: [PATCH 0523/1087] add AVec, which automatically infers element type should consider modifying Vec as such --- rocket/src/main/scala/util.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index bd747109..e0214e8f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -11,6 +11,15 @@ object Util implicit def wcToUFix(c: WideCounter): UFix = c.value } +object AVec +{ + def apply[T <: Data](elts: Seq[T]): Vec[T] = { + require(elts.tail.forall(elts.head.getClass == _.getClass)) + Vec(elts) { elts.head.clone } + } + def apply[T <: Data](elt0: T, elts: T*): Vec[T] = apply(elt0 :: elts.toList) +} + // a counter that clock gates most of its MSBs using the LSB carry-out case class WideCounter(width: Int, inc: Bool = Bool(true)) { From b514c7b7258fc6e585d40bf1ab508b28ffd49710 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Nov 2012 22:00:43 -0800 Subject: [PATCH 0524/1087] clean up I$ parity code --- rocket/src/main/scala/ecc.scala | 65 +++++++++++++++++++----------- rocket/src/main/scala/icache.scala | 27 ++++++------- 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala index cc1e00a6..179315d6 100644 --- a/rocket/src/main/scala/ecc.scala +++ b/rocket/src/main/scala/ecc.scala @@ -14,26 +14,38 @@ abstract class Decoding def error = correctable || uncorrectable } -abstract class Encoding +abstract class Code { def width(w0: Int): Int def encode(x: Bits): Bits def decode(x: Bits): Decoding } -class Parity extends Encoding +class IdentityCode extends Code +{ + def width(w0: Int) = w0 + def encode(x: Bits) = x + def decode(y: Bits) = new Decoding { + def uncorrected = y + def corrected = y + def correctable = Bool(false) + def uncorrectable = Bool(false) + } +} + +class ParityCode extends Code { def width(w0: Int) = w0+1 def encode(x: Bits) = Cat(x.xorR, x) def decode(y: Bits) = new Decoding { def uncorrected = y(y.getWidth-2,0) def corrected = uncorrected - def correctable = y.xorR - def uncorrectable = Bool(false) + def correctable = Bool(false) + def uncorrectable = y.xorR } } -class SEC extends Encoding +class SECCode extends Code { def width(k: Int) = { val m = log2Up(k) + 1 - !isPow2(k) @@ -75,30 +87,37 @@ class SEC extends Encoding private def mapping(i: Int) = i-1-log2Up(i) } -class SECDED extends Encoding +class SECDEDCode extends Code { - def width(k: Int) = new SEC().width(k)+1 - def encode(x: Bits) = new Parity().encode(new SEC().encode(x)) + private val sec = new SECCode + private val par = new ParityCode + + def width(k: Int) = sec.width(k)+1 + def encode(x: Bits) = par.encode(sec.encode(x)) def decode(x: Bits) = new Decoding { - val sec = new SEC().decode(x(x.getWidth-2,0)) - val par = new Parity().decode(x) - def uncorrected = sec.uncorrected - def corrected = sec.corrected - def correctable = par.correctable - def uncorrectable = !par.correctable && sec.correctable + val secdec = sec.decode(x(x.getWidth-2,0)) + val pardec = par.decode(x) + + def uncorrected = secdec.uncorrected + def corrected = secdec.corrected + def correctable = pardec.uncorrectable + def uncorrectable = !pardec.uncorrectable && secdec.correctable } } +object ErrGen +{ + // generate a 1-bit error with approximate probability 2^-f + def apply(width: Int, f: Int): Bits = { + require(width > 0 && f >= 0 && log2Up(width) + f <= 16) + UFixToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0) + } + def apply(x: Bits, f: Int): Bits = x ^ apply(x.getWidth, f) +} + class SECDEDTest extends Component { - def inject(x: Bits, n: UFix) = { - val r = LFSR16() - val r1 = UFixToOH(r(log2Up(x.getWidth)-1,0))(x.getWidth-1,0) - val r2 = UFixToOH(r(log2Up(x.getWidth)*2-1,log2Up(x.getWidth)))(x.getWidth-1,0) - x ^ Mux(n < UFix(1), UFix(0), r1) ^ Mux(n < UFix(2), UFix(0), r2) - } - - val code = new SECDED + val code = new SECDEDCode val k = 4 val n = code.width(k) @@ -115,7 +134,7 @@ class SECDEDTest extends Component val c = Counter(Bool(true), 1 << k) val numErrors = Counter(c._2, 3)._1 val e = code.encode(c._1) - val i = inject(e, numErrors) + val i = e ^ Mux(numErrors < 1, 0, ErrGen(n, 1)) ^ Mux(numErrors < 2, 0, ErrGen(n, 1)) val d = code.decode(i) io.original := c._1 diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 8845f5ee..1fc33696 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -7,7 +7,7 @@ import uncore._ import Util._ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, - parity: Boolean = false) + code: Code = new IdentityCode) { val w = 1 val ibytes = 4 @@ -15,12 +15,10 @@ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, val dm = assoc == 1 val lines = sets * assoc val databits = MEM_DATA_BITS - val datawidth = databits + (if (parity) 1 else 0) val idxbits = log2Up(sets) val offbits = OFFSET_BITS val untagbits = idxbits + offbits val tagbits = PADDR_BITS - untagbits - val tagwidth = tagbits + (if (parity) 1 else 0) require(isPow2(sets) && isPow2(assoc)) require(isPow2(w) && isPow2(ibytes)) @@ -176,11 +174,12 @@ class ICache(implicit c: ICacheConfig) extends Component val (rf_cnt, refill_done) = Counter(io.mem.xact_rep.valid, REFILL_CYCLES) val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) - val tag_array = Mem(c.sets, seqRead = true) { Bits(width = c.tagwidth*c.assoc) } + val enc_tagbits = c.code.width(c.tagbits) + val tag_array = Mem(c.sets, seqRead = true) { Bits(width = enc_tagbits*c.assoc) } val tag_rdata = Reg() { Bits() } when (refill_done) { - val wmask = FillInterleaved(c.tagwidth, if (c.dm) Bits(1) else UFixToOH(repl_way)) - val tag = Cat(if (c.parity) s2_tag.xorR else null, s2_tag) + val wmask = FillInterleaved(enc_tagbits, if (c.dm) Bits(1) else UFixToOH(repl_way)) + val tag = c.code.encode(s2_tag) tag_array.write(s2_idx, Fill(c.assoc, tag), wmask) } /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM @@ -201,39 +200,37 @@ class ICache(implicit c: ICacheConfig) extends Component val s1_tag_match = Vec(c.assoc) { Bool() } val s2_tag_hit = Vec(c.assoc) { Bool() } - val s2_data_disparity = Vec(c.assoc) { Bool() } + val s2_dout = Vec(c.assoc){Reg{Bits()}} + for (i <- 0 until c.assoc) { val s1_vb = vb_array(Cat(UFix(i), s1_pgoff(c.untagbits-1,c.offbits))).toBool val s2_vb = Reg() { Bool() } val s2_tag_disparity = Reg() { Bool() } val s2_tag_match = Reg() { Bool() } - val tag_out = tag_rdata(c.tagwidth*(i+1)-1, c.tagwidth*i) + val tag_out = tag_rdata(enc_tagbits*(i+1)-1, enc_tagbits*i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb - s2_tag_disparity := tag_out.xorR + s2_tag_disparity := c.code.decode(tag_out).error s2_tag_match := s1_tag_match(i) } s1_tag_match(i) := tag_out(c.tagbits-1,0) === s1_tag s2_tag_hit(i) := s2_vb && s2_tag_match - s2_disparity(i) := Bool(c.parity) && s2_vb && (s2_tag_disparity || s2_data_disparity(i)) + s2_disparity(i) := s2_vb && (s2_tag_disparity || c.code.decode(s2_dout(i)).error) } s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) - val s2_dout = Vec(c.assoc) { Reg() { Bits(width = c.databits) } } for (i <- 0 until c.assoc) { - val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.datawidth) } + val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.code.width(c.databits)) } val s1_dout = Reg(){ Bits() } when (io.mem.xact_rep.valid && repl_way === UFix(i)) { val d = io.mem.xact_rep.bits.data - val wdata = if (c.parity) Cat(d.xorR, d) else d - data_array(Cat(s2_idx,rf_cnt)) := wdata + data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM s1_dout := data_array(s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth)) } // if s1_tag_match is critical, replace with partial tag check when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := s1_dout } - s2_data_disparity(i) := s2_dout(i).xorR } val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)))(c.ibytes*8-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) From c036cdc1ea30e61824786c1e90d11ba38626accd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Nov 2012 22:01:08 -0800 Subject: [PATCH 0525/1087] add option for 2-cycle load-use delay --- rocket/src/main/scala/ctrl.scala | 7 ++++--- rocket/src/main/scala/dpath.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/tile.scala | 2 ++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6a5c2802..f2ff15a1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -676,9 +676,10 @@ class Control(implicit conf: RocketConfiguration) extends Component fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. - val mem_mem_cmd_bh = if (conf.fastLoadByte) Bool(false) else - (mem_reg_mem_type === MT_B) || (mem_reg_mem_type === MT_BU) || - (mem_reg_mem_type === MT_H) || (mem_reg_mem_type === MT_HU) + val mem_mem_cmd_bh = + if (!conf.fastLoadWord) Bool(true) + else if (conf.fastLoadByte) Bool(false) + else AVec(MT_B, MT_BU, MT_H, MT_HU) contains mem_reg_mem_type val data_hazard_mem = mem_reg_wen && (id_raddr1 != UFix(0) && id_renx1 && id_raddr1 === io.dpath.mem_waddr || id_raddr2 != UFix(0) && id_renx2 && id_raddr2 === io.dpath.mem_waddr || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 1f877e5e..2e580bc0 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -135,13 +135,13 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data val ex_rs1 = - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(3), dmem_resp_data, + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(3) && Bool(conf.fastLoadWord), dmem_resp_data, Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(2), wb_reg_wdata, Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(1), mem_reg_wdata, Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(0), Bits(0), Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb))))) val ex_rs2 = - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(3), dmem_resp_data, + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(3) && Bool(conf.fastLoadWord), dmem_resp_data, Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(2), wb_reg_wdata, Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(1), mem_reg_wdata, Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(0), Bits(0), diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 13bee641..75cdad7b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -728,7 +728,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) - val s1_readwrite = s1_read || s1_write + val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val dtlb = new TLB(8) dtlb.io.ptw <> io.cpu.ptw diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 401b11aa..55dfc000 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -9,6 +9,7 @@ import Util._ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, icache: ICacheConfig, dcache: DCacheConfig, fpu: Boolean, vec: Boolean, + fastLoadWord: Boolean = true, fastLoadByte: Boolean = false) { val dcacheReqTagBits = 9 // enforce compliance with require() @@ -16,6 +17,7 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, val nxpr = 32 val nxprbits = log2Up(nxpr) val rvc = false + if (fastLoadByte) require(fastLoadWord) } class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) From de2f28193aae5d6adea4c9b099af754a047df6e8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Nov 2012 04:24:25 -0800 Subject: [PATCH 0526/1087] get rid of more global constants --- rocket/src/main/scala/consts.scala | 8 +------- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/icache.scala | 5 +++-- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/package.scala | 1 - 6 files changed, 7 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d0351cbd..1c40b8ef 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -5,6 +5,7 @@ import Chisel._ import scala.math._ trait ScalarOpConstants { + val SZ_BR = 3 val BR_X = Bits("b???", 3) val BR_EQ = Bits(0, 3) val BR_NE = Bits(1, 3) @@ -105,13 +106,6 @@ abstract trait RocketDcacheConstants extends uncore.constants.CacheConstants wit require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) } -trait TLBConstants { - val BTB_ENTRIES = 8 - val ITLB_ENTRIES = 8 - val DTLB_ENTRIES = 16 - val VITLB_ENTRIES = 4 -} - trait VectorOpConstants { val VEC_X = Bits("b??", 2).toUFix val VEC_FN_N = UFix(0, 2) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f2ff15a1..108b2455 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -43,7 +43,7 @@ class ioCtrlDpath extends Bundle() // inputs from datapath val inst = Bits(INPUT, 32); val jalr_eq = Bool(INPUT) - val ex_br_type = Bits(OUTPUT, 3) + val ex_br_type = Bits(OUTPUT, SZ_BR) val ex_br_taken = Bool(INPUT) val div_rdy = Bool(INPUT); val div_result_val = Bool(INPUT); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 2e580bc0..a48b6dc9 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -216,7 +216,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ptw.status := pcr.io.status // branch resolution logic - io.ctrl.jalr_eq := ex_rs1 === id_pc.toFix && ex_reg_inst(11,0) === UFix(0) + io.ctrl.jalr_eq := ex_rs1 === id_pc.toFix && ex_reg_inst(21,10) === UFix(0) io.ctrl.ex_br_taken := Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2, Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2, diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1fc33696..420377f4 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -7,6 +7,7 @@ import uncore._ import Util._ case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, + ntlb: Int = 8, nbtb: Int = 8, code: Code = new IdentityCode) { val w = 1 @@ -56,9 +57,9 @@ class Frontend(implicit c: ICacheConfig) extends Component val mem = new ioUncachedRequestor } - val btb = new rocketDpathBTB(BTB_ENTRIES) + val btb = new rocketDpathBTB(c.nbtb) val icache = new ICache - val tlb = new TLB(ITLB_ENTRIES) + val tlb = new TLB(c.ntlb) val s1_pc = Reg() { UFix() } val s1_same_block = Reg() { Bool() } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 75cdad7b..abe82c56 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -7,7 +7,7 @@ import uncore._ import Util._ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, - nmshr: Int, nrpq: Int, nsdq: Int, + nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, reqtagbits: Int = -1, databits: Int = -1) { require(isPow2(sets)) diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index c30c6913..54492de6 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -13,7 +13,6 @@ object Constants extends InterruptConstants with RocketDcacheConstants with VectorOpConstants with - TLBConstants with uncore.constants.MemoryInterfaceConstants { val START_ADDR = 0x2000 From 8a6ff5f9aa80432cd73347550ddeccd4510cc590 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 Nov 2012 19:46:48 -0800 Subject: [PATCH 0527/1087] fix D$ writeback bug I swear I did this last week... perhaps I am finally losing it! --- rocket/src/main/scala/nbdcache.scala | 72 +++++++++++++++------------- rocket/src/main/scala/util.scala | 1 + 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index abe82c56..e20a27c2 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -414,41 +414,47 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { } val valid = Reg(resetVal = Bool(false)) - val is_probe = Reg() { Bool() } - val data_req_fired = Reg(resetVal = Bool(false)) - val r_data_req_fired = Reg(data_req_fired, resetVal = Bool(false)) - val cmd_sent = Reg() { Bool() } - val cnt = Reg() { UFix(width = log2Up(REFILL_CYCLES+1)) } - val req = Reg() { new WritebackReq() } + val is_probe = Reg{Bool()} + val r1_data_req_fired = Reg(resetVal = Bool(false)) + val r2_data_req_fired = Reg(resetVal = Bool(false)) + val cmd_sent = Reg{Bool()} + val cnt = Reg{UFix(width = log2Up(REFILL_CYCLES+1))} + val req = Reg{new WritebackReq} - val dout_rdy = Mux(is_probe, io.probe_rep_data.ready, io.mem_req_data.ready) - data_req_fired := Bool(false) - when (valid && io.mem_req.ready) { - cmd_sent := Bool(true) + when (valid) { + r1_data_req_fired := false + r2_data_req_fired := r1_data_req_fired + when (io.data_req.fire()) { + r1_data_req_fired := true + cnt := cnt + 1 + } + + when (r2_data_req_fired && !Mux(is_probe, io.probe_rep_data.ready, io.mem_req_data.ready)) { + r1_data_req_fired := false + r2_data_req_fired := false + cnt := cnt - Mux[UFix](r1_data_req_fired, 2, 1) + } + + when (!r1_data_req_fired && !r2_data_req_fired && cmd_sent && cnt === REFILL_CYCLES) { + valid := false + } + + when (valid && io.mem_req.ready) { + cmd_sent := true + } } - when (io.data_req.fire()) { - data_req_fired := Bool(true) - cnt := cnt + UFix(1) - } - when (data_req_fired && !dout_rdy) { - data_req_fired := Bool(false) - cnt := cnt - UFix(1) - } - .elsewhen (cmd_sent && (cnt === UFix(REFILL_CYCLES))) { - valid := Bool(false) - } - when (io.probe.valid && io.probe.ready) { - valid := Bool(true) - is_probe := Bool(true) - cmd_sent := Bool(true) - cnt := UFix(0) + when (io.probe.fire()) { + valid := true + is_probe := true + cmd_sent := true + cnt := 0 req := io.probe.bits } - when (io.req.valid && io.req.ready) { - valid := Bool(true) - is_probe := Bool(false) - cmd_sent := Bool(false) - cnt := UFix(0) + when (io.req.fire()) { + valid := true + is_probe := false + cmd_sent := false + cnt := 0 req := io.req.bits } @@ -463,9 +469,9 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { io.mem_req.bits.x_type := conf.co.getTransactionInitTypeOnWriteback() io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix io.mem_req.bits.tile_xact_id := req.tile_xact_id - io.mem_req_data.valid := r_data_req_fired && !is_probe + io.mem_req_data.valid := r2_data_req_fired && !is_probe io.mem_req_data.bits.data := io.data_resp - io.probe_rep_data.valid := r_data_req_fired && is_probe + io.probe_rep_data.valid := r2_data_req_fired && is_probe io.probe_rep_data.bits.data := io.data_resp io.meta_read.valid := fire && io.data_req.ready diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index e0214e8f..726177af 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -7,6 +7,7 @@ object Util implicit def intToUFix(x: Int): UFix = UFix(x) implicit def intToBoolean(x: Int): Boolean = if (x != 0) true else false implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 + implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def wcToUFix(c: WideCounter): UFix = c.value } From 352bb464b5f05ab94496fcbc5a9cd9ac1e73563f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 26 Nov 2012 20:33:41 -0800 Subject: [PATCH 0528/1087] clock gate X/M and M/W store data registers --- rocket/src/main/scala/ctrl.scala | 4 ++++ rocket/src/main/scala/dpath.scala | 28 ++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 108b2455..28e5ec69 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -35,6 +35,8 @@ class ioCtrlDpath extends Bundle() val wb_wen = Bool(OUTPUT); val wb_valid = Bool(OUTPUT) val ex_mem_type = Bits(OUTPUT, 3) + val ex_rs2_val = Bool(OUTPUT) + val mem_rs2_val = Bool(OUTPUT) // exception handling val exception = Bool(OUTPUT); val cause = UFix(OUTPUT, 6); @@ -746,6 +748,8 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.eret := wb_reg_eret io.dpath.ex_mem_type := ex_reg_mem_type io.dpath.ex_br_type := ex_reg_br_type + io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_vec_val + io.dpath.mem_rs2_val := mem_reg_vec_val io.fpu.valid := !ctrl_killd && id_fp_val io.fpu.killx := ctrl_killx diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a48b6dc9..87454049 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -39,21 +39,23 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // memory definitions val mem_reg_pc = Reg() { UFix() }; val mem_reg_inst = Reg() { Bits() }; - val mem_reg_rs1 = Reg() { Bits() }; - val mem_reg_rs2 = Reg() { Bits() }; val mem_reg_waddr = Reg() { UFix() }; val mem_reg_wdata = Reg() { Bits() }; val mem_reg_kill = Reg() { Bool() } + val mem_reg_store_data = Reg{Bits()} + val mem_reg_rs1 = Reg{Bits()} + val mem_reg_rs2 = Reg{Bits()} // writeback definitions val wb_reg_pc = Reg() { UFix() }; val wb_reg_inst = Reg() { Bits() }; - val wb_reg_rs1 = Reg() { Bits() }; - val wb_reg_rs2 = Reg() { Bits() }; val wb_reg_waddr = Reg() { UFix() } val wb_reg_wdata = Reg() { Bits() } val wb_reg_ll_wb = Reg(resetVal = Bool(false)); val wb_wdata = Bits(); + val wb_reg_store_data = Reg{Bits()} + val wb_reg_rs1 = Reg{Bits()} + val wb_reg_rs2 = Reg{Bits()} val wb_wen = io.ctrl.wb_wen && io.ctrl.wb_valid || wb_reg_ll_wb // instruction decode stage @@ -198,7 +200,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req.bits.addr := ex_effective_address - io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) + io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_store_data) io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) @@ -244,10 +246,13 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (!ex_reg_kill) { mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst - mem_reg_rs1 := ex_rs1 - mem_reg_rs2 := StoreGen(io.ctrl.ex_mem_type, Bits(0), ex_rs2).data mem_reg_waddr := ex_reg_waddr mem_reg_wdata := ex_wdata + mem_reg_rs1 := ex_rs1 + mem_reg_rs2 := ex_rs2 + when (io.ctrl.ex_rs2_val) { + mem_reg_store_data := StoreGen(io.ctrl.ex_mem_type, Bits(0), ex_rs2).data + } } // for load/use hazard detection (load byte/halfword) @@ -287,10 +292,13 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc wb_reg_inst := mem_reg_inst - wb_reg_rs1 := mem_reg_rs1 - wb_reg_rs2 := mem_reg_rs2 wb_reg_waddr := mem_reg_waddr wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) + wb_reg_rs1 := mem_reg_rs1 + wb_reg_rs2 := mem_reg_rs2 + when (io.ctrl.mem_rs2_val) { + wb_reg_store_data := mem_reg_store_data + } } wb_reg_ll_wb := io.ctrl.mem_ll_wb when (io.ctrl.mem_ll_wb) { @@ -314,7 +322,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component vec.io.vecbank := pcr.io.vecbank vec.io.vecbankcnt := pcr.io.vecbankcnt vec.io.wdata := wb_reg_wdata - vec.io.rs2 := wb_reg_rs2 + vec.io.rs2 := wb_reg_store_data pcr.io.vec_irq_aux := vec.io.irq_aux pcr.io.vec_appvl := vec.io.appvl From 608f65e71655d0c7b2a221482279518b81c0ee98 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 26 Nov 2012 20:34:30 -0800 Subject: [PATCH 0529/1087] don't wastefully read 2x the bits from D$ RAMs --- rocket/src/main/scala/nbdcache.scala | 98 ++++++++++++++++++++-------- rocket/src/main/scala/util.scala | 1 + 2 files changed, 71 insertions(+), 28 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e20a27c2..4071b47a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -8,7 +8,8 @@ import Util._ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, - reqtagbits: Int = -1, databits: Int = -1) + reqtagbits: Int = -1, databits: Int = -1, + narrowRead: Boolean = true) { require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this @@ -27,6 +28,7 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, def ramoffbits = log2Up(MEM_DATA_BITS/8) def databytes = databits/8 def wordoffbits = log2Up(databytes) + def isNarrowRead = narrowRead && databits*ways % MEM_DATA_BITS == 0 } abstract class ReplacementPolicy @@ -595,16 +597,43 @@ class DataArray(implicit conf: DCacheConfig) extends Component { val waddr = io.write.bits.addr >> conf.ramoffbits val raddr = io.read.bits.addr >> conf.ramoffbits - for (w <- 0 until conf.ways) { - val rdata = Reg() { Bits() } - val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } - when (io.write.bits.way_en(w) && io.write.valid) { - array.write(waddr, io.write.bits.data, wmask) + if (conf.isNarrowRead) { + val waysPerMem = MEM_DATA_BITS/conf.databits + for (w <- 0 until conf.ways by waysPerMem) { + val resp = Vec(MEM_DATA_BITS/conf.databits){Reg{Bits(width = MEM_DATA_BITS)}} + val r_raddr = RegEn(io.read.bits.addr, io.read.valid) + for (p <- 0 until resp.size) { + val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } + val way_en = io.write.bits.way_en(w+waysPerMem-1,w) + when (way_en.orR && io.write.valid && io.write.bits.wmask(p)) { + val data = Fill(waysPerMem, io.write.bits.data(conf.databits*(p+1)-1,conf.databits*p)) + val mask = FillInterleaved(conf.databits, way_en) + array.write(waddr, data, mask) + } + when (way_en.orR && io.read.valid) { + resp(p) := array(raddr) + } + } + for (dw <- 0 until waysPerMem) { + val r = AVec(resp.map(_(conf.databits*(dw+1)-1,conf.databits*dw))) + val resp_mux = + if (r.size == 1) r + else AVec(r(r_raddr(conf.ramoffbits-1,conf.wordoffbits)), r.tail:_*) + io.resp(w+dw) := resp_mux.toBits + } } - when (io.read.bits.way_en(w) && io.read.valid) { - rdata := array(raddr) + } else { + for (w <- 0 until conf.ways) { + val rdata = Reg() { Bits() } + val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } + when (io.write.bits.way_en(w) && io.write.valid) { + array.write(waddr, io.write.bits.data, wmask) + } + when (io.read.bits.way_en(w) && io.read.valid) { + rdata := array(raddr) + } + io.resp(w) := rdata } - io.resp(w) := rdata } io.read.ready := Bool(true) @@ -714,24 +743,16 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s1_req = Reg{io.cpu.req.bits.clone} val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill val s1_replay = Reg(resetVal = Bool(false)) - val s1_store_bypass = Bool() val s2_valid = Reg(s1_valid_masked, resetVal = Bool(false)) val s2_req = Reg{io.cpu.req.bits.clone} val s2_replay = Reg(s1_replay, resetVal = Bool(false)) val s2_valid_masked = Bool() - val s2_nack_hit = Bool() - val s2_store_bypass = Reg{Bool()} - val s2_store_bypass_data = Reg{Bits(width = conf.databits)} - val s2_store_bypass_mask = Reg{Bits(width = conf.databytes)} val s3_valid = Reg(resetVal = Bool(false)) val s3_req = Reg{io.cpu.req.bits.clone} val s3_way = Reg{Bits()} - val s4_valid = Reg(s3_valid, resetVal = Bool(false)) - val s4_req = RegEn(s3_req, s3_valid) - val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) @@ -766,7 +787,6 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { s2_req.typ := s1_req.typ s2_req.cmd := s1_req.cmd s2_req.tag := s1_req.tag - s2_store_bypass := s1_store_bypass when (s1_write) { s2_req.data := Mux(s1_replay, mshr.io.replay.bits.data, io.cpu.req.bits.data) } @@ -813,11 +833,22 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isValid(meta.io.resp(w).state)){Bits()}.toBits val s1_clk_en = Reg(metaReadArb.io.out.valid) + val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_tag_eq_way(w))){Bits()}) val s2_hit = conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) - val s2_data = wayMap((w: Int) => RegEn(data.io.resp(w), s1_clk_en && s1_tag_eq_way(w))){Bits()} + + val s2_data = Vec(conf.ways){Bits(width = MEM_DATA_BITS)} + for (w <- 0 until conf.ways) { + val regs = Vec(MEM_DATA_BITS/conf.databits){Reg{Bits(width = conf.databits)}} + val en1 = s1_clk_en && s1_tag_eq_way(w) + for (i <- 0 until regs.size) { + val en = en1 && (Bool(i == 0 || !conf.isNarrowRead) || s1_writeback) + when (en) { regs(i) := data.io.resp(w) >> conf.databits*i } + } + s2_data(w) := Cat(regs.last, regs.init.reverse:_*) + } val data_resp_mux = Mux1H(s2_tag_match_way, s2_data) // store/amo hits @@ -843,7 +874,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_repl_tag = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).tag, s1_clk_en && s1_replaced_way_en(w))){Bits()}) // miss handling - mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) && !s2_nack_hit + mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) mshr.io.req.bits := s2_req mshr.io.req.bits.tag_match := s2_tag_match mshr.io.req.bits.old_meta.state := s2_repl_state @@ -893,20 +924,29 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { wb.io.probe_rep_data <> io.mem.probe_rep_data // store->load bypassing + val s4_valid = Reg(s3_valid, resetVal = Bool(false)) + val s4_req = RegEn(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( (s2_valid_masked || s2_replay, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3, StoreGen(r._2).mask)) - s1_store_bypass := bypasses.map(_._1).reduce(_||_) - when (s1_clk_en && s1_store_bypass) { - s2_store_bypass_data := PriorityMux(bypasses.map(x => (x._1, x._2))) - s2_store_bypass_mask := PriorityMux(bypasses.map(x => (x._1, x._3))) + val s2_store_bypass_data = Reg{Bits(width = conf.databits)} + val s2_store_bypass_mask = Reg{Bits(width = conf.databytes)} + when (s1_clk_en) { + when (bypasses.map(_._1).reduce(_||_)) { + s2_store_bypass_data := PriorityMux(bypasses.map(x => (x._1, x._2))) + s2_store_bypass_mask := PriorityMux(bypasses.map(x => (x._1, x._3))) + }.otherwise { + s2_store_bypass_mask := Bits(0) + } } // load data subword mux/sign extension - val s2_data_word_prebypass = data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) - val s2_data_word = Cat(null, (0 until conf.databytes).map(i => Mux(s2_store_bypass && s2_store_bypass_mask(i), s2_store_bypass_data, s2_data_word_prebypass)(8*(i+1)-1,8*i)).reverse:_*) + val s2_data_word_prebypass = + if (conf.isNarrowRead) data_resp_mux(conf.databits-1,0) + else data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) + val s2_data_word = Cat(null, (0 until conf.databytes).map(i => Mux(s2_store_bypass_mask(i), s2_store_bypass_data, s2_data_word_prebypass)(8*(i+1)-1,8*i)).reverse:_*) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word) amoalu.io := s2_req @@ -916,10 +956,12 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready - s2_nack_hit := Reg(s1_nack) || s2_hit && mshr.io.secondary_miss + val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) + when (s2_nack_hit) { mshr.io.req.valid := Bool(false) } + val s2_nack_victim = s2_hit && mshr.io.secondary_miss val s2_nack_miss = !s2_hit && !mshr.io.req.ready val s2_nack_fence = s2_req.cmd === M_FENCE && !mshr.io.fence_rdy - val s2_nack = s2_nack_hit || s2_nack_miss || s2_nack_fence + val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss || s2_nack_fence s2_valid_masked := s2_valid && !s2_nack // after a nack, block until nack condition resolves to save energy diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 726177af..76f078b2 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -18,6 +18,7 @@ object AVec require(elts.tail.forall(elts.head.getClass == _.getClass)) Vec(elts) { elts.head.clone } } + def apply[T <: Data](elts: Vec[T]): Vec[T] = apply(elts.toSeq) def apply[T <: Data](elt0: T, elts: T*): Vec[T] = apply(elt0 :: elts.toList) } From 64674d4d39e379423c5e1dcd12cb2e9b293bd19c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 26 Nov 2012 20:38:45 -0800 Subject: [PATCH 0530/1087] clean up PTW and support PADDR_BITS < VADDR_BITS --- rocket/src/main/scala/ptw.scala | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index dff3590c..d8ebdc2f 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import Node._ import Constants._ -import scala.math._ +import Util._ class IOTLBPTW extends Bundle { val req = new FIFOIO()(UFix(width = VPN_BITS)) @@ -35,16 +35,14 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val bitsPerLevel = VPN_BITS/levels require(VPN_BITS == levels * bitsPerLevel) - val count = Reg() { UFix(width = log2Up(levels)) } val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UFix() }; - val state = Reg(resetVal = s_ready); - - val r_req_vpn = Reg() { Bits() } - val r_req_dest = Reg() { Bits() } - - val req_addr = Reg() { UFix() } - val r_resp_ppn = Reg() { Bits() }; - val r_resp_perm = Reg() { Bits() }; + val state = Reg(resetVal = s_ready) + val count = Reg{UFix(width = log2Up(levels))} + + val r_req_vpn = Reg{Bits()} + val r_req_dest = Reg{Bits()} + val r_req_addr = Reg{UFix(width = PADDR_BITS.max(VADDR_BITS))} + val r_resp_perm = Reg{Bits()} val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) @@ -56,20 +54,19 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component when (arb.io.out.fire()) { r_req_vpn := arb.io.out.bits r_req_dest := arb.io.chosen - req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel), UFix(0,3)) + r_req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel)) << log2Up(conf.xprlen/8) } when (io.mem.resp.valid) { - req_addr := Cat(io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS), vpn_idx, UFix(0,3)).toUFix + r_req_addr := Cat(io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS), vpn_idx).toUFix << log2Up(conf.xprlen/8) r_resp_perm := io.mem.resp.bits.data(9,4); - r_resp_ppn := io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS); } io.mem.req.valid := state === s_req io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.addr := req_addr + io.mem.req.bits.addr := r_req_addr io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error @@ -77,8 +74,9 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val resp_ptd = io.mem.resp.bits.data(1,0) === Bits(1) val resp_pte = io.mem.resp.bits.data(1,0) === Bits(2) - - val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn(PPN_BITS-1, VPN_BITS-bitsPerLevel*(i+1)), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) + + val r_resp_ppn = r_req_addr >> PGIDX_BITS + val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn >> VPN_BITS-bitsPerLevel*(i+1), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j)) for (i <- 0 until io.requestor.size) { From 9c857b83f08a257b8c4739ee3ba4dc38c1472e91 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 27 Nov 2012 01:28:06 -0800 Subject: [PATCH 0531/1087] refactor PCR file --- rocket/src/main/scala/consts.scala | 42 --- rocket/src/main/scala/ctrl.scala | 428 ++++++++++++------------- rocket/src/main/scala/dpath.scala | 14 +- rocket/src/main/scala/dpath_util.scala | 271 +++++++++------- rocket/src/main/scala/htif.scala | 5 +- rocket/src/main/scala/package.scala | 1 - rocket/src/main/scala/ptw.scala | 4 +- rocket/src/main/scala/tlb.scala | 14 +- rocket/src/main/scala/util.scala | 5 +- 9 files changed, 388 insertions(+), 396 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1c40b8ef..9b956260 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -52,48 +52,6 @@ trait ScalarOpConstants { val RA = UFix(1, 5); } -trait PCRConstants { - val PCR_X = Bits("b???", 3) - val PCR_N = Bits(0,3) - val PCR_F = Bits(1,3) // mfpcr - val PCR_T = Bits(4,3) // mtpcr - val PCR_C = Bits(6,3) // clearpcr - val PCR_S = Bits(7,3) // setpcr - - val PCR_STATUS = UFix( 0, 5); - val PCR_EPC = UFix( 1, 5); - val PCR_BADVADDR = UFix( 2, 5); - val PCR_EVEC = UFix( 3, 5); - val PCR_COUNT = UFix( 4, 5); - val PCR_COMPARE = UFix( 5, 5); - val PCR_CAUSE = UFix( 6, 5); - val PCR_PTBR = UFix( 7, 5); - val PCR_SEND_IPI = UFix( 8, 5); - val PCR_CLR_IPI = UFix( 9, 5); - val PCR_COREID = UFix(10, 5); - val PCR_IMPL = UFix(11, 5); - val PCR_K0 = UFix(12, 5); - val PCR_K1 = UFix(13, 5); - val PCR_VECBANK = UFix(18, 5); - val PCR_VECCFG = UFix(19, 5); - val PCR_RESET = UFix(29, 5); - val PCR_TOHOST = UFix(30, 5); - val PCR_FROMHOST = UFix(31, 5); - - // definition of bits in PCR status reg - val SR_ET = 0; // enable traps - val SR_EF = 1; // enable floating point - val SR_EV = 2; // enable vector unit - val SR_EC = 3; // enable compressed instruction encoding - val SR_PS = 4; // mode stack bit - val SR_S = 5; // user/supervisor mode - val SR_U64 = 6; // 64 bit user mode - val SR_S64 = 7; // 64 bit supervisor mode - val SR_VM = 8 // VM enable - val SR_IM = 16 // interrupt mask - val SR_IM_WIDTH = 8 -} - trait InterruptConstants { val CAUSE_INTERRUPT = 32 val IRQ_IPI = 5 diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 28e5ec69..fe2ab61d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -56,7 +56,7 @@ class ioCtrlDpath extends Bundle() val ex_waddr = UFix(INPUT, 5); // write addr from execute stage val mem_waddr = UFix(INPUT, 5); // write addr from memory stage val wb_waddr = UFix(INPUT, 5); // write addr from writeback stage - val status = Bits(INPUT, 32); + val status = new Status().asInput val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UFix(INPUT, 5); val irq_timer = Bool(INPUT); @@ -75,7 +75,7 @@ abstract trait DecodeConstants // | vec_val | | renx1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WA_X, WB_X, PCR_X,N,X,X,X,X) + List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WA_X, WB_X, PCR.X,N,X,X,X,X) val table: Array[(Bits, List[Bits])] } @@ -89,106 +89,106 @@ object XDecode extends DecodeConstants // | vec_val | | renx1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WA_RA,WB_PC, PCR_N,N,N,N,N,N), - JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), - RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR_N,N,N,N,N,N), + J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WA_RA,WB_PC, PCR.N,N,N,N,N,N), + JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), + JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), + JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), + RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), - LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_S,N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_C,N,N,N,Y,Y), - ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR_N,Y,N,N,N,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_F,N,N,N,Y,Y), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_T,N,N,N,Y,Y), - RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR_N,N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_IRT,PCR_N,N,N,N,N,N)) + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.S,N,N,N,Y,Y), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.C,N,N,N,Y,Y), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,Y,N,N,N,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.F,N,N,N,Y,Y), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.T,N,N,N,Y,Y), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR.N,N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR.N,N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_IRT,PCR.N,N,N,N,N,N)) } object FDecode extends DecodeConstants @@ -200,64 +200,64 @@ object FDecode extends DecodeConstants // | vec_val | | renx1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MFTX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MFTX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MXTF_S-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MXTF_D-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MFFSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - MTFSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N)) + FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MFTX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MFTX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MXTF_S-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MXTF_D-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MFFSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MTFSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), + FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N)) } object VDecode extends DecodeConstants @@ -269,53 +269,53 @@ object VDecode extends DecodeConstants // | vec_val | | renx1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), - VVCFG-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), - VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR_N,N,N,N,N,Y), - VF-> List(Y, N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_ALU,PCR_N,N,N,N,N,N), - VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - VMSV-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR_N,N,N,N,N,N), - FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,N,N), - VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), - VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,N,N), + VVCFGIVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), + VVCFG-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), + VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), + VF-> List(Y, N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), + VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + VMSV-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR_N,N,N,N,Y,N), - VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N), - VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR_N,N,N,N,Y,N)) + VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), + VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), + VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), + VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), + VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), + VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,Y,N), + VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,Y,N)) } class Control(implicit conf: RocketConfiguration) extends Component @@ -367,7 +367,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val ex_reg_vec_val = Reg(resetVal = Bool(false)) val ex_reg_replay_next = Reg(resetVal = Bool(false)) val ex_reg_load_use = Reg(resetVal = Bool(false)) - val ex_reg_pcr = Reg(resetVal = PCR_N) + val ex_reg_pcr = Reg(resetVal = PCR.N) val ex_reg_br_type = Reg(resetVal = BR_N) val ex_reg_mem_cmd = Reg(){Bits()} val ex_reg_mem_type = Reg(){Bits()} @@ -387,12 +387,12 @@ class Control(implicit conf: RocketConfiguration) extends Component val mem_reg_vec_val = Reg(resetVal = Bool(false)) val mem_reg_replay = Reg(resetVal = Bool(false)) val mem_reg_replay_next = Reg(resetVal = Bool(false)) - val mem_reg_pcr = Reg(resetVal = PCR_N) + val mem_reg_pcr = Reg(resetVal = PCR.N) val mem_reg_cause = Reg(){UFix()} val mem_reg_mem_type = Reg(){Bits()} val wb_reg_valid = Reg(resetVal = Bool(false)) - val wb_reg_pcr = Reg(resetVal = PCR_N) + val wb_reg_pcr = Reg(resetVal = PCR.N) val wb_reg_wen = Reg(resetVal = Bool(false)) val wb_reg_fp_wen = Reg(resetVal = Bool(false)) val wb_reg_flush_inst = Reg(resetVal = Bool(false)) @@ -415,7 +415,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val id_maskable_interrupts = List( (io.dpath.irq_ipi, IRQ_IPI), (io.dpath.irq_timer, IRQ_TIMER)) - var id_interrupts = id_maskable_interrupts.map(i => (io.dpath.status(SR_IM+i._2) && i._1, UFix(CAUSE_INTERRUPT+i._2))) + var id_interrupts = id_maskable_interrupts.map(i => (io.dpath.status.im(i._2) && i._1, UFix(CAUSE_INTERRUPT+i._2))) val (vec_replay, vec_stalld) = if (conf.vec) { // vector control @@ -425,15 +425,15 @@ class Control(implicit conf: RocketConfiguration) extends Component io.vec_iface <> vec.io.iface vec.io.valid := wb_reg_valid - vec.io.s := io.dpath.status(SR_S) - vec.io.sr_ev := io.dpath.status(SR_EV) + vec.io.s := io.dpath.status.s + vec.io.sr_ev := io.dpath.status.ev vec.io.exception := wb_reg_xcpt vec.io.eret := wb_reg_eret val vec_dec = new rocketCtrlVecDecoder() vec_dec.io.inst := io.dpath.inst - val s = io.dpath.status(SR_S) + val s = io.dpath.status.s val mask_cmdq_ready = !vec_dec.io.sigs.enq_cmdq || s && io.vec_iface.vcmdq.ready || !s && io.vec_iface.vcmdq_user_ready val mask_ximm1q_ready = !vec_dec.io.sigs.enq_ximm1q || s && io.vec_iface.vximm1q.ready || !s && io.vec_iface.vximm1q_user_ready val mask_ximm2q_ready = !vec_dec.io.sigs.enq_ximm2q || s && io.vec_iface.vximm2q.ready || !s && io.vec_iface.vximm2q_user_ready @@ -455,23 +455,23 @@ class Control(implicit conf: RocketConfiguration) extends Component } else (Bool(false), Bool(false)) val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) - val id_interrupt = io.dpath.status(SR_ET) && id_interrupt_unmasked + val id_interrupt = io.dpath.status.et && id_interrupt_unmasked def checkExceptions(x: Seq[(Bits, UFix)]) = (x.map(_._1).reduce(_||_), PriorityMux(x)) // executing ERET when traps are enabled causes an illegal instruction exception - val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status(SR_ET)) + val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status.et) val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), (io.imem.resp.bits.xcpt_ma, UFix(0)), (io.imem.resp.bits.xcpt_if, UFix(1)), (illegal_inst, UFix(2)), - (id_privileged && !io.dpath.status(SR_S), UFix(3)), - (id_fp_val && !io.dpath.status(SR_EF), UFix(4)), + (id_privileged && !io.dpath.status.s, UFix(3)), + (id_fp_val && !io.dpath.status.ef, UFix(4)), (id_syscall, UFix(6)), - (id_vec_val && !io.dpath.status(SR_EV), UFix(12)))) + (id_vec_val && !io.dpath.status.ev, UFix(12)))) ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } @@ -491,7 +491,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_vec_val := Bool(false); ex_reg_replay_next := Bool(false); ex_reg_load_use := Bool(false); - ex_reg_pcr := PCR_N + ex_reg_pcr := PCR.N ex_reg_br_type := BR_N ex_reg_xcpt := Bool(false) } @@ -540,7 +540,7 @@ class Control(implicit conf: RocketConfiguration) extends Component when (ctrl_killx) { mem_reg_valid := Bool(false); - mem_reg_pcr := PCR_N + mem_reg_pcr := PCR.N mem_reg_wen := Bool(false); mem_reg_fp_wen := Bool(false); mem_reg_eret := Bool(false); @@ -574,7 +574,7 @@ class Control(implicit conf: RocketConfiguration) extends Component (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_vec_val || mem_reg_pcr != PCR_N) + val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_vec_val || mem_reg_pcr != PCR.N) val replay_mem = ll_wb_kill_mem || mem_reg_replay || fpu_kill_mem val killm_common = ll_wb_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem @@ -585,7 +585,7 @@ class Control(implicit conf: RocketConfiguration) extends Component when (ctrl_killm) { wb_reg_valid := Bool(false) - wb_reg_pcr := PCR_N + wb_reg_pcr := PCR.N wb_reg_wen := Bool(false); wb_reg_fp_wen := Bool(false); wb_reg_eret := Bool(false); diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 87454049..0303b552 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -205,8 +205,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component require(io.dmem.req.bits.tag.getWidth >= 6) // processor control regfile read - val pcr = new rocketDpathPCR() - pcr.io.r.en := io.ctrl.pcr != PCR_N + val pcr = new PCR + pcr.io.r.en := io.ctrl.pcr != PCR.N pcr.io.r.addr := wb_reg_inst(26,22).toUFix pcr.io.host <> io.host @@ -306,7 +306,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component wb_reg_wdata := mem_ll_wdata } wb_wdata := Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, - Mux(io.ctrl.pcr != PCR_N, pcr.io.r.data, + Mux(io.ctrl.pcr != PCR.N, pcr.io.r.data, wb_reg_wdata)) if (conf.vec) @@ -317,7 +317,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component vec.io.ctrl <> io.vec_ctrl io.vec_iface <> vec.io.iface - vec.io.valid := io.ctrl.wb_valid && pcr.io.status(SR_EV) + vec.io.valid := io.ctrl.wb_valid && pcr.io.status.ev vec.io.inst := wb_reg_inst vec.io.vecbank := pcr.io.vecbank vec.io.vecbankcnt := pcr.io.vecbankcnt @@ -341,9 +341,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // processor control regfile write pcr.io.w.addr := wb_reg_inst(26,22).toUFix - pcr.io.w.en := io.ctrl.pcr === PCR_T || io.ctrl.pcr === PCR_S || io.ctrl.pcr === PCR_C - pcr.io.w.data := Mux(io.ctrl.pcr === PCR_S, pcr.io.r.data | wb_reg_wdata, - Mux(io.ctrl.pcr === PCR_C, pcr.io.r.data & ~wb_reg_wdata, + pcr.io.w.en := io.ctrl.pcr === PCR.T || io.ctrl.pcr === PCR.S || io.ctrl.pcr === PCR.C + pcr.io.w.data := Mux(io.ctrl.pcr === PCR.S, pcr.io.r.data | wb_reg_wdata, + Mux(io.ctrl.pcr === PCR.C, pcr.io.r.data & ~wb_reg_wdata, wb_reg_wdata)) // hook up I$ diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index d896034a..38b64bb1 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -54,66 +54,101 @@ class rocketDpathBTB(entries: Int) extends Component io.target := Mux1H(hits, targets) } -class ioDpathPCR(implicit conf: RocketConfiguration) extends Bundle -{ - val host = new ioHTIF(conf.ntiles) - val r = new ioReadPort(32, 64) - val w = new ioWritePort(32, 64) - - val status = Bits(OUTPUT, 32); - val ptbr = UFix(OUTPUT, PADDR_BITS); - val evec = UFix(OUTPUT, VADDR_BITS); - val exception = Bool(INPUT); - val cause = UFix(INPUT, 6); - val badvaddr_wen = Bool(INPUT); - val vec_irq_aux = Bits(INPUT, 64) - val vec_irq_aux_wen = Bool(INPUT) - val pc = UFix(INPUT, VADDR_BITS+1); - val eret = Bool(INPUT); - val ei = Bool(INPUT); - val di = Bool(INPUT); - val ptbr_wen = Bool(OUTPUT); - val irq_timer = Bool(OUTPUT); - val irq_ipi = Bool(OUTPUT); - val replay = Bool(OUTPUT) - val vecbank = Bits(OUTPUT, 8) - val vecbankcnt = UFix(OUTPUT, 4) - val vec_appvl = UFix(INPUT, 12) - val vec_nxregs = UFix(INPUT, 6) - val vec_nfregs = UFix(INPUT, 6) +class Status extends Bundle { + val im = Bits(width = 8) + val zero = Bits(width = 7) + val vm = Bool() + val s64 = Bool() + val u64 = Bool() + val s = Bool() + val ps = Bool() + val ec = Bool() + val ev = Bool() + val ef = Bool() + val et = Bool() } -class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component +object PCR { - val io = new ioDpathPCR - - val reg_epc = Reg{Fix()} - val reg_badvaddr = Reg{Fix()} - val reg_ebase = Reg{Fix()} + // commands + val SZ = 3 + val X = Bits("b???", 3) + val N = Bits(0,3) + val F = Bits(1,3) // mfpcr + val T = Bits(4,3) // mtpcr + val C = Bits(6,3) // clearpcr + val S = Bits(7,3) // setpcr + + // regs + val STATUS = 0 + val EPC = 1 + val BADVADDR = 2 + val EVEC = 3 + val COUNT = 4 + val COMPARE = 5 + val CAUSE = 6 + val PTBR = 7 + val SEND_IPI = 8 + val CLR_IPI = 9 + val COREID = 10 + val IMPL = 11 + val K0 = 12 + val K1 = 13 + val VECBANK = 18 + val VECCFG = 19 + val RESET = 29 + val TOHOST = 30 + val FROMHOST = 31 +} + +class PCR(implicit conf: RocketConfiguration) extends Component +{ + val io = new Bundle { + val host = new ioHTIF(conf.ntiles) + val r = new ioReadPort(conf.nxpr, conf.xprlen) + val w = new ioWritePort(conf.nxpr, conf.xprlen) + + val status = new Status().asOutput + val ptbr = UFix(OUTPUT, PADDR_BITS) + val evec = UFix(OUTPUT, VADDR_BITS) + val exception = Bool(INPUT) + val cause = UFix(INPUT, 6) + val badvaddr_wen = Bool(INPUT) + val vec_irq_aux = Bits(INPUT, conf.xprlen) + val vec_irq_aux_wen = Bool(INPUT) + val pc = UFix(INPUT, VADDR_BITS+1) + val eret = Bool(INPUT) + val ei = Bool(INPUT) + val di = Bool(INPUT) + val ptbr_wen = Bool(OUTPUT) + val irq_timer = Bool(OUTPUT) + val irq_ipi = Bool(OUTPUT) + val replay = Bool(OUTPUT) + val vecbank = Bits(OUTPUT, 8) + val vecbankcnt = UFix(OUTPUT, 4) + val vec_appvl = UFix(INPUT, 12) + val vec_nxregs = UFix(INPUT, 6) + val vec_nfregs = UFix(INPUT, 6) + } + import PCR._ + + val reg_epc = Reg{Fix(width = VADDR_BITS+1)} + val reg_badvaddr = Reg{Fix(width = VADDR_BITS+1)} + val reg_ebase = Reg{Fix(width = VADDR_BITS)} val reg_count = WideCounter(32) - val reg_compare = Reg() { UFix() }; - val reg_cause = Reg() { Bits() }; - val reg_tohost = Reg(resetVal = Bits(0, 64)); - val reg_fromhost = Reg(resetVal = Bits(0, 64)); - val reg_coreid = Reg() { Bits() } - val reg_k0 = Reg() { Bits() }; - val reg_k1 = Reg() { Bits() }; - val reg_ptbr = Reg() { UFix() }; - val reg_vecbank = Reg(resetVal = Bits("b1111_1111", 8)) - - val reg_error_mode = Reg(resetVal = Bool(false)); - val reg_status_vm = Reg(resetVal = Bool(false)); - val reg_status_im = Reg(resetVal = Bits(0,SR_IM_WIDTH)); - val reg_status_sx = Reg(resetVal = Bool(true)); - val reg_status_ux = Reg(resetVal = Bool(true)); - val reg_status_ec = Reg(resetVal = Bool(false)); - val reg_status_ef = Reg(resetVal = Bool(false)); - val reg_status_ev = Reg(resetVal = Bool(false)); - val reg_status_s = Reg(resetVal = Bool(true)); - val reg_status_ps = Reg(resetVal = Bool(false)); - val reg_status_et = Reg(resetVal = Bool(false)); - - val r_irq_timer = Reg(resetVal = Bool(false)); + val reg_compare = Reg{Bits(width = 32)} + val reg_cause = Reg{Bits(width = io.cause.getWidth)} + val reg_tohost = Reg(resetVal = Bits(0, conf.xprlen)) + val reg_fromhost = Reg(resetVal = Bits(0, conf.xprlen)) + val reg_coreid = Reg{Bits(width = 16)} + val reg_k0 = Reg{Bits(width = conf.xprlen)} + val reg_k1 = Reg{Bits(width = conf.xprlen)} + val reg_ptbr = Reg{UFix(width = PADDR_BITS)} + val reg_vecbank = Reg(resetVal = Fix(-1,8).toBits) + val reg_error_mode = Reg(resetVal = Bool(false)) + val reg_status = Reg{new Status} // reset down below + + val r_irq_timer = Reg(resetVal = Bool(false)) val r_irq_ipi = Reg(resetVal = Bool(true)) val rdata = Bits(); @@ -127,12 +162,12 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component val wdata = Mux(io.w.en, io.w.data, io.host.pcr_req.bits.data) io.host.pcr_req.ready := !io.w.en && !io.r.en - io.ptbr_wen := reg_status_vm.toBool && wen && (waddr === PCR_PTBR); - io.status := Cat(reg_status_im, Bits(0,7), reg_status_vm, reg_status_sx, reg_status_ux, reg_status_s, reg_status_ps, reg_status_ec, reg_status_ev, reg_status_ef, reg_status_et); - io.evec := Mux(io.exception, reg_ebase, reg_epc).toUFix - io.ptbr := reg_ptbr; - io.host.debug.error_mode := reg_error_mode; - io.r.data := rdata; + io.status := reg_status + io.ptbr_wen := wen && waddr === PTBR + io.evec := Mux(io.exception, reg_ebase, reg_epc).toUFix + io.ptbr := reg_ptbr + io.host.debug.error_mode := reg_error_mode + io.r.data := rdata io.vecbank := reg_vecbank var cnt = UFix(0,4) @@ -140,7 +175,7 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) - val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), ~io.w.data(63,VADDR_BITS) === UFix(0), io.w.data(63,VADDR_BITS) != UFix(0)) + val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), io.w.data(conf.xprlen-1,VADDR_BITS).andR, io.w.data(conf.xprlen-1,VADDR_BITS).orR) when (io.badvaddr_wen) { reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; } @@ -149,21 +184,20 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component } when (io.exception) { - when (!reg_status_et) { - reg_error_mode := Bool(true) - } - .otherwise { - reg_status_s := Bool(true); - reg_status_ps := reg_status_s; - reg_status_et := Bool(false); - reg_epc := io.pc; - reg_cause := io.cause; + when (!reg_status.et) { + reg_error_mode := true + }.otherwise { + reg_status.s := true + reg_status.ps := reg_status.s + reg_status.et := false + reg_epc := io.pc + reg_cause := io.cause } } when (io.eret) { - reg_status_s := reg_status_ps; - reg_status_et := Bool(true); + reg_status.s := reg_status.ps + reg_status.et := true } when (reg_count === reg_compare) { @@ -172,59 +206,64 @@ class rocketDpathPCR(implicit conf: RocketConfiguration) extends Component io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; - io.host.ipi_req.valid := io.w.en && io.w.addr === PCR_SEND_IPI + io.host.ipi_req.valid := io.w.en && io.w.addr === SEND_IPI io.host.ipi_req.bits := io.w.data io.replay := io.host.ipi_req.valid && !io.host.ipi_req.ready - when (io.host.pcr_req.fire() && !io.host.pcr_req.bits.rw && io.host.pcr_req.bits.addr === PCR_TOHOST) { reg_tohost := UFix(0) } + when (io.host.pcr_req.fire() && !io.host.pcr_req.bits.rw && io.host.pcr_req.bits.addr === TOHOST) { reg_tohost := UFix(0) } + + val read_impl = Bits(2) + val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS + val read_veccfg = Cat(io.vec_nfregs, io.vec_nxregs, io.vec_appvl) + val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) + rdata := AVec[Bits]( + reg_status.toBits, reg_epc, reg_badvaddr, reg_ebase, + reg_count, reg_compare, read_cause, read_ptbr, + reg_coreid/*x*/, read_impl/*x*/, reg_coreid, read_impl, + reg_k0, reg_k1, reg_k0/*x*/, reg_k1/*x*/, + reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank, read_veccfg, + reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, + reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, + reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost, reg_fromhost + )(raddr) when (wen) { - when (waddr === PCR_STATUS) { - reg_status_vm := wdata(SR_VM).toBool; - reg_status_im := wdata(SR_IM_WIDTH+SR_IM,SR_IM); - reg_status_sx := wdata(SR_S64).toBool; - reg_status_ux := wdata(SR_U64).toBool; - reg_status_s := wdata(SR_S).toBool; - reg_status_ps := wdata(SR_PS).toBool; - reg_status_ev := Bool(conf.vec) && wdata(SR_EV).toBool; - reg_status_ef := Bool(conf.fpu) && wdata(SR_EF).toBool; - reg_status_ec := Bool(conf.rvc) && wdata(SR_EC).toBool; - reg_status_et := wdata(SR_ET).toBool; + when (waddr === STATUS) { + reg_status := new Status().fromBits(wdata) + reg_status.zero := 0 + if (!conf.vec) reg_status.ev := false + if (!conf.fpu) reg_status.ef := false + if (!conf.rvc) reg_status.ec := false } - when (waddr === PCR_EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } - when (waddr === PCR_EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } - when (waddr === PCR_COUNT) { reg_count := wdata.toUFix } - when (waddr === PCR_COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } - when (waddr === PCR_COREID) { reg_coreid := wdata(15,0) } - when (waddr === PCR_FROMHOST) { when (reg_fromhost === UFix(0) || io.w.en) { reg_fromhost := wdata } } - when (waddr === PCR_TOHOST) { when (reg_tohost === UFix(0)) { reg_tohost := wdata } } - when (waddr === PCR_CLR_IPI) { r_irq_ipi := wdata(0) } - when (waddr === PCR_K0) { reg_k0 := wdata; } - when (waddr === PCR_K1) { reg_k1 := wdata; } - when (waddr === PCR_PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } - when (waddr === PCR_VECBANK) { reg_vecbank:= wdata(7,0) } + when (waddr === EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } + when (waddr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } + when (waddr === COUNT) { reg_count := wdata.toUFix } + when (waddr === COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } + when (waddr === COREID) { reg_coreid := wdata(15,0) } + when (waddr === FROMHOST) { when (reg_fromhost === UFix(0) || io.w.en) { reg_fromhost := wdata } } + when (waddr === TOHOST) { when (reg_tohost === UFix(0)) { reg_tohost := wdata } } + when (waddr === CLR_IPI) { r_irq_ipi := wdata(0) } + when (waddr === K0) { reg_k0 := wdata; } + when (waddr === K1) { reg_k1 := wdata; } + when (waddr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } + when (waddr === VECBANK) { reg_vecbank:= wdata(7,0) } } io.host.ipi_rep.ready := Bool(true) when (io.host.ipi_rep.valid) { r_irq_ipi := Bool(true) } - rdata := io.status // raddr === PCR_STATUS - switch (raddr) { - is (PCR_EPC) { rdata := reg_epc } - is (PCR_BADVADDR) { rdata := reg_badvaddr } - is (PCR_EVEC) { rdata := reg_ebase } - is (PCR_COUNT) { rdata := reg_count } - is (PCR_COMPARE) { rdata := reg_compare } - is (PCR_CAUSE) { rdata := reg_cause(5) << 63 | reg_cause(4,0) } - is (PCR_COREID) { rdata := reg_coreid } - is (PCR_IMPL) { rdata := Bits(2) } - is (PCR_FROMHOST) { rdata := reg_fromhost; } - is (PCR_TOHOST) { rdata := reg_tohost; } - is (PCR_K0) { rdata := reg_k0; } - is (PCR_K1) { rdata := reg_k1; } - is (PCR_PTBR) { rdata := reg_ptbr } - is (PCR_VECBANK) { rdata := Cat(Bits(0, 56), reg_vecbank) } - is (PCR_VECCFG) { rdata := Cat(Bits(0, 40), io.vec_nfregs, io.vec_nxregs, io.vec_appvl) } + when (reset) { + reg_status.et := false + reg_status.ef := false + reg_status.ev := false + reg_status.ec := false + reg_status.ps := false + reg_status.s := true + reg_status.u64 := true + reg_status.s64 := true + reg_status.vm := false + reg_status.zero := 0 + reg_status.im := 0 } } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index c8d203bb..2a777d5e 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -4,6 +4,7 @@ import Chisel._ import Node._ import Constants._ import uncore._ +import Util._ class ioDebug extends Bundle { @@ -199,7 +200,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component val cpu = io.cpu(i) val me = pcr_coreid === UFix(i) - cpu.pcr_req.valid := state === state_pcr_req && me && pcr_addr != PCR_RESET + cpu.pcr_req.valid := state === state_pcr_req && me && pcr_addr != PCR.RESET cpu.pcr_req.bits.rw := cmd === cmd_writecr cpu.pcr_req.bits.addr := pcr_addr cpu.pcr_req.bits.data := pcr_wdata @@ -219,7 +220,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component when (cpu.pcr_req.valid && cpu.pcr_req.ready) { state := state_pcr_resp } - when (state === state_pcr_req && me && pcr_addr === PCR_RESET) { + when (state === state_pcr_req && me && pcr_addr === PCR.RESET) { when (cmd === cmd_writecr) { my_reset := pcr_wdata(0) } diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 54492de6..dfc32dd4 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -9,7 +9,6 @@ import scala.math._ object Constants extends ScalarOpConstants with uncore.constants.MemoryOpConstants with - PCRConstants with InterruptConstants with RocketDcacheConstants with VectorOpConstants with diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index d8ebdc2f..135b92e0 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -13,14 +13,14 @@ class IOTLBPTW extends Bundle { val perm = Bits(width = PERM_BITS) }).flip - val status = Bits(INPUT, width = 32) + val status = new Status().asInput val invalidate = Bool(INPUT) } class IODatapathPTW extends Bundle { val ptbr = UFix(INPUT, PADDR_BITS) val invalidate = Bool(INPUT) - val status = Bits(INPUT, 32) + val status = new Status().asInput } class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index a6ad8765..101742d3 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -136,22 +136,20 @@ class TLB(entries: Int) extends Component val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val status_s = io.ptw.status(SR_S) // user/supervisor mode - val status_vm = io.ptw.status(SR_VM) // virtual memory enable val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) - val tlb_hit = status_vm && tag_hit - val tlb_miss = status_vm && !tag_hit && !bad_va + val tlb_hit = io.ptw.status.vm && tag_hit + val tlb_miss = io.ptw.status.vm && !tag_hit && !bad_va when (io.req.valid && tlb_hit) { plru.access(OHToUFix(tag_cam.io.hits)) } io.req.ready := state === s_ready - io.resp.xcpt_ld := bad_va || tlb_hit && !Mux(status_s, (sr_array & tag_cam.io.hits).orR, (ur_array & tag_cam.io.hits).orR) - io.resp.xcpt_st := bad_va || tlb_hit && !Mux(status_s, (sw_array & tag_cam.io.hits).orR, (uw_array & tag_cam.io.hits).orR) - io.resp.xcpt_if := bad_va || tlb_hit && !Mux(status_s, (sx_array & tag_cam.io.hits).orR, (ux_array & tag_cam.io.hits).orR) + io.resp.xcpt_ld := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sr_array & tag_cam.io.hits).orR, (ur_array & tag_cam.io.hits).orR) + io.resp.xcpt_st := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sw_array & tag_cam.io.hits).orR, (uw_array & tag_cam.io.hits).orR) + io.resp.xcpt_if := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sx_array & tag_cam.io.hits).orR, (ux_array & tag_cam.io.hits).orR) io.resp.miss := tlb_miss - io.resp.ppn := Mux(status_vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) + io.resp.ppn := Mux(io.ptw.status.vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) io.resp.hit_idx := tag_cam.io.hits io.ptw.req.valid := state === s_request diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 76f078b2..1b277932 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -14,10 +14,7 @@ object Util object AVec { - def apply[T <: Data](elts: Seq[T]): Vec[T] = { - require(elts.tail.forall(elts.head.getClass == _.getClass)) - Vec(elts) { elts.head.clone } - } + def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) { elts.head.clone } def apply[T <: Data](elts: Vec[T]): Vec[T] = apply(elts.toSeq) def apply[T <: Data](elt0: T, elts: T*): Vec[T] = apply(elt0 :: elts.toList) } From 90cae54ac4b8634435e5f3c2b81ff0647692b0ec Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 27 Nov 2012 02:42:27 -0800 Subject: [PATCH 0532/1087] fix D$ read/write concurrency bug --- rocket/src/main/scala/nbdcache.scala | 9 +++++---- rocket/src/main/scala/ptw.scala | 20 ++++++++------------ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4071b47a..525d271e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -600,17 +600,18 @@ class DataArray(implicit conf: DCacheConfig) extends Component { if (conf.isNarrowRead) { val waysPerMem = MEM_DATA_BITS/conf.databits for (w <- 0 until conf.ways by waysPerMem) { + val wway_en = io.write.bits.way_en(w+waysPerMem-1,w) + val rway_en = io.read.bits.way_en(w+waysPerMem-1,w) val resp = Vec(MEM_DATA_BITS/conf.databits){Reg{Bits(width = MEM_DATA_BITS)}} val r_raddr = RegEn(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } - val way_en = io.write.bits.way_en(w+waysPerMem-1,w) - when (way_en.orR && io.write.valid && io.write.bits.wmask(p)) { + when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { val data = Fill(waysPerMem, io.write.bits.data(conf.databits*(p+1)-1,conf.databits*p)) - val mask = FillInterleaved(conf.databits, way_en) + val mask = FillInterleaved(conf.databits, wway_en) array.write(waddr, data, mask) } - when (way_en.orR && io.read.valid) { + when (rway_en.orR && io.read.valid) { resp(p) := array(raddr) } } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 135b92e0..c87a6e5c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -41,11 +41,9 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val r_req_vpn = Reg{Bits()} val r_req_dest = Reg{Bits()} - val r_req_addr = Reg{UFix(width = PADDR_BITS.max(VADDR_BITS))} - val r_resp_perm = Reg{Bits()} + val r_pte = Reg{Bits()} - val vpn_idxs = (1 until levels).map(i => r_req_vpn((levels-i)*bitsPerLevel-1, (levels-i-1)*bitsPerLevel)) - val vpn_idx = (2 until levels).foldRight(vpn_idxs(0))((i,j) => Mux(count === UFix(i-1), vpn_idxs(i-1), j)) + val vpn_idx = AVec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) val arb = new RRArbiter(n)(UFix(width = VPN_BITS)) arb.io.in <> io.requestor.map(_.req) @@ -54,19 +52,18 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component when (arb.io.out.fire()) { r_req_vpn := arb.io.out.bits r_req_dest := arb.io.chosen - r_req_addr := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), arb.io.out.bits(VPN_BITS-1,VPN_BITS-bitsPerLevel)) << log2Up(conf.xprlen/8) + r_pte := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), io.mem.resp.bits.data(PGIDX_BITS-1,0)) } when (io.mem.resp.valid) { - r_req_addr := Cat(io.mem.resp.bits.data(PADDR_BITS-1, PGIDX_BITS), vpn_idx).toUFix << log2Up(conf.xprlen/8) - r_resp_perm := io.mem.resp.bits.data(9,4); + r_pte := io.mem.resp.bits.data } io.mem.req.valid := state === s_req io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.addr := r_req_addr + io.mem.req.bits.addr := Cat(r_pte(PADDR_BITS-1,PGIDX_BITS), vpn_idx).toUFix << log2Up(conf.xprlen/8) io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error @@ -75,15 +72,14 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val resp_ptd = io.mem.resp.bits.data(1,0) === Bits(1) val resp_pte = io.mem.resp.bits.data(1,0) === Bits(2) - val r_resp_ppn = r_req_addr >> PGIDX_BITS - val resp_ppns = (0 until levels-1).map(i => Cat(r_resp_ppn >> VPN_BITS-bitsPerLevel*(i+1), r_req_vpn(VPN_BITS-1-bitsPerLevel*(i+1), 0))) - val resp_ppn = (0 until levels-1).foldRight(r_resp_ppn)((i,j) => Mux(count === UFix(i), resp_ppns(i), j)) + val r_resp_ppn = io.mem.req.bits.addr >> PGIDX_BITS + val resp_ppn = AVec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { val me = r_req_dest === UFix(i) io.requestor(i).resp.valid := resp_val && me io.requestor(i).resp.bits.error := resp_err - io.requestor(i).resp.bits.perm := r_resp_perm + io.requestor(i).resp.bits.perm := r_pte(9,4) io.requestor(i).resp.bits.ppn := resp_ppn.toUFix io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).status := io.dpath.status From 4608660f6e5370e33c31ee14836b6176ad198b4b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 4 Dec 2012 05:57:53 -0800 Subject: [PATCH 0533/1087] torture revealed a couple bugs FP loads/stores with certain negative offsets could cause illegal rounding mode traps, and x's were cropping up in situations that are benign in HW. --- rocket/src/main/scala/dpath.scala | 5 +- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/fpu.scala | 125 +++++++++++++------------ rocket/src/main/scala/nbdcache.scala | 8 +- 4 files changed, 71 insertions(+), 69 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0303b552..3ceca7e8 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -95,9 +95,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(sel === A2_BTYPE, Cat(inst(31,27), inst(16,10)), Mux(sel === A2_JTYPE, inst(18,7), inst(21,10)))) - val msbs = Mux(sel === A2_LTYPE, inst(26,7), + val msbs = Mux(sel === A2_ZERO, Bits(0), + Mux(sel === A2_LTYPE, inst(26,7).toFix, Mux(sel === A2_JTYPE, inst(31,19).toFix, - Mux(sel === A2_ITYPE, inst(21), inst(31)).toFix)) + Mux(sel === A2_ITYPE, inst(21), inst(31)).toFix))) Cat(msbs, lsbs).toFix } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 38b64bb1..5c36034c 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -214,7 +214,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS - val read_veccfg = Cat(io.vec_nfregs, io.vec_nxregs, io.vec_appvl) + val read_veccfg = if (conf.vec) Cat(io.vec_nfregs, io.vec_nxregs, io.vec_appvl) else Bits(0) val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) rdata := AVec[Bits]( reg_status.toBits, reg_epc, reg_badvaddr, reg_ebase, diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 7cd5b989..ef6d708c 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -59,7 +59,7 @@ class FPUCtrlSigs extends Bundle val toint = Bool() val fastpipe = Bool() val fma = Bool() - val store = Bool() + val round = Bool() val rdfsr = Bool() val wrfsr = Bool() } @@ -75,67 +75,67 @@ class FPUDecoder extends Component val Y = Bool(true) val X = Bool(false) val decoder = DecodeLogic(io.inst, - List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X), - Array(FLW -> List(FCMD_LOAD, Y,N,N,N,Y,N,N,N,N,N,N), - FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N), - FSW -> List(FCMD_STORE, N,N,Y,N,Y,N,Y,N,N,N,N), - FSD -> List(FCMD_STORE, N,N,Y,N,N,N,Y,N,N,N,N), - MXTF_S -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,N,N), - MXTF_D -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N,N,N), - FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N,N,N), - FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N,N,N), - FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N,N,N), - FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N,N,N), - FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,N,N), - MFTX_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,N,N), - MFTX_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,N,N), - FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,N,N), - FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,N,N), - FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,N,N), - FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N,N,N), - FCVT_W_D -> List(FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N,N,N), - FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N,N,N), - FCVT_L_D -> List(FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N,N,N), - FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N,N,N), - FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,Y,N,N,Y,N,N,Y,N,N,N), - FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,Y,N,N,N,N,N,Y,N,N,N), - FEQ_S -> List(FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N,N,N), - FLT_S -> List(FCMD_LT, N,Y,Y,N,Y,N,Y,N,N,N,N), - FLE_S -> List(FCMD_LE, N,Y,Y,N,Y,N,Y,N,N,N,N), - FEQ_D -> List(FCMD_EQ, N,Y,Y,N,N,N,Y,N,N,N,N), - FLT_D -> List(FCMD_LT, N,Y,Y,N,N,N,Y,N,N,N,N), - FLE_D -> List(FCMD_LE, N,Y,Y,N,N,N,Y,N,N,N,N), - MTFSR -> List(FCMD_MTFSR, N,N,N,N,Y,N,Y,N,N,Y,Y), - MFFSR -> List(FCMD_MFFSR, N,N,N,N,Y,N,Y,N,N,Y,N), - FSGNJ_S -> List(FCMD_SGNJ, Y,Y,Y,N,Y,N,N,Y,N,N,N), - FSGNJN_S -> List(FCMD_SGNJN, Y,Y,Y,N,Y,N,N,Y,N,N,N), - FSGNJX_S -> List(FCMD_SGNJX, Y,Y,Y,N,Y,N,N,Y,N,N,N), - FSGNJ_D -> List(FCMD_SGNJ, Y,Y,Y,N,N,N,N,Y,N,N,N), - FSGNJN_D -> List(FCMD_SGNJN, Y,Y,Y,N,N,N,N,Y,N,N,N), - FSGNJX_D -> List(FCMD_SGNJX, Y,Y,Y,N,N,N,N,Y,N,N,N), - FMIN_S -> List(FCMD_MIN, Y,Y,Y,N,Y,N,Y,Y,N,N,N), - FMAX_S -> List(FCMD_MAX, Y,Y,Y,N,Y,N,Y,Y,N,N,N), - FMIN_D -> List(FCMD_MIN, Y,Y,Y,N,N,N,Y,Y,N,N,N), - FMAX_D -> List(FCMD_MAX, Y,Y,Y,N,N,N,Y,Y,N,N,N), - FADD_S -> List(FCMD_ADD, Y,Y,Y,N,Y,N,N,N,Y,N,N), - FSUB_S -> List(FCMD_SUB, Y,Y,Y,N,Y,N,N,N,Y,N,N), - FMUL_S -> List(FCMD_MUL, Y,Y,Y,N,Y,N,N,N,Y,N,N), - FADD_D -> List(FCMD_ADD, Y,Y,Y,N,N,N,N,N,Y,N,N), - FSUB_D -> List(FCMD_SUB, Y,Y,Y,N,N,N,N,N,Y,N,N), - FMUL_D -> List(FCMD_MUL, Y,Y,Y,N,N,N,N,N,Y,N,N), - FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,Y,N,N), - FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,Y,N,N), - FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,Y,N,N), - FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,Y,N,N), - FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,N,N,N,N,Y,N,N), - FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,N,N,N,N,Y,N,N), - FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,N,N,N,N,Y,N,N), - FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,N,N,N,N,Y,N,N) + List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X), + Array(FLW -> List(FCMD_LOAD, Y,N,N,N,Y,N,N,N,N,N,N,N), + FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N), + FSW -> List(FCMD_STORE, N,N,Y,N,Y,N,Y,N,N,N,N,N), + FSD -> List(FCMD_STORE, N,N,Y,N,N,N,Y,N,N,N,N,N), + MXTF_S -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,Y,N,N), + MXTF_D -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,Y,N,N), + FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N,Y,N,N), + FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N,Y,N,N), + FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N,Y,N,N), + FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N,Y,N,N), + FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N,Y,N,N), + FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N,Y,N,N), + FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N,Y,N,N), + FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,Y,N,N), + MFTX_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,Y,N,N), + MFTX_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,Y,N,N), + FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,Y,N,N), + FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,Y,N,N), + FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,Y,N,N), + FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N,Y,N,N), + FCVT_W_D -> List(FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N,Y,N,N), + FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N,Y,N,N), + FCVT_L_D -> List(FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N,Y,N,N), + FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N,Y,N,N), + FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,Y,N,N,Y,N,N,Y,N,Y,N,N), + FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,Y,N,N,N,N,N,Y,N,Y,N,N), + FEQ_S -> List(FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N,Y,N,N), + FLT_S -> List(FCMD_LT, N,Y,Y,N,Y,N,Y,N,N,Y,N,N), + FLE_S -> List(FCMD_LE, N,Y,Y,N,Y,N,Y,N,N,Y,N,N), + FEQ_D -> List(FCMD_EQ, N,Y,Y,N,N,N,Y,N,N,Y,N,N), + FLT_D -> List(FCMD_LT, N,Y,Y,N,N,N,Y,N,N,Y,N,N), + FLE_D -> List(FCMD_LE, N,Y,Y,N,N,N,Y,N,N,Y,N,N), + MTFSR -> List(FCMD_MTFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,Y), + MFFSR -> List(FCMD_MFFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,N), + FSGNJ_S -> List(FCMD_SGNJ, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), + FSGNJN_S -> List(FCMD_SGNJN, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), + FSGNJX_S -> List(FCMD_SGNJX, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), + FSGNJ_D -> List(FCMD_SGNJ, Y,Y,Y,N,N,N,N,Y,N,Y,N,N), + FSGNJN_D -> List(FCMD_SGNJN, Y,Y,Y,N,N,N,N,Y,N,Y,N,N), + FSGNJX_D -> List(FCMD_SGNJX, Y,Y,Y,N,N,N,N,Y,N,Y,N,N), + FMIN_S -> List(FCMD_MIN, Y,Y,Y,N,Y,N,Y,Y,N,Y,N,N), + FMAX_S -> List(FCMD_MAX, Y,Y,Y,N,Y,N,Y,Y,N,Y,N,N), + FMIN_D -> List(FCMD_MIN, Y,Y,Y,N,N,N,Y,Y,N,Y,N,N), + FMAX_D -> List(FCMD_MAX, Y,Y,Y,N,N,N,Y,Y,N,Y,N,N), + FADD_S -> List(FCMD_ADD, Y,Y,Y,N,Y,N,N,N,Y,Y,N,N), + FSUB_S -> List(FCMD_SUB, Y,Y,Y,N,Y,N,N,N,Y,Y,N,N), + FMUL_S -> List(FCMD_MUL, Y,Y,Y,N,Y,N,N,N,Y,Y,N,N), + FADD_D -> List(FCMD_ADD, Y,Y,Y,N,N,N,N,N,Y,Y,N,N), + FSUB_D -> List(FCMD_SUB, Y,Y,Y,N,N,N,N,N,Y,Y,N,N), + FMUL_D -> List(FCMD_MUL, Y,Y,Y,N,N,N,N,N,Y,Y,N,N), + FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), + FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), + FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), + FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), + FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N), + FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N), + FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N), + FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N) )) - val cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: rdfsr :: wrfsr :: Nil = decoder + val cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: round :: rdfsr :: wrfsr :: Nil = decoder io.sigs.cmd := cmd io.sigs.wen := wen.toBool @@ -147,6 +147,7 @@ class FPUDecoder extends Component io.sigs.toint := toint.toBool io.sigs.fastpipe := fastpipe.toBool io.sigs.fma := fma.toBool + io.sigs.round := round.toBool io.sigs.rdfsr := rdfsr.toBool io.sigs.wrfsr := wrfsr.toBool } @@ -614,5 +615,5 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UFix(x._2)) io.ctrl.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) - io.ctrl.illegal_rm := ex_rm(2) + io.ctrl.illegal_rm := ex_rm(2) && ctrl.round } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 525d271e..a98202cf 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -593,7 +593,6 @@ class DataArray(implicit conf: DCacheConfig) extends Component { val resp = Vec(conf.ways){ Bits(OUTPUT, MEM_DATA_BITS) } } - val wmask = FillInterleaved(conf.databits, io.write.bits.wmask) val waddr = io.write.bits.addr >> conf.ramoffbits val raddr = io.read.bits.addr >> conf.ramoffbits @@ -624,6 +623,7 @@ class DataArray(implicit conf: DCacheConfig) extends Component { } } } else { + val wmask = FillInterleaved(conf.databits, io.write.bits.wmask) for (w <- 0 until conf.ways) { val rdata = Reg() { Bits() } val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } @@ -837,8 +837,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR - val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_tag_eq_way(w))){Bits()}) - val s2_hit = conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) + val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en)){Bits()}) + val s2_hit = s2_tag_match && conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) val s2_data = Vec(conf.ways){Bits(width = MEM_DATA_BITS)} for (w <- 0 until conf.ways) { @@ -921,7 +921,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { wb.io.req <> mshr.io.wb_req wb.io.meta_read <> metaReadArb.io.in(2) wb.io.data_req <> readArb.io.in(1) - wb.io.data_resp <> data_resp_mux + wb.io.data_resp := data_resp_mux wb.io.probe_rep_data <> io.mem.probe_rep_data // store->load bypassing From 290d3d226c4139d74a70e262ee9e96237fd5ace5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Dec 2012 02:07:52 -0800 Subject: [PATCH 0534/1087] fix AMO and store bypass bugs thanks, torture tester --- rocket/src/main/scala/nbdcache.scala | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a98202cf..8662b7ff 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -658,7 +658,7 @@ class AMOALU(implicit conf: DCacheConfig) extends Component { val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU val word = io.typ === MT_W || io.typ === MT_WU || io.typ === MT_B || io.typ === MT_BU - val mask = Fix(-1,64) ^ ((word & io.addr(2)) << 31) + val mask = Fix(-1,64) ^ (io.addr(2) << 31) val adder_out = (io.lhs & mask) + (io.rhs & mask) val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) @@ -675,9 +675,8 @@ class AMOALU(implicit conf: DCacheConfig) extends Component { Mux(Mux(less, min, max), io.lhs, io.rhs)))) - val wdata = Mux(word, Cat(out(31,0), out(31,0)), out) val wmask = FillInterleaved(8, StoreGen(io.typ, io.addr).mask) - io.out := wmask & wdata | ~wmask & io.lhs + io.out := wmask & out | ~wmask & io.lhs } class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { @@ -744,6 +743,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s1_req = Reg{io.cpu.req.bits.clone} val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill val s1_replay = Reg(resetVal = Bool(false)) + val s1_clk_en = Reg{Bool()} val s2_valid = Reg(s1_valid_masked, resetVal = Bool(false)) val s2_req = Reg{io.cpu.req.bits.clone} @@ -783,7 +783,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { } val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(conf.pgidxbits-1,0)) - when (s1_valid || s1_replay) { + when (s1_clk_en) { s2_req.addr := s1_addr s2_req.typ := s1_req.typ s2_req.cmd := s1_req.cmd @@ -833,7 +833,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(i => f(i))){gen} val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isValid(meta.io.resp(w).state)){Bits()}.toBits - val s1_clk_en = Reg(metaReadArb.io.out.valid) + s1_clk_en := metaReadArb.io.out.valid val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR @@ -931,15 +931,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { (s2_valid_masked || s2_replay, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) - ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3, StoreGen(r._2).mask)) + ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3)) val s2_store_bypass_data = Reg{Bits(width = conf.databits)} - val s2_store_bypass_mask = Reg{Bits(width = conf.databytes)} + val s2_store_bypass = Reg{Bool()} when (s1_clk_en) { + s2_store_bypass := false when (bypasses.map(_._1).reduce(_||_)) { s2_store_bypass_data := PriorityMux(bypasses.map(x => (x._1, x._2))) - s2_store_bypass_mask := PriorityMux(bypasses.map(x => (x._1, x._3))) - }.otherwise { - s2_store_bypass_mask := Bits(0) + s2_store_bypass := true } } @@ -947,7 +946,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_data_word_prebypass = if (conf.isNarrowRead) data_resp_mux(conf.databits-1,0) else data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) - val s2_data_word = Cat(null, (0 until conf.databytes).map(i => Mux(s2_store_bypass_mask(i), s2_store_bypass_data, s2_data_word_prebypass)(8*(i+1)-1,8*i)).reverse:_*) + val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word) amoalu.io := s2_req From 4dda38204f8d0900fe21eb3fd0c7cb70c42fee9e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Dec 2012 03:13:22 -0800 Subject: [PATCH 0535/1087] fix d$ reset bug --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 8662b7ff..2c1a6cbe 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -582,7 +582,7 @@ class MetaDataArray(implicit conf: DCacheConfig) extends Component { io.resp(w).tag := m } - io.read.ready := Bool(true) + io.read.ready := !rst io.write.ready := !rst } From e9752f1d72e9a64a526ee0753e3a6ab08e07a8b0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Dec 2012 14:22:07 -0800 Subject: [PATCH 0536/1087] pipeline host pcr access --- rocket/src/main/scala/dpath_util.scala | 30 +++++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 5c36034c..067bea05 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -153,14 +153,28 @@ class PCR(implicit conf: RocketConfiguration) extends Component val rdata = Bits(); - val raddr = Mux(io.r.en, io.r.addr, io.host.pcr_req.bits.addr(4,0)) - io.host.pcr_rep.valid := io.host.pcr_req.fire() - io.host.pcr_rep.bits := rdata + val host_pcr_req_valid = Reg{Bool()} // don't reset + val host_pcr_req_fire = host_pcr_req_valid && !io.r.en && !io.w.en + val host_pcr_rep_valid = Reg{Bool()} // don't reset + val host_pcr_bits = Reg{io.host.pcr_req.bits.clone} + io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid + io.host.pcr_rep.valid := host_pcr_rep_valid + io.host.pcr_rep.bits := host_pcr_bits.data + when (io.host.pcr_req.fire()) { + host_pcr_req_valid := true + host_pcr_bits := io.host.pcr_req.bits + } + when (host_pcr_req_fire) { + host_pcr_req_valid := false + host_pcr_rep_valid := true + host_pcr_bits.data := rdata + } + when (io.host.pcr_rep.fire()) { host_pcr_rep_valid := false } - val wen = io.w.en || !io.r.en && io.host.pcr_req.valid && io.host.pcr_req.bits.rw - val waddr = Mux(io.w.en, io.w.addr, io.host.pcr_req.bits.addr) - val wdata = Mux(io.w.en, io.w.data, io.host.pcr_req.bits.data) - io.host.pcr_req.ready := !io.w.en && !io.r.en + val raddr = Mux(io.r.en, io.r.addr, host_pcr_bits.addr) + val wen = io.w.en || !io.r.en && host_pcr_req_valid && host_pcr_bits.rw + val waddr = Mux(io.w.en, io.w.addr, host_pcr_bits.addr) + val wdata = Mux(io.w.en, io.w.data, host_pcr_bits.data) io.status := reg_status io.ptbr_wen := wen && waddr === PTBR @@ -210,7 +224,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component io.host.ipi_req.bits := io.w.data io.replay := io.host.ipi_req.valid && !io.host.ipi_req.ready - when (io.host.pcr_req.fire() && !io.host.pcr_req.bits.rw && io.host.pcr_req.bits.addr === TOHOST) { reg_tohost := UFix(0) } + when (host_pcr_req_fire && !host_pcr_bits.rw && host_pcr_bits.addr === TOHOST) { reg_tohost := UFix(0) } val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS From 3f59e439ef750caf5cc0bb3760edde9507096e00 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 7 Dec 2012 15:14:20 -0800 Subject: [PATCH 0537/1087] fix d$ tag raw hazard --- rocket/src/main/scala/nbdcache.scala | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2c1a6cbe..44d64f1f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -171,7 +171,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val probe_refill = (new FIFOIO) { Bool() }.flip } - val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write :: s_drain_rpq :: Nil = Enum(8) { UFix() } + val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = s_invalid) val xacx_type = Reg { UFix() } @@ -203,11 +203,15 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { state := s_invalid } - when (state === s_meta_write && io.meta_write.ready) { + when (state === s_meta_write_resp) { + // this wait state allows us to catch RAW hazards on the tags via nack_victim state := s_drain_rpq } + when (state === s_meta_write_req && io.meta_write.ready) { + state := s_meta_write_resp + } when (state === s_refill_resp) { - when (refill_done) { state := s_meta_write } + when (refill_done) { state := s_meta_write_req } when (reply) { refill_count := refill_count + UFix(1) line_state := conf.co.newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) @@ -245,7 +249,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) when (io.req_bits.tag_match) { when (conf.co.isHit(req_cmd, io.req_bits.old_meta.state)) { // set dirty bit - state := s_meta_write + state := s_meta_write_req line_state := conf.co.newStateOnHit(req_cmd, io.req_bits.old_meta.state) }.otherwise { // upgrade permissions state := s_refill_req @@ -260,7 +264,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.req_pri_rdy := (state === s_invalid) io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.meta_write.valid := state === s_meta_write || state === s_meta_clear + io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx io.meta_write.bits.data.state := Mux(state === s_meta_clear, conf.co.newStateOnFlush(), line_state) io.meta_write.bits.data.tag := io.tag From f5c53ce35d4e78392baddbc60d4cd0b0fea96943 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 11 Dec 2012 15:58:53 -0800 Subject: [PATCH 0538/1087] add ecc support to d$ data rams i haven't injected errors yet; it may well be incorrect. --- rocket/src/main/scala/dpath_alu.scala | 4 +- rocket/src/main/scala/nbdcache.scala | 148 ++++++++++++++++---------- rocket/src/main/scala/util.scala | 5 + 3 files changed, 95 insertions(+), 62 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 34f76abd..96730f73 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -51,9 +51,7 @@ class ALU(implicit conf: RocketConfiguration) extends Component val io = new ALUIO // ADD, SUB - val sub = isSub(io.fn) - val adder_rhs = Mux(sub, ~io.in2, io.in2) - val sum = (io.in1 + adder_rhs + sub.toUFix)(63,0) + val sum = io.in1 + Mux(isSub(io.fn), -io.in2, io.in2) // SLT, SLTU val less = Mux(io.in1(63) === io.in2(63), sum(63), diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 44d64f1f..a9f55762 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -8,8 +8,9 @@ import Util._ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, - reqtagbits: Int = -1, databits: Int = -1, - narrowRead: Boolean = true) + code: Code = new IdentityCode, + narrowRead: Boolean = true, + reqtagbits: Int = -1, databits: Int = -1) { require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this @@ -29,6 +30,12 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, def databytes = databits/8 def wordoffbits = log2Up(databytes) def isNarrowRead = narrowRead && databits*ways % MEM_DATA_BITS == 0 + val statebits = 2 // TODO: obtain from coherence policy + val metabits = statebits + tagbits + val encdatabits = code.width(databits) + val encmetabits = code.width(metabits) + val wordsperrow = MEM_DATA_BITS/databits + val bitsperrow = wordsperrow*encdatabits } abstract class ReplacementPolicy @@ -110,8 +117,8 @@ class DataReadReq(implicit conf: DCacheConfig) extends Bundle { class DataWriteReq(implicit conf: DCacheConfig) extends Bundle { val way_en = Bits(width = conf.ways) val addr = Bits(width = conf.untagbits) - val wmask = Bits(width = MEM_DATA_BITS/conf.databits) - val data = Bits(width = MEM_DATA_BITS) + val wmask = Bits(width = conf.wordsperrow) + val data = Bits(width = conf.bitsperrow) override def clone = new DataWriteReq().asInstanceOf[this.type] } @@ -126,7 +133,7 @@ class WritebackReq(implicit conf: DCacheConfig) extends Bundle { } class MetaData(implicit conf: DCacheConfig) extends Bundle { - val state = UFix(width = 2) + val state = UFix(width = conf.statebits) val tag = Bits(width = conf.tagbits) override def clone = new MetaData().asInstanceOf[this.type] @@ -413,7 +420,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val probe = (new FIFOIO) { new WritebackReq() }.flip val meta_read = (new FIFOIO) { new MetaReadReq } val data_req = (new FIFOIO) { new DataReadReq() } - val data_resp = Bits(INPUT, MEM_DATA_BITS) + val data_resp = Bits(INPUT, conf.bitsperrow) val mem_req = (new FIFOIO) { new TransactionInit } val mem_req_data = (new FIFOIO) { new TransactionInitData } val probe_rep_data = (new FIFOIO) { new ProbeReplyData } @@ -430,7 +437,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { when (valid) { r1_data_req_fired := false r2_data_req_fired := r1_data_req_fired - when (io.data_req.fire()) { + when (io.data_req.fire() && io.meta_read.fire()) { r1_data_req_fired := true cnt := cnt + 1 } @@ -467,7 +474,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val fire = valid && cnt < UFix(REFILL_CYCLES) io.req.ready := !valid && !io.probe.valid io.probe.ready := !valid - io.data_req.valid := fire && io.meta_read.ready + io.data_req.valid := fire io.data_req.bits.way_en := req.way_en io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(REFILL_CYCLES)-1,0)) << conf.ramoffbits @@ -480,7 +487,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { io.probe_rep_data.valid := r2_data_req_fired && is_probe io.probe_rep_data.bits.data := io.data_resp - io.meta_read.valid := fire && io.data_req.ready + io.meta_read.valid := fire io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits } @@ -594,32 +601,31 @@ class DataArray(implicit conf: DCacheConfig) extends Component { val io = new Bundle { val read = new FIFOIO()(new DataReadReq).flip val write = new FIFOIO()(new DataWriteReq).flip - val resp = Vec(conf.ways){ Bits(OUTPUT, MEM_DATA_BITS) } + val resp = Vec(conf.ways){ Bits(OUTPUT, conf.bitsperrow) } } val waddr = io.write.bits.addr >> conf.ramoffbits val raddr = io.read.bits.addr >> conf.ramoffbits if (conf.isNarrowRead) { - val waysPerMem = MEM_DATA_BITS/conf.databits - for (w <- 0 until conf.ways by waysPerMem) { - val wway_en = io.write.bits.way_en(w+waysPerMem-1,w) - val rway_en = io.read.bits.way_en(w+waysPerMem-1,w) - val resp = Vec(MEM_DATA_BITS/conf.databits){Reg{Bits(width = MEM_DATA_BITS)}} + for (w <- 0 until conf.ways by conf.wordsperrow) { + val wway_en = io.write.bits.way_en(w+conf.wordsperrow-1,w) + val rway_en = io.read.bits.way_en(w+conf.wordsperrow-1,w) + val resp = Vec(conf.wordsperrow){Reg{Bits(width = conf.bitsperrow)}} val r_raddr = RegEn(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } + val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=conf.bitsperrow) } when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { - val data = Fill(waysPerMem, io.write.bits.data(conf.databits*(p+1)-1,conf.databits*p)) - val mask = FillInterleaved(conf.databits, wway_en) + val data = Fill(conf.wordsperrow, io.write.bits.data(conf.encdatabits*(p+1)-1,conf.encdatabits*p)) + val mask = FillInterleaved(conf.encdatabits, wway_en) array.write(waddr, data, mask) } when (rway_en.orR && io.read.valid) { resp(p) := array(raddr) } } - for (dw <- 0 until waysPerMem) { - val r = AVec(resp.map(_(conf.databits*(dw+1)-1,conf.databits*dw))) + for (dw <- 0 until conf.wordsperrow) { + val r = AVec(resp.map(_(conf.encdatabits*(dw+1)-1,conf.encdatabits*dw))) val resp_mux = if (r.size == 1) r else AVec(r(r_raddr(conf.ramoffbits-1,conf.wordoffbits)), r.tail:_*) @@ -627,10 +633,10 @@ class DataArray(implicit conf: DCacheConfig) extends Component { } } } else { - val wmask = FillInterleaved(conf.databits, io.write.bits.wmask) + val wmask = FillInterleaved(conf.encdatabits, io.write.bits.wmask) for (w <- 0 until conf.ways) { val rdata = Reg() { Bits() } - val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=MEM_DATA_BITS) } + val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=conf.bitsperrow) } when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } @@ -752,12 +758,14 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_valid = Reg(s1_valid_masked, resetVal = Bool(false)) val s2_req = Reg{io.cpu.req.bits.clone} val s2_replay = Reg(s1_replay, resetVal = Bool(false)) + val s2_recycle = Bool() val s2_valid_masked = Bool() val s3_valid = Reg(resetVal = Bool(false)) val s3_req = Reg{io.cpu.req.bits.clone} val s3_way = Reg{Bits()} + val s1_recycled = RegEn(s2_recycle, s1_clk_en) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) @@ -785,6 +793,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { when (mshr.io.replay.valid) { s1_req := mshr.io.replay.bits } + when (s2_recycle) { + s1_req := s2_req + } val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(conf.pgidxbits-1,0)) when (s1_clk_en) { @@ -795,6 +806,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { when (s1_write) { s2_req.data := Mux(s1_replay, mshr.io.replay.bits.data, io.cpu.req.bits.data) } + when (s1_recycled) { s2_req.data := s1_req.data } } val misaligned = @@ -809,29 +821,41 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // tags val meta = new MetaDataArray - val metaReadArb = (new Arbiter(4)) { new MetaReadReq } + val metaReadArb = (new Arbiter(5)) { new MetaReadReq } val metaWriteArb = (new Arbiter(2)) { new MetaWriteReq } metaReadArb.io.out <> meta.io.read metaWriteArb.io.out <> meta.io.write // data val data = new DataArray - val readArb = new Arbiter(3)(new DataReadReq) - val writeArb = new Arbiter(2)(new DataWriteReq) + val readArb = new Arbiter(4)(new DataReadReq) readArb.io.out.ready := !io.mem.xact_rep.valid || io.mem.xact_rep.ready // insert bubble if refill gets blocked readArb.io.out <> data.io.read - writeArb.io.out <> data.io.write + + val writeArb = new Arbiter(2)(new DataWriteReq) + data.io.write.valid := writeArb.io.out.valid + writeArb.io.out.ready := data.io.write.ready + data.io.write.bits := writeArb.io.out.bits + val wdata_encoded = (0 until conf.wordsperrow).map(i => conf.code.encode(writeArb.io.out.bits.data(conf.databits*(i+1)-1,conf.databits*i))) + data.io.write.bits.data := AVec(wdata_encoded).toBits // tag read for new requests - metaReadArb.io.in(3).valid := io.cpu.req.valid - metaReadArb.io.in(3).bits.addr := io.cpu.req.bits.addr - when (!metaReadArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } + metaReadArb.io.in(4).valid := io.cpu.req.valid + metaReadArb.io.in(4).bits.addr := io.cpu.req.bits.addr + when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) } // data read for new requests - readArb.io.in(2).bits.addr := io.cpu.req.bits.addr - readArb.io.in(2).valid := io.cpu.req.valid - readArb.io.in(2).bits.way_en := Fix(-1) - when (!readArb.io.in(2).ready) { io.cpu.req.ready := Bool(false) } + readArb.io.in(3).bits.addr := io.cpu.req.bits.addr + readArb.io.in(3).valid := io.cpu.req.valid + readArb.io.in(3).bits.way_en := Fix(-1) + when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } + + // recycled requests + metaReadArb.io.in(0).valid := s2_recycle + metaReadArb.io.in(0).bits.addr := s2_req.addr + readArb.io.in(0).valid := s2_recycle + readArb.io.in(0).bits.addr := s2_req.addr + readArb.io.in(0).bits.way_en := Fix(-1) // tag check and way muxing def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(i => f(i))){gen} @@ -844,30 +868,35 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en)){Bits()}) val s2_hit = s2_tag_match && conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) - val s2_data = Vec(conf.ways){Bits(width = MEM_DATA_BITS)} + val s2_data = Vec(conf.ways){Bits(width = conf.bitsperrow)} for (w <- 0 until conf.ways) { - val regs = Vec(MEM_DATA_BITS/conf.databits){Reg{Bits(width = conf.databits)}} + val regs = Vec(conf.wordsperrow){Reg{Bits(width = conf.encdatabits)}} val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { val en = en1 && (Bool(i == 0 || !conf.isNarrowRead) || s1_writeback) - when (en) { regs(i) := data.io.resp(w) >> conf.databits*i } + when (en) { regs(i) := data.io.resp(w) >> conf.encdatabits*i } } - s2_data(w) := Cat(regs.last, regs.init.reverse:_*) + s2_data(w) := regs.toBits } - val data_resp_mux = Mux1H(s2_tag_match_way, s2_data) + val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) + val s2_data_decoded = (0 until conf.wordsperrow).map(i => conf.code.decode(s2_data_muxed(conf.encdatabits*(i+1)-1,conf.encdatabits*i))) + val s2_data_corrected = AVec(s2_data_decoded.map(_.corrected)).toBits + val s2_data_uncorrected = AVec(s2_data_decoded.map(_.uncorrected)).toBits + val s2_word_idx = if (conf.isNarrowRead) UFix(0) else s2_req.addr(log2Up(conf.wordsperrow*conf.databytes)-1,3) + val s2_data_correctable = AVec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && isWrite(s2_req.cmd) val amoalu = new AMOALU - when ((s2_valid || s2_replay) && isWrite(s2_req.cmd)) { + when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) { s3_req := s2_req - s3_req.data := amoalu.io.out + s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out) s3_way := s2_tag_match_way } writeArb.io.in(0).bits.addr := s3_req.addr writeArb.io.in(0).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix - writeArb.io.in(0).bits.data := Fill(MEM_DATA_BITS/conf.databits, s3_req.data) + writeArb.io.in(0).bits.data := Fill(conf.wordsperrow, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way @@ -875,15 +904,13 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val replacer = new RandomReplacement val s1_replaced_way_en = UFixToOH(replacer.way) val s2_replaced_way_en = UFixToOH(RegEn(replacer.way, s1_clk_en)) - val s2_repl_state = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en && s1_replaced_way_en(w))){Bits()}) - val s2_repl_tag = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w).tag, s1_clk_en && s1_replaced_way_en(w))){Bits()}) + val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))){new MetaData}) // miss handling mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) mshr.io.req.bits := s2_req mshr.io.req.bits.tag_match := s2_tag_match - mshr.io.req.bits.old_meta.state := s2_repl_state - mshr.io.req.bits.old_meta.tag := s2_repl_tag + mshr.io.req.bits.old_meta := s2_repl_meta mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshr.io.req.bits.data := s2_req.data @@ -895,12 +922,12 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { when (mshr.io.req.fire()) { replacer.miss } // replays - readArb.io.in(0).valid := mshr.io.replay.valid - readArb.io.in(0).bits := mshr.io.replay.bits - readArb.io.in(0).bits.way_en := Fix(-1) - mshr.io.replay.ready := Bool(true) - s1_replay := mshr.io.replay.fire() - metaReadArb.io.in(0) <> mshr.io.meta_read + readArb.io.in(1).valid := mshr.io.replay.valid + readArb.io.in(1).bits := mshr.io.replay.bits + readArb.io.in(1).bits.way_en := Fix(-1) + mshr.io.replay.ready := readArb.io.in(1).ready + s1_replay := mshr.io.replay.valid && readArb.io.in(1).ready + metaReadArb.io.in(1) <> mshr.io.meta_read metaWriteArb.io.in(0) <> mshr.io.meta_write // probes @@ -910,7 +937,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way prober.io.line_state := s2_hit_state - prober.io.meta_read <> metaReadArb.io.in(1) + prober.io.meta_read <> metaReadArb.io.in(2) prober.io.meta_write <> metaWriteArb.io.in(1) // refills @@ -923,9 +950,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { // writebacks wb.io.req <> mshr.io.wb_req - wb.io.meta_read <> metaReadArb.io.in(2) - wb.io.data_req <> readArb.io.in(1) - wb.io.data_resp := data_resp_mux + wb.io.meta_read <> metaReadArb.io.in(3) + wb.io.data_req <> readArb.io.in(2) + wb.io.data_resp := s2_data_corrected wb.io.probe_rep_data <> io.mem.probe_rep_data // store->load bypassing @@ -947,9 +974,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { } // load data subword mux/sign extension - val s2_data_word_prebypass = - if (conf.isNarrowRead) data_resp_mux(conf.databits-1,0) - else data_resp_mux >> Cat(s2_req.addr(log2Up(MEM_DATA_BITS/8)-1,3), Bits(0,log2Up(conf.databits))) + val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(conf.databits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word) @@ -968,6 +993,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss || s2_nack_fence s2_valid_masked := s2_valid && !s2_nack + val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable + val s2_recycle_next = Reg(resetVal = Bool(false)) + when (s1_valid || s1_replay) { s2_recycle_next := (s1_valid || s1_replay) && s2_recycle_ecc } + s2_recycle := s2_recycle_ecc || s2_recycle_next + // after a nack, block until nack condition resolves to save energy val block_fence = Reg(resetVal = Bool(false)) block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshr.io.fence_rdy @@ -978,7 +1008,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { } val s2_read = isRead(s2_req.cmd) - io.cpu.resp.valid := s2_read && (s2_replay || s2_valid_masked && s2_hit) + io.cpu.resp.valid := s2_read && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits := s2_req io.cpu.resp.bits.replay := s2_replay && s2_read diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 1b277932..60b0b595 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -17,6 +17,11 @@ object AVec def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) { elts.head.clone } def apply[T <: Data](elts: Vec[T]): Vec[T] = apply(elts.toSeq) def apply[T <: Data](elt0: T, elts: T*): Vec[T] = apply(elt0 :: elts.toList) + + def tabulate[T <: Data](n: Int)(f: Int => T): Vec[T] = + apply((0 until n).map(i => f(i))) + def tabulate[T <: Data](n1: Int, n2: Int)(f: (Int, Int) => T): Vec[Vec[T]] = + tabulate(n1)(i1 => tabulate(n2)(f(i1, _))) } // a counter that clock gates most of its MSBs using the LSB carry-out From c921fc34a9f65e2fe10e43ff8d2f2bdcd9ead715 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 12 Dec 2012 02:22:34 -0800 Subject: [PATCH 0539/1087] merge ALU left and right shifters --- rocket/src/main/scala/dpath_alu.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 96730f73..14ec1426 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -61,9 +61,10 @@ class ALU(implicit conf: RocketConfiguration) extends Component val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUFix val shin_hi_32 = Mux(isSub(io.fn), Fill(32, io.in1(31)), UFix(0,32)) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) - val shin = Cat(shin_hi, io.in1(31,0)) + val shin_r = Cat(shin_hi, io.in1(31,0)) + val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r)) val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toFix >> shamt)(63,0) - val shout_l = (shin << shamt)(63,0) + val shout_l = Reverse(shout_r) val bitwise_logic = Mux(io.fn === FN_AND, io.in1 & io.in2, From 05f19b21d04966384125e537b16229ff189bc114 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 12 Dec 2012 02:22:47 -0800 Subject: [PATCH 0540/1087] merge multiplier and divider --- rocket/src/main/scala/ctrl.scala | 39 ++--- rocket/src/main/scala/divider.scala | 205 ++++++++++++++++++++------ rocket/src/main/scala/dpath.scala | 37 +---- rocket/src/main/scala/dpath_alu.scala | 22 +-- 4 files changed, 189 insertions(+), 114 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index fe2ab61d..a5cc132d 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -17,8 +17,8 @@ class ioCtrlDpath extends Bundle() val sel_alu2 = UFix(OUTPUT, 3); val fn_dw = Bool(OUTPUT); val fn_alu = UFix(OUTPUT, SZ_ALU_FN); - val mul_val = Bool(OUTPUT); - val mul_kill = Bool(OUTPUT) + val div_mul_val = Bool(OUTPUT) + val div_mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); val div_kill = Bool(OUTPUT) val sel_wa = Bool(OUTPUT); @@ -47,10 +47,7 @@ class ioCtrlDpath extends Bundle() val jalr_eq = Bool(INPUT) val ex_br_type = Bits(OUTPUT, SZ_BR) val ex_br_taken = Bool(INPUT) - val div_rdy = Bool(INPUT); - val div_result_val = Bool(INPUT); - val mul_rdy = Bool(INPUT); - val mul_result_val = Bool(INPUT); + val div_mul_rdy = Bool(INPUT) val mem_ll_wb = Bool(INPUT) val mem_ll_waddr = UFix(INPUT, 5) val ex_waddr = UFix(INPUT, 5); // write addr from execute stage @@ -359,8 +356,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val ex_reg_flush_inst = Reg(resetVal = Bool(false)) val ex_reg_jalr = Reg(resetVal = Bool(false)) val ex_reg_btb_hit = Reg(resetVal = Bool(false)) - val ex_reg_div_val = Reg(resetVal = Bool(false)) - val ex_reg_mul_val = Reg(resetVal = Bool(false)) + val ex_reg_div_mul_val = Reg(resetVal = Bool(false)) val ex_reg_mem_val = Reg(resetVal = Bool(false)) val ex_reg_xcpt = Reg(resetVal = Bool(false)) val ex_reg_fp_val = Reg(resetVal = Bool(false)) @@ -379,8 +375,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val mem_reg_wen = Reg(resetVal = Bool(false)) val mem_reg_fp_wen = Reg(resetVal = Bool(false)) val mem_reg_flush_inst = Reg(resetVal = Bool(false)) - val mem_reg_div_val = Reg(resetVal = Bool(false)) - val mem_reg_mul_val = Reg(resetVal = Bool(false)) + val mem_reg_div_mul_val = Reg(resetVal = Bool(false)) val mem_reg_mem_val = Reg(resetVal = Bool(false)) val mem_reg_xcpt = Reg(resetVal = Bool(false)) val mem_reg_fp_val = Reg(resetVal = Bool(false)) @@ -479,8 +474,7 @@ class Control(implicit conf: RocketConfiguration) extends Component when (ctrl_killd) { ex_reg_jalr := Bool(false) ex_reg_btb_hit := Bool(false); - ex_reg_div_val := Bool(false); - ex_reg_mul_val := Bool(false); + ex_reg_div_mul_val := Bool(false) ex_reg_mem_val := Bool(false); ex_reg_valid := Bool(false); ex_reg_wen := Bool(false); @@ -499,8 +493,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_br_type := id_br_type; ex_reg_jalr := id_jalr ex_reg_btb_hit := io.imem.resp.bits.taken - ex_reg_div_val := id_div_val - ex_reg_mul_val := id_mul_val + ex_reg_div_mul_val := id_mul_val || id_div_val ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr @@ -521,8 +514,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val wb_dcache_miss = wb_reg_mem_val && !io.dmem.resp.valid val replay_ex = wb_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || ex_reg_mem_val && !io.dmem.req.ready || - ex_reg_div_val && !io.dpath.div_rdy || - ex_reg_mul_val && !io.dpath.mul_rdy || + ex_reg_div_mul_val && !io.dpath.div_mul_rdy || mem_reg_replay_next ctrl_killx := take_pc_wb || replay_ex @@ -535,8 +527,7 @@ class Control(implicit conf: RocketConfiguration) extends Component mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb when (ex_xcpt) { mem_reg_cause := ex_cause } - mem_reg_div_val := ex_reg_div_val && io.dpath.div_rdy - mem_reg_mul_val := ex_reg_mul_val && io.dpath.mul_rdy + mem_reg_div_mul_val := ex_reg_div_mul_val && io.dpath.div_mul_rdy when (ctrl_killx) { mem_reg_valid := Bool(false); @@ -603,7 +594,7 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_eret := mem_reg_eret && !mem_reg_replay wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_mem_val := mem_reg_mem_val - wb_reg_div_mul_val := mem_reg_div_val || mem_reg_mul_val + wb_reg_div_mul_val := mem_reg_div_mul_val wb_reg_fp_val := mem_reg_fp_val wb_reg_replay_next := mem_reg_replay_next } @@ -674,7 +665,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_val || ex_reg_mul_val || ex_reg_fp_val) || + val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. @@ -691,7 +682,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_val || mem_reg_mul_val || mem_reg_fp_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -731,10 +722,8 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.sel_alu2 := id_sel_alu2.toUFix io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu.toUFix - io.dpath.div_val := ex_reg_div_val - io.dpath.div_kill := mem_reg_div_val && killm_common - io.dpath.mul_val := ex_reg_mul_val - io.dpath.mul_kill := mem_reg_mul_val && killm_common + io.dpath.div_mul_val := ex_reg_div_mul_val + io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; io.dpath.ex_jalr := ex_reg_jalr diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index f7e97185..a15c284f 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -4,62 +4,83 @@ import Chisel._ import Node._ import Constants._ import ALU._ +import Util._ -class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { +class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { val io = new MultiplierIO val w = io.req.bits.in1.getWidth + val mulw = (w+1+mulUnroll-1)/mulUnroll*mulUnroll - val s_ready :: s_neg_inputs :: s_busy :: s_neg_outputs :: s_done :: Nil = Enum(5) { UFix() }; + val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UFix() }; val state = Reg(resetVal = s_ready); - val count = Reg() { UFix(width = log2Up(w+1)) } - val divby0 = Reg() { Bool() }; - val neg_quo = Reg() { Bool() }; - val neg_rem = Reg() { Bool() }; - val rem = Reg() { Bool() }; - val half = Reg() { Bool() }; - val r_req = Reg{io.req.bits.clone} - - val divisor = Reg() { Bits() } - val remainder = Reg() { Bits(width = 2*w+1) } - val subtractor = remainder(2*w,w) - divisor - - val dw = io.req.bits.dw - val fn = io.req.bits.fn - val tc = isMulFN(fn, FN_DIV) || isMulFN(fn, FN_REM) + val req = Reg{io.req.bits.clone} + val count = Reg{UFix(width = log2Up(w+1))} + val divby0 = Reg{Bool()} + val neg_out = Reg{Bool()} + val divisor = Reg{Bits(width = w+1)} // div only needs w bits + val remainder = Reg{Bits(width = 2*mulw+1)} // div only needs 2*w+1 bits - val lhs_sign = tc && Mux(dw === DW_64, io.req.bits.in1(w-1), io.req.bits.in1(w/2-1)) - val lhs_hi = Mux(dw === DW_64, io.req.bits.in1(w-1,w/2), Fill(w/2, lhs_sign)) - val lhs_in = Cat(lhs_hi, io.req.bits.in1(w/2-1,0)) - - val rhs_sign = tc && Mux(dw === DW_64, io.req.bits.in2(w-1), io.req.bits.in2(w/2-1)) - val rhs_hi = Mux(dw === DW_64, io.req.bits.in2(w-1,w/2), Fill(w/2, rhs_sign)) - val rhs_in = Cat(rhs_hi, io.req.bits.in2(w/2-1,0)) + def sext(x: Bits, cmds: Vec[Bits]) = { + val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) + val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) + (Cat(hi, x(w/2-1,0)), sign) + } + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM, FN_MULH, FN_MULHSU)) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM, FN_MULH)) + val subtractor = remainder(2*w,w) - divisor(w-1,0) + when (state === s_neg_inputs) { - state := s_busy + state := s_div_busy when (remainder(w-1)) { - remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) + remainder := -remainder(w-1,0) } - when (divisor(w-1)) { + when (divisor(w-1) && !AVec(FN_MULHU, FN_MULHSU).contains(req.fn)) { divisor := subtractor(w-1,0) } } - when (state === s_neg_outputs) { + when (state === s_neg_output) { + remainder := -remainder(w-1,0) state := s_done - when (neg_rem && neg_quo && !divby0) { - remainder := Cat(-remainder(2*w, w+1), remainder(w), -remainder(w-1,0)) + } + when (state === s_move_rem) { + remainder := remainder(2*w, w+1) + state := Mux(neg_out, s_neg_output, s_done) + } + when (state === s_mul_busy) { + val carryIn = remainder(w) + val mplier = Cat(remainder(2*mulw,w+1),remainder(w-1,0)).toFix + val mpcand = divisor.toFix + val prod = mplier(mulUnroll-1,0) * mpcand + Mux(carryIn, mpcand, Fix(0)) + val sum = Cat(mplier(2*mulw-1,mulw) + prod, mplier(mulw-1,mulUnroll)) + val carryOut = mplier(mulUnroll-1) + remainder := Cat(sum(sum.getWidth-1,w), carryOut, sum(w-1,0)).toFix + + val cycles = mulw/mulUnroll + val shift1 = (UFix(cycles)-count)*mulUnroll + val shift = shift1(log2Up(w)-1,0) + val mask = (UFix(1) << shift) - 1 + val eOut = shift1 < w && !((mplier(w-1,0).toBits ^ carryIn.toFix) & mask).orR + val shifted = mplier >> shift + when (Bool(earlyOut) && eOut) { + remainder := Cat(shifted(sum.getWidth-1,w), carryOut, shifted(w-1,0)).toFix } - .elsewhen (neg_quo && !divby0) { - remainder := Cat(remainder(2*w, w), -remainder(w-1,0)) - } - .elsewhen (neg_rem) { - remainder := Cat(-remainder(2*w, w+1), remainder(w,0)) + + count := count + 1 + when (count === cycles-1 || Bool(earlyOut) && eOut) { + state := s_done + when (AVec(FN_MULH, FN_MULHU, FN_MULHSU) contains req.fn) { + state := s_move_rem + } } } - when (state === s_busy) { + when (state === s_div_busy) { when (count === UFix(w)) { - state := Mux(neg_quo || neg_rem, s_neg_outputs, s_done) + state := Mux(neg_out && !divby0, s_neg_output, s_done) + when (AVec(FN_REM, FN_REMU) contains req.fn) { + state := s_move_rem + } } count := count + UFix(1) @@ -69,13 +90,104 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext val divisorMSB = Log2(divisor, w) val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - val eOut = count === UFix(0) && eOutPos > dividendMSB && (divisorMSB != UFix(0) || divisor(0)) + val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB + val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0)) when (Bool(earlyOut) && eOut) { - val eOutDist = eOutPos - dividendMSB - val shift = Mux(divisorMSB >= dividendMSB, UFix(w-1), eOutDist(log2Up(w)-1,0)) + val shift = eOutPos(log2Up(w)-1,0) remainder := remainder(w-1,0) << shift count := shift + when (eOutPos(log2Up(w))) { + remainder := remainder(w-1,0) << w-1 + count := w-1 + } + } + } + when (io.resp.fire() || io.kill) { + state := s_ready + } + when (io.req.fire()) { + val isMul = AVec(FN_MUL, FN_MULH, FN_MULHU, FN_MULHSU).contains(io.req.bits.fn) + val isRem = AVec(FN_REM, FN_REMU).contains(io.req.bits.fn) + state := Mux(isMul, s_mul_busy, Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div_busy)) + count := UFix(0) + neg_out := !isMul && Mux(isRem, lhs_sign, lhs_sign != rhs_sign) + divby0 := true + divisor := Cat(rhs_sign, rhs_in) + remainder := Cat(Fill(mulw-w, isMul && lhs_sign), Bool(false), lhs_in) + req := io.req.bits + } + + io.resp.bits := req + io.resp.bits.data := Mux(req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) + io.resp.valid := state === s_done + io.req.ready := state === s_ready +} + +class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { + val io = new MultiplierIO + val w = io.req.bits.in1.getWidth + + val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(6) { UFix() }; + val state = Reg(resetVal = s_ready); + + val count = Reg() { UFix(width = log2Up(w+1)) } + val divby0 = Reg() { Bool() }; + val neg_out = Reg() { Bool() }; + val r_req = Reg{io.req.bits.clone} + + val divisor = Reg() { Bits() } + val remainder = Reg() { Bits(width = 2*w+1) } + val subtractor = remainder(2*w,w) - divisor + + def sext(x: Bits, cmds: Vec[Bits]) = { + val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) + val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) + (Cat(hi, x(w/2-1,0)), sign) + } + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM)) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM)) + + val r_isRem = isMulFN(r_req.fn, FN_REM) || isMulFN(r_req.fn, FN_REMU) + + when (state === s_neg_inputs) { + state := s_busy + when (remainder(w-1)) { + remainder := -remainder(w-1,0) + } + when (divisor(w-1)) { + divisor := subtractor(w-1,0) + } + } + when (state === s_neg_output) { + remainder := -remainder(w-1,0) + state := s_done + } + when (state === s_move_rem) { + remainder := remainder(2*w, w+1) + state := Mux(neg_out, s_neg_output, s_done) + } + when (state === s_busy) { + when (count === UFix(w)) { + state := Mux(r_isRem, s_move_rem, Mux(neg_out && !divby0, s_neg_output, s_done)) + } + count := count + UFix(1) + + val msb = subtractor(w) + divby0 := divby0 && !msb + remainder := Cat(Mux(msb, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !msb) + + val divisorMSB = Log2(divisor, w) + val dividendMSB = Log2(remainder(w-1,0), w) + val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB + val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0)) + when (Bool(earlyOut) && eOut) { + val shift = eOutPos(log2Up(w)-1,0) + remainder := remainder(w-1,0) << shift + count := shift + when (eOutPos(log2Up(w))) { + remainder := remainder(w-1,0) << w-1 + count := w-1 + } } } when (io.resp.fire() || io.kill) { @@ -84,20 +196,15 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext when (io.req.fire()) { state := Mux(lhs_sign || rhs_sign, s_neg_inputs, s_busy) count := UFix(0) - half := (dw === DW_32); - neg_quo := lhs_sign != rhs_sign - neg_rem := lhs_sign - rem := isMulFN(fn, FN_REM) || isMulFN(fn, FN_REMU) - divby0 := Bool(true); + neg_out := Mux(AVec(FN_REM, FN_REMU).contains(io.req.bits.fn), lhs_sign, lhs_sign != rhs_sign) + divby0 := true divisor := rhs_in remainder := lhs_in r_req := io.req.bits } - val result = Mux(rem, remainder(w+w, w+1), remainder(w-1,0)) - io.resp.bits := r_req - io.resp.bits.data := Mux(half, Cat(Fill(w/2, result(w/2-1)), result(w/2-1,0)), result) + io.resp.bits.data := Mux(r_req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) io.resp.valid := state === s_done io.req.ready := state === s_ready } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 3ceca7e8..ee6630e5 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -158,31 +158,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Component alu.io.in2 := ex_op2.toUFix alu.io.in1 := ex_rs1.toUFix - // divider - val div = new Divider(earlyOut = true) - div.io.req.valid := io.ctrl.div_val + // multiplier and divider + val div = new MulDiv(mulUnroll = 4, earlyOut = true) + div.io.req.valid := io.ctrl.div_mul_val div.io.req.bits.dw := ex_reg_ctrl_fn_dw div.io.req.bits.fn := ex_reg_ctrl_fn_alu div.io.req.bits.in1 := ex_rs1 div.io.req.bits.in2 := ex_rs2 div.io.req.bits.tag := ex_reg_waddr - div.io.kill := io.ctrl.div_kill + div.io.kill := io.ctrl.div_mul_kill div.io.resp.ready := Bool(true) - io.ctrl.div_rdy := div.io.req.ready - io.ctrl.div_result_val := div.io.resp.valid - - // multiplier - val mul = new Multiplier(unroll = 4, earlyOut = true) - mul.io.req.valid := io.ctrl.mul_val - mul.io.req.bits.dw := ex_reg_ctrl_fn_dw - mul.io.req.bits.fn := ex_reg_ctrl_fn_alu - mul.io.req.bits.in1 := ex_rs1 - mul.io.req.bits.in2 := ex_rs2 - mul.io.req.bits.tag := ex_reg_waddr - mul.io.kill := io.ctrl.mul_kill - mul.io.resp.ready := Bool(true) - io.ctrl.mul_rdy := mul.io.req.ready - io.ctrl.mul_result_val := mul.io.resp.valid + io.ctrl.div_mul_rdy := div.io.req.ready io.fpu.fromint_data := ex_rs1 io.ctrl.ex_waddr := ex_reg_waddr @@ -266,17 +252,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu val mem_ll_wdata = Bits() - mem_ll_wdata := mul.io.resp.bits.data - io.ctrl.mem_ll_waddr := mul.io.resp.bits.tag - io.ctrl.mem_ll_wb := mul.io.resp.valid - when (div.io.resp.valid) { - mul.io.resp.ready := Bool(false) - mem_ll_wdata := div.io.resp.bits.data - io.ctrl.mem_ll_waddr := div.io.resp.bits.tag - io.ctrl.mem_ll_wb := Bool(true) - } + mem_ll_wdata := div.io.resp.bits.data + io.ctrl.mem_ll_waddr := div.io.resp.bits.tag + io.ctrl.mem_ll_wb := div.io.resp.valid when (dmem_resp_replay) { - mul.io.resp.ready := Bool(false) div.io.resp.ready := Bool(false) mem_ll_wdata := io.dmem.resp.bits.data_subword io.ctrl.mem_ll_waddr := dmem_resp_waddr diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 14ec1426..5a29ab20 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -9,17 +9,17 @@ object ALU { val SZ_ALU_FN = 4 val FN_X = Bits("b????") - val FN_ADD = UFix(0) - val FN_SL = UFix(1) - val FN_XOR = UFix(4) - val FN_OR = UFix(6) - val FN_AND = UFix(7) - val FN_SR = UFix(5) - val FN_SUB = UFix(8) - val FN_SLT = UFix(10) - val FN_SLTU = UFix(11) - val FN_SRA = UFix(13) - val FN_OP2 = UFix(15) + val FN_ADD = Bits(0) + val FN_SL = Bits(1) + val FN_XOR = Bits(4) + val FN_OR = Bits(6) + val FN_AND = Bits(7) + val FN_SR = Bits(5) + val FN_SUB = Bits(8) + val FN_SLT = Bits(10) + val FN_SLTU = Bits(11) + val FN_SRA = Bits(13) + val FN_OP2 = Bits(15) val FN_DIV = FN_XOR val FN_DIVU = FN_SR From ce9f4881d277c82828a47386a1e12e0586f403c5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 6 Jan 2013 03:47:00 -0800 Subject: [PATCH 0541/1087] remove broken multiplier early out --- rocket/src/main/scala/divider.scala | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index a15c284f..cb9215e9 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -52,23 +52,16 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val carryIn = remainder(w) val mplier = Cat(remainder(2*mulw,w+1),remainder(w-1,0)).toFix val mpcand = divisor.toFix - val prod = mplier(mulUnroll-1,0) * mpcand + Mux(carryIn, mpcand, Fix(0)) - val sum = Cat(mplier(2*mulw-1,mulw) + prod, mplier(mulw-1,mulUnroll)) + val prod0 = mplier(2*mulw-1,mulw) + + (if (mulUnroll == 1) Mux(mplier(0), -Cat(mpcand < Fix(0), mpcand).toFix, Mux(carryIn, mpcand, Fix(0))) + else (mplier(mulUnroll-1,0) + carryIn.toUFix).toFix * mpcand) + val prod = Mux(mplier(mulUnroll-1,0).andR && carryIn, mplier(2*mulw-1,mulw), prod0) + val sum = Cat(prod, mplier(mulw-1,mulUnroll)) val carryOut = mplier(mulUnroll-1) remainder := Cat(sum(sum.getWidth-1,w), carryOut, sum(w-1,0)).toFix - val cycles = mulw/mulUnroll - val shift1 = (UFix(cycles)-count)*mulUnroll - val shift = shift1(log2Up(w)-1,0) - val mask = (UFix(1) << shift) - 1 - val eOut = shift1 < w && !((mplier(w-1,0).toBits ^ carryIn.toFix) & mask).orR - val shifted = mplier >> shift - when (Bool(earlyOut) && eOut) { - remainder := Cat(shifted(sum.getWidth-1,w), carryOut, shifted(w-1,0)).toFix - } - count := count + 1 - when (count === cycles-1 || Bool(earlyOut) && eOut) { + when (count === mulw/mulUnroll-1) { state := s_done when (AVec(FN_MULH, FN_MULHU, FN_MULHSU) contains req.fn) { state := s_move_rem From 78868f60757d3faa57ab563f6f99d43cbb90f631 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 6 Jan 2013 03:47:17 -0800 Subject: [PATCH 0542/1087] add config option to trade mul/div area for speed --- rocket/src/main/scala/dpath.scala | 3 ++- rocket/src/main/scala/tile.scala | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ee6630e5..93236aaa 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -159,7 +159,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component alu.io.in1 := ex_rs1.toUFix // multiplier and divider - val div = new MulDiv(mulUnroll = 4, earlyOut = true) + val div = new MulDiv(mulUnroll = if (conf.fastMulDiv) 8 else 1, + earlyOut = conf.fastMulDiv) div.io.req.valid := io.ctrl.div_mul_val div.io.req.bits.dw := ex_reg_ctrl_fn_dw div.io.req.bits.fn := ex_reg_ctrl_fn_alu diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 55dfc000..1ea180ca 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -10,7 +10,8 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, icache: ICacheConfig, dcache: DCacheConfig, fpu: Boolean, vec: Boolean, fastLoadWord: Boolean = true, - fastLoadByte: Boolean = false) + fastLoadByte: Boolean = false, + fastMulDiv: Boolean = true) { val dcacheReqTagBits = 9 // enforce compliance with require() val xprlen = 64 From 261e14f83186356eed3a5c4e6dc07de19b02e05f Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 13 Dec 2012 11:45:42 -0800 Subject: [PATCH 0543/1087] Refactored uncore conf --- rocket/src/main/scala/htif.scala | 12 ++++++------ rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/tile.scala | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 2a777d5e..b6f6f3eb 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -36,11 +36,11 @@ class ioHTIF(ntiles: Int) extends Bundle val ipi_rep = (new FIFOIO) { Bool() }.flip } -class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component +class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Component with ClientCoherenceAgent { val io = new Bundle { val host = new ioHost(w) - val cpu = Vec(conf.ntiles) { new ioHTIF(conf.ntiles).flip } + val cpu = Vec(conf.ln.nTiles) { new ioHTIF(conf.ln.nTiles).flip } val mem = new ioTileLink } @@ -81,7 +81,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) - val pcr_coreid = if (conf.ntiles == 1) UFix(0) else addr(20+log2Up(conf.ntiles),20) + val pcr_coreid = if (conf.ln.nTiles == 1) UFix(0) else addr(20+log2Up(conf.ln.nTiles),20) val pcr_wdata = packet_ram(0) val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR @@ -193,8 +193,8 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component io.mem.probe_rep_data.valid := Bool(false) io.mem.incoherent := Bool(true) - val pcrReadData = Vec(conf.ntiles) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } - for (i <- 0 until conf.ntiles) { + val pcrReadData = Vec(conf.ln.nTiles) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } + for (i <- 0 until conf.ln.nTiles) { val my_reset = Reg(resetVal = Bool(true)) val my_ipi = Reg(resetVal = Bool(false)) @@ -211,7 +211,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component } cpu.ipi_rep.valid := my_ipi cpu.ipi_req.ready := Bool(true) - for (j <- 0 until conf.ntiles) { + for (j <- 0 until conf.ln.nTiles) { when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UFix(i)) { my_ipi := Bool(true) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a9f55762..a9158cfc 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -858,7 +858,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Component { readArb.io.in(0).bits.way_en := Fix(-1) // tag check and way muxing - def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(i => f(i))){gen} + def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(f)){gen} val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isValid(meta.io.resp(w).state)){Bits()}.toBits s1_clk_en := metaReadArb.io.out.valid diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 1ea180ca..f259cf33 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -21,7 +21,7 @@ case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, if (fastLoadByte) require(fastLoadWord) } -class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) +class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) with ClientCoherenceAgent { val memPorts = 2 + confIn.vec implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) From e1225c5114c697d6e84533cc84a9607dd8b77b24 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 7 Jan 2013 13:38:59 -0800 Subject: [PATCH 0544/1087] standardize IO naming convention --- rocket/src/main/scala/arbiter.scala | 10 +++++----- rocket/src/main/scala/core.scala | 12 ++++++------ rocket/src/main/scala/ctrl.scala | 14 +++++++------- rocket/src/main/scala/ctrl_vec.scala | 12 ++++++------ rocket/src/main/scala/dpath.scala | 16 ++++++++-------- rocket/src/main/scala/dpath_util.scala | 6 +++--- rocket/src/main/scala/dpath_vec.scala | 10 +++++----- rocket/src/main/scala/fpu.scala | 8 ++++---- rocket/src/main/scala/htif.scala | 26 ++++++++++++++++++-------- rocket/src/main/scala/icache.scala | 10 +++++----- rocket/src/main/scala/nbdcache.scala | 10 +++++----- rocket/src/main/scala/ptw.scala | 10 +++++----- rocket/src/main/scala/tile.scala | 7 ++++--- rocket/src/main/scala/tlb.scala | 2 +- 14 files changed, 82 insertions(+), 71 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 119362ee..c748141e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -8,8 +8,8 @@ import uncore._ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val requestor = Vec(n) { new ioHellaCache()(conf.dcache) }.flip - val mem = new ioHellaCache()(conf.dcache) + val requestor = Vec(n) { new HellaCacheIO()(conf.dcache) }.flip + val mem = new HellaCacheIO()(conf.dcache) } val r_valid = io.requestor.map(r => Reg(r.req.valid)) @@ -48,7 +48,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp } } -class ioUncachedRequestor extends Bundle { +class UncachedRequestorIO extends Bundle { val xact_init = (new FIFOIO) { new TransactionInit } val xact_abort = (new FIFOIO) { new TransactionAbort }.flip val xact_rep = (new FIFOIO) { new TransactionReply }.flip @@ -57,8 +57,8 @@ class ioUncachedRequestor extends Bundle { class MemArbiter(n: Int) extends Component { val io = new Bundle { - val mem = new ioUncachedRequestor - val requestor = Vec(n) { new ioUncachedRequestor }.flip + val mem = new UncachedRequestorIO + val requestor = Vec(n) { new UncachedRequestorIO }.flip } var xi_bits = new TransactionInit diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 2c161514..2c27e514 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -6,17 +6,17 @@ import Constants._ import hwacha._ import Util._ -class ioRocket(implicit conf: RocketConfiguration) extends Bundle +class RocketIO(implicit conf: RocketConfiguration) extends Bundle { - val host = new ioHTIF(conf.ntiles) - val imem = new IOCPUFrontend()(conf.icache) - val vimem = new IOCPUFrontend()(conf.icache) - val dmem = new ioHellaCache()(conf.dcache) + val host = new HTIFIO(conf.lnConf.nTiles) + val imem = new CPUFrontendIO()(conf.icache) + val vimem = new CPUFrontendIO()(conf.icache) + val dmem = new HellaCacheIO()(conf.dcache) } class Core(implicit conf: RocketConfiguration) extends Component { - val io = new ioRocket + val io = new RocketIO val ctrl = new Control val dpath = new Datapath diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a5cc132d..c4b23b76 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -7,7 +7,7 @@ import Instructions._ import hwacha._ import ALU._ -class ioCtrlDpath extends Bundle() +class CtrlDpathIO extends Bundle() { // outputs to datapath val sel_pc = UFix(OUTPUT, 3); @@ -318,18 +318,18 @@ object VDecode extends DecodeConstants class Control(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val dpath = new ioCtrlDpath - val imem = new IOCPUFrontend()(conf.icache) - val dmem = new ioHellaCache()(conf.dcache) + val dpath = new CtrlDpathIO + val imem = new CPUFrontendIO()(conf.icache) + val dmem = new HellaCacheIO()(conf.dcache) val dtlb_val = Bool(OUTPUT) val dtlb_kill = Bool(OUTPUT) val dtlb_rdy = Bool(INPUT) val dtlb_miss = Bool(INPUT) val xcpt_dtlb_ld = Bool(INPUT) val xcpt_dtlb_st = Bool(INPUT) - val fpu = new ioCtrlFPU - val vec_dpath = new ioCtrlDpathVec - val vec_iface = new ioCtrlVecInterface + val fpu = new CtrlFPUIO + val vec_dpath = new CtrlDpathVecIO + val vec_iface = new CtrlVecInterfaceIO } var decode_table = XDecode.table diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index fd45b56b..35974ec9 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -6,7 +6,7 @@ import Constants._ import Instructions._ import hwacha.Constants._ -class ioCtrlDpathVec extends Bundle +class CtrlDpathVecIO extends Bundle { val inst = Bits(INPUT, 32) val appvl0 = Bool(INPUT) @@ -18,7 +18,7 @@ class ioCtrlDpathVec extends Bundle val sel_vimm2 = Bits(OUTPUT, 1) } -class ioCtrlVecInterface extends Bundle +class CtrlVecInterfaceIO extends Bundle { val vcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) val vximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) @@ -45,10 +45,10 @@ class ioCtrlVecInterface extends Bundle val hold = Bool(OUTPUT) } -class ioCtrlVec extends Bundle +class CtrlVecIO extends Bundle { - val dpath = new ioCtrlDpathVec() - val iface = new ioCtrlVecInterface() + val dpath = new CtrlDpathVecIO + val iface = new CtrlVecInterfaceIO val valid = Bool(INPUT) val s = Bool(INPUT) val sr_ev = Bool(INPUT) @@ -188,7 +188,7 @@ class rocketCtrlVecDecoder extends Component class rocketCtrlVec extends Component { - val io = new ioCtrlVec() + val io = new CtrlVecIO val dec = new rocketCtrlVecDecoder() dec.io.inst := io.dpath.inst diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 93236aaa..e7387a27 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -10,14 +10,14 @@ import hwacha._ class Datapath(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val host = new ioHTIF(conf.ntiles) - val ctrl = new ioCtrlDpath().flip - val dmem = new ioHellaCache()(conf.dcache) - val ptw = new IODatapathPTW().flip - val imem = new IOCPUFrontend()(conf.icache) - val fpu = new ioDpathFPU(); - val vec_ctrl = new ioCtrlDpathVec().flip - val vec_iface = new ioDpathVecInterface() + val host = new HTIFIO(conf.lnConf.nTiles) + val ctrl = (new CtrlDpathIO).flip + val dmem = new HellaCacheIO()(conf.dcache) + val ptw = (new DatapathPTWIO).flip + val imem = new CPUFrontendIO()(conf.icache) + val fpu = new DpathFPUIO + val vec_ctrl = (new CtrlDpathVecIO).flip + val vec_iface = new DpathVecInterfaceIO } // execute definitions diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 067bea05..131ae28c 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -6,7 +6,7 @@ import Constants._ import scala.math._ import Util._ -class ioDpathBTB extends Bundle() +class DpathBTBIO extends Bundle { val current_pc = UFix(INPUT, VADDR_BITS); val hit = Bool(OUTPUT); @@ -21,7 +21,7 @@ class ioDpathBTB extends Bundle() // fully-associative branch target buffer class rocketDpathBTB(entries: Int) extends Component { - val io = new ioDpathBTB(); + val io = new DpathBTBIO val repl_way = LFSR16(io.wen)(log2Up(entries)-1,0) // TODO: pseudo-LRU @@ -104,7 +104,7 @@ object PCR class PCR(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val host = new ioHTIF(conf.ntiles) + val host = new HTIFIO(conf.lnConf.nTiles) val r = new ioReadPort(conf.nxpr, conf.xprlen) val w = new ioWritePort(conf.nxpr, conf.xprlen) diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 9ef695a2..caceae02 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -6,7 +6,7 @@ import Constants._ import Instructions._ import hwacha.Constants._ -class ioDpathVecInterface extends Bundle +class DpathVecInterfaceIO extends Bundle { val vcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) val vximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) @@ -22,10 +22,10 @@ class ioDpathVecInterface extends Bundle val irq_aux = Bits(INPUT, 64) } -class ioDpathVec extends Bundle +class DpathVecIO extends Bundle { - val ctrl = new ioCtrlDpathVec().flip - val iface = new ioDpathVecInterface() + val ctrl = (new CtrlDpathVecIO).flip + val iface = new DpathVecInterfaceIO val valid = Bool(INPUT) val inst = Bits(INPUT, 32) val vecbank = Bits(INPUT, 8) @@ -41,7 +41,7 @@ class ioDpathVec extends Bundle class rocketDpathVec extends Component { - val io = new ioDpathVec() + val io = new DpathVecIO val nxregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.wdata(5,0), io.inst(15,10)) val nfregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.rs2(5,0), io.inst(21,16)) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index ef6d708c..41fd4cb4 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -152,7 +152,7 @@ class FPUDecoder extends Component io.sigs.wrfsr := wrfsr.toBool } -class ioDpathFPU extends Bundle { +class DpathFPUIO extends Bundle { val inst = Bits(OUTPUT, 32) val fromint_data = Bits(OUTPUT, 64) @@ -165,7 +165,7 @@ class ioDpathFPU extends Bundle { val dmem_resp_data = Bits(OUTPUT, 64) } -class ioCtrlFPU extends Bundle { +class CtrlFPUIO extends Bundle { val valid = Bool(OUTPUT) val nack_mem = Bool(INPUT) val illegal_rm = Bool(INPUT) @@ -456,8 +456,8 @@ class FPUDFMAPipe(val latency: Int) extends Component class FPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { - val ctrl = new ioCtrlFPU().flip - val dpath = new ioDpathFPU().flip + val ctrl = (new CtrlFPUIO).flip + val dpath = (new DpathFPUIO).flip val sfma = new ioFMA(33) val dfma = new ioFMA(65) } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index b6f6f3eb..678d3d69 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -6,12 +6,12 @@ import Constants._ import uncore._ import Util._ -class ioDebug extends Bundle +class DebugIO extends Bundle { val error_mode = Bool(OUTPUT); } -class ioHost(val w: Int) extends Bundle +class HostIO(val w: Int) extends Bundle { val clk = Bool(OUTPUT) val clk_edge = Bool(OUTPUT) @@ -26,10 +26,10 @@ class PCRReq extends Bundle val data = Bits(width = 64) } -class ioHTIF(ntiles: Int) extends Bundle +class HTIFIO(ntiles: Int) extends Bundle { val reset = Bool(INPUT) - val debug = new ioDebug + val debug = new DebugIO val pcr_req = (new FIFOIO) { new PCRReq }.flip val pcr_rep = (new FIFOIO) { Bits(width = 64) } val ipi_req = (new FIFOIO) { Bits(width = log2Up(ntiles)) } @@ -39,9 +39,9 @@ class ioHTIF(ntiles: Int) extends Bundle class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Component with ClientCoherenceAgent { val io = new Bundle { - val host = new ioHost(w) - val cpu = Vec(conf.ln.nTiles) { new ioHTIF(conf.ln.nTiles).flip } - val mem = new ioTileLink + val host = new HostIO(w) + val cpu = Vec(conf.ln.nTiles) { new HTIFIO(conf.ln.nTiles).flip } + val mem = new TileLinkIO()(conf.ln) } val short_request_bits = 64 @@ -191,7 +191,17 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo io.mem.probe_req.ready := Bool(false) io.mem.probe_rep.valid := Bool(false) io.mem.probe_rep_data.valid := Bool(false) - io.mem.incoherent := Bool(true) + + io.mem.xact_init.header.src := UFix(1) + io.mem.xact_init.header.dst := UFix(0) + io.mem.xact_init_data.header.src := UFix(1) + io.mem.xact_init_data.header.dst := UFix(0) + io.mem.probe_rep.header.src := UFix(1) + io.mem.probe_rep.header.dst := UFix(0) + io.mem.probe_rep_data.header.src := UFix(1) + io.mem.probe_rep_data.header.dst := UFix(0) + io.mem.xact_finish.header.src := UFix(1) + io.mem.xact_finish.header.dst := UFix(0) val pcrReadData = Vec(conf.ln.nTiles) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } for (i <- 0 until conf.ln.nTiles) { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 420377f4..c47c0768 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -43,18 +43,18 @@ class FrontendResp(implicit conf: ICacheConfig) extends Bundle { override def clone = new FrontendResp().asInstanceOf[this.type] } -class IOCPUFrontend(implicit conf: ICacheConfig) extends Bundle { +class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { val req = new PipeIO()(new FrontendReq) val resp = new FIFOIO()(new FrontendResp).flip - val ptw = new IOTLBPTW().flip + val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } class Frontend(implicit c: ICacheConfig) extends Component { val io = new Bundle { - val cpu = new IOCPUFrontend()(c).flip - val mem = new ioUncachedRequestor + val cpu = new CPUFrontendIO()(c).flip + val mem = new UncachedRequestorIO } val btb = new rocketDpathBTB(c.nbtb) @@ -134,7 +134,7 @@ class ICache(implicit c: ICacheConfig) extends Component val datablock = Bits(width = c.databits) }) val invalidate = Bool(INPUT) - val mem = new ioUncachedRequestor + val mem = new UncachedRequestorIO } val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UFix() } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a9158cfc..0dcbf9dd 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -726,17 +726,17 @@ class HellaCacheExceptions extends Bundle { } // interface between D$ and processor/DTLB -class ioHellaCache(implicit conf: DCacheConfig) extends Bundle { +class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { val req = (new FIFOIO){ new HellaCacheReq } val resp = (new PipeIO){ new HellaCacheResp }.flip val xcpt = (new HellaCacheExceptions).asInput - val ptw = new IOTLBPTW().flip + val ptw = (new TLBPTWIO).flip } -class HellaCache(implicit conf: DCacheConfig) extends Component { +class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { val io = new Bundle { - val cpu = (new ioHellaCache).flip - val mem = new ioTileLink + val cpu = (new HellaCacheIO).flip + val mem = new TileLinkIO } val indexmsb = conf.untagbits-1 diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index c87a6e5c..7972dc3d 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,7 +5,7 @@ import Node._ import Constants._ import Util._ -class IOTLBPTW extends Bundle { +class TLBPTWIO extends Bundle { val req = new FIFOIO()(UFix(width = VPN_BITS)) val resp = new PipeIO()(new Bundle { val error = Bool() @@ -17,7 +17,7 @@ class IOTLBPTW extends Bundle { val invalidate = Bool(INPUT) } -class IODatapathPTW extends Bundle { +class DatapathPTWIO extends Bundle { val ptbr = UFix(INPUT, PADDR_BITS) val invalidate = Bool(INPUT) val status = new Status().asInput @@ -26,9 +26,9 @@ class IODatapathPTW extends Bundle { class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val requestor = Vec(n) { new IOTLBPTW }.flip - val mem = new ioHellaCache()(conf.dcache) - val dpath = new IODatapathPTW + val requestor = Vec(n) { new TLBPTWIO }.flip + val mem = new HellaCacheIO()(conf.dcache) + val dpath = new DatapathPTWIO } val levels = 3 diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index f259cf33..1018bde0 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,7 @@ import Constants._ import uncore._ import Util._ -case class RocketConfiguration(ntiles: Int, co: CoherencePolicyWithUncached, +case class RocketConfiguration(lnConf: LogicalNetworkConfiguration, co: CoherencePolicyWithUncached, icache: ICacheConfig, dcache: DCacheConfig, fpu: Boolean, vec: Boolean, fastLoadWord: Boolean = true, @@ -25,11 +25,12 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon { val memPorts = 2 + confIn.vec implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) + implicit val lnConf = confIn.lnConf implicit val conf = confIn.copy(dcache = dcConf) val io = new Bundle { - val tilelink = new ioTileLink - val host = new ioHTIF(conf.ntiles) + val tilelink = new TileLinkIO + val host = new HTIFIO(lnConf.nTiles) } val core = new Core diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 101742d3..33c4377d 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -91,7 +91,7 @@ class TLB(entries: Int) extends Component val io = new Bundle { val req = new FIFOIO()(new TLBReq).flip val resp = new TLBResp(entries) - val ptw = new IOTLBPTW + val ptw = new TLBPTWIO } val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) { UFix() } From a2fa3fd04dba2053653cc669645040d6fab409bf Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 15 Jan 2013 15:50:37 -0800 Subject: [PATCH 0545/1087] Refactored packet headers/payloads --- rocket/src/main/scala/arbiter.scala | 30 ++++++++++++------------- rocket/src/main/scala/htif.scala | 33 ++++++++++++++-------------- rocket/src/main/scala/icache.scala | 14 ++++++------ rocket/src/main/scala/nbdcache.scala | 20 ++++++++--------- rocket/src/main/scala/tile.scala | 4 ++-- 5 files changed, 51 insertions(+), 50 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index c748141e..b60f845e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -48,32 +48,32 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp } } -class UncachedRequestorIO extends Bundle { - val xact_init = (new FIFOIO) { new TransactionInit } - val xact_abort = (new FIFOIO) { new TransactionAbort }.flip - val xact_rep = (new FIFOIO) { new TransactionReply }.flip - val xact_finish = (new FIFOIO) { new TransactionFinish } +class UncachedRequestorIO(implicit conf: LogicalNetworkConfiguration) extends Bundle { + val xact_init = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInit }} + val xact_abort = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionAbort }} + val xact_rep = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionReply }} + val xact_finish = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionFinish }} } -class MemArbiter(n: Int) extends Component { +class MemArbiter(n: Int)(implicit conf: LogicalNetworkConfiguration) extends Component { val io = new Bundle { val mem = new UncachedRequestorIO val requestor = Vec(n) { new UncachedRequestorIO }.flip } var xi_bits = new TransactionInit - xi_bits := io.requestor(n-1).xact_init.bits - xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.tile_xact_id, UFix(n-1, log2Up(n))) + xi_bits := io.requestor(n-1).xact_init.bits.payload + xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.payload.tile_xact_id, UFix(n-1, log2Up(n))) for (i <- n-2 to 0 by -1) { var my_xi_bits = new TransactionInit - my_xi_bits := io.requestor(i).xact_init.bits - my_xi_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.tile_xact_id, UFix(i, log2Up(n))) + my_xi_bits := io.requestor(i).xact_init.bits.payload + my_xi_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.payload.tile_xact_id, UFix(i, log2Up(n))) xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) } - io.mem.xact_init.bits := xi_bits + io.mem.xact_init.bits.payload := xi_bits io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_) io.requestor(0).xact_init.ready := io.mem.xact_init.ready for (i <- 1 until n) @@ -92,22 +92,22 @@ class MemArbiter(n: Int) extends Component { io.mem.xact_rep.ready := Bool(false) for (i <- 0 until n) { - val tag = io.mem.xact_rep.bits.tile_xact_id + val tag = io.mem.xact_rep.bits.payload.tile_xact_id io.requestor(i).xact_rep.valid := Bool(false) when (tag(log2Up(n)-1,0) === UFix(i)) { io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid io.mem.xact_rep.ready := io.requestor(i).xact_rep.ready } io.requestor(i).xact_rep.bits := io.mem.xact_rep.bits - io.requestor(i).xact_rep.bits.tile_xact_id := tag >> UFix(log2Up(n)) + io.requestor(i).xact_rep.bits.payload.tile_xact_id := tag >> UFix(log2Up(n)) } for (i <- 0 until n) { - val tag = io.mem.xact_abort.bits.tile_xact_id + val tag = io.mem.xact_abort.bits.payload.tile_xact_id io.requestor(i).xact_abort.valid := io.mem.xact_abort.valid && tag(log2Up(n)-1,0) === UFix(i) io.requestor(i).xact_abort.bits := io.mem.xact_abort.bits - io.requestor(i).xact_abort.bits.tile_xact_id := tag >> UFix(log2Up(n)) + io.requestor(i).xact_abort.bits.payload.tile_xact_id := tag >> UFix(log2Up(n)) } io.mem.xact_abort.ready := Bool(true) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 678d3d69..682576cf 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -38,6 +38,7 @@ class HTIFIO(ntiles: Int) extends Bundle class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Component with ClientCoherenceAgent { + implicit val lnConf = conf.ln val io = new Bundle { val host = new HostIO(w) val cpu = Vec(conf.ln.nTiles) { new HTIFIO(conf.ln.nTiles).flip } @@ -107,8 +108,8 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val mem_nacked = Reg(resetVal = Bool(false)) when (io.mem.xact_rep.valid) { mem_acked := Bool(true) - mem_gxid := io.mem.xact_rep.bits.global_xact_id - mem_needs_ack := io.mem.xact_rep.bits.require_ack + mem_gxid := io.mem.xact_rep.bits.payload.global_xact_id + mem_needs_ack := io.mem.xact_rep.bits.payload.require_ack } io.mem.xact_rep.ready := Bool(true) when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } @@ -175,7 +176,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo for (i <- 0 until MEM_DATA_BITS/short_request_bits) { val idx = Cat(mem_cnt, UFix(i, log2Up(MEM_DATA_BITS/short_request_bits))) when (state === state_mem_rdata && io.mem.xact_rep.valid) { - packet_ram(idx) := io.mem.xact_rep.bits.data((i+1)*short_request_bits-1, i*short_request_bits) + packet_ram(idx) := io.mem.xact_rep.bits.payload.data((i+1)*short_request_bits-1, i*short_request_bits) } mem_req_data = Cat(packet_ram(idx), mem_req_data) } @@ -183,25 +184,25 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) val co = conf.co.asInstanceOf[CoherencePolicyWithUncached] x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteTransactionInit(init_addr, UFix(0)), co.getUncachedReadTransactionInit(init_addr, UFix(0))) - io.mem.xact_init <> x_init.io.deq + io.mem.xact_init <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq) io.mem.xact_init_data.valid:= state === state_mem_wdata - io.mem.xact_init_data.bits.data := mem_req_data + io.mem.xact_init_data.bits.payload.data := mem_req_data io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack - io.mem.xact_finish.bits.global_xact_id := mem_gxid + io.mem.xact_finish.bits.payload.global_xact_id := mem_gxid io.mem.probe_req.ready := Bool(false) io.mem.probe_rep.valid := Bool(false) io.mem.probe_rep_data.valid := Bool(false) - io.mem.xact_init.header.src := UFix(1) - io.mem.xact_init.header.dst := UFix(0) - io.mem.xact_init_data.header.src := UFix(1) - io.mem.xact_init_data.header.dst := UFix(0) - io.mem.probe_rep.header.src := UFix(1) - io.mem.probe_rep.header.dst := UFix(0) - io.mem.probe_rep_data.header.src := UFix(1) - io.mem.probe_rep_data.header.dst := UFix(0) - io.mem.xact_finish.header.src := UFix(1) - io.mem.xact_finish.header.dst := UFix(0) + io.mem.xact_init.bits.header.src := UFix(conf.ln.nTiles) + io.mem.xact_init.bits.header.dst := UFix(0) + io.mem.xact_init_data.bits.header.src := UFix(conf.ln.nTiles) + io.mem.xact_init_data.bits.header.dst := UFix(0) + io.mem.probe_rep.bits.header.src := UFix(conf.ln.nTiles) + io.mem.probe_rep.bits.header.dst := UFix(0) + io.mem.probe_rep_data.bits.header.src := UFix(conf.ln.nTiles) + io.mem.probe_rep_data.bits.header.dst := UFix(0) + io.mem.xact_finish.bits.header.src := UFix(conf.ln.nTiles) + io.mem.xact_finish.bits.header.dst := UFix(0) val pcrReadData = Vec(conf.ln.nTiles) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } for (i <- 0 until conf.ln.nTiles) { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c47c0768..31488646 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -50,7 +50,7 @@ class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { val invalidate = Bool(OUTPUT) } -class Frontend(implicit c: ICacheConfig) extends Component +class Frontend(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { val io = new Bundle { val cpu = new CPUFrontendIO()(c).flip @@ -121,7 +121,7 @@ class Frontend(implicit c: ICacheConfig) extends Component io.cpu.resp.bits.xcpt_if := s2_xcpt_if } -class ICache(implicit c: ICacheConfig) extends Component +class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { val io = new Bundle { val req = new PipeIO()(new Bundle { @@ -224,7 +224,7 @@ class ICache(implicit c: ICacheConfig) extends Component val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.code.width(c.databits)) } val s1_dout = Reg(){ Bits() } when (io.mem.xact_rep.valid && repl_way === UFix(i)) { - val d = io.mem.xact_rep.bits.data + val d = io.mem.xact_rep.bits.payload.data data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM @@ -238,14 +238,14 @@ class ICache(implicit c: ICacheConfig) extends Component io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val finish_q = (new Queue(1)) { new TransactionFinish } - finish_q.io.enq.valid := refill_done && io.mem.xact_rep.bits.require_ack - finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.global_xact_id + finish_q.io.enq.valid := refill_done && io.mem.xact_rep.bits.payload.require_ack + finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.payload.global_xact_id // output signals io.resp.valid := s2_hit io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits := c.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) - io.mem.xact_finish <> finish_q.io.deq + io.mem.xact_init.bits.payload := c.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) + io.mem.xact_finish <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) io.mem.xact_rep.ready := Bool(true) // control state machine diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0dcbf9dd..acdba677 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -915,9 +915,9 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio mshr.io.req.bits.data := s2_req.data mshr.io.mem_rep.valid := io.mem.xact_rep.fire() - mshr.io.mem_rep.bits := io.mem.xact_rep.bits + mshr.io.mem_rep.bits := io.mem.xact_rep.bits.payload mshr.io.mem_abort.valid := io.mem.xact_abort.valid - mshr.io.mem_abort.bits := io.mem.xact_abort.bits + mshr.io.mem_abort.bits := io.mem.xact_abort.bits.payload io.mem.xact_abort.ready := Bool(true) when (mshr.io.req.fire()) { replacer.miss } @@ -931,8 +931,8 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio metaWriteArb.io.in(0) <> mshr.io.meta_write // probes - prober.io.req <> io.mem.probe_req - prober.io.rep <> io.mem.probe_rep + prober.io.req <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe_req) + FIFOedLogicalNetworkIOWrapper(prober.io.rep) <> io.mem.probe_rep prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way @@ -941,19 +941,19 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio prober.io.meta_write <> metaWriteArb.io.in(1) // refills - val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits) + val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits.payload) writeArb.io.in(1).valid := io.mem.xact_rep.valid && refill io.mem.xact_rep.ready := writeArb.io.in(1).ready || !refill writeArb.io.in(1).bits := mshr.io.mem_resp writeArb.io.in(1).bits.wmask := Fix(-1) - writeArb.io.in(1).bits.data := io.mem.xact_rep.bits.data + writeArb.io.in(1).bits.data := io.mem.xact_rep.bits.payload.data // writebacks wb.io.req <> mshr.io.wb_req wb.io.meta_read <> metaReadArb.io.in(3) wb.io.data_req <> readArb.io.in(2) wb.io.data_resp := s2_data_corrected - wb.io.probe_rep_data <> io.mem.probe_rep_data + FIFOedLogicalNetworkIOWrapper(wb.io.probe_rep_data) <> io.mem.probe_rep_data // store->load bypassing val s4_valid = Reg(s3_valid, resetVal = Bool(false)) @@ -1021,8 +1021,8 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio xact_init_arb.io.in(1).valid := mshr.io.mem_req.valid && prober.io.req.ready mshr.io.mem_req.ready := xact_init_arb.io.in(1).ready && prober.io.req.ready xact_init_arb.io.in(1).bits := mshr.io.mem_req.bits - io.mem.xact_init <> xact_init_arb.io.out + io.mem.xact_init <> FIFOedLogicalNetworkIOWrapper(xact_init_arb.io.out) - io.mem.xact_init_data <> wb.io.mem_req_data - io.mem.xact_finish <> mshr.io.mem_finish + io.mem.xact_init_data <> FIFOedLogicalNetworkIOWrapper(wb.io.mem_req_data) + io.mem.xact_finish <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_finish) } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 1018bde0..28a5510b 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -34,7 +34,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon } val core = new Core - val icache = new Frontend()(confIn.icache) + val icache = new Frontend()(confIn.icache, lnConf) val dcache = new HellaCache val arbiter = new MemArbiter(memPorts) @@ -51,7 +51,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data if (conf.vec) { - val vicache = new Frontend()(ICacheConfig(128, 1, conf.co)) // 128 sets x 1 ways (8KB) + val vicache = new Frontend()(ICacheConfig(128, 1, conf.co), lnConf) // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem core.io.vimem <> vicache.io.cpu } From 6b00e7ff7408e732c8fa1db887f8b17473c5c26f Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 21 Jan 2013 17:18:23 -0800 Subject: [PATCH 0546/1087] New TileLink bundle names --- rocket/src/main/scala/arbiter.scala | 66 +++++++-------- rocket/src/main/scala/consts.scala | 4 +- rocket/src/main/scala/htif.scala | 62 +++++++------- rocket/src/main/scala/icache.scala | 26 +++--- rocket/src/main/scala/nbdcache.scala | 122 +++++++++++++-------------- rocket/src/main/scala/tile.scala | 16 ++-- 6 files changed, 148 insertions(+), 148 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index b60f845e..3dc2f44e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -49,10 +49,10 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp } class UncachedRequestorIO(implicit conf: LogicalNetworkConfiguration) extends Bundle { - val xact_init = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionInit }} - val xact_abort = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionAbort }} - val xact_rep = (new MasterSourcedIO) {(new LogicalNetworkIO){new TransactionReply }} - val xact_finish = (new ClientSourcedIO){(new LogicalNetworkIO){new TransactionFinish }} + val acquire = (new ClientSourcedIO){(new LogicalNetworkIO){new Acquire }} + val abort = (new MasterSourcedIO) {(new LogicalNetworkIO){new Abort }} + val grant = (new MasterSourcedIO) {(new LogicalNetworkIO){new Grant }} + val grant_ack = (new ClientSourcedIO){(new LogicalNetworkIO){new GrantAck }} } class MemArbiter(n: Int)(implicit conf: LogicalNetworkConfiguration) extends Component { @@ -61,54 +61,54 @@ class MemArbiter(n: Int)(implicit conf: LogicalNetworkConfiguration) extends Com val requestor = Vec(n) { new UncachedRequestorIO }.flip } - var xi_bits = new TransactionInit - xi_bits := io.requestor(n-1).xact_init.bits.payload - xi_bits.tile_xact_id := Cat(io.requestor(n-1).xact_init.bits.payload.tile_xact_id, UFix(n-1, log2Up(n))) + var xi_bits = new Acquire + xi_bits := io.requestor(n-1).acquire.bits.payload + xi_bits.client_xact_id := Cat(io.requestor(n-1).acquire.bits.payload.client_xact_id, UFix(n-1, log2Up(n))) for (i <- n-2 to 0 by -1) { - var my_xi_bits = new TransactionInit - my_xi_bits := io.requestor(i).xact_init.bits.payload - my_xi_bits.tile_xact_id := Cat(io.requestor(i).xact_init.bits.payload.tile_xact_id, UFix(i, log2Up(n))) + var my_xi_bits = new Acquire + my_xi_bits := io.requestor(i).acquire.bits.payload + my_xi_bits.client_xact_id := Cat(io.requestor(i).acquire.bits.payload.client_xact_id, UFix(i, log2Up(n))) - xi_bits = Mux(io.requestor(i).xact_init.valid, my_xi_bits, xi_bits) + xi_bits = Mux(io.requestor(i).acquire.valid, my_xi_bits, xi_bits) } - io.mem.xact_init.bits.payload := xi_bits - io.mem.xact_init.valid := io.requestor.map(_.xact_init.valid).reduce(_||_) - io.requestor(0).xact_init.ready := io.mem.xact_init.ready + io.mem.acquire.bits.payload := xi_bits + io.mem.acquire.valid := io.requestor.map(_.acquire.valid).reduce(_||_) + io.requestor(0).acquire.ready := io.mem.acquire.ready for (i <- 1 until n) - io.requestor(i).xact_init.ready := io.requestor(i-1).xact_init.ready && !io.requestor(i-1).xact_init.valid + io.requestor(i).acquire.ready := io.requestor(i-1).acquire.ready && !io.requestor(i-1).acquire.valid - var xf_bits = io.requestor(n-1).xact_finish.bits + var xf_bits = io.requestor(n-1).grant_ack.bits for (i <- n-2 to 0 by -1) - xf_bits = Mux(io.requestor(i).xact_finish.valid, io.requestor(i).xact_finish.bits, xf_bits) + xf_bits = Mux(io.requestor(i).grant_ack.valid, io.requestor(i).grant_ack.bits, xf_bits) - io.mem.xact_finish.bits := xf_bits - io.mem.xact_finish.valid := io.requestor.map(_.xact_finish.valid).reduce(_||_) - io.requestor(0).xact_finish.ready := io.mem.xact_finish.ready + io.mem.grant_ack.bits := xf_bits + io.mem.grant_ack.valid := io.requestor.map(_.grant_ack.valid).reduce(_||_) + io.requestor(0).grant_ack.ready := io.mem.grant_ack.ready for (i <- 1 until n) - io.requestor(i).xact_finish.ready := io.requestor(i-1).xact_finish.ready && !io.requestor(i-1).xact_finish.valid + io.requestor(i).grant_ack.ready := io.requestor(i-1).grant_ack.ready && !io.requestor(i-1).grant_ack.valid - io.mem.xact_rep.ready := Bool(false) + io.mem.grant.ready := Bool(false) for (i <- 0 until n) { - val tag = io.mem.xact_rep.bits.payload.tile_xact_id - io.requestor(i).xact_rep.valid := Bool(false) + val tag = io.mem.grant.bits.payload.client_xact_id + io.requestor(i).grant.valid := Bool(false) when (tag(log2Up(n)-1,0) === UFix(i)) { - io.requestor(i).xact_rep.valid := io.mem.xact_rep.valid - io.mem.xact_rep.ready := io.requestor(i).xact_rep.ready + io.requestor(i).grant.valid := io.mem.grant.valid + io.mem.grant.ready := io.requestor(i).grant.ready } - io.requestor(i).xact_rep.bits := io.mem.xact_rep.bits - io.requestor(i).xact_rep.bits.payload.tile_xact_id := tag >> UFix(log2Up(n)) + io.requestor(i).grant.bits := io.mem.grant.bits + io.requestor(i).grant.bits.payload.client_xact_id := tag >> UFix(log2Up(n)) } for (i <- 0 until n) { - val tag = io.mem.xact_abort.bits.payload.tile_xact_id - io.requestor(i).xact_abort.valid := io.mem.xact_abort.valid && tag(log2Up(n)-1,0) === UFix(i) - io.requestor(i).xact_abort.bits := io.mem.xact_abort.bits - io.requestor(i).xact_abort.bits.payload.tile_xact_id := tag >> UFix(log2Up(n)) + val tag = io.mem.abort.bits.payload.client_xact_id + io.requestor(i).abort.valid := io.mem.abort.valid && tag(log2Up(n)-1,0) === UFix(i) + io.requestor(i).abort.bits := io.mem.abort.bits + io.requestor(i).abort.bits.payload.client_xact_id := tag >> UFix(log2Up(n)) } - io.mem.xact_abort.ready := Bool(true) + io.mem.abort.ready := Bool(true) } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 9b956260..0c4e2fbe 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -60,8 +60,8 @@ trait InterruptConstants { abstract trait RocketDcacheConstants extends uncore.constants.CacheConstants with uncore.constants.AddressConstants { require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) - require(OFFSET_BITS <= uncore.Constants.X_INIT_WRITE_MASK_BITS) - require(log2Up(OFFSET_BITS) <= uncore.Constants.X_INIT_SUBWORD_ADDR_BITS) + require(OFFSET_BITS <= uncore.Constants.ACQUIRE_WRITE_MASK_BITS) + require(log2Up(OFFSET_BITS) <= uncore.Constants.ACQUIRE_SUBWORD_ADDR_BITS) } trait VectorOpConstants { diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 682576cf..611beb68 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -106,14 +106,14 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val mem_gxid = Reg() { Bits() } val mem_needs_ack = Reg() { Bool() } val mem_nacked = Reg(resetVal = Bool(false)) - when (io.mem.xact_rep.valid) { + when (io.mem.grant.valid) { mem_acked := Bool(true) - mem_gxid := io.mem.xact_rep.bits.payload.global_xact_id - mem_needs_ack := io.mem.xact_rep.bits.payload.require_ack + mem_gxid := io.mem.grant.bits.payload.master_xact_id + mem_needs_ack := io.mem.grant.bits.payload.require_ack } - io.mem.xact_rep.ready := Bool(true) - when (io.mem.xact_abort.valid) { mem_nacked := Bool(true) } - io.mem.xact_abort.ready := Bool(true) + io.mem.grant.ready := Bool(true) + when (io.mem.abort.valid) { mem_nacked := Bool(true) } + io.mem.abort.ready := Bool(true) val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = state_rx) @@ -126,11 +126,11 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo } val mem_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val x_init = new Queue(1)(new TransactionInit) + val x_init = new Queue(1)(new Acquire) when (state === state_mem_req && x_init.io.enq.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } - when (state === state_mem_wdata && io.mem.xact_init_data.ready) { + when (state === state_mem_wdata && io.mem.acquire_data.ready) { when (mem_cnt.andR) { state := state_mem_wresp } @@ -151,7 +151,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo state := state_mem_req mem_nacked := Bool(false) } - when (io.mem.xact_rep.valid) { + when (io.mem.grant.valid) { when (mem_cnt.andR) { state := state_mem_finish } @@ -159,7 +159,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo } mem_acked := Bool(false) } - when (state === state_mem_finish && io.mem.xact_finish.ready) { + when (state === state_mem_finish && io.mem.grant_ack.ready) { state := Mux(cmd === cmd_readmem || pos === UFix(1), state_tx, state_rx) pos := pos - UFix(1) addr := addr + UFix(1 << OFFSET_BITS-3) @@ -175,34 +175,34 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo var mem_req_data: Bits = null for (i <- 0 until MEM_DATA_BITS/short_request_bits) { val idx = Cat(mem_cnt, UFix(i, log2Up(MEM_DATA_BITS/short_request_bits))) - when (state === state_mem_rdata && io.mem.xact_rep.valid) { - packet_ram(idx) := io.mem.xact_rep.bits.payload.data((i+1)*short_request_bits-1, i*short_request_bits) + when (state === state_mem_rdata && io.mem.grant.valid) { + packet_ram(idx) := io.mem.grant.bits.payload.data((i+1)*short_request_bits-1, i*short_request_bits) } mem_req_data = Cat(packet_ram(idx), mem_req_data) } x_init.io.enq.valid := state === state_mem_req val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) val co = conf.co.asInstanceOf[CoherencePolicyWithUncached] - x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteTransactionInit(init_addr, UFix(0)), co.getUncachedReadTransactionInit(init_addr, UFix(0))) - io.mem.xact_init <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq) - io.mem.xact_init_data.valid:= state === state_mem_wdata - io.mem.xact_init_data.bits.payload.data := mem_req_data - io.mem.xact_finish.valid := (state === state_mem_finish) && mem_needs_ack - io.mem.xact_finish.bits.payload.global_xact_id := mem_gxid - io.mem.probe_req.ready := Bool(false) - io.mem.probe_rep.valid := Bool(false) - io.mem.probe_rep_data.valid := Bool(false) + x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteAcquire(init_addr, UFix(0)), co.getUncachedReadAcquire(init_addr, UFix(0))) + io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq) + io.mem.acquire_data.valid:= state === state_mem_wdata + io.mem.acquire_data.bits.payload.data := mem_req_data + io.mem.grant_ack.valid := (state === state_mem_finish) && mem_needs_ack + io.mem.grant_ack.bits.payload.master_xact_id := mem_gxid + io.mem.probe.ready := Bool(false) + io.mem.release.valid := Bool(false) + io.mem.release_data.valid := Bool(false) - io.mem.xact_init.bits.header.src := UFix(conf.ln.nTiles) - io.mem.xact_init.bits.header.dst := UFix(0) - io.mem.xact_init_data.bits.header.src := UFix(conf.ln.nTiles) - io.mem.xact_init_data.bits.header.dst := UFix(0) - io.mem.probe_rep.bits.header.src := UFix(conf.ln.nTiles) - io.mem.probe_rep.bits.header.dst := UFix(0) - io.mem.probe_rep_data.bits.header.src := UFix(conf.ln.nTiles) - io.mem.probe_rep_data.bits.header.dst := UFix(0) - io.mem.xact_finish.bits.header.src := UFix(conf.ln.nTiles) - io.mem.xact_finish.bits.header.dst := UFix(0) + io.mem.acquire.bits.header.src := UFix(conf.ln.nTiles) + io.mem.acquire.bits.header.dst := UFix(0) + io.mem.acquire_data.bits.header.src := UFix(conf.ln.nTiles) + io.mem.acquire_data.bits.header.dst := UFix(0) + io.mem.release.bits.header.src := UFix(conf.ln.nTiles) + io.mem.release.bits.header.dst := UFix(0) + io.mem.release_data.bits.header.src := UFix(conf.ln.nTiles) + io.mem.release_data.bits.header.dst := UFix(0) + io.mem.grant_ack.bits.header.src := UFix(conf.ln.nTiles) + io.mem.grant_ack.bits.header.dst := UFix(0) val pcrReadData = Vec(conf.ln.nTiles) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } for (i <- 0 until conf.ln.nTiles) { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 31488646..5914daac 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -172,7 +172,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - val (rf_cnt, refill_done) = Counter(io.mem.xact_rep.valid, REFILL_CYCLES) + val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) val enc_tagbits = c.code.width(c.tagbits) @@ -223,8 +223,8 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte for (i <- 0 until c.assoc) { val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.code.width(c.databits)) } val s1_dout = Reg(){ Bits() } - when (io.mem.xact_rep.valid && repl_way === UFix(i)) { - val d = io.mem.xact_rep.bits.payload.data + when (io.mem.grant.valid && repl_way === UFix(i)) { + val d = io.mem.grant.bits.payload.data data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM @@ -237,16 +237,16 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) - val finish_q = (new Queue(1)) { new TransactionFinish } - finish_q.io.enq.valid := refill_done && io.mem.xact_rep.bits.payload.require_ack - finish_q.io.enq.bits.global_xact_id := io.mem.xact_rep.bits.payload.global_xact_id + val finish_q = (new Queue(1)) { new GrantAck } + finish_q.io.enq.valid := refill_done && io.mem.grant.bits.payload.require_ack + finish_q.io.enq.bits.master_xact_id := io.mem.grant.bits.payload.master_xact_id // output signals io.resp.valid := s2_hit - io.mem.xact_init.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.xact_init.bits.payload := c.co.getUncachedReadTransactionInit(s2_addr >> UFix(c.offbits), UFix(0)) - io.mem.xact_finish <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) - io.mem.xact_rep.ready := Bool(true) + io.mem.acquire.valid := (state === s_request) && finish_q.io.enq.ready + io.mem.acquire.bits.payload := c.co.getUncachedReadAcquire(s2_addr >> UFix(c.offbits), UFix(0)) + io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) + io.mem.grant.ready := Bool(true) // control state machine switch (state) { @@ -255,11 +255,11 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte invalidated := Bool(false) } is (s_request) { - when (io.mem.xact_init.ready && finish_q.io.enq.ready) { state := s_refill_wait } + when (io.mem.acquire.ready && finish_q.io.enq.ready) { state := s_refill_wait } } is (s_refill_wait) { - when (io.mem.xact_abort.valid) { state := s_request } - when (io.mem.xact_rep.valid) { state := s_refill } + when (io.mem.abort.valid) { state := s_request } + when (io.mem.grant.valid) { state := s_refill } } is (s_refill) { when (refill_done) { state := s_ready } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index acdba677..c0eac3ab 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -127,7 +127,7 @@ class WritebackReq(implicit conf: DCacheConfig) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) - val tile_xact_id = Bits(width = TILE_XACT_ID_BITS) + val client_xact_id = Bits(width = CLIENT_XACT_ID_BITS) override def clone = new WritebackReq().asInstanceOf[this.type] } @@ -165,14 +165,14 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val idx_match = Bool(OUTPUT) val tag = Bits(OUTPUT, conf.tagbits) - val mem_req = (new FIFOIO) { new TransactionInit } + val mem_req = (new FIFOIO) { new Acquire } val mem_resp = new DataWriteReq().asOutput val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay() } - val mem_abort = (new PipeIO) { new TransactionAbort }.flip - val mem_rep = (new PipeIO) { new TransactionReply }.flip - val mem_finish = (new FIFOIO) { new TransactionFinish } + val mem_abort = (new PipeIO) { new Abort }.flip + val mem_rep = (new PipeIO) { new Grant }.flip + val mem_finish = (new FIFOIO) { new GrantAck } val wb_req = (new FIFOIO) { new WritebackReq } val probe_writeback = (new FIFOIO) { Bool() }.flip val probe_refill = (new FIFOIO) { Bool() }.flip @@ -181,7 +181,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = s_invalid) - val xacx_type = Reg { UFix() } + val acq_type = Reg { UFix() } val line_state = Reg { UFix() } val refill_count = Reg { UFix(width = log2Up(REFILL_CYCLES)) } val req = Reg { new MSHRReq() } @@ -197,14 +197,14 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid - val abort = io.mem_abort.valid && io.mem_abort.bits.tile_xact_id === UFix(id) - val reply = io.mem_rep.valid && io.mem_rep.bits.tile_xact_id === UFix(id) + val abort = io.mem_abort.valid && io.mem_abort.bits.client_xact_id === UFix(id) + val reply = io.mem_rep.valid && io.mem_rep.bits.client_xact_id === UFix(id) val refill_done = reply && refill_count.andR val wb_done = reply && (state === s_wb_resp) - val finish_q = (new Queue(2 /* wb + refill */)) { new TransactionFinish } + val finish_q = (new Queue(2 /* wb + refill */)) { new GrantAck } finish_q.io.enq.valid := wb_done || refill_done - finish_q.io.enq.bits.global_xact_id := io.mem_rep.bits.global_xact_id + finish_q.io.enq.bits.master_xact_id := io.mem_rep.bits.master_xact_id io.wb_req.valid := Bool(false) when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { @@ -221,7 +221,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (refill_done) { state := s_meta_write_req } when (reply) { refill_count := refill_count + UFix(1) - line_state := conf.co.newStateOnTransactionReply(io.mem_rep.bits, io.mem_req.bits) + line_state := conf.co.newStateOnGrant(io.mem_rep.bits, io.mem_req.bits) } when (abort) { state := s_refill_req } } @@ -245,12 +245,12 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - xacx_type := conf.co.getTransactionInitTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) + acq_type := conf.co.getAcquireTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { line_state := conf.co.newStateOnFlush() refill_count := UFix(0) - xacx_type := conf.co.getTransactionInitTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) + acq_type := conf.co.getAcquireTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) req := io.req_bits state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) @@ -280,15 +280,15 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.wb_req.bits.tag := req.old_meta.tag io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en - io.wb_req.bits.tile_xact_id := Bits(id) + io.wb_req.bits.client_xact_id := Bits(id) io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match io.mem_req.valid := state === s_refill_req - io.mem_req.bits.x_type := xacx_type + io.mem_req.bits.a_type := acq_type io.mem_req.bits.addr := Cat(io.tag, req_idx).toUFix - io.mem_req.bits.tile_xact_id := Bits(id) + io.mem_req.bits.client_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq io.meta_read.valid := state === s_drain_rpq @@ -310,14 +310,14 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) - val mem_req = (new FIFOIO) { new TransactionInit } + val mem_req = (new FIFOIO) { new Acquire } val mem_resp = new DataWriteReq().asOutput val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay } - val mem_abort = (new PipeIO) { new TransactionAbort }.flip - val mem_rep = (new PipeIO) { new TransactionReply }.flip - val mem_finish = (new FIFOIO) { new TransactionFinish } + val mem_abort = (new PipeIO) { new Abort }.flip + val mem_rep = (new PipeIO) { new Grant }.flip + val mem_finish = (new FIFOIO) { new GrantAck } val wb_req = (new FIFOIO) { new WritebackReq } val probe = (new FIFOIO) { Bool() }.flip @@ -337,8 +337,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val memRespMux = Vec(conf.nmshr) { new DataWriteReq } val meta_read_arb = (new Arbiter(conf.nmshr)) { new MetaReadReq } val meta_write_arb = (new Arbiter(conf.nmshr)) { new MetaWriteReq } - val mem_req_arb = (new Arbiter(conf.nmshr)) { new TransactionInit } - val mem_finish_arb = (new Arbiter(conf.nmshr)) { new TransactionFinish } + val mem_req_arb = (new Arbiter(conf.nmshr)) { new Acquire } + val mem_finish_arb = (new Arbiter(conf.nmshr)) { new GrantAck } val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } @@ -399,7 +399,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match - io.mem_resp := memRespMux(io.mem_rep.bits.tile_xact_id) + io.mem_resp := memRespMux(io.mem_rep.bits.client_xact_id) io.fence_rdy := !fence io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) @@ -421,9 +421,9 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val meta_read = (new FIFOIO) { new MetaReadReq } val data_req = (new FIFOIO) { new DataReadReq() } val data_resp = Bits(INPUT, conf.bitsperrow) - val mem_req = (new FIFOIO) { new TransactionInit } - val mem_req_data = (new FIFOIO) { new TransactionInitData } - val probe_rep_data = (new FIFOIO) { new ProbeReplyData } + val mem_req = (new FIFOIO) { new Acquire } + val mem_req_data = (new FIFOIO) { new AcquireData } + val release_data = (new FIFOIO) { new ReleaseData } } val valid = Reg(resetVal = Bool(false)) @@ -442,7 +442,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { cnt := cnt + 1 } - when (r2_data_req_fired && !Mux(is_probe, io.probe_rep_data.ready, io.mem_req_data.ready)) { + when (r2_data_req_fired && !Mux(is_probe, io.release_data.ready, io.mem_req_data.ready)) { r1_data_req_fired := false r2_data_req_fired := false cnt := cnt - Mux[UFix](r1_data_req_fired, 2, 1) @@ -479,13 +479,13 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(REFILL_CYCLES)-1,0)) << conf.ramoffbits io.mem_req.valid := valid && !cmd_sent - io.mem_req.bits.x_type := conf.co.getTransactionInitTypeOnWriteback() + io.mem_req.bits.a_type := conf.co.getAcquireTypeOnWriteback() io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix - io.mem_req.bits.tile_xact_id := req.tile_xact_id + io.mem_req.bits.client_xact_id := req.client_xact_id io.mem_req_data.valid := r2_data_req_fired && !is_probe io.mem_req_data.bits.data := io.data_resp - io.probe_rep_data.valid := r2_data_req_fired && is_probe - io.probe_rep_data.bits.data := io.data_resp + io.release_data.valid := r2_data_req_fired && is_probe + io.release_data.bits.data := io.data_resp io.meta_read.valid := fire io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits @@ -493,8 +493,8 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { class ProbeUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { - val req = (new FIFOIO) { new ProbeRequest }.flip - val rep = (new FIFOIO) { new ProbeReply } + val req = (new FIFOIO) { new Probe }.flip + val rep = (new FIFOIO) { new Release } val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } val mshr_req = (new FIFOIO) { Bool() } @@ -503,11 +503,11 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { val line_state = UFix(INPUT, 2) } - val s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_probe_rep :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(8) { UFix() } + val s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(8) { UFix() } val state = Reg(resetVal = s_invalid) val line_state = Reg() { UFix() } val way_en = Reg() { Bits() } - val req = Reg() { new ProbeRequest() } + val req = Reg() { new Probe() } val hit = way_en.orR when (state === s_meta_write && io.meta_write.ready) { @@ -519,14 +519,14 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { when (state === s_writeback_req && io.wb_req.ready) { state := s_writeback_resp } - when (state === s_probe_rep && io.rep.ready) { + when (state === s_release && io.rep.ready) { state := s_invalid when (hit) { state := Mux(conf.co.needsWriteback(line_state), s_writeback_req, s_meta_write) } } when (state === s_mshr_req) { - state := s_probe_rep + state := s_release line_state := io.line_state way_en := io.way_en when (!io.mshr_req.ready) { state := s_meta_read } @@ -543,8 +543,8 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { } io.req.ready := state === s_invalid && !reset - io.rep.valid := state === s_probe_rep - io.rep.bits := conf.co.newProbeReply(req, Mux(hit, line_state, conf.co.newStateOnFlush)) + io.rep.valid := state === s_release + io.rep.bits := conf.co.newRelease(req, Mux(hit, line_state, conf.co.newStateOnFlush)) io.meta_read.valid := state === s_meta_read io.meta_read.bits.addr := req.addr << UFix(conf.offbits) @@ -552,7 +552,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr - io.meta_write.bits.data.state := conf.co.newStateOnProbeRequest(req, line_state) + io.meta_write.bits.data.state := conf.co.newStateOnProbe(req, line_state) io.meta_write.bits.data.tag := req.addr >> UFix(conf.idxbits) io.mshr_req.valid := state === s_mshr_req @@ -829,7 +829,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // data val data = new DataArray val readArb = new Arbiter(4)(new DataReadReq) - readArb.io.out.ready := !io.mem.xact_rep.valid || io.mem.xact_rep.ready // insert bubble if refill gets blocked + readArb.io.out.ready := !io.mem.grant.valid || io.mem.grant.ready // insert bubble if refill gets blocked readArb.io.out <> data.io.read val writeArb = new Arbiter(2)(new DataWriteReq) @@ -914,11 +914,11 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshr.io.req.bits.data := s2_req.data - mshr.io.mem_rep.valid := io.mem.xact_rep.fire() - mshr.io.mem_rep.bits := io.mem.xact_rep.bits.payload - mshr.io.mem_abort.valid := io.mem.xact_abort.valid - mshr.io.mem_abort.bits := io.mem.xact_abort.bits.payload - io.mem.xact_abort.ready := Bool(true) + mshr.io.mem_rep.valid := io.mem.grant.fire() + mshr.io.mem_rep.bits := io.mem.grant.bits.payload + mshr.io.mem_abort.valid := io.mem.abort.valid + mshr.io.mem_abort.bits := io.mem.abort.bits.payload + io.mem.abort.ready := Bool(true) when (mshr.io.req.fire()) { replacer.miss } // replays @@ -931,8 +931,8 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio metaWriteArb.io.in(0) <> mshr.io.meta_write // probes - prober.io.req <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe_req) - FIFOedLogicalNetworkIOWrapper(prober.io.rep) <> io.mem.probe_rep + prober.io.req <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) + FIFOedLogicalNetworkIOWrapper(prober.io.rep) <> io.mem.release prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way @@ -941,19 +941,19 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio prober.io.meta_write <> metaWriteArb.io.in(1) // refills - val refill = conf.co.messageUpdatesDataArray(io.mem.xact_rep.bits.payload) - writeArb.io.in(1).valid := io.mem.xact_rep.valid && refill - io.mem.xact_rep.ready := writeArb.io.in(1).ready || !refill + val refill = conf.co.messageUpdatesDataArray(io.mem.grant.bits.payload) + writeArb.io.in(1).valid := io.mem.grant.valid && refill + io.mem.grant.ready := writeArb.io.in(1).ready || !refill writeArb.io.in(1).bits := mshr.io.mem_resp writeArb.io.in(1).bits.wmask := Fix(-1) - writeArb.io.in(1).bits.data := io.mem.xact_rep.bits.payload.data + writeArb.io.in(1).bits.data := io.mem.grant.bits.payload.data // writebacks wb.io.req <> mshr.io.wb_req wb.io.meta_read <> metaReadArb.io.in(3) wb.io.data_req <> readArb.io.in(2) wb.io.data_resp := s2_data_corrected - FIFOedLogicalNetworkIOWrapper(wb.io.probe_rep_data) <> io.mem.probe_rep_data + FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release_data // store->load bypassing val s4_valid = Reg(s3_valid, resetVal = Bool(false)) @@ -1016,13 +1016,13 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio io.cpu.resp.bits.data_subword := loadgen.byte io.cpu.resp.bits.store_data := s2_req.data - val xact_init_arb = (new Arbiter(2)) { new TransactionInit } - xact_init_arb.io.in(0) <> wb.io.mem_req - xact_init_arb.io.in(1).valid := mshr.io.mem_req.valid && prober.io.req.ready - mshr.io.mem_req.ready := xact_init_arb.io.in(1).ready && prober.io.req.ready - xact_init_arb.io.in(1).bits := mshr.io.mem_req.bits - io.mem.xact_init <> FIFOedLogicalNetworkIOWrapper(xact_init_arb.io.out) + val acquire_arb = (new Arbiter(2)) { new Acquire } + acquire_arb.io.in(0) <> wb.io.mem_req + acquire_arb.io.in(1).valid := mshr.io.mem_req.valid && prober.io.req.ready + mshr.io.mem_req.ready := acquire_arb.io.in(1).ready && prober.io.req.ready + acquire_arb.io.in(1).bits := mshr.io.mem_req.bits + io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(acquire_arb.io.out) - io.mem.xact_init_data <> FIFOedLogicalNetworkIOWrapper(wb.io.mem_req_data) - io.mem.xact_finish <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_finish) + io.mem.acquire_data <> FIFOedLogicalNetworkIOWrapper(wb.io.mem_req_data) + io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_finish) } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 28a5510b..b4ddbd05 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -41,14 +41,14 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache.io.mem - io.tilelink.xact_init <> arbiter.io.mem.xact_init - io.tilelink.xact_init_data <> dcache.io.mem.xact_init_data - arbiter.io.mem.xact_abort <> io.tilelink.xact_abort - arbiter.io.mem.xact_rep <> io.tilelink.xact_rep - io.tilelink.xact_finish <> arbiter.io.mem.xact_finish - dcache.io.mem.probe_req <> io.tilelink.probe_req - io.tilelink.probe_rep <> dcache.io.mem.probe_rep - io.tilelink.probe_rep_data <> dcache.io.mem.probe_rep_data + io.tilelink.acquire <> arbiter.io.mem.acquire + io.tilelink.acquire_data <> dcache.io.mem.acquire_data + arbiter.io.mem.abort <> io.tilelink.abort + arbiter.io.mem.grant <> io.tilelink.grant + io.tilelink.grant_ack <> arbiter.io.mem.grant_ack + dcache.io.mem.probe <> io.tilelink.probe + io.tilelink.release <> dcache.io.mem.release + io.tilelink.release_data <> dcache.io.mem.release_data if (conf.vec) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co), lnConf) // 128 sets x 1 ways (8KB) From 63060bc0a84962f2074aa30011e9d52902343172 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Wed, 23 Jan 2013 19:27:53 -0800 Subject: [PATCH 0547/1087] minor tweaks for eos18 tapeout (SRAM r/w port ordering, etc) --- rocket/src/main/scala/ctrl.scala | 5 ++++- rocket/src/main/scala/icache.scala | 6 ++++-- rocket/src/main/scala/nbdcache.scala | 6 +++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c4b23b76..91206853 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -603,7 +603,10 @@ class Control(implicit conf: RocketConfiguration) extends Component class Scoreboard { - val r = Reg(resetVal = Bits(0)) +// val r = Reg(resetVal = Bits(0)) + // RIMAS: explicitly set width to 32, otherwise Chisel would set it to 1024 + // and cause a ton of warnings during synthesis + val r = Reg(resetVal = Bits(0,32)) var next = r var ens = Bool(false) def apply(addr: UFix) = r(addr) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 5914daac..a65cd01e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -183,7 +183,8 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val tag = c.code.encode(s2_tag) tag_array.write(s2_idx, Fill(c.assoc, tag), wmask) } - /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM +// /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM + .elsewhen (s0_valid) { tag_rdata := tag_array(s0_pgoff(c.untagbits-1,c.offbits)) } @@ -227,7 +228,8 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val d = io.mem.grant.bits.payload.data data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } - /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM +// /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM + .elsewhen (s0_valid) { s1_dout := data_array(s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth)) } // if s1_tag_match is critical, replace with partial tag check diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c0eac3ab..1756a706 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -577,15 +577,15 @@ class MetaDataArray(implicit conf: DCacheConfig) extends Component { val tags = Mem(conf.sets, seqRead = true) { UFix(width = metabits*conf.ways) } val tag = Reg{UFix()} - when (io.read.valid) { - tag := tags(io.read.bits.addr(conf.untagbits-1,conf.offbits)) - } when (rst || io.write.valid) { val addr = Mux(rst, rst_cnt, io.write.bits.idx) val data = Cat(Mux(rst, conf.co.newStateOnFlush, io.write.bits.data.state), io.write.bits.data.tag) val mask = Mux(rst, Fix(-1), io.write.bits.way_en) tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) } + when (io.read.valid) { + tag := tags(io.read.bits.addr(conf.untagbits-1,conf.offbits)) + } for (w <- 0 until conf.ways) { val m = tag(metabits*(w+1)-1, metabits*w) From 5b9f938263083a30def1536889866a20f84d60fa Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 6 Jan 2013 05:18:33 -0800 Subject: [PATCH 0548/1087] correctly sign-extend badvaddr, epc, and ebase --- rocket/src/main/scala/dpath_util.scala | 30 ++++++++++++-------------- rocket/src/main/scala/util.scala | 26 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 131ae28c..4aa220f1 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -132,9 +132,9 @@ class PCR(implicit conf: RocketConfiguration) extends Component } import PCR._ - val reg_epc = Reg{Fix(width = VADDR_BITS+1)} - val reg_badvaddr = Reg{Fix(width = VADDR_BITS+1)} - val reg_ebase = Reg{Fix(width = VADDR_BITS)} + val reg_epc = Reg{Bits(width = conf.xprlen)} + val reg_badvaddr = Reg{Bits(width = conf.xprlen)} + val reg_ebase = Reg{Bits(width = conf.xprlen)} val reg_count = WideCounter(32) val reg_compare = Reg{Bits(width = 32)} val reg_cause = Reg{Bits(width = io.cause.getWidth)} @@ -189,24 +189,22 @@ class PCR(implicit conf: RocketConfiguration) extends Component cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) - val badvaddr_sign = Mux(io.w.data(VADDR_BITS-1), io.w.data(conf.xprlen-1,VADDR_BITS).andR, io.w.data(conf.xprlen-1,VADDR_BITS).orR) - when (io.badvaddr_wen) { - reg_badvaddr := Cat(badvaddr_sign, io.w.data(VADDR_BITS-1,0)).toUFix; - } - when (io.vec_irq_aux_wen) { - reg_badvaddr := io.vec_irq_aux.toUFix + when (io.badvaddr_wen || io.vec_irq_aux_wen) { + val wdata = Mux(io.badvaddr_wen, io.w.data, io.vec_irq_aux) + val (upper, lower) = Split(wdata, VADDR_BITS) + val sign = Mux(lower.toFix < Fix(0), upper.andR, upper.orR) + reg_badvaddr := Cat(sign, lower).toFix } when (io.exception) { when (!reg_status.et) { reg_error_mode := true - }.otherwise { - reg_status.s := true - reg_status.ps := reg_status.s - reg_status.et := false - reg_epc := io.pc - reg_cause := io.cause } + reg_status.s := true + reg_status.ps := reg_status.s + reg_status.et := false + reg_epc := io.pc.toFix + reg_cause := io.cause } when (io.eret) { @@ -250,7 +248,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component if (!conf.rvc) reg_status.ec := false } when (waddr === EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } - when (waddr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toUFix; } + when (waddr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toFix } when (waddr === COUNT) { reg_count := wdata.toUFix } when (waddr === COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } when (waddr === COREID) { reg_coreid := wdata(15,0) } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 60b0b595..183621a6 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -24,6 +24,32 @@ object AVec tabulate(n1)(i1 => tabulate(n2)(f(i1, _))) } +object Split +{ + // is there a better way to do do this? + def apply(x: Bits, n0: Int) = { + val w = checkWidth(x, n0) + (x(w-1,n0), x(n0-1,0)) + } + def apply(x: Bits, n1: Int, n0: Int) = { + val w = checkWidth(x, n1, n0) + (x(w-1,n1), x(n1-1,n0), x(n0-1,0)) + } + def apply(x: Bits, n2: Int, n1: Int, n0: Int) = { + val w = checkWidth(x, n2, n1, n0) + (x(w-1,n2), x(n2-1,n1), x(n1-1,n0), x(n0-1,0)) + } + + private def checkWidth(x: Bits, n: Int*) = { + val w = x.getWidth + def decreasing(x: Seq[Int]): Boolean = + if (x.tail.isEmpty) true + else x.head > x.tail.head && decreasing(x.tail) + require(decreasing(w :: n.toList)) + w + } +} + // a counter that clock gates most of its MSBs using the LSB carry-out case class WideCounter(width: Int, inc: Bool = Bool(true)) { From bb6fbddf1f26772a21106f8feb086dfe6f0a961f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 24 Jan 2013 17:17:12 -0800 Subject: [PATCH 0549/1087] don't probe the mshr file to inquire about refills --- rocket/src/main/scala/nbdcache.scala | 47 +++++++++++++++------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 1756a706..7a91e06c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -175,7 +175,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val mem_finish = (new FIFOIO) { new GrantAck } val wb_req = (new FIFOIO) { new WritebackReq } val probe_writeback = (new FIFOIO) { Bool() }.flip - val probe_refill = (new FIFOIO) { Bool() }.flip } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } @@ -185,6 +184,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val line_state = Reg { UFix() } val refill_count = Reg { UFix(width = log2Up(REFILL_CYCLES)) } val req = Reg { new MSHRReq() } + val writeback_probed = Reg{Bool()} val req_cmd = io.req_bits.cmd val req_idx = req.addr(conf.untagbits-1,conf.offbits) @@ -202,12 +202,13 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val refill_done = reply && refill_count.andR val wb_done = reply && (state === s_wb_resp) - val finish_q = (new Queue(2 /* wb + refill */)) { new GrantAck } - finish_q.io.enq.valid := wb_done || refill_done - finish_q.io.enq.bits.master_xact_id := io.mem_rep.bits.master_xact_id io.wb_req.valid := Bool(false) + when (io.probe_writeback.valid && idx_match && io.probe_writeback.bits) { + writeback_probed := true + } + io.probe_writeback.ready := !idx_match || state != s_wb_req && state != s_wb_resp && state != s_meta_clear - when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { + when (state === s_drain_rpq && !rpq.io.deq.valid) { state := s_invalid } when (state === s_meta_write_resp) { @@ -234,24 +235,25 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { } when (state === s_wb_resp) { when (reply) { state := s_meta_clear } - when (abort) { state := s_wb_req } + when (abort) { state := Mux(writeback_probed, s_refill_req, s_wb_req) } } when (state === s_wb_req) { - io.wb_req.valid := Bool(true) - when (io.probe_writeback.valid && idx_match) { - io.wb_req.valid := Bool(false) - when (io.probe_writeback.bits) { state := s_refill_req } + io.wb_req.valid := true + when (writeback_probed) { + io.wb_req.valid := false + state := s_refill_req }.elsewhen (io.wb_req.ready) { state := s_wb_resp } } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req acq_type := conf.co.getAcquireTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) } - when ((state === s_invalid) && io.req_pri_val) { + when (io.req_pri_val && io.req_pri_rdy) { line_state := conf.co.newStateOnFlush() refill_count := UFix(0) acq_type := conf.co.getAcquireTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) req := io.req_bits + writeback_probed := false state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) when (io.req_bits.tag_match) { @@ -264,11 +266,19 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { } } + val finish_q = (new Queue(2 /* wb + refill */)) { new GrantAck } + finish_q.io.enq.valid := wb_done || refill_done + finish_q.io.enq.bits.master_xact_id := io.mem_rep.bits.master_xact_id + val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp + io.mem_finish.valid := finish_q.io.deq.valid && can_finish + finish_q.io.deq.ready := io.mem_finish.ready && can_finish + io.mem_finish.bits := finish_q.io.deq.bits + io.idx_match := (state != s_invalid) && idx_match io.mem_resp := req io.mem_resp.addr := Cat(req_idx, refill_count) << conf.ramoffbits io.tag := req.addr >> conf.untagbits - io.req_pri_rdy := (state === s_invalid) + io.req_pri_rdy := state === s_invalid && !finish_q.io.deq.valid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear @@ -282,14 +292,12 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) - io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match - io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match - io.mem_req.valid := state === s_refill_req io.mem_req.bits.a_type := acq_type io.mem_req.bits.addr := Cat(io.tag, req_idx).toUFix io.mem_req.bits.client_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq + io.mem_req.bits.client_xact_id := Bits(id) io.meta_read.valid := state === s_drain_rpq io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits @@ -351,7 +359,6 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { var fence = Bool(false) var sec_rdy = Bool(false) var writeback_probe_rdy = Bool(true) - var refill_probe_rdy = Bool(true) for (i <- 0 to conf.nmshr-1) { val mshr = new MSHR(i) @@ -373,7 +380,6 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.wb_req <> wb_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) - mshr.io.probe_refill.valid := io.probe.valid && tag_match mshr.io.probe_writeback.valid := io.probe.valid mshr.io.probe_writeback.bits := wb_probe_match @@ -385,7 +391,6 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { sec_rdy = sec_rdy || mshr.io.req_sec_rdy fence = fence || !mshr.io.req_pri_rdy idx_match = idx_match || mshr.io.idx_match - refill_probe_rdy = refill_probe_rdy && mshr.io.probe_refill.ready writeback_probe_rdy = writeback_probe_rdy && mshr.io.probe_writeback.ready } @@ -401,7 +406,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { io.secondary_miss := idx_match io.mem_resp := memRespMux(io.mem_rep.bits.client_xact_id) io.fence_rdy := !fence - io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) + io.probe.ready := writeback_probe_rdy || !wb_probe_match val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) io.replay.bits.data := sdq(RegEn(replay_arb.io.out.bits.sdq_id, free_sdq)) @@ -1018,9 +1023,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val acquire_arb = (new Arbiter(2)) { new Acquire } acquire_arb.io.in(0) <> wb.io.mem_req - acquire_arb.io.in(1).valid := mshr.io.mem_req.valid && prober.io.req.ready - mshr.io.mem_req.ready := acquire_arb.io.in(1).ready && prober.io.req.ready - acquire_arb.io.in(1).bits := mshr.io.mem_req.bits + acquire_arb.io.in(1) <> mshr.io.mem_req io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(acquire_arb.io.out) io.mem.acquire_data <> FIFOedLogicalNetworkIOWrapper(wb.io.mem_req_data) From 37ee843b2c515056b5bb6255f4eb5d5f9cf2dc7e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 24 Jan 2013 17:46:11 -0800 Subject: [PATCH 0550/1087] don't use reset combinationally --- rocket/src/main/scala/nbdcache.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7a91e06c..125be0b3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -508,7 +508,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { val line_state = UFix(INPUT, 2) } - val s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(8) { UFix() } + val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = s_invalid) val line_state = Reg() { UFix() } val way_en = Reg() { Bits() } @@ -546,8 +546,11 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { state := s_meta_read req := io.req.bits } + when (state === s_reset) { + state := s_invalid + } - io.req.ready := state === s_invalid && !reset + io.req.ready := state === s_invalid io.rep.valid := state === s_release io.rep.bits := conf.co.newRelease(req, Mux(hit, line_state, conf.co.newStateOnFlush)) From 1fbc20450e5d2189f6cdc521f4a40322ae9020f9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 24 Jan 2013 17:46:35 -0800 Subject: [PATCH 0551/1087] don't allow simultaneous reads and writes to the tag ram --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 125be0b3..cc01a874 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -601,7 +601,7 @@ class MetaDataArray(implicit conf: DCacheConfig) extends Component { io.resp(w).tag := m } - io.read.ready := !rst + io.read.ready := !rst && !io.write.valid // so really this could be a 6T RAM io.write.ready := !rst } From 575bd3445ac0f56ab6a455ed5da5b0bfc5c4cc82 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 24 Jan 2013 18:00:39 -0800 Subject: [PATCH 0552/1087] re-generalize scoreboard --- rocket/src/main/scala/ctrl.scala | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 91206853..ee90bbe0 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -601,12 +601,9 @@ class Control(implicit conf: RocketConfiguration) extends Component val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || vec_replay || io.dpath.pcr_replay - class Scoreboard + class Scoreboard(n: Int) { -// val r = Reg(resetVal = Bits(0)) - // RIMAS: explicitly set width to 32, otherwise Chisel would set it to 1024 - // and cause a ton of warnings during synthesis - val r = Reg(resetVal = Bits(0,32)) + val r = Reg(resetVal = Bits(0, n)) var next = r var ens = Bool(false) def apply(addr: UFix) = r(addr) @@ -620,12 +617,12 @@ class Control(implicit conf: RocketConfiguration) extends Component } } - val sboard = new Scoreboard + val sboard = new Scoreboard(32) sboard.set((wb_reg_div_mul_val || wb_dcache_miss) && io.dpath.wb_wen, io.dpath.wb_waddr) sboard.clear(io.dpath.mem_ll_wb, io.dpath.mem_ll_waddr) val id_stall_fpu = if (conf.fpu) { - val fp_sboard = new Scoreboard + val fp_sboard = new Scoreboard(32) fp_sboard.set((wb_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set) && !replay_wb, io.dpath.wb_waddr) fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) From c890099e097347363112be001fb8252c75e2ac75 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 24 Jan 2013 23:41:24 -0800 Subject: [PATCH 0553/1087] add System Control Register space to HTIF --- rocket/src/main/scala/htif.scala | 35 +++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 611beb68..a3530d0c 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -36,6 +36,15 @@ class HTIFIO(ntiles: Int) extends Bundle val ipi_rep = (new FIFOIO) { Bool() }.flip } +class SCRIO extends Bundle +{ + val n = 64 + val rdata = Vec(n) { Bits(INPUT, 64) } + val wen = Bool(OUTPUT) + val waddr = UFix(OUTPUT, log2Up(n)) + val wdata = Bits(OUTPUT, 64) +} + class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Component with ClientCoherenceAgent { implicit val lnConf = conf.ln @@ -43,6 +52,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val host = new HostIO(w) val cpu = Vec(conf.ln.nTiles) { new HTIFIO(conf.ln.nTiles).flip } val mem = new TileLinkIO()(conf.ln) + val scr = new SCRIO } val short_request_bits = 64 @@ -82,7 +92,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) - val pcr_coreid = if (conf.ln.nTiles == 1) UFix(0) else addr(20+log2Up(conf.ln.nTiles),20) + val pcr_coreid = addr(log2Up(conf.ln.nTiles+1)-1+20,20) val pcr_wdata = packet_ram(0) val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR @@ -204,7 +214,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo io.mem.grant_ack.bits.header.src := UFix(conf.ln.nTiles) io.mem.grant_ack.bits.header.dst := UFix(0) - val pcrReadData = Vec(conf.ln.nTiles) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } + val pcrReadData = Reg{Bits(width = io.cpu(0).pcr_rep.bits.getWidth)} for (i <- 0 until conf.ln.nTiles) { val my_reset = Reg(resetVal = Bool(true)) val my_ipi = Reg(resetVal = Bool(false)) @@ -235,22 +245,37 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo when (cmd === cmd_writecr) { my_reset := pcr_wdata(0) } - pcrReadData(i) := my_reset.toBits + pcrReadData := my_reset.toBits state := state_tx } cpu.pcr_rep.ready := Bool(true) when (cpu.pcr_rep.valid) { - pcrReadData(i) := cpu.pcr_rep.bits + pcrReadData := cpu.pcr_rep.bits state := state_tx } } + val scr_rdata = Vec(io.scr.rdata.size){Bits(width = 64)} + for (i <- 0 until scr_rdata.size) + scr_rdata(i) := io.scr.rdata(i) + scr_rdata(0) := conf.ln.nTiles + scr_rdata(1) := UFix(REFILL_CYCLES*MEM_DATA_BITS/8) << x_init.io.enq.bits.addr.getWidth + + io.scr.wen := false + io.scr.wdata := pcr_wdata + io.scr.waddr := pcr_addr.toUFix + when (state === state_pcr_req && pcr_coreid === Fix(-1)) { + io.scr.wen := cmd === cmd_writecr + pcrReadData := scr_rdata(pcr_addr) + state := state_tx + } + val tx_cmd = Mux(nack, cmd_nack, cmd_ack) val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext) val tx_data = Mux(tx_word_count === UFix(0), tx_header, - Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcrReadData(pcr_coreid), + Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcrReadData, packet_ram(packet_ram_raddr))) io.host.in.ready := state === state_rx From 696dd102eba0d649b857d622701818d06c719711 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 27 Jan 2013 10:59:41 -0800 Subject: [PATCH 0554/1087] cleans up unconnected tile io pins (networking headers overwritten at top level) --- rocket/src/main/scala/tile.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index b4ddbd05..09bc1a03 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -50,6 +50,14 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.release <> dcache.io.mem.release io.tilelink.release_data <> dcache.io.mem.release_data + val ioSubBundles = arbiter.io.getClass.getMethods.filter( x => + classOf[DirectionalFIFOIO[Data]].isAssignableFrom(x.getReturnType)).map{ m => + m.invoke(arbiter.io).asInstanceOf[DirectionalFIFOIO[LogicalNetworkIO[Data]]] } + ioSubBundles.foreach{ m => + m.bits.header.dst := UFix(0) + m.bits.header.src := UFix(0) + } + if (conf.vec) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co), lnConf) // 128 sets x 1 ways (8KB) arbiter.io.requestor(2) <> vicache.io.mem From 409b549d3c90114bb2407a0d4f6a23f5f20314c9 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 27 Jan 2013 11:27:09 -0800 Subject: [PATCH 0555/1087] actually cleared up tile ios --- rocket/src/main/scala/tile.scala | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 09bc1a03..f28706f6 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -41,18 +41,28 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon arbiter.io.requestor(0) <> dcache.io.mem arbiter.io.requestor(1) <> icache.io.mem - io.tilelink.acquire <> arbiter.io.mem.acquire - io.tilelink.acquire_data <> dcache.io.mem.acquire_data + io.tilelink.acquire.valid := arbiter.io.mem.acquire.valid + io.tilelink.acquire.ready := arbiter.io.mem.acquire.ready + io.tilelink.acquire.bits := arbiter.io.mem.acquire.bits + io.tilelink.acquire_data.valid := dcache.io.mem.acquire_data.valid + io.tilelink.acquire_data.ready := dcache.io.mem.acquire_data.ready + io.tilelink.acquire_data.bits := dcache.io.mem.acquire_data.bits arbiter.io.mem.abort <> io.tilelink.abort arbiter.io.mem.grant <> io.tilelink.grant - io.tilelink.grant_ack <> arbiter.io.mem.grant_ack + io.tilelink.grant_ack.valid := arbiter.io.mem.grant_ack.valid + io.tilelink.grant_ack.ready := arbiter.io.mem.grant_ack.ready + io.tilelink.grant_ack.bits := arbiter.io.mem.grant_ack.bits dcache.io.mem.probe <> io.tilelink.probe - io.tilelink.release <> dcache.io.mem.release - io.tilelink.release_data <> dcache.io.mem.release_data + io.tilelink.release.valid := dcache.io.mem.release.valid + io.tilelink.release.ready := dcache.io.mem.release.ready + io.tilelink.release.bits := dcache.io.mem.release.bits + io.tilelink.release_data.valid := dcache.io.mem.release_data.valid + io.tilelink.release_data.ready := dcache.io.mem.release_data.ready + io.tilelink.release_data.bits := dcache.io.mem.release_data.bits - val ioSubBundles = arbiter.io.getClass.getMethods.filter( x => - classOf[DirectionalFIFOIO[Data]].isAssignableFrom(x.getReturnType)).map{ m => - m.invoke(arbiter.io).asInstanceOf[DirectionalFIFOIO[LogicalNetworkIO[Data]]] } + val ioSubBundles = io.tilelink.getClass.getMethods.filter( x => + classOf[ClientSourcedIO[Data]].isAssignableFrom(x.getReturnType)).map{ m => + m.invoke(io.tilelink).asInstanceOf[ClientSourcedIO[LogicalNetworkIO[Data]]] } ioSubBundles.foreach{ m => m.bits.header.dst := UFix(0) m.bits.header.src := UFix(0) From 83c207c852a1177c6a476fcf43e1a1bcf490d71b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 27 Jan 2013 12:00:28 -0800 Subject: [PATCH 0556/1087] pin cleanup in htif --- rocket/src/main/scala/htif.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index a3530d0c..6ec29fd4 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -194,8 +194,8 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) val co = conf.co.asInstanceOf[CoherencePolicyWithUncached] x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteAcquire(init_addr, UFix(0)), co.getUncachedReadAcquire(init_addr, UFix(0))) - io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq) - io.mem.acquire_data.valid:= state === state_mem_wdata + io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UFix(conf.ln.nTiles), UFix(0)) + io.mem.acquire_data.valid := state === state_mem_wdata io.mem.acquire_data.bits.payload.data := mem_req_data io.mem.grant_ack.valid := (state === state_mem_finish) && mem_needs_ack io.mem.grant_ack.bits.payload.master_xact_id := mem_gxid @@ -203,8 +203,6 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo io.mem.release.valid := Bool(false) io.mem.release_data.valid := Bool(false) - io.mem.acquire.bits.header.src := UFix(conf.ln.nTiles) - io.mem.acquire.bits.header.dst := UFix(0) io.mem.acquire_data.bits.header.src := UFix(conf.ln.nTiles) io.mem.acquire_data.bits.header.dst := UFix(0) io.mem.release.bits.header.src := UFix(conf.ln.nTiles) From 931cffa749892e3a65f360aa50287c33df29baab Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 27 Jan 2013 23:04:35 -0800 Subject: [PATCH 0557/1087] ready signal fix --- rocket/src/main/scala/tile.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index f28706f6..306b1a74 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -42,22 +42,22 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon arbiter.io.requestor(1) <> icache.io.mem io.tilelink.acquire.valid := arbiter.io.mem.acquire.valid - io.tilelink.acquire.ready := arbiter.io.mem.acquire.ready + arbiter.io.mem.acquire.ready := io.tilelink.acquire.ready io.tilelink.acquire.bits := arbiter.io.mem.acquire.bits io.tilelink.acquire_data.valid := dcache.io.mem.acquire_data.valid - io.tilelink.acquire_data.ready := dcache.io.mem.acquire_data.ready + dcache.io.mem.acquire_data.ready := io.tilelink.acquire_data.ready io.tilelink.acquire_data.bits := dcache.io.mem.acquire_data.bits arbiter.io.mem.abort <> io.tilelink.abort arbiter.io.mem.grant <> io.tilelink.grant io.tilelink.grant_ack.valid := arbiter.io.mem.grant_ack.valid - io.tilelink.grant_ack.ready := arbiter.io.mem.grant_ack.ready + arbiter.io.mem.grant_ack.ready := io.tilelink.grant_ack.ready io.tilelink.grant_ack.bits := arbiter.io.mem.grant_ack.bits dcache.io.mem.probe <> io.tilelink.probe io.tilelink.release.valid := dcache.io.mem.release.valid - io.tilelink.release.ready := dcache.io.mem.release.ready + dcache.io.mem.release.ready := io.tilelink.release.ready io.tilelink.release.bits := dcache.io.mem.release.bits io.tilelink.release_data.valid := dcache.io.mem.release_data.valid - io.tilelink.release_data.ready := dcache.io.mem.release_data.ready + dcache.io.mem.release_data.ready := io.tilelink.release_data.ready io.tilelink.release_data.bits := dcache.io.mem.release_data.bits val ioSubBundles = io.tilelink.getClass.getMethods.filter( x => From f5729c9f2554f2470be3aa8877e57f1de24365e8 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 28 Jan 2013 16:44:17 -0800 Subject: [PATCH 0558/1087] removed ack_required field from grant messages --- rocket/src/main/scala/htif.scala | 2 +- rocket/src/main/scala/icache.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 611beb68..85c6bb50 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -109,7 +109,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo when (io.mem.grant.valid) { mem_acked := Bool(true) mem_gxid := io.mem.grant.bits.payload.master_xact_id - mem_needs_ack := io.mem.grant.bits.payload.require_ack + mem_needs_ack := conf.co.requiresAck(io.mem.grant.bits.payload) } io.mem.grant.ready := Bool(true) when (io.mem.abort.valid) { mem_nacked := Bool(true) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 5914daac..77ac7f9b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -238,7 +238,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val finish_q = (new Queue(1)) { new GrantAck } - finish_q.io.enq.valid := refill_done && io.mem.grant.bits.payload.require_ack + finish_q.io.enq.valid := refill_done && c.co.requiresAck(io.mem.grant.bits.payload) finish_q.io.enq.bits.master_xact_id := io.mem.grant.bits.payload.master_xact_id // output signals From f2df6147dfb8c22abd20d7b06a272dc5776f0606 Mon Sep 17 00:00:00 2001 From: Rimas Avizienis Date: Mon, 28 Jan 2013 17:17:09 -0800 Subject: [PATCH 0559/1087] shuffled FPU control logic around to make functional unit retiming work better --- rocket/src/main/scala/fpu.scala | 209 +++++++++++++++++++++----------- 1 file changed, 141 insertions(+), 68 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 41fd4cb4..02aa28c0 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -370,8 +370,8 @@ class FPToFP(val latency: Int) extends Component class ioFMA(width: Int) extends Bundle { val valid = Bool(INPUT) - val cmd = Bits(INPUT, FCMD_WIDTH) - val rm = Bits(INPUT, 3) + val cmd = Bits(INPUT, 2) + val rm = Bits(INPUT, 2) val in1 = Bits(INPUT, width) val in2 = Bits(INPUT, width) val in3 = Bits(INPUT, width) @@ -382,82 +382,126 @@ class ioFMA(width: Int) extends Bundle { class FPUSFMAPipe(val latency: Int) extends Component { val io = new ioFMA(33) + + val r_cmd = Reg() { Bits() } + val r_rm = Reg() { Bits() } + val r_in1 = Reg() { Bits() } + val r_in2 = Reg() { Bits() } + val r_in3 = Reg() { Bits() } + + val out_reg = Reg() { Bits() } + val exc_reg = Reg() { Bits() } + + val valid_pipe_regs = Vec(latency) { Reg() { Bool() } } + val dout_pipe_regs = Vec(latency-2) { Reg() { Bits() } } + val exc_pipe_regs = Vec(latency-2) { Reg() { Bits() } } + + valid_pipe_regs(0) := io.valid + for (i <- 1 until latency) { + valid_pipe_regs(i) := valid_pipe_regs(i-1) + } - val cmd = Reg() { Bits() } - val rm = Reg() { Bits() } - val in1 = Reg() { Bits() } - val in2 = Reg() { Bits() } - val in3 = Reg() { Bits() } - - val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || - io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB - val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB - - val one = Bits("h80000000") - val zero = Cat(io.in1(32) ^ io.in2(32), Bits(0, 32)) - - val valid = Reg(io.valid) when (io.valid) { - cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) - rm := io.rm - in1 := io.in1 - in2 := Mux(cmd_addsub, one, io.in2) - in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) + r_cmd := io.cmd + r_rm := io.rm + r_in1 := io.in1 + r_in2 := io.in2 + r_in3 := io.in3 } val fma = new hardfloat.mulAddSubRecodedFloatN(23, 9) - fma.io.op := cmd - fma.io.roundingMode := rm - fma.io.a := in1 - fma.io.b := in2 - fma.io.c := in3 + + fma.io.op := r_cmd + fma.io.roundingMode := r_rm + fma.io.a := r_in1 + fma.io.b := r_in2 + fma.io.c := r_in3 - io.out := Pipe(valid, fma.io.out, latency-1).bits - io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits + when (valid_pipe_regs(0)) { + dout_pipe_regs(0) := fma.io.out + exc_pipe_regs(0) := fma.io.exceptionFlags + } + + for (i <- 1 until latency-2) { + when (valid_pipe_regs(i)) { + dout_pipe_regs(i) := dout_pipe_regs(i-1) + exc_pipe_regs(i) := exc_pipe_regs(i-1) + } + } + + when (valid_pipe_regs(latency-2)) { + out_reg := dout_pipe_regs(latency-3) + exc_reg := exc_pipe_regs(latency-3) + } + + io.out := out_reg + io.exc := exc_reg } class FPUDFMAPipe(val latency: Int) extends Component { val io = new ioFMA(65) - val cmd = Reg() { Bits() } - val rm = Reg() { Bits() } - val in1 = Reg() { Bits() } - val in2 = Reg() { Bits() } - val in3 = Reg() { Bits() } - - val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || - io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB - val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB - - val one = Bits("h8000000000000000") - val zero = Cat(io.in1(64) ^ io.in2(64), Bits(0, 64)) - - val valid = Reg(io.valid) + val r_cmd = Reg() { Bits() } + val r_rm = Reg() { Bits() } + val r_in1 = Reg() { Bits() } + val r_in2 = Reg() { Bits() } + val r_in3 = Reg() { Bits() } + + val out_reg = Reg() { Bits() } + val exc_reg = Reg() { Bits() } + + val valid_pipe_regs = Vec(latency) { Reg() { Bool() } } + val dout_pipe_regs = Vec(latency-2) { Reg() { Bits() } } + val exc_pipe_regs = Vec(latency-2) { Reg() { Bits() } } + + valid_pipe_regs(0) := io.valid + for (i <- 1 until latency) { + valid_pipe_regs(i) := valid_pipe_regs(i-1) + } + when (io.valid) { - cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) - rm := io.rm - in1 := io.in1 - in2 := Mux(cmd_addsub, one, io.in2) - in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) + r_cmd := io.cmd + r_rm := io.rm + r_in1 := io.in1 + r_in2 := io.in2 + r_in3 := io.in3 + } + + val fma = new hardfloat.mulAddSubRecodedFloatN(52, 12) + + fma.io.op := r_cmd + fma.io.roundingMode := r_rm + fma.io.a := r_in1 + fma.io.b := r_in2 + fma.io.c := r_in3 + + when (valid_pipe_regs(0)) { + dout_pipe_regs(0) := fma.io.out + exc_pipe_regs(0) := fma.io.exceptionFlags + } + + for (i <- 1 until latency-2) { + when (valid_pipe_regs(i)) { + dout_pipe_regs(i) := dout_pipe_regs(i-1) + exc_pipe_regs(i) := exc_pipe_regs(i-1) + } } - val fma = new hardfloat.mulAddSubRecodedFloatN(52, 12) - fma.io.op := cmd - fma.io.roundingMode := rm - fma.io.a := in1 - fma.io.b := in2 - fma.io.c := in3 + when (valid_pipe_regs(latency-2)) { + out_reg := dout_pipe_regs(latency-3) + exc_reg := exc_pipe_regs(latency-3) + } - io.out := Pipe(valid, fma.io.out, latency-1).bits - io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits + io.out := out_reg + io.exc := exc_reg } class FPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { - val ctrl = (new CtrlFPUIO).flip - val dpath = (new DpathFPUIO).flip + val ctrl = new CtrlFPUIO().flip + val dpath = new DpathFPUIO().flip val sfma = new ioFMA(33) val dfma = new ioFMA(65) } @@ -526,23 +570,52 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB + + // RIMAS: refactoring for retiming + // moved recoding of cmd -> op outside of DFMA/SFMA blocks + // also moved muxing of operands based on command bits out of module + + // Single precision FMA val sfma = new FPUSFMAPipe(sfma_latency) + val sfma_cmd = Mux(io.sfma.valid, io.sfma.cmd, ctrl.cmd) + val sfma_cmd_fma = sfma_cmd === FCMD_MADD || sfma_cmd === FCMD_MSUB || + sfma_cmd === FCMD_NMADD || sfma_cmd === FCMD_NMSUB + val sfma_cmd_addsub = sfma_cmd === FCMD_ADD || sfma_cmd === FCMD_SUB + + val sfma_in1 = Mux(io.sfma.valid, io.sfma.in1, ex_rs1) + val sfma_in2 = Mux(io.sfma.valid, io.sfma.in2, ex_rs2) + val sfma_in3 = Mux(io.sfma.valid, io.sfma.in3, ex_rs3) + val sfma_one = Bits("h80000000") + val sfma_zero = Cat(sfma_in1(32) ^ sfma_in2(32), Bits(0, 32)) + sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single - sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) - sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) - sfma.io.in3 := Mux(io.sfma.valid, io.sfma.in3, ex_rs3) - sfma.io.cmd := Mux(io.sfma.valid, io.sfma.cmd, ctrl.cmd) - sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm, ex_rm) + sfma.io.in1 := sfma_in1 + sfma.io.in2 := Mux(sfma_cmd_addsub, sfma_one, sfma_in2) + sfma.io.in3 := Mux(sfma_cmd_fma, sfma_in3, Mux(sfma_cmd_addsub, sfma_in2, sfma_zero)) + sfma.io.cmd := Cat(sfma_cmd(1) & (sfma_cmd_fma || sfma_cmd_addsub), sfma_cmd(0)) + sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm(1,0), ex_rm(1,0)) io.sfma.out := sfma.io.out io.sfma.exc := sfma.io.exc - + + // Double precision FMA val dfma = new FPUDFMAPipe(dfma_latency) + val dfma_cmd = Mux(io.dfma.valid, io.dfma.cmd, ctrl.cmd) + val dfma_cmd_fma = dfma_cmd === FCMD_MADD || dfma_cmd === FCMD_MSUB || + dfma_cmd === FCMD_NMADD || dfma_cmd === FCMD_NMSUB + val dfma_cmd_addsub = dfma_cmd === FCMD_ADD || dfma_cmd === FCMD_SUB + + val dfma_in1 = Mux(io.dfma.valid, io.dfma.in1, ex_rs1) + val dfma_in2 = Mux(io.dfma.valid, io.dfma.in2, ex_rs2) + val dfma_in3 = Mux(io.dfma.valid, io.dfma.in3, ex_rs3) + val dfma_one = Bits("h8000000000000000") + val dfma_zero = Cat(dfma_in1(64) ^ dfma_in2(64), Bits(0, 64)) + dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single - dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) - dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) - dfma.io.in3 := Mux(io.dfma.valid, io.dfma.in3, ex_rs3) - dfma.io.cmd := Mux(io.dfma.valid, io.dfma.cmd, ctrl.cmd) - dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm, ex_rm) + dfma.io.in1 := dfma_in1 + dfma.io.in2 := Mux(dfma_cmd_addsub, dfma_one, dfma_in2) + dfma.io.in3 := Mux(dfma_cmd_fma, dfma_in3, Mux(dfma_cmd_addsub, dfma_in2, dfma_zero)) + dfma.io.cmd := Cat(dfma_cmd(1) & (dfma_cmd_fma || dfma_cmd_addsub), dfma_cmd(0)) + dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm(1,0), ex_rm(1,0)) io.dfma.out := dfma.io.out io.dfma.exc := dfma.io.exc From 37c67f1d87caabff2d7ff3d0ee56f21cfe5fe3fb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 28 Jan 2013 17:56:32 -0800 Subject: [PATCH 0560/1087] pipeline reset to the vector unit --- rocket/src/main/scala/core.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 2c27e514..fef96e15 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -43,7 +43,7 @@ class Core(implicit conf: RocketConfiguration) extends Component } else null if (conf.vec) { - val vu = new vu() + val vu = new vu(Reg(reset)) val vdtlb = new TLB(8) ptw += vdtlb.io.ptw From 45d8066f45ef92e433390d4f5e7daa93dfc97b5f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 28 Jan 2013 20:54:25 -0800 Subject: [PATCH 0561/1087] add HellaQueue, an SRAM-based queue --- rocket/src/main/scala/util.scala | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 183621a6..2e99d4e4 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -73,3 +73,55 @@ case class WideCounter(width: Int, inc: Bool = Bool(true)) if (isWide) large := (if (w < smallWidth) UFix(0) else x(w.min(width)-1,smallWidth)) } } + +class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Component +{ + val io = new ioQueue(entries)(data) + require(isPow2(entries) && entries > 1) + + val do_flow = Bool() + val do_enq = io.enq.fire() && !do_flow + val do_deq = io.deq.fire() && !do_flow + + val maybe_full = Reg(resetVal = Bool(false)) + val enq_ptr = Counter(do_enq, entries)._1 + val deq_ptr = Counter(do_deq, entries)._1 + when (do_enq != do_deq) { maybe_full := do_enq } + + val ptr_match = enq_ptr === deq_ptr + val empty = ptr_match && !maybe_full + val full = ptr_match && maybe_full + do_flow := empty && io.deq.ready + + val ram = Mem(entries, seqRead = true){data} + val ram_out = Reg{data} + val ram_out_valid = Reg(io.deq.ready) + when (io.deq.ready && !empty) { + ram_out := ram(Mux(io.deq.valid, deq_ptr + UFix(1), deq_ptr)) + } + when (do_enq) { ram(enq_ptr) := io.enq.bits } + + io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid) + io.enq.ready := !full + io.deq.bits := Mux(empty, io.enq.bits, ram_out) +} + +class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Component +{ + val io = new ioQueue(entries)(data) + + val fq = new HellaFlowQueue(entries)(data) + io.enq <> fq.io.enq + io.deq <> Queue(fq.io.deq, 1, pipe = true) +} + +object HellaQueue +{ + def apply[T <: Data](enq: FIFOIO[T], entries: Int) = { + val q = (new HellaQueue(entries)) { enq.bits.clone } + q.io.enq.valid := enq.valid // not using <> so that override is allowed + q.io.enq.bits := enq.bits + enq.ready := q.io.enq.ready + q.io.deq + } +} From 6275e009f8461108e9abe3296116a7228d4b1bb8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 28 Jan 2013 20:57:43 -0800 Subject: [PATCH 0562/1087] fix HellaQueue deq.valid signal --- rocket/src/main/scala/util.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 2e99d4e4..dc0e49c5 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -95,8 +95,10 @@ class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Component val ram = Mem(entries, seqRead = true){data} val ram_out = Reg{data} - val ram_out_valid = Reg(io.deq.ready) + val ram_out_valid = Reg{Bool()} + ram_out_valid := Bool(false) when (io.deq.ready && !empty) { + ram_out_valid := Bool(true) ram_out := ram(Mux(io.deq.valid, deq_ptr + UFix(1), deq_ptr)) } when (do_enq) { ram(enq_ptr) := io.enq.bits } From 60bd3a6413bce4d75376196f83cbcee2e43c8294 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 29 Jan 2013 19:34:55 -0800 Subject: [PATCH 0563/1087] Revert "shuffled FPU control logic around to make functional unit retiming work better" This reverts commit 20dd308067b143adff4913fc7ac710a393ca1d86. --- rocket/src/main/scala/fpu.scala | 209 +++++++++++--------------------- 1 file changed, 68 insertions(+), 141 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 02aa28c0..41fd4cb4 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -370,8 +370,8 @@ class FPToFP(val latency: Int) extends Component class ioFMA(width: Int) extends Bundle { val valid = Bool(INPUT) - val cmd = Bits(INPUT, 2) - val rm = Bits(INPUT, 2) + val cmd = Bits(INPUT, FCMD_WIDTH) + val rm = Bits(INPUT, 3) val in1 = Bits(INPUT, width) val in2 = Bits(INPUT, width) val in3 = Bits(INPUT, width) @@ -382,126 +382,82 @@ class ioFMA(width: Int) extends Bundle { class FPUSFMAPipe(val latency: Int) extends Component { val io = new ioFMA(33) + + val cmd = Reg() { Bits() } + val rm = Reg() { Bits() } + val in1 = Reg() { Bits() } + val in2 = Reg() { Bits() } + val in3 = Reg() { Bits() } - val r_cmd = Reg() { Bits() } - val r_rm = Reg() { Bits() } - val r_in1 = Reg() { Bits() } - val r_in2 = Reg() { Bits() } - val r_in3 = Reg() { Bits() } - - val out_reg = Reg() { Bits() } - val exc_reg = Reg() { Bits() } - - val valid_pipe_regs = Vec(latency) { Reg() { Bool() } } - val dout_pipe_regs = Vec(latency-2) { Reg() { Bits() } } - val exc_pipe_regs = Vec(latency-2) { Reg() { Bits() } } - - valid_pipe_regs(0) := io.valid - for (i <- 1 until latency) { - valid_pipe_regs(i) := valid_pipe_regs(i-1) - } - + val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || + io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB + val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB + + val one = Bits("h80000000") + val zero = Cat(io.in1(32) ^ io.in2(32), Bits(0, 32)) + + val valid = Reg(io.valid) when (io.valid) { - r_cmd := io.cmd - r_rm := io.rm - r_in1 := io.in1 - r_in2 := io.in2 - r_in3 := io.in3 + cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) + rm := io.rm + in1 := io.in1 + in2 := Mux(cmd_addsub, one, io.in2) + in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } val fma = new hardfloat.mulAddSubRecodedFloatN(23, 9) - - fma.io.op := r_cmd - fma.io.roundingMode := r_rm - fma.io.a := r_in1 - fma.io.b := r_in2 - fma.io.c := r_in3 + fma.io.op := cmd + fma.io.roundingMode := rm + fma.io.a := in1 + fma.io.b := in2 + fma.io.c := in3 - when (valid_pipe_regs(0)) { - dout_pipe_regs(0) := fma.io.out - exc_pipe_regs(0) := fma.io.exceptionFlags - } - - for (i <- 1 until latency-2) { - when (valid_pipe_regs(i)) { - dout_pipe_regs(i) := dout_pipe_regs(i-1) - exc_pipe_regs(i) := exc_pipe_regs(i-1) - } - } - - when (valid_pipe_regs(latency-2)) { - out_reg := dout_pipe_regs(latency-3) - exc_reg := exc_pipe_regs(latency-3) - } - - io.out := out_reg - io.exc := exc_reg + io.out := Pipe(valid, fma.io.out, latency-1).bits + io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } class FPUDFMAPipe(val latency: Int) extends Component { val io = new ioFMA(65) - val r_cmd = Reg() { Bits() } - val r_rm = Reg() { Bits() } - val r_in1 = Reg() { Bits() } - val r_in2 = Reg() { Bits() } - val r_in3 = Reg() { Bits() } - - val out_reg = Reg() { Bits() } - val exc_reg = Reg() { Bits() } - - val valid_pipe_regs = Vec(latency) { Reg() { Bool() } } - val dout_pipe_regs = Vec(latency-2) { Reg() { Bits() } } - val exc_pipe_regs = Vec(latency-2) { Reg() { Bits() } } - - valid_pipe_regs(0) := io.valid - for (i <- 1 until latency) { - valid_pipe_regs(i) := valid_pipe_regs(i-1) - } - + val cmd = Reg() { Bits() } + val rm = Reg() { Bits() } + val in1 = Reg() { Bits() } + val in2 = Reg() { Bits() } + val in3 = Reg() { Bits() } + + val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || + io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB + val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB + + val one = Bits("h8000000000000000") + val zero = Cat(io.in1(64) ^ io.in2(64), Bits(0, 64)) + + val valid = Reg(io.valid) when (io.valid) { - r_cmd := io.cmd - r_rm := io.rm - r_in1 := io.in1 - r_in2 := io.in2 - r_in3 := io.in3 + cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) + rm := io.rm + in1 := io.in1 + in2 := Mux(cmd_addsub, one, io.in2) + in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } - + val fma = new hardfloat.mulAddSubRecodedFloatN(52, 12) - - fma.io.op := r_cmd - fma.io.roundingMode := r_rm - fma.io.a := r_in1 - fma.io.b := r_in2 - fma.io.c := r_in3 + fma.io.op := cmd + fma.io.roundingMode := rm + fma.io.a := in1 + fma.io.b := in2 + fma.io.c := in3 - when (valid_pipe_regs(0)) { - dout_pipe_regs(0) := fma.io.out - exc_pipe_regs(0) := fma.io.exceptionFlags - } - - for (i <- 1 until latency-2) { - when (valid_pipe_regs(i)) { - dout_pipe_regs(i) := dout_pipe_regs(i-1) - exc_pipe_regs(i) := exc_pipe_regs(i-1) - } - } - - when (valid_pipe_regs(latency-2)) { - out_reg := dout_pipe_regs(latency-3) - exc_reg := exc_pipe_regs(latency-3) - } - - io.out := out_reg - io.exc := exc_reg + io.out := Pipe(valid, fma.io.out, latency-1).bits + io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } class FPU(sfma_latency: Int, dfma_latency: Int) extends Component { val io = new Bundle { - val ctrl = new CtrlFPUIO().flip - val dpath = new DpathFPUIO().flip + val ctrl = (new CtrlFPUIO).flip + val dpath = (new DpathFPUIO).flip val sfma = new ioFMA(33) val dfma = new ioFMA(65) } @@ -570,52 +526,23 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB - - // RIMAS: refactoring for retiming - // moved recoding of cmd -> op outside of DFMA/SFMA blocks - // also moved muxing of operands based on command bits out of module - - // Single precision FMA val sfma = new FPUSFMAPipe(sfma_latency) - val sfma_cmd = Mux(io.sfma.valid, io.sfma.cmd, ctrl.cmd) - val sfma_cmd_fma = sfma_cmd === FCMD_MADD || sfma_cmd === FCMD_MSUB || - sfma_cmd === FCMD_NMADD || sfma_cmd === FCMD_NMSUB - val sfma_cmd_addsub = sfma_cmd === FCMD_ADD || sfma_cmd === FCMD_SUB - - val sfma_in1 = Mux(io.sfma.valid, io.sfma.in1, ex_rs1) - val sfma_in2 = Mux(io.sfma.valid, io.sfma.in2, ex_rs2) - val sfma_in3 = Mux(io.sfma.valid, io.sfma.in3, ex_rs3) - val sfma_one = Bits("h80000000") - val sfma_zero = Cat(sfma_in1(32) ^ sfma_in2(32), Bits(0, 32)) - sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single - sfma.io.in1 := sfma_in1 - sfma.io.in2 := Mux(sfma_cmd_addsub, sfma_one, sfma_in2) - sfma.io.in3 := Mux(sfma_cmd_fma, sfma_in3, Mux(sfma_cmd_addsub, sfma_in2, sfma_zero)) - sfma.io.cmd := Cat(sfma_cmd(1) & (sfma_cmd_fma || sfma_cmd_addsub), sfma_cmd(0)) - sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm(1,0), ex_rm(1,0)) + sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) + sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) + sfma.io.in3 := Mux(io.sfma.valid, io.sfma.in3, ex_rs3) + sfma.io.cmd := Mux(io.sfma.valid, io.sfma.cmd, ctrl.cmd) + sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm, ex_rm) io.sfma.out := sfma.io.out io.sfma.exc := sfma.io.exc - - // Double precision FMA + val dfma = new FPUDFMAPipe(dfma_latency) - val dfma_cmd = Mux(io.dfma.valid, io.dfma.cmd, ctrl.cmd) - val dfma_cmd_fma = dfma_cmd === FCMD_MADD || dfma_cmd === FCMD_MSUB || - dfma_cmd === FCMD_NMADD || dfma_cmd === FCMD_NMSUB - val dfma_cmd_addsub = dfma_cmd === FCMD_ADD || dfma_cmd === FCMD_SUB - - val dfma_in1 = Mux(io.dfma.valid, io.dfma.in1, ex_rs1) - val dfma_in2 = Mux(io.dfma.valid, io.dfma.in2, ex_rs2) - val dfma_in3 = Mux(io.dfma.valid, io.dfma.in3, ex_rs3) - val dfma_one = Bits("h8000000000000000") - val dfma_zero = Cat(dfma_in1(64) ^ dfma_in2(64), Bits(0, 64)) - dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single - dfma.io.in1 := dfma_in1 - dfma.io.in2 := Mux(dfma_cmd_addsub, dfma_one, dfma_in2) - dfma.io.in3 := Mux(dfma_cmd_fma, dfma_in3, Mux(dfma_cmd_addsub, dfma_in2, dfma_zero)) - dfma.io.cmd := Cat(dfma_cmd(1) & (dfma_cmd_fma || dfma_cmd_addsub), dfma_cmd(0)) - dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm(1,0), ex_rm(1,0)) + dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) + dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) + dfma.io.in3 := Mux(io.dfma.valid, io.dfma.in3, ex_rs3) + dfma.io.cmd := Mux(io.dfma.valid, io.dfma.cmd, ctrl.cmd) + dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm, ex_rm) io.dfma.out := dfma.io.out io.dfma.exc := dfma.io.exc From 66eb3720a4582b86ef6105938b2c4f7ba10c5902 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 29 Jan 2013 21:16:20 -0800 Subject: [PATCH 0564/1087] fix SRAM semantics bug in HellaFlowQueue --- rocket/src/main/scala/util.scala | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index dc0e49c5..1a8e9118 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -91,21 +91,22 @@ class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Component val ptr_match = enq_ptr === deq_ptr val empty = ptr_match && !maybe_full val full = ptr_match && maybe_full + val atLeastTwo = full || enq_ptr - deq_ptr >= UFix(2) do_flow := empty && io.deq.ready - val ram = Mem(entries, seqRead = true){data} - val ram_out = Reg{data} + val ram = Mem(entries, seqRead = true){Bits(width = data.getWidth)} + val ram_out = Reg{Bits(width = data.getWidth)} val ram_out_valid = Reg{Bool()} ram_out_valid := Bool(false) - when (io.deq.ready && !empty) { + when (io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)) { ram_out_valid := Bool(true) ram_out := ram(Mux(io.deq.valid, deq_ptr + UFix(1), deq_ptr)) } - when (do_enq) { ram(enq_ptr) := io.enq.bits } + when (do_enq) { ram(enq_ptr) := io.enq.bits.toBits } io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid) io.enq.ready := !full - io.deq.bits := Mux(empty, io.enq.bits, ram_out) + io.deq.bits := Mux(empty, io.enq.bits, data.fromBits(ram_out)) } class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Component From a0bd0adeb2f075e6af8d3b7efeaaae62a143887a Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 29 Jan 2013 21:32:42 -0800 Subject: [PATCH 0565/1087] change write/read port ordering for vlsi_mem_gen script --- rocket/src/main/scala/util.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 1a8e9118..bad12808 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -98,11 +98,11 @@ class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Component val ram_out = Reg{Bits(width = data.getWidth)} val ram_out_valid = Reg{Bool()} ram_out_valid := Bool(false) + when (do_enq) { ram(enq_ptr) := io.enq.bits.toBits } when (io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)) { ram_out_valid := Bool(true) ram_out := ram(Mux(io.deq.valid, deq_ptr + UFix(1), deq_ptr)) } - when (do_enq) { ram(enq_ptr) := io.enq.bits.toBits } io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid) io.enq.ready := !full From 9f89c812b7d2b1c7761bf68fe3f19d28ad74850c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 29 Jan 2013 23:08:25 -0800 Subject: [PATCH 0566/1087] fix HTIF memory size reporting --- rocket/src/main/scala/htif.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 6ec29fd4..96618f06 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -258,7 +258,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo for (i <- 0 until scr_rdata.size) scr_rdata(i) := io.scr.rdata(i) scr_rdata(0) := conf.ln.nTiles - scr_rdata(1) := UFix(REFILL_CYCLES*MEM_DATA_BITS/8) << x_init.io.enq.bits.addr.getWidth + scr_rdata(1) := (UFix(REFILL_CYCLES*MEM_DATA_BITS/8) << x_init.io.enq.bits.addr.getWidth) >> 20 io.scr.wen := false io.scr.wdata := pcr_wdata From 35349d227f4f3bf83439e3cc3c3f6d3efc526b27 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 20 Feb 2013 16:09:46 -0800 Subject: [PATCH 0567/1087] update to new Mem style --- rocket/src/main/scala/icache.scala | 12 ++++++------ rocket/src/main/scala/nbdcache.scala | 17 ++++------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index a65cd01e..c55f65a7 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -177,7 +177,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val enc_tagbits = c.code.width(c.tagbits) val tag_array = Mem(c.sets, seqRead = true) { Bits(width = enc_tagbits*c.assoc) } - val tag_rdata = Reg() { Bits() } + val tag_raddr = Reg{UFix()} when (refill_done) { val wmask = FillInterleaved(enc_tagbits, if (c.dm) Bits(1) else UFixToOH(repl_way)) val tag = c.code.encode(s2_tag) @@ -185,7 +185,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - tag_rdata := tag_array(s0_pgoff(c.untagbits-1,c.offbits)) + tag_raddr := s0_pgoff(c.untagbits-1,c.offbits) } val vb_array = Reg(resetVal = Bits(0, c.lines)) @@ -209,7 +209,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val s2_vb = Reg() { Bool() } val s2_tag_disparity = Reg() { Bool() } val s2_tag_match = Reg() { Bool() } - val tag_out = tag_rdata(enc_tagbits*(i+1)-1, enc_tagbits*i) + val tag_out = tag_array(tag_raddr)(enc_tagbits*(i+1)-1, enc_tagbits*i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb s2_tag_disparity := c.code.decode(tag_out).error @@ -223,17 +223,17 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte for (i <- 0 until c.assoc) { val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.code.width(c.databits)) } - val s1_dout = Reg(){ Bits() } + val s1_raddr = Reg{UFix()} when (io.mem.grant.valid && repl_way === UFix(i)) { val d = io.mem.grant.bits.payload.data data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - s1_dout := data_array(s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth)) + s1_raddr := s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth) } // if s1_tag_match is critical, replace with partial tag check - when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := s1_dout } + when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } } val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)))(c.ibytes*8-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index cc01a874..0aefc0c2 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -583,7 +583,6 @@ class MetaDataArray(implicit conf: DCacheConfig) extends Component { val metabits = io.write.bits.data.state.width + conf.tagbits val tags = Mem(conf.sets, seqRead = true) { UFix(width = metabits*conf.ways) } - val tag = Reg{UFix()} when (rst || io.write.valid) { val addr = Mux(rst, rst_cnt, io.write.bits.idx) @@ -591,9 +590,7 @@ class MetaDataArray(implicit conf: DCacheConfig) extends Component { val mask = Mux(rst, Fix(-1), io.write.bits.way_en) tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) } - when (io.read.valid) { - tag := tags(io.read.bits.addr(conf.untagbits-1,conf.offbits)) - } + val tag = tags(RegEn(io.read.bits.addr >> conf.offbits, io.read.valid)) for (w <- 0 until conf.ways) { val m = tag(metabits*(w+1)-1, metabits*w) @@ -619,7 +616,7 @@ class DataArray(implicit conf: DCacheConfig) extends Component { for (w <- 0 until conf.ways by conf.wordsperrow) { val wway_en = io.write.bits.way_en(w+conf.wordsperrow-1,w) val rway_en = io.read.bits.way_en(w+conf.wordsperrow-1,w) - val resp = Vec(conf.wordsperrow){Reg{Bits(width = conf.bitsperrow)}} + val resp = Vec(conf.wordsperrow){Bits(width = conf.bitsperrow)} val r_raddr = RegEn(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=conf.bitsperrow) } @@ -628,9 +625,7 @@ class DataArray(implicit conf: DCacheConfig) extends Component { val mask = FillInterleaved(conf.encdatabits, wway_en) array.write(waddr, data, mask) } - when (rway_en.orR && io.read.valid) { - resp(p) := array(raddr) - } + resp(p) := array(RegEn(raddr, rway_en.orR && io.read.valid)) } for (dw <- 0 until conf.wordsperrow) { val r = AVec(resp.map(_(conf.encdatabits*(dw+1)-1,conf.encdatabits*dw))) @@ -643,15 +638,11 @@ class DataArray(implicit conf: DCacheConfig) extends Component { } else { val wmask = FillInterleaved(conf.encdatabits, io.write.bits.wmask) for (w <- 0 until conf.ways) { - val rdata = Reg() { Bits() } val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=conf.bitsperrow) } when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } - when (io.read.bits.way_en(w) && io.read.valid) { - rdata := array(raddr) - } - io.resp(w) := rdata + io.resp(w) := array(RegEn(raddr, io.read.bits.way_en(w) && io.read.valid)) } } From e0361840bd455c9c83b4cc71a0e6317e11a7f187 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 28 Feb 2013 18:11:40 -0800 Subject: [PATCH 0568/1087] writebacks on release network pass asm tests and bmarks --- rocket/src/main/scala/arbiter.scala | 22 ++-- rocket/src/main/scala/icache.scala | 5 +- rocket/src/main/scala/nbdcache.scala | 148 ++++++++++++++++++--------- rocket/src/main/scala/tile.scala | 8 +- 4 files changed, 120 insertions(+), 63 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 3dc2f44e..4b8529e6 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -61,29 +61,29 @@ class MemArbiter(n: Int)(implicit conf: LogicalNetworkConfiguration) extends Com val requestor = Vec(n) { new UncachedRequestorIO }.flip } - var xi_bits = new Acquire - xi_bits := io.requestor(n-1).acquire.bits.payload - xi_bits.client_xact_id := Cat(io.requestor(n-1).acquire.bits.payload.client_xact_id, UFix(n-1, log2Up(n))) + var acq_bits = new Acquire + acq_bits := io.requestor(n-1).acquire.bits.payload + acq_bits.client_xact_id := Cat(io.requestor(n-1).acquire.bits.payload.client_xact_id, UFix(n-1, log2Up(n))) for (i <- n-2 to 0 by -1) { - var my_xi_bits = new Acquire - my_xi_bits := io.requestor(i).acquire.bits.payload - my_xi_bits.client_xact_id := Cat(io.requestor(i).acquire.bits.payload.client_xact_id, UFix(i, log2Up(n))) + var my_acq_bits = new Acquire + my_acq_bits := io.requestor(i).acquire.bits.payload + my_acq_bits.client_xact_id := Cat(io.requestor(i).acquire.bits.payload.client_xact_id, UFix(i, log2Up(n))) - xi_bits = Mux(io.requestor(i).acquire.valid, my_xi_bits, xi_bits) + acq_bits = Mux(io.requestor(i).acquire.valid, my_acq_bits, acq_bits) } - io.mem.acquire.bits.payload := xi_bits + io.mem.acquire.bits.payload := acq_bits io.mem.acquire.valid := io.requestor.map(_.acquire.valid).reduce(_||_) io.requestor(0).acquire.ready := io.mem.acquire.ready for (i <- 1 until n) io.requestor(i).acquire.ready := io.requestor(i-1).acquire.ready && !io.requestor(i-1).acquire.valid - var xf_bits = io.requestor(n-1).grant_ack.bits + var ga_bits = io.requestor(n-1).grant_ack.bits for (i <- n-2 to 0 by -1) - xf_bits = Mux(io.requestor(i).grant_ack.valid, io.requestor(i).grant_ack.bits, xf_bits) + ga_bits = Mux(io.requestor(i).grant_ack.valid, io.requestor(i).grant_ack.bits, ga_bits) - io.mem.grant_ack.bits := xf_bits + io.mem.grant_ack.bits := ga_bits io.mem.grant_ack.valid := io.requestor.map(_.grant_ack.valid).reduce(_||_) io.requestor(0).grant_ack.ready := io.mem.grant_ack.ready for (i <- 1 until n) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 77ac7f9b..764ba9ee 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -172,7 +172,8 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) + Assert(!c.co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") + val (rf_cnt, refill_done) = Counter(io.mem.grant.valid && !c.co.isVoluntary(io.mem.grant.bits.payload), REFILL_CYCLES) val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) val enc_tagbits = c.code.width(c.tagbits) @@ -223,7 +224,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte for (i <- 0 until c.assoc) { val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.code.width(c.databits)) } val s1_dout = Reg(){ Bits() } - when (io.mem.grant.valid && repl_way === UFix(i)) { + when (io.mem.grant.valid && c.co.messageHasData(io.mem.grant.bits.payload) && repl_way === UFix(i)) { val d = io.mem.grant.bits.payload.data data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c0eac3ab..88adc3be 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -123,11 +123,18 @@ class DataWriteReq(implicit conf: DCacheConfig) extends Bundle { override def clone = new DataWriteReq().asInstanceOf[this.type] } +class InternalProbe(implicit conf: DCacheConfig) extends Probe { + val client_xact_id = Bits(width = CLIENT_XACT_ID_BITS) + + override def clone = new InternalProbe().asInstanceOf[this.type] +} + class WritebackReq(implicit conf: DCacheConfig) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) val client_xact_id = Bits(width = CLIENT_XACT_ID_BITS) + val r_type = UFix(width = RELEASE_TYPE_MAX_BITS) override def clone = new WritebackReq().asInstanceOf[this.type] } @@ -162,8 +169,9 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val req_bits = new MSHRReq().asInput val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq)) - val idx_match = Bool(OUTPUT) - val tag = Bits(OUTPUT, conf.tagbits) + val idx_match = Bool(OUTPUT) + val probe_idx_match = Bool(OUTPUT) + val tag = Bits(OUTPUT, conf.tagbits) val mem_req = (new FIFOIO) { new Acquire } val mem_resp = new DataWriteReq().asOutput @@ -172,6 +180,9 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val replay = (new FIFOIO) { new Replay() } val mem_abort = (new PipeIO) { new Abort }.flip val mem_rep = (new PipeIO) { new Grant }.flip + val mem_probe = (new PipeIO) { new Probe }.flip + val mem_probe_ready = Bool(OUTPUT) + val self_probe = (new FIFOIO) { new InternalProbe } val mem_finish = (new FIFOIO) { new GrantAck } val wb_req = (new FIFOIO) { new WritebackReq } val probe_writeback = (new FIFOIO) { Bool() }.flip @@ -181,29 +192,45 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = s_invalid) - val acq_type = Reg { UFix() } + val acquire_type = Reg { UFix() } + val release_type = Reg { UFix() } val line_state = Reg { UFix() } val refill_count = Reg { UFix(width = log2Up(REFILL_CYCLES)) } val req = Reg { new MSHRReq() } + val sent_wb_req = Reg { Bool() } val req_cmd = io.req_bits.cmd val req_idx = req.addr(conf.untagbits-1,conf.offbits) val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) + val probe_idx_match = req_idx === io.mem_probe.bits.addr(conf.untagbits-1,conf.offbits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val abort = io.mem_abort.valid && io.mem_abort.bits.client_xact_id === UFix(id) + val reply = io.mem_rep.valid && io.mem_rep.bits.client_xact_id === UFix(id) + val refill_done = reply && refill_count.andR + val wb_done = reply && (state === s_wb_resp) + + val probe_wb_tag_match = io.mem_probe.bits.addr >> conf.untagbits === req.old_meta.tag + val probe_tag_match = io.mem_probe.bits.addr >> conf.untagbits === req.addr >> conf.untagbits + val handle_probe = (state != s_invalid) && probe_idx_match + val kill_probe = sent_wb_req && probe_wb_tag_match && conf.co.pendingVoluntaryReleaseIsSufficient(release_type, io.mem_probe.bits.p_type) + val probe_q = (new Queue(1, pipe = true, flow = true)) { new Probe } + probe_q.io.enq.valid := io.mem_probe.valid && handle_probe && sent_wb_req && !kill_probe + io.mem_probe_ready := probe_q.io.enq.ready && handle_probe + probe_q.io.enq.bits := io.mem_probe.bits + io.self_probe.valid := probe_q.io.deq.valid && (state != s_wb_resp) + probe_q.io.deq.ready := io.self_probe.ready && (state != s_wb_resp) + io.self_probe.bits := probe_q.io.deq.bits + io.self_probe.bits.client_xact_id := UFix(id) + val rpq = (new Queue(conf.nrpq)) { new Replay } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid - val abort = io.mem_abort.valid && io.mem_abort.bits.client_xact_id === UFix(id) - val reply = io.mem_rep.valid && io.mem_rep.bits.client_xact_id === UFix(id) - val refill_done = reply && refill_count.andR - val wb_done = reply && (state === s_wb_resp) - val finish_q = (new Queue(2 /* wb + refill */)) { new GrantAck } - finish_q.io.enq.valid := wb_done || refill_done + finish_q.io.enq.valid := (wb_done || refill_done) && conf.co.requiresAck(io.mem_rep.bits) finish_q.io.enq.bits.master_xact_id := io.mem_rep.bits.master_xact_id io.wb_req.valid := Bool(false) @@ -241,17 +268,22 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (io.probe_writeback.valid && idx_match) { io.wb_req.valid := Bool(false) when (io.probe_writeback.bits) { state := s_refill_req } - }.elsewhen (io.wb_req.ready) { state := s_wb_resp } + }.elsewhen (io.wb_req.ready) { + sent_wb_req := Bool(true) + state := s_wb_resp + } } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - acq_type := conf.co.getAcquireTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) + acquire_type := conf.co.getAcquireTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) } when ((state === s_invalid) && io.req_pri_val) { line_state := conf.co.newStateOnFlush() refill_count := UFix(0) - acq_type := conf.co.getAcquireTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) + acquire_type := conf.co.getAcquireTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) + release_type := conf.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits + sent_wb_req := Bool(false) state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) when (io.req_bits.tag_match) { @@ -265,6 +297,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { } io.idx_match := (state != s_invalid) && idx_match + io.probe_idx_match := (state != s_invalid) && probe_idx_match io.mem_resp := req io.mem_resp.addr := Cat(req_idx, refill_count) << conf.ramoffbits io.tag := req.addr >> conf.untagbits @@ -281,12 +314,13 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) + io.wb_req.bits.r_type := conf.co.getReleaseTypeOnVoluntaryWriteback() io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match io.mem_req.valid := state === s_refill_req - io.mem_req.bits.a_type := acq_type + io.mem_req.bits.a_type := acquire_type io.mem_req.bits.addr := Cat(io.tag, req_idx).toUFix io.mem_req.bits.client_xact_id := Bits(id) io.mem_finish <> finish_q.io.deq @@ -319,7 +353,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val mem_rep = (new PipeIO) { new Grant }.flip val mem_finish = (new FIFOIO) { new GrantAck } val wb_req = (new FIFOIO) { new WritebackReq } - val probe = (new FIFOIO) { Bool() }.flip + val mem_probe = (new FIFOIO) { new Probe }.flip + val self_probe = (new FIFOIO) { new InternalProbe } val fence_rdy = Bool(OUTPUT) } @@ -340,6 +375,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val mem_req_arb = (new Arbiter(conf.nmshr)) { new Acquire } val mem_finish_arb = (new Arbiter(conf.nmshr)) { new GrantAck } val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } + val self_probe_arb = (new Arbiter(conf.nmshr+1)) { new InternalProbe } val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } @@ -347,11 +383,18 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.addr >> conf.untagbits var idx_match = Bool(false) + var probe_idx_match = Bool(false) var pri_rdy = Bool(false) var fence = Bool(false) var sec_rdy = Bool(false) var writeback_probe_rdy = Bool(true) var refill_probe_rdy = Bool(true) + var mem_probe_rdy = Bool(false) + + self_probe_arb.io.in(0).valid := io.mem_probe.valid && !probe_idx_match + self_probe_arb.io.in(0).bits := io.mem_probe.bits + self_probe_arb.io.in(0).bits.client_xact_id := UFix(0) // DNC + mem_probe_rdy = mem_probe_rdy || self_probe_arb.io.in(0).ready for (i <- 0 to conf.nmshr-1) { val mshr = new MSHR(i) @@ -367,14 +410,16 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mshr.io.req_bits := io.req.bits mshr.io.req_sdq_id := sdq_alloc_id + mshr.io.mem_probe <> io.mem_probe mshr.io.meta_read <> meta_read_arb.io.in(i) mshr.io.meta_write <> meta_write_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.wb_req <> wb_req_arb.io.in(i) + mshr.io.self_probe <> self_probe_arb.io.in(i+1) mshr.io.replay <> replay_arb.io.in(i) - mshr.io.probe_refill.valid := io.probe.valid && tag_match - mshr.io.probe_writeback.valid := io.probe.valid + mshr.io.probe_refill.valid := io.mem_probe.valid && tag_match + mshr.io.probe_writeback.valid := io.mem_probe.valid mshr.io.probe_writeback.bits := wb_probe_match mshr.io.mem_abort <> io.mem_abort @@ -385,8 +430,10 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { sec_rdy = sec_rdy || mshr.io.req_sec_rdy fence = fence || !mshr.io.req_pri_rdy idx_match = idx_match || mshr.io.idx_match + probe_idx_match = probe_idx_match || mshr.io.probe_idx_match refill_probe_rdy = refill_probe_rdy && mshr.io.probe_refill.ready writeback_probe_rdy = writeback_probe_rdy && mshr.io.probe_writeback.ready + mem_probe_rdy = mem_probe_rdy || mshr.io.mem_probe_ready } alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match @@ -396,12 +443,14 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mem_req_arb.io.out <> io.mem_req mem_finish_arb.io.out <> io.mem_finish wb_req_arb.io.out <> io.wb_req + self_probe_arb.io.out <> io.self_probe io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match io.mem_resp := memRespMux(io.mem_rep.bits.client_xact_id) io.fence_rdy := !fence - io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) + io.mem_probe.ready := mem_probe_rdy + //io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) io.replay.bits.data := sdq(RegEn(replay_arb.io.out.bits.sdq_id, free_sdq)) @@ -421,13 +470,11 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { val meta_read = (new FIFOIO) { new MetaReadReq } val data_req = (new FIFOIO) { new DataReadReq() } val data_resp = Bits(INPUT, conf.bitsperrow) - val mem_req = (new FIFOIO) { new Acquire } - val mem_req_data = (new FIFOIO) { new AcquireData } + val release = (new FIFOIO) { new Release } val release_data = (new FIFOIO) { new ReleaseData } } val valid = Reg(resetVal = Bool(false)) - val is_probe = Reg{Bool()} val r1_data_req_fired = Reg(resetVal = Bool(false)) val r2_data_req_fired = Reg(resetVal = Bool(false)) val cmd_sent = Reg{Bool()} @@ -442,7 +489,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { cnt := cnt + 1 } - when (r2_data_req_fired && !Mux(is_probe, io.release_data.ready, io.mem_req_data.ready)) { + when (r2_data_req_fired && !io.release_data.ready) { r1_data_req_fired := false r2_data_req_fired := false cnt := cnt - Mux[UFix](r1_data_req_fired, 2, 1) @@ -452,20 +499,18 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { valid := false } - when (valid && io.mem_req.ready) { + when (valid && io.release.ready) { cmd_sent := true } } when (io.probe.fire()) { valid := true - is_probe := true cmd_sent := true cnt := 0 req := io.probe.bits } when (io.req.fire()) { valid := true - is_probe := false cmd_sent := false cnt := 0 req := io.req.bits @@ -478,22 +523,21 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { io.data_req.bits.way_en := req.way_en io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(REFILL_CYCLES)-1,0)) << conf.ramoffbits - io.mem_req.valid := valid && !cmd_sent - io.mem_req.bits.a_type := conf.co.getAcquireTypeOnWriteback() - io.mem_req.bits.addr := Cat(req.tag, req.idx).toUFix - io.mem_req.bits.client_xact_id := req.client_xact_id - io.mem_req_data.valid := r2_data_req_fired && !is_probe - io.mem_req_data.bits.data := io.data_resp - io.release_data.valid := r2_data_req_fired && is_probe + io.release.valid := valid && !cmd_sent + io.release.bits.r_type := req.r_type + io.release.bits.addr := Cat(req.tag, req.idx).toUFix + io.release.bits.client_xact_id := req.client_xact_id + io.release.bits.master_xact_id := UFix(0) + io.release_data.valid := r2_data_req_fired io.release_data.bits.data := io.data_resp io.meta_read.valid := fire - io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits + io.meta_read.bits.addr := io.release.bits.addr << conf.offbits } class ProbeUnit(implicit conf: DCacheConfig) extends Component { val io = new Bundle { - val req = (new FIFOIO) { new Probe }.flip + val req = (new FIFOIO) { new InternalProbe }.flip val rep = (new FIFOIO) { new Release } val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } @@ -507,7 +551,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { val state = Reg(resetVal = s_invalid) val line_state = Reg() { UFix() } val way_en = Reg() { Bits() } - val req = Reg() { new Probe() } + val req = Reg() { new InternalProbe } val hit = way_en.orR when (state === s_meta_write && io.meta_write.ready) { @@ -529,7 +573,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { state := s_release line_state := io.line_state way_en := io.way_en - when (!io.mshr_req.ready) { state := s_meta_read } + //when (!io.mshr_req.ready) { state := s_meta_read } } when (state === s_meta_resp) { state := s_mshr_req @@ -544,7 +588,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.req.ready := state === s_invalid && !reset io.rep.valid := state === s_release - io.rep.bits := conf.co.newRelease(req, Mux(hit, line_state, conf.co.newStateOnFlush)) + io.rep.bits := conf.co.newRelease(req, Mux(hit, line_state, conf.co.newStateOnFlush), req.client_xact_id) io.meta_read.valid := state === s_meta_read io.meta_read.bits.addr := req.addr << UFix(conf.offbits) @@ -555,11 +599,13 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.meta_write.bits.data.state := conf.co.newStateOnProbe(req, line_state) io.meta_write.bits.data.tag := req.addr >> UFix(conf.idxbits) - io.mshr_req.valid := state === s_mshr_req + //io.mshr_req.valid := state === s_mshr_req io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr io.wb_req.bits.tag := req.addr >> UFix(conf.idxbits) + io.wb_req.bits.r_type := UFix(0) // DNC + io.wb_req.bits.client_xact_id := UFix(0) // DNC } class MetaDataArray(implicit conf: DCacheConfig) extends Component { @@ -919,8 +965,18 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio mshr.io.mem_abort.valid := io.mem.abort.valid mshr.io.mem_abort.bits := io.mem.abort.bits.payload io.mem.abort.ready := Bool(true) + mshr.io.mem_probe <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) when (mshr.io.req.fire()) { replacer.miss } + io.mem.acquire.valid := mshr.io.mem_req.valid && prober.io.req.ready + mshr.io.mem_req.ready := io.mem.acquire.ready && prober.io.req.ready + io.mem.acquire.bits.payload := mshr.io.mem_req.bits + //TODO io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_req) ??? + //TODO io.mem.acquire_data should be connected to uncached store data generator + //io.mem.acquire_data <> FIFOedLogicalNetworkIOWrapper(TODO) + io.mem.acquire_data.valid := Bool(false) + io.mem.acquire_data.bits.payload.data := UFix(0) + // replays readArb.io.in(1).valid := mshr.io.replay.valid readArb.io.in(1).bits := mshr.io.replay.bits @@ -931,9 +987,12 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio metaWriteArb.io.in(0) <> mshr.io.meta_write // probes - prober.io.req <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) - FIFOedLogicalNetworkIOWrapper(prober.io.rep) <> io.mem.release - prober.io.mshr_req <> mshr.io.probe + val releaseArb = (new Arbiter(2)) { new Release } + FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release + + prober.io.req <> mshr.io.self_probe + prober.io.rep <> releaseArb.io.in(1) + //prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way prober.io.line_state := s2_hit_state @@ -953,6 +1012,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio wb.io.meta_read <> metaReadArb.io.in(3) wb.io.data_req <> readArb.io.in(2) wb.io.data_resp := s2_data_corrected + releaseArb.io.in(0) <> wb.io.release FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release_data // store->load bypassing @@ -1016,13 +1076,5 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio io.cpu.resp.bits.data_subword := loadgen.byte io.cpu.resp.bits.store_data := s2_req.data - val acquire_arb = (new Arbiter(2)) { new Acquire } - acquire_arb.io.in(0) <> wb.io.mem_req - acquire_arb.io.in(1).valid := mshr.io.mem_req.valid && prober.io.req.ready - mshr.io.mem_req.ready := acquire_arb.io.in(1).ready && prober.io.req.ready - acquire_arb.io.in(1).bits := mshr.io.mem_req.bits - io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(acquire_arb.io.out) - - io.mem.acquire_data <> FIFOedLogicalNetworkIOWrapper(wb.io.mem_req_data) io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_finish) } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index b4ddbd05..9ac249b1 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -24,6 +24,7 @@ case class RocketConfiguration(lnConf: LogicalNetworkConfiguration, co: Coherenc class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) with ClientCoherenceAgent { val memPorts = 2 + confIn.vec + val dcachePortID = 0 implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) implicit val lnConf = confIn.lnConf implicit val conf = confIn.copy(dcache = dcConf) @@ -38,7 +39,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val dcache = new HellaCache val arbiter = new MemArbiter(memPorts) - arbiter.io.requestor(0) <> dcache.io.mem + arbiter.io.requestor(dcachePortID) <> dcache.io.mem arbiter.io.requestor(1) <> icache.io.mem io.tilelink.acquire <> arbiter.io.mem.acquire @@ -47,8 +48,11 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon arbiter.io.mem.grant <> io.tilelink.grant io.tilelink.grant_ack <> arbiter.io.mem.grant_ack dcache.io.mem.probe <> io.tilelink.probe - io.tilelink.release <> dcache.io.mem.release io.tilelink.release_data <> dcache.io.mem.release_data + io.tilelink.release.valid := dcache.io.mem.release.valid + dcache.io.mem.release.ready := io.tilelink.release.ready + io.tilelink.release.bits := dcache.io.mem.release.bits + io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UFix(dcachePortID, log2Up(memPorts))) // Mimic client id extension done by MemArbiter for Acquires from either cache) if (conf.vec) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co), lnConf) // 128 sets x 1 ways (8KB) From 0f50970913f6935ce7d93d73bc7845a418832bb1 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 19 Mar 2013 00:43:20 -0700 Subject: [PATCH 0569/1087] move HellaQueue to uncore --- rocket/src/main/scala/util.scala | 55 -------------------------------- 1 file changed, 55 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index bad12808..183621a6 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -73,58 +73,3 @@ case class WideCounter(width: Int, inc: Bool = Bool(true)) if (isWide) large := (if (w < smallWidth) UFix(0) else x(w.min(width)-1,smallWidth)) } } - -class HellaFlowQueue[T <: Data](val entries: Int)(data: => T) extends Component -{ - val io = new ioQueue(entries)(data) - require(isPow2(entries) && entries > 1) - - val do_flow = Bool() - val do_enq = io.enq.fire() && !do_flow - val do_deq = io.deq.fire() && !do_flow - - val maybe_full = Reg(resetVal = Bool(false)) - val enq_ptr = Counter(do_enq, entries)._1 - val deq_ptr = Counter(do_deq, entries)._1 - when (do_enq != do_deq) { maybe_full := do_enq } - - val ptr_match = enq_ptr === deq_ptr - val empty = ptr_match && !maybe_full - val full = ptr_match && maybe_full - val atLeastTwo = full || enq_ptr - deq_ptr >= UFix(2) - do_flow := empty && io.deq.ready - - val ram = Mem(entries, seqRead = true){Bits(width = data.getWidth)} - val ram_out = Reg{Bits(width = data.getWidth)} - val ram_out_valid = Reg{Bool()} - ram_out_valid := Bool(false) - when (do_enq) { ram(enq_ptr) := io.enq.bits.toBits } - when (io.deq.ready && (atLeastTwo || !io.deq.valid && !empty)) { - ram_out_valid := Bool(true) - ram_out := ram(Mux(io.deq.valid, deq_ptr + UFix(1), deq_ptr)) - } - - io.deq.valid := Mux(empty, io.enq.valid, ram_out_valid) - io.enq.ready := !full - io.deq.bits := Mux(empty, io.enq.bits, data.fromBits(ram_out)) -} - -class HellaQueue[T <: Data](val entries: Int)(data: => T) extends Component -{ - val io = new ioQueue(entries)(data) - - val fq = new HellaFlowQueue(entries)(data) - io.enq <> fq.io.enq - io.deq <> Queue(fq.io.deq, 1, pipe = true) -} - -object HellaQueue -{ - def apply[T <: Data](enq: FIFOIO[T], entries: Int) = { - val q = (new HellaQueue(entries)) { enq.bits.clone } - q.io.enq.valid := enq.valid // not using <> so that override is allowed - q.io.enq.bits := enq.bits - enq.ready := q.io.enq.ready - q.io.deq - } -} From ea9d0b771efd1acf32e149b636fdb2af03669add Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 19 Mar 2013 15:29:40 -0700 Subject: [PATCH 0570/1087] remove aborts; simplify probes --- rocket/src/main/scala/arbiter.scala | 11 ---- rocket/src/main/scala/htif.scala | 11 ---- rocket/src/main/scala/icache.scala | 1 - rocket/src/main/scala/nbdcache.scala | 81 +++++----------------------- rocket/src/main/scala/tile.scala | 1 - 5 files changed, 14 insertions(+), 91 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 4b8529e6..871a8595 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -50,7 +50,6 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp class UncachedRequestorIO(implicit conf: LogicalNetworkConfiguration) extends Bundle { val acquire = (new ClientSourcedIO){(new LogicalNetworkIO){new Acquire }} - val abort = (new MasterSourcedIO) {(new LogicalNetworkIO){new Abort }} val grant = (new MasterSourcedIO) {(new LogicalNetworkIO){new Grant }} val grant_ack = (new ClientSourcedIO){(new LogicalNetworkIO){new GrantAck }} } @@ -101,14 +100,4 @@ class MemArbiter(n: Int)(implicit conf: LogicalNetworkConfiguration) extends Com io.requestor(i).grant.bits := io.mem.grant.bits io.requestor(i).grant.bits.payload.client_xact_id := tag >> UFix(log2Up(n)) } - - for (i <- 0 until n) - { - val tag = io.mem.abort.bits.payload.client_xact_id - io.requestor(i).abort.valid := io.mem.abort.valid && tag(log2Up(n)-1,0) === UFix(i) - io.requestor(i).abort.bits := io.mem.abort.bits - io.requestor(i).abort.bits.payload.client_xact_id := tag >> UFix(log2Up(n)) - } - - io.mem.abort.ready := Bool(true) } diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 85c6bb50..6e81f99a 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -105,15 +105,12 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val mem_acked = Reg(resetVal = Bool(false)) val mem_gxid = Reg() { Bits() } val mem_needs_ack = Reg() { Bool() } - val mem_nacked = Reg(resetVal = Bool(false)) when (io.mem.grant.valid) { mem_acked := Bool(true) mem_gxid := io.mem.grant.bits.payload.master_xact_id mem_needs_ack := conf.co.requiresAck(io.mem.grant.bits.payload) } io.mem.grant.ready := Bool(true) - when (io.mem.abort.valid) { mem_nacked := Bool(true) } - io.mem.abort.ready := Bool(true) val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = state_rx) @@ -137,20 +134,12 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo mem_cnt := mem_cnt + UFix(1) } when (state === state_mem_wresp) { - when (mem_nacked) { - state := state_mem_req - mem_nacked := Bool(false) - } when (mem_acked) { state := state_mem_finish mem_acked := Bool(false) } } when (state === state_mem_rdata) { - when (mem_nacked) { - state := state_mem_req - mem_nacked := Bool(false) - } when (io.mem.grant.valid) { when (mem_cnt.andR) { state := state_mem_finish diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 764ba9ee..8ddd9823 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -259,7 +259,6 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte when (io.mem.acquire.ready && finish_q.io.enq.ready) { state := s_refill_wait } } is (s_refill_wait) { - when (io.mem.abort.valid) { state := s_request } when (io.mem.grant.valid) { state := s_refill } } is (s_refill) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 88adc3be..acdabd5a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -178,11 +178,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay() } - val mem_abort = (new PipeIO) { new Abort }.flip val mem_rep = (new PipeIO) { new Grant }.flip - val mem_probe = (new PipeIO) { new Probe }.flip - val mem_probe_ready = Bool(OUTPUT) - val self_probe = (new FIFOIO) { new InternalProbe } val mem_finish = (new FIFOIO) { new GrantAck } val wb_req = (new FIFOIO) { new WritebackReq } val probe_writeback = (new FIFOIO) { Bool() }.flip @@ -197,32 +193,16 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val line_state = Reg { UFix() } val refill_count = Reg { UFix(width = log2Up(REFILL_CYCLES)) } val req = Reg { new MSHRReq() } - val sent_wb_req = Reg { Bool() } val req_cmd = io.req_bits.cmd val req_idx = req.addr(conf.untagbits-1,conf.offbits) val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) - val probe_idx_match = req_idx === io.mem_probe.bits.addr(conf.untagbits-1,conf.offbits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val abort = io.mem_abort.valid && io.mem_abort.bits.client_xact_id === UFix(id) val reply = io.mem_rep.valid && io.mem_rep.bits.client_xact_id === UFix(id) val refill_done = reply && refill_count.andR val wb_done = reply && (state === s_wb_resp) - val probe_wb_tag_match = io.mem_probe.bits.addr >> conf.untagbits === req.old_meta.tag - val probe_tag_match = io.mem_probe.bits.addr >> conf.untagbits === req.addr >> conf.untagbits - val handle_probe = (state != s_invalid) && probe_idx_match - val kill_probe = sent_wb_req && probe_wb_tag_match && conf.co.pendingVoluntaryReleaseIsSufficient(release_type, io.mem_probe.bits.p_type) - val probe_q = (new Queue(1, pipe = true, flow = true)) { new Probe } - probe_q.io.enq.valid := io.mem_probe.valid && handle_probe && sent_wb_req && !kill_probe - io.mem_probe_ready := probe_q.io.enq.ready && handle_probe - probe_q.io.enq.bits := io.mem_probe.bits - io.self_probe.valid := probe_q.io.deq.valid && (state != s_wb_resp) - probe_q.io.deq.ready := io.self_probe.ready && (state != s_wb_resp) - io.self_probe.bits := probe_q.io.deq.bits - io.self_probe.bits.client_xact_id := UFix(id) - val rpq = (new Queue(conf.nrpq)) { new Replay } rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) rpq.io.enq.bits := io.req_bits @@ -232,7 +212,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val finish_q = (new Queue(2 /* wb + refill */)) { new GrantAck } finish_q.io.enq.valid := (wb_done || refill_done) && conf.co.requiresAck(io.mem_rep.bits) finish_q.io.enq.bits.master_xact_id := io.mem_rep.bits.master_xact_id - io.wb_req.valid := Bool(false) when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { state := s_invalid @@ -250,28 +229,18 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { refill_count := refill_count + UFix(1) line_state := conf.co.newStateOnGrant(io.mem_rep.bits, io.mem_req.bits) } - when (abort) { state := s_refill_req } } when (state === s_refill_req) { - when (abort) { state := s_refill_req } - .elsewhen (io.mem_req.ready) { state := s_refill_resp } + when (io.mem_req.ready) { state := s_refill_resp } } when (state === s_meta_clear && io.meta_write.ready) { state := s_refill_req } when (state === s_wb_resp) { when (reply) { state := s_meta_clear } - when (abort) { state := s_wb_req } } - when (state === s_wb_req) { - io.wb_req.valid := Bool(true) - when (io.probe_writeback.valid && idx_match) { - io.wb_req.valid := Bool(false) - when (io.probe_writeback.bits) { state := s_refill_req } - }.elsewhen (io.wb_req.ready) { - sent_wb_req := Bool(true) - state := s_wb_resp - } + when (state === s_wb_req && io.wb_req.ready) { + state := s_wb_resp } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req @@ -283,7 +252,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { acquire_type := conf.co.getAcquireTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) release_type := conf.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits - sent_wb_req := Bool(false) state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) when (io.req_bits.tag_match) { @@ -297,7 +265,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { } io.idx_match := (state != s_invalid) && idx_match - io.probe_idx_match := (state != s_invalid) && probe_idx_match io.mem_resp := req io.mem_resp.addr := Cat(req_idx, refill_count) << conf.ramoffbits io.tag := req.addr >> conf.untagbits @@ -310,14 +277,15 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en + io.wb_req.valid := state === s_wb_req io.wb_req.bits.tag := req.old_meta.tag io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) io.wb_req.bits.r_type := conf.co.getReleaseTypeOnVoluntaryWriteback() - io.probe_writeback.ready := (state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match - io.probe_refill.ready := (state != s_refill_resp && state != s_drain_rpq) || !idx_match + io.probe_writeback.ready := (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && state != s_drain_rpq) || !idx_match + io.probe_refill.ready := (state != s_refill_resp && state != s_meta_write_req && state != s_meta_write_resp && state != s_drain_rpq) || !idx_match io.mem_req.valid := state === s_refill_req io.mem_req.bits.a_type := acquire_type @@ -349,12 +317,10 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay } - val mem_abort = (new PipeIO) { new Abort }.flip val mem_rep = (new PipeIO) { new Grant }.flip val mem_finish = (new FIFOIO) { new GrantAck } val wb_req = (new FIFOIO) { new WritebackReq } - val mem_probe = (new FIFOIO) { new Probe }.flip - val self_probe = (new FIFOIO) { new InternalProbe } + val probe = (new FIFOIO) { new Bool() }.flip val fence_rdy = Bool(OUTPUT) } @@ -375,7 +341,6 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val mem_req_arb = (new Arbiter(conf.nmshr)) { new Acquire } val mem_finish_arb = (new Arbiter(conf.nmshr)) { new GrantAck } val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } - val self_probe_arb = (new Arbiter(conf.nmshr+1)) { new InternalProbe } val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } @@ -383,18 +348,11 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.addr >> conf.untagbits var idx_match = Bool(false) - var probe_idx_match = Bool(false) var pri_rdy = Bool(false) var fence = Bool(false) var sec_rdy = Bool(false) var writeback_probe_rdy = Bool(true) var refill_probe_rdy = Bool(true) - var mem_probe_rdy = Bool(false) - - self_probe_arb.io.in(0).valid := io.mem_probe.valid && !probe_idx_match - self_probe_arb.io.in(0).bits := io.mem_probe.bits - self_probe_arb.io.in(0).bits.client_xact_id := UFix(0) // DNC - mem_probe_rdy = mem_probe_rdy || self_probe_arb.io.in(0).ready for (i <- 0 to conf.nmshr-1) { val mshr = new MSHR(i) @@ -410,19 +368,16 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mshr.io.req_bits := io.req.bits mshr.io.req_sdq_id := sdq_alloc_id - mshr.io.mem_probe <> io.mem_probe mshr.io.meta_read <> meta_read_arb.io.in(i) mshr.io.meta_write <> meta_write_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.wb_req <> wb_req_arb.io.in(i) - mshr.io.self_probe <> self_probe_arb.io.in(i+1) mshr.io.replay <> replay_arb.io.in(i) - mshr.io.probe_refill.valid := io.mem_probe.valid && tag_match - mshr.io.probe_writeback.valid := io.mem_probe.valid + mshr.io.probe_refill.valid := io.probe.valid && tag_match + mshr.io.probe_writeback.valid := io.probe.valid mshr.io.probe_writeback.bits := wb_probe_match - mshr.io.mem_abort <> io.mem_abort mshr.io.mem_rep <> io.mem_rep memRespMux(i) := mshr.io.mem_resp @@ -430,10 +385,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { sec_rdy = sec_rdy || mshr.io.req_sec_rdy fence = fence || !mshr.io.req_pri_rdy idx_match = idx_match || mshr.io.idx_match - probe_idx_match = probe_idx_match || mshr.io.probe_idx_match refill_probe_rdy = refill_probe_rdy && mshr.io.probe_refill.ready writeback_probe_rdy = writeback_probe_rdy && mshr.io.probe_writeback.ready - mem_probe_rdy = mem_probe_rdy || mshr.io.mem_probe_ready } alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match @@ -443,14 +396,12 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mem_req_arb.io.out <> io.mem_req mem_finish_arb.io.out <> io.mem_finish wb_req_arb.io.out <> io.wb_req - self_probe_arb.io.out <> io.self_probe io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match io.mem_resp := memRespMux(io.mem_rep.bits.client_xact_id) io.fence_rdy := !fence - io.mem_probe.ready := mem_probe_rdy - //io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) + io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) io.replay.bits.data := sdq(RegEn(replay_arb.io.out.bits.sdq_id, free_sdq)) @@ -573,7 +524,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { state := s_release line_state := io.line_state way_en := io.way_en - //when (!io.mshr_req.ready) { state := s_meta_read } + when (!io.mshr_req.ready) { state := s_meta_read } } when (state === s_meta_resp) { state := s_mshr_req @@ -599,7 +550,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.meta_write.bits.data.state := conf.co.newStateOnProbe(req, line_state) io.meta_write.bits.data.tag := req.addr >> UFix(conf.idxbits) - //io.mshr_req.valid := state === s_mshr_req + io.mshr_req.valid := state === s_mshr_req io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr @@ -962,10 +913,6 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio mshr.io.mem_rep.valid := io.mem.grant.fire() mshr.io.mem_rep.bits := io.mem.grant.bits.payload - mshr.io.mem_abort.valid := io.mem.abort.valid - mshr.io.mem_abort.bits := io.mem.abort.bits.payload - io.mem.abort.ready := Bool(true) - mshr.io.mem_probe <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) when (mshr.io.req.fire()) { replacer.miss } io.mem.acquire.valid := mshr.io.mem_req.valid && prober.io.req.ready @@ -990,9 +937,9 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val releaseArb = (new Arbiter(2)) { new Release } FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release - prober.io.req <> mshr.io.self_probe + prober.io.req <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) prober.io.rep <> releaseArb.io.in(1) - //prober.io.mshr_req <> mshr.io.probe + prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way prober.io.line_state := s2_hit_state diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 9ac249b1..2cbfd24c 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -44,7 +44,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.acquire <> arbiter.io.mem.acquire io.tilelink.acquire_data <> dcache.io.mem.acquire_data - arbiter.io.mem.abort <> io.tilelink.abort arbiter.io.mem.grant <> io.tilelink.grant io.tilelink.grant_ack <> arbiter.io.mem.grant_ack dcache.io.mem.probe <> io.tilelink.probe From 6d2541acedaeeb94fcce39dda671584e364b2c02 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 28 Feb 2013 21:03:37 -0800 Subject: [PATCH 0571/1087] nTiles -> nClients in LogicalNetworkConfig --- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/htif.scala | 20 ++++++++++---------- rocket/src/main/scala/tile.scala | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 2c27e514..09fad98c 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -8,7 +8,7 @@ import Util._ class RocketIO(implicit conf: RocketConfiguration) extends Bundle { - val host = new HTIFIO(conf.lnConf.nTiles) + val host = new HTIFIO(conf.lnConf.nClients) val imem = new CPUFrontendIO()(conf.icache) val vimem = new CPUFrontendIO()(conf.icache) val dmem = new HellaCacheIO()(conf.dcache) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index e7387a27..ffe039c1 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -10,7 +10,7 @@ import hwacha._ class Datapath(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val host = new HTIFIO(conf.lnConf.nTiles) + val host = new HTIFIO(conf.lnConf.nClients) val ctrl = (new CtrlDpathIO).flip val dmem = new HellaCacheIO()(conf.dcache) val ptw = (new DatapathPTWIO).flip diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 131ae28c..bce859c9 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -104,7 +104,7 @@ object PCR class PCR(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val host = new HTIFIO(conf.lnConf.nTiles) + val host = new HTIFIO(conf.lnConf.nClients) val r = new ioReadPort(conf.nxpr, conf.xprlen) val w = new ioWritePort(conf.nxpr, conf.xprlen) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 6e81f99a..365380ae 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -41,7 +41,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo implicit val lnConf = conf.ln val io = new Bundle { val host = new HostIO(w) - val cpu = Vec(conf.ln.nTiles) { new HTIFIO(conf.ln.nTiles).flip } + val cpu = Vec(conf.ln.nClients) { new HTIFIO(conf.ln.nClients).flip } val mem = new TileLinkIO()(conf.ln) } @@ -82,7 +82,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) - val pcr_coreid = if (conf.ln.nTiles == 1) UFix(0) else addr(20+log2Up(conf.ln.nTiles),20) + val pcr_coreid = if (conf.ln.nClients == 1) UFix(0) else addr(20+log2Up(conf.ln.nClients),20) val pcr_wdata = packet_ram(0) val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR @@ -182,19 +182,19 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo io.mem.release.valid := Bool(false) io.mem.release_data.valid := Bool(false) - io.mem.acquire.bits.header.src := UFix(conf.ln.nTiles) + io.mem.acquire.bits.header.src := UFix(conf.ln.nClients) io.mem.acquire.bits.header.dst := UFix(0) - io.mem.acquire_data.bits.header.src := UFix(conf.ln.nTiles) + io.mem.acquire_data.bits.header.src := UFix(conf.ln.nClients) io.mem.acquire_data.bits.header.dst := UFix(0) - io.mem.release.bits.header.src := UFix(conf.ln.nTiles) + io.mem.release.bits.header.src := UFix(conf.ln.nClients) io.mem.release.bits.header.dst := UFix(0) - io.mem.release_data.bits.header.src := UFix(conf.ln.nTiles) + io.mem.release_data.bits.header.src := UFix(conf.ln.nClients) io.mem.release_data.bits.header.dst := UFix(0) - io.mem.grant_ack.bits.header.src := UFix(conf.ln.nTiles) + io.mem.grant_ack.bits.header.src := UFix(conf.ln.nClients) io.mem.grant_ack.bits.header.dst := UFix(0) - val pcrReadData = Vec(conf.ln.nTiles) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } - for (i <- 0 until conf.ln.nTiles) { + val pcrReadData = Vec(conf.ln.nClients) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } + for (i <- 0 until conf.ln.nClients) { val my_reset = Reg(resetVal = Bool(true)) val my_ipi = Reg(resetVal = Bool(false)) @@ -211,7 +211,7 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo } cpu.ipi_rep.valid := my_ipi cpu.ipi_req.ready := Bool(true) - for (j <- 0 until conf.ln.nTiles) { + for (j <- 0 until conf.ln.nClients) { when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UFix(i)) { my_ipi := Bool(true) } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 2cbfd24c..be141b77 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -31,7 +31,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val io = new Bundle { val tilelink = new TileLinkIO - val host = new HTIFIO(lnConf.nTiles) + val host = new HTIFIO(lnConf.nClients) } val core = new Core From 273bd34091288cd01fe7ad42ee2dc026a76f0696 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 20 Mar 2013 14:05:12 -0700 Subject: [PATCH 0572/1087] Generalized mem arbiter, moved to uncore. Support for multiple banks when acking grants. --- rocket/src/main/scala/arbiter.scala | 54 ---------------------------- rocket/src/main/scala/htif.scala | 18 +++------- rocket/src/main/scala/icache.scala | 5 +-- rocket/src/main/scala/nbdcache.scala | 39 ++++++++++---------- rocket/src/main/scala/tile.scala | 22 ++++++------ 5 files changed, 40 insertions(+), 98 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 871a8595..ac842d85 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -47,57 +47,3 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp resp.bits.replay := io.mem.resp.bits.replay && tag_hit } } - -class UncachedRequestorIO(implicit conf: LogicalNetworkConfiguration) extends Bundle { - val acquire = (new ClientSourcedIO){(new LogicalNetworkIO){new Acquire }} - val grant = (new MasterSourcedIO) {(new LogicalNetworkIO){new Grant }} - val grant_ack = (new ClientSourcedIO){(new LogicalNetworkIO){new GrantAck }} -} - -class MemArbiter(n: Int)(implicit conf: LogicalNetworkConfiguration) extends Component { - val io = new Bundle { - val mem = new UncachedRequestorIO - val requestor = Vec(n) { new UncachedRequestorIO }.flip - } - - var acq_bits = new Acquire - acq_bits := io.requestor(n-1).acquire.bits.payload - acq_bits.client_xact_id := Cat(io.requestor(n-1).acquire.bits.payload.client_xact_id, UFix(n-1, log2Up(n))) - for (i <- n-2 to 0 by -1) - { - var my_acq_bits = new Acquire - my_acq_bits := io.requestor(i).acquire.bits.payload - my_acq_bits.client_xact_id := Cat(io.requestor(i).acquire.bits.payload.client_xact_id, UFix(i, log2Up(n))) - - acq_bits = Mux(io.requestor(i).acquire.valid, my_acq_bits, acq_bits) - } - - io.mem.acquire.bits.payload := acq_bits - io.mem.acquire.valid := io.requestor.map(_.acquire.valid).reduce(_||_) - io.requestor(0).acquire.ready := io.mem.acquire.ready - for (i <- 1 until n) - io.requestor(i).acquire.ready := io.requestor(i-1).acquire.ready && !io.requestor(i-1).acquire.valid - - var ga_bits = io.requestor(n-1).grant_ack.bits - for (i <- n-2 to 0 by -1) - ga_bits = Mux(io.requestor(i).grant_ack.valid, io.requestor(i).grant_ack.bits, ga_bits) - - io.mem.grant_ack.bits := ga_bits - io.mem.grant_ack.valid := io.requestor.map(_.grant_ack.valid).reduce(_||_) - io.requestor(0).grant_ack.ready := io.mem.grant_ack.ready - for (i <- 1 until n) - io.requestor(i).grant_ack.ready := io.requestor(i-1).grant_ack.ready && !io.requestor(i-1).grant_ack.valid - - io.mem.grant.ready := Bool(false) - for (i <- 0 until n) - { - val tag = io.mem.grant.bits.payload.client_xact_id - io.requestor(i).grant.valid := Bool(false) - when (tag(log2Up(n)-1,0) === UFix(i)) { - io.requestor(i).grant.valid := io.mem.grant.valid - io.mem.grant.ready := io.requestor(i).grant.ready - } - io.requestor(i).grant.bits := io.mem.grant.bits - io.requestor(i).grant.bits.payload.client_xact_id := tag >> UFix(log2Up(n)) - } -} diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 365380ae..d7aef576 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -36,7 +36,7 @@ class HTIFIO(ntiles: Int) extends Bundle val ipi_rep = (new FIFOIO) { Bool() }.flip } -class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Component with ClientCoherenceAgent +class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component with ClientCoherenceAgent { implicit val lnConf = conf.ln val io = new Bundle { @@ -104,10 +104,12 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val mem_acked = Reg(resetVal = Bool(false)) val mem_gxid = Reg() { Bits() } + val mem_gsrc = Reg() { UFix(width = conf.ln.idBits) } val mem_needs_ack = Reg() { Bool() } when (io.mem.grant.valid) { mem_acked := Bool(true) mem_gxid := io.mem.grant.bits.payload.master_xact_id + mem_gsrc := io.mem.grant.bits.header.src mem_needs_ack := conf.co.requiresAck(io.mem.grant.bits.payload) } io.mem.grant.ready := Bool(true) @@ -173,26 +175,16 @@ class rocketHTIF(w: Int)(implicit conf: CoherenceHubConfiguration) extends Compo val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) val co = conf.co.asInstanceOf[CoherencePolicyWithUncached] x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteAcquire(init_addr, UFix(0)), co.getUncachedReadAcquire(init_addr, UFix(0))) - io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq) + io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UFix(conf.ln.nClients), UFix(0)) io.mem.acquire_data.valid:= state === state_mem_wdata io.mem.acquire_data.bits.payload.data := mem_req_data io.mem.grant_ack.valid := (state === state_mem_finish) && mem_needs_ack io.mem.grant_ack.bits.payload.master_xact_id := mem_gxid + io.mem.grant_ack.bits.header.dst := mem_gsrc io.mem.probe.ready := Bool(false) io.mem.release.valid := Bool(false) io.mem.release_data.valid := Bool(false) - io.mem.acquire.bits.header.src := UFix(conf.ln.nClients) - io.mem.acquire.bits.header.dst := UFix(0) - io.mem.acquire_data.bits.header.src := UFix(conf.ln.nClients) - io.mem.acquire_data.bits.header.dst := UFix(0) - io.mem.release.bits.header.src := UFix(conf.ln.nClients) - io.mem.release.bits.header.dst := UFix(0) - io.mem.release_data.bits.header.src := UFix(conf.ln.nClients) - io.mem.release_data.bits.header.dst := UFix(0) - io.mem.grant_ack.bits.header.src := UFix(conf.ln.nClients) - io.mem.grant_ack.bits.header.dst := UFix(0) - val pcrReadData = Vec(conf.ln.nClients) { Reg() { Bits(width = io.cpu(0).pcr_rep.bits.getWidth) } } for (i <- 0 until conf.ln.nClients) { val my_reset = Reg(resetVal = Bool(true)) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 8ddd9823..f72361af 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -54,7 +54,7 @@ class Frontend(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) ex { val io = new Bundle { val cpu = new CPUFrontendIO()(c).flip - val mem = new UncachedRequestorIO + val mem = new UncachedTileLinkIO } val btb = new rocketDpathBTB(c.nbtb) @@ -134,7 +134,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val datablock = Bits(width = c.databits) }) val invalidate = Bool(INPUT) - val mem = new UncachedRequestorIO + val mem = new UncachedTileLinkIO } val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UFix() } @@ -246,6 +246,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && finish_q.io.enq.ready io.mem.acquire.bits.payload := c.co.getUncachedReadAcquire(s2_addr >> UFix(c.offbits), UFix(0)) + io.mem.acquire_data.valid := Bool(false) io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) io.mem.grant.ready := Bool(true) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index acdabd5a..0d6b562d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -124,7 +124,7 @@ class DataWriteReq(implicit conf: DCacheConfig) extends Bundle { } class InternalProbe(implicit conf: DCacheConfig) extends Probe { - val client_xact_id = Bits(width = CLIENT_XACT_ID_BITS) + val client_xact_id = Bits(width = CLIENT_XACT_ID_MAX_BITS) override def clone = new InternalProbe().asInstanceOf[this.type] } @@ -133,7 +133,7 @@ class WritebackReq(implicit conf: DCacheConfig) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) - val client_xact_id = Bits(width = CLIENT_XACT_ID_BITS) + val client_xact_id = Bits(width = CLIENT_XACT_ID_MAX_BITS) val r_type = UFix(width = RELEASE_TYPE_MAX_BITS) override def clone = new WritebackReq().asInstanceOf[this.type] @@ -160,7 +160,7 @@ class MetaWriteReq(implicit conf: DCacheConfig) extends Bundle { override def clone = new MetaWriteReq().asInstanceOf[this.type] } -class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { +class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -178,8 +178,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay() } - val mem_rep = (new PipeIO) { new Grant }.flip - val mem_finish = (new FIFOIO) { new GrantAck } + val mem_grant = (new PipeIO) { (new LogicalNetworkIO) {new Grant} }.flip + val mem_finish = (new FIFOIO) { (new LogicalNetworkIO) {new GrantAck} } val wb_req = (new FIFOIO) { new WritebackReq } val probe_writeback = (new FIFOIO) { Bool() }.flip val probe_refill = (new FIFOIO) { Bool() }.flip @@ -199,7 +199,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val reply = io.mem_rep.valid && io.mem_rep.bits.client_xact_id === UFix(id) + val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UFix(id) val refill_done = reply && refill_count.andR val wb_done = reply && (state === s_wb_resp) @@ -209,9 +209,10 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid - val finish_q = (new Queue(2 /* wb + refill */)) { new GrantAck } - finish_q.io.enq.valid := (wb_done || refill_done) && conf.co.requiresAck(io.mem_rep.bits) - finish_q.io.enq.bits.master_xact_id := io.mem_rep.bits.master_xact_id + val finish_q = (new Queue(2 /* wb + refill */)) { (new LogicalNetworkIO){new GrantAck} } + finish_q.io.enq.valid := (wb_done || refill_done) && conf.co.requiresAck(io.mem_grant.bits.payload) + finish_q.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id + finish_q.io.enq.bits.header.dst := io.mem_grant.bits.header.src when (state === s_drain_rpq && !rpq.io.deq.valid && !finish_q.io.deq.valid) { state := s_invalid @@ -227,7 +228,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { when (refill_done) { state := s_meta_write_req } when (reply) { refill_count := refill_count + UFix(1) - line_state := conf.co.newStateOnGrant(io.mem_rep.bits, io.mem_req.bits) + line_state := conf.co.newStateOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) } } when (state === s_refill_req) { @@ -307,7 +308,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Component { } } -class MSHRFile(implicit conf: DCacheConfig) extends Component { +class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) @@ -317,8 +318,8 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } val replay = (new FIFOIO) { new Replay } - val mem_rep = (new PipeIO) { new Grant }.flip - val mem_finish = (new FIFOIO) { new GrantAck } + val mem_grant = (new PipeIO) { (new LogicalNetworkIO){new Grant} }.flip + val mem_finish = (new FIFOIO) { (new LogicalNetworkIO){new GrantAck} } val wb_req = (new FIFOIO) { new WritebackReq } val probe = (new FIFOIO) { new Bool() }.flip @@ -339,7 +340,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { val meta_read_arb = (new Arbiter(conf.nmshr)) { new MetaReadReq } val meta_write_arb = (new Arbiter(conf.nmshr)) { new MetaWriteReq } val mem_req_arb = (new Arbiter(conf.nmshr)) { new Acquire } - val mem_finish_arb = (new Arbiter(conf.nmshr)) { new GrantAck } + val mem_finish_arb = (new Arbiter(conf.nmshr)) { (new LogicalNetworkIO){new GrantAck} } val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } @@ -378,7 +379,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { mshr.io.probe_writeback.valid := io.probe.valid mshr.io.probe_writeback.bits := wb_probe_match - mshr.io.mem_rep <> io.mem_rep + mshr.io.mem_grant <> io.mem_grant memRespMux(i) := mshr.io.mem_resp pri_rdy = pri_rdy || mshr.io.req_pri_rdy @@ -399,7 +400,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Component { io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match - io.mem_resp := memRespMux(io.mem_rep.bits.client_xact_id) + io.mem_resp := memRespMux(io.mem_grant.bits.payload.client_xact_id) io.fence_rdy := !fence io.probe.ready := (refill_probe_rdy || !tag_match) && (writeback_probe_rdy || !wb_probe_match) @@ -911,8 +912,8 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshr.io.req.bits.data := s2_req.data - mshr.io.mem_rep.valid := io.mem.grant.fire() - mshr.io.mem_rep.bits := io.mem.grant.bits.payload + mshr.io.mem_grant.valid := io.mem.grant.fire() + mshr.io.mem_grant.bits := io.mem.grant.bits when (mshr.io.req.fire()) { replacer.miss } io.mem.acquire.valid := mshr.io.mem_req.valid && prober.io.req.ready @@ -1023,5 +1024,5 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio io.cpu.resp.bits.data_subword := loadgen.byte io.cpu.resp.bits.store_data := s2_req.data - io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_finish) + io.mem.grant_ack <> mshr.io.mem_finish } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index be141b77..1689a66d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -24,7 +24,9 @@ case class RocketConfiguration(lnConf: LogicalNetworkConfiguration, co: Coherenc class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) with ClientCoherenceAgent { val memPorts = 2 + confIn.vec - val dcachePortID = 0 + val dcachePortId = 0 + val icachePortId = 1 + val vicachePortId = 2 implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) implicit val lnConf = confIn.lnConf implicit val conf = confIn.copy(dcache = dcConf) @@ -38,24 +40,24 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val icache = new Frontend()(confIn.icache, lnConf) val dcache = new HellaCache - val arbiter = new MemArbiter(memPorts) - arbiter.io.requestor(dcachePortID) <> dcache.io.mem - arbiter.io.requestor(1) <> icache.io.mem + val arbiter = new UncachedTileLinkIOArbiter(memPorts) + arbiter.io.in(dcachePortId) <> dcache.io.mem + arbiter.io.in(icachePortId) <> icache.io.mem - io.tilelink.acquire <> arbiter.io.mem.acquire - io.tilelink.acquire_data <> dcache.io.mem.acquire_data - arbiter.io.mem.grant <> io.tilelink.grant - io.tilelink.grant_ack <> arbiter.io.mem.grant_ack + io.tilelink.acquire <> arbiter.io.out.acquire + io.tilelink.acquire_data <> arbiter.io.out.acquire_data + arbiter.io.out.grant <> io.tilelink.grant + io.tilelink.grant_ack <> arbiter.io.out.grant_ack dcache.io.mem.probe <> io.tilelink.probe io.tilelink.release_data <> dcache.io.mem.release_data io.tilelink.release.valid := dcache.io.mem.release.valid dcache.io.mem.release.ready := io.tilelink.release.ready io.tilelink.release.bits := dcache.io.mem.release.bits - io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UFix(dcachePortID, log2Up(memPorts))) // Mimic client id extension done by MemArbiter for Acquires from either cache) + io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UFix(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) if (conf.vec) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co), lnConf) // 128 sets x 1 ways (8KB) - arbiter.io.requestor(2) <> vicache.io.mem + arbiter.io.in(vicachePortId) <> vicache.io.mem core.io.vimem <> vicache.io.cpu } From 16113a96ba88760a0689adff144796b180e3bac4 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 25 Mar 2013 19:09:08 -0700 Subject: [PATCH 0573/1087] fixes after merge --- rocket/src/main/scala/htif.scala | 2 +- rocket/src/main/scala/tile.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 3c42a705..6a872715 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -92,7 +92,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) - val pcr_coreid = if (conf.ln.nClients == 1) UFix(0) else addr(log2Up(conf.ln.nClients)-1+20,20) + val pcr_coreid = addr(log2Up(conf.ln.nClients)-1+20+1,20) val pcr_wdata = packet_ram(0) val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index c04feedb..b073f9be 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -55,13 +55,13 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.release.bits := dcache.io.mem.release.bits io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UFix(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - val ioSubBundles = io.tilelink.getClass.getMethods.filter( x => + /*val ioSubBundles = io.tilelink.getClass.getMethods.filter( x => classOf[ClientSourcedIO[Data]].isAssignableFrom(x.getReturnType)).map{ m => m.invoke(io.tilelink).asInstanceOf[ClientSourcedIO[LogicalNetworkIO[Data]]] } ioSubBundles.foreach{ m => m.bits.header.dst := UFix(0) m.bits.header.src := UFix(0) - } + }*/ if (conf.vec) { val vicache = new Frontend()(ICacheConfig(128, 1, conf.co), lnConf) // 128 sets x 1 ways (8KB) From f8aebcbf8ca440ced61b57ea9fff65bbf6828185 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 4 Apr 2013 15:50:29 -0700 Subject: [PATCH 0574/1087] fix for cache controller bug: failing to mux correct metadata into mshr.io.old_meta on tag match --- rocket/src/main/scala/nbdcache.scala | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 74aacb5b..d8977d25 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -139,6 +139,14 @@ class WritebackReq(implicit conf: DCacheConfig) extends Bundle { override def clone = new WritebackReq().asInstanceOf[this.type] } +object MetaData { + def apply(tag: Bits, state: UFix)(implicit conf: DCacheConfig) = { + val meta = new MetaData + meta.state := state + meta.tag := tag + meta + } +} class MetaData(implicit conf: DCacheConfig) extends Bundle { val state = UFix(width = conf.statebits) val tag = Bits(width = conf.tagbits) @@ -250,7 +258,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura release_type := conf.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits - state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) when (io.req_bits.tag_match) { when (conf.co.isHit(req_cmd, io.req_bits.old_meta.state)) { // set dirty bit state := s_meta_write_req @@ -258,6 +265,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura }.otherwise { // upgrade permissions state := s_refill_req } + }.otherwise { // writback if necessary and refill + state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) } } @@ -902,7 +911,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) mshr.io.req.bits := s2_req mshr.io.req.bits.tag_match := s2_tag_match - mshr.io.req.bits.old_meta := s2_repl_meta + mshr.io.req.bits.old_meta := Mux(s2_tag_match, MetaData(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshr.io.req.bits.data := s2_req.data From d4a3351cfc6f4d9032d221ac3af3bf38aecfed60 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 25 Mar 2013 23:26:47 -0700 Subject: [PATCH 0575/1087] expose pending interrupts in status register --- rocket/src/main/scala/consts.scala | 2 -- rocket/src/main/scala/ctrl.scala | 8 ++------ rocket/src/main/scala/dpath_util.scala | 6 +++++- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 0c4e2fbe..b6c7fb82 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -54,8 +54,6 @@ trait ScalarOpConstants { trait InterruptConstants { val CAUSE_INTERRUPT = 32 - val IRQ_IPI = 5 - val IRQ_TIMER = 7 } abstract trait RocketDcacheConstants extends uncore.constants.CacheConstants with uncore.constants.AddressConstants { diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index ee90bbe0..664f476c 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -56,8 +56,6 @@ class CtrlDpathIO extends Bundle() val status = new Status().asInput val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UFix(INPUT, 5); - val irq_timer = Bool(INPUT); - val irq_ipi = Bool(INPUT); val pcr_replay = Bool(INPUT) } @@ -407,10 +405,8 @@ class Control(implicit conf: RocketConfiguration) extends Component val ctrl_killx = Bool() val ctrl_killm = Bool() - val id_maskable_interrupts = List( - (io.dpath.irq_ipi, IRQ_IPI), - (io.dpath.irq_timer, IRQ_TIMER)) - var id_interrupts = id_maskable_interrupts.map(i => (io.dpath.status.im(i._2) && i._1, UFix(CAUSE_INTERRUPT+i._2))) + val sr = io.dpath.status + var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UFix(CAUSE_INTERRUPT+i))) val (vec_replay, vec_stalld) = if (conf.vec) { // vector control diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 6086a315..ce8fd4a8 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -55,6 +55,7 @@ class rocketDpathBTB(entries: Int) extends Component } class Status extends Bundle { + val ip = Bits(width = 8) val im = Bits(width = 8) val zero = Bits(width = 7) val vm = Bool() @@ -177,6 +178,8 @@ class PCR(implicit conf: RocketConfiguration) extends Component val wdata = Mux(io.w.en, io.w.data, host_pcr_bits.data) io.status := reg_status + io.status.ip := Cat(r_irq_timer, Bool(false), r_irq_ipi, Bool(false), + Bool(false), Bool(false), Bool(false), Bool(false)) io.ptbr_wen := wen && waddr === PTBR io.evec := Mux(io.exception, reg_ebase, reg_epc).toUFix io.ptbr := reg_ptbr @@ -229,7 +232,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component val read_veccfg = if (conf.vec) Cat(io.vec_nfregs, io.vec_nxregs, io.vec_appvl) else Bits(0) val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) rdata := AVec[Bits]( - reg_status.toBits, reg_epc, reg_badvaddr, reg_ebase, + io.status.toBits, reg_epc, reg_badvaddr, reg_ebase, reg_count, reg_compare, read_cause, read_ptbr, reg_coreid/*x*/, read_impl/*x*/, reg_coreid, read_impl, reg_k0, reg_k1, reg_k0/*x*/, reg_k1/*x*/, @@ -276,6 +279,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component reg_status.vm := false reg_status.zero := 0 reg_status.im := 0 + reg_status.ip := 0 } } From d43f484feb4bcc823435a3f79c9c7692c131b3ad Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 25 Mar 2013 23:27:23 -0700 Subject: [PATCH 0576/1087] take interrupts on nonzero fromhost values --- rocket/src/main/scala/dpath_util.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index ce8fd4a8..6d53ce04 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -178,7 +178,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component val wdata = Mux(io.w.en, io.w.data, host_pcr_bits.data) io.status := reg_status - io.status.ip := Cat(r_irq_timer, Bool(false), r_irq_ipi, Bool(false), + io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), Bool(false), Bool(false), Bool(false), Bool(false)) io.ptbr_wen := wen && waddr === PTBR io.evec := Mux(io.exception, reg_ebase, reg_epc).toUFix From 8b439ef20d96781f62cdb5e4d57b187408be604d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 2 Apr 2013 14:43:01 -0700 Subject: [PATCH 0577/1087] only support setpcr/clearpcr of SR the full PCR RMW support was wasted area/power --- rocket/src/main/scala/dpath.scala | 13 ++--- rocket/src/main/scala/dpath_util.scala | 70 +++++++++++++------------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ffe039c1..c649a593 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -194,9 +194,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // processor control regfile read val pcr = new PCR - pcr.io.r.en := io.ctrl.pcr != PCR.N - pcr.io.r.addr := wb_reg_inst(26,22).toUFix - pcr.io.host <> io.host pcr.io <> io.ctrl io.ctrl.pcr_replay := pcr.io.replay @@ -287,7 +284,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component wb_reg_wdata := mem_ll_wdata } wb_wdata := Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, - Mux(io.ctrl.pcr != PCR.N, pcr.io.r.data, + Mux(io.ctrl.pcr != PCR.N, pcr.io.rw.rdata, wb_reg_wdata)) if (conf.vec) @@ -321,11 +318,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write - pcr.io.w.addr := wb_reg_inst(26,22).toUFix - pcr.io.w.en := io.ctrl.pcr === PCR.T || io.ctrl.pcr === PCR.S || io.ctrl.pcr === PCR.C - pcr.io.w.data := Mux(io.ctrl.pcr === PCR.S, pcr.io.r.data | wb_reg_wdata, - Mux(io.ctrl.pcr === PCR.C, pcr.io.r.data & ~wb_reg_wdata, - wb_reg_wdata)) + pcr.io.rw.addr := wb_reg_inst(26,22).toUFix + pcr.io.rw.cmd := io.ctrl.pcr + pcr.io.rw.wdata := wb_reg_wdata // hook up I$ io.imem.req.bits.currentpc := ex_reg_pc diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 6d53ce04..a9d199e8 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -106,8 +106,12 @@ class PCR(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { val host = new HTIFIO(conf.lnConf.nClients) - val r = new ioReadPort(conf.nxpr, conf.xprlen) - val w = new ioWritePort(conf.nxpr, conf.xprlen) + val rw = new Bundle { + val addr = UFix(INPUT, log2Up(conf.nxpr)) + val cmd = Bits(INPUT, PCR.SZ) + val rdata = Bits(INPUT, conf.xprlen) + val wdata = Bits(INPUT, conf.xprlen) + } val status = new Status().asOutput val ptbr = UFix(OUTPUT, PADDR_BITS) @@ -151,11 +155,9 @@ class PCR(implicit conf: RocketConfiguration) extends Component val r_irq_timer = Reg(resetVal = Bool(false)) val r_irq_ipi = Reg(resetVal = Bool(true)) - - val rdata = Bits(); val host_pcr_req_valid = Reg{Bool()} // don't reset - val host_pcr_req_fire = host_pcr_req_valid && !io.r.en && !io.w.en + val host_pcr_req_fire = host_pcr_req_valid && io.rw.cmd === PCR.N val host_pcr_rep_valid = Reg{Bool()} // don't reset val host_pcr_bits = Reg{io.host.pcr_req.bits.clone} io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid @@ -168,23 +170,22 @@ class PCR(implicit conf: RocketConfiguration) extends Component when (host_pcr_req_fire) { host_pcr_req_valid := false host_pcr_rep_valid := true - host_pcr_bits.data := rdata + host_pcr_bits.data := io.rw.rdata } when (io.host.pcr_rep.fire()) { host_pcr_rep_valid := false } - val raddr = Mux(io.r.en, io.r.addr, host_pcr_bits.addr) - val wen = io.w.en || !io.r.en && host_pcr_req_valid && host_pcr_bits.rw - val waddr = Mux(io.w.en, io.w.addr, host_pcr_bits.addr) - val wdata = Mux(io.w.en, io.w.data, host_pcr_bits.data) + val addr = Mux(io.rw.cmd != PCR.N, io.rw.addr, host_pcr_bits.addr) + val wen = io.rw.cmd === PCR.T || io.rw.cmd === PCR.S || io.rw.cmd === PCR.C || + host_pcr_req_fire && host_pcr_bits.rw + val wdata = Mux(io.rw.cmd != PCR.N, io.rw.wdata, host_pcr_bits.data) io.status := reg_status io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), Bool(false), Bool(false), Bool(false), Bool(false)) - io.ptbr_wen := wen && waddr === PTBR + io.ptbr_wen := wen && addr === PTBR io.evec := Mux(io.exception, reg_ebase, reg_epc).toUFix io.ptbr := reg_ptbr io.host.debug.error_mode := reg_error_mode - io.r.data := rdata io.vecbank := reg_vecbank var cnt = UFix(0,4) @@ -193,7 +194,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component io.vecbankcnt := cnt(3,0) when (io.badvaddr_wen || io.vec_irq_aux_wen) { - val wdata = Mux(io.badvaddr_wen, io.w.data, io.vec_irq_aux) + val wdata = Mux(io.badvaddr_wen, io.rw.wdata, io.vec_irq_aux) val (upper, lower) = Split(wdata, VADDR_BITS) val sign = Mux(lower.toFix < Fix(0), upper.andR, upper.orR) reg_badvaddr := Cat(sign, lower).toFix @@ -221,8 +222,8 @@ class PCR(implicit conf: RocketConfiguration) extends Component io.irq_timer := r_irq_timer; io.irq_ipi := r_irq_ipi; - io.host.ipi_req.valid := io.w.en && io.w.addr === SEND_IPI - io.host.ipi_req.bits := io.w.data + io.host.ipi_req.valid := io.rw.cmd === PCR.T && io.rw.addr === SEND_IPI + io.host.ipi_req.bits := io.rw.wdata io.replay := io.host.ipi_req.valid && !io.host.ipi_req.ready when (host_pcr_req_fire && !host_pcr_bits.rw && host_pcr_bits.addr === TOHOST) { reg_tohost := UFix(0) } @@ -231,7 +232,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS val read_veccfg = if (conf.vec) Cat(io.vec_nfregs, io.vec_nxregs, io.vec_appvl) else Bits(0) val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) - rdata := AVec[Bits]( + io.rw.rdata := AVec[Bits]( io.status.toBits, reg_epc, reg_badvaddr, reg_ebase, reg_count, reg_compare, read_cause, read_ptbr, reg_coreid/*x*/, read_impl/*x*/, reg_coreid, read_impl, @@ -240,28 +241,32 @@ class PCR(implicit conf: RocketConfiguration) extends Component reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost, reg_fromhost - )(raddr) + )(addr) when (wen) { - when (waddr === STATUS) { - reg_status := new Status().fromBits(wdata) + when (addr === STATUS) { + val sr_wdata = Mux(io.rw.cmd === PCR.S, reg_status.toBits | wdata, + Mux(io.rw.cmd === PCR.C, reg_status.toBits & ~wdata, + wdata)) + reg_status := new Status().fromBits(sr_wdata) + reg_status.zero := 0 if (!conf.vec) reg_status.ev := false if (!conf.fpu) reg_status.ef := false if (!conf.rvc) reg_status.ec := false } - when (waddr === EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } - when (waddr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toFix } - when (waddr === COUNT) { reg_count := wdata.toUFix } - when (waddr === COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } - when (waddr === COREID) { reg_coreid := wdata(15,0) } - when (waddr === FROMHOST) { when (reg_fromhost === UFix(0) || io.w.en) { reg_fromhost := wdata } } - when (waddr === TOHOST) { when (reg_tohost === UFix(0)) { reg_tohost := wdata } } - when (waddr === CLR_IPI) { r_irq_ipi := wdata(0) } - when (waddr === K0) { reg_k0 := wdata; } - when (waddr === K1) { reg_k1 := wdata; } - when (waddr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } - when (waddr === VECBANK) { reg_vecbank:= wdata(7,0) } + when (addr === EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } + when (addr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toFix } + when (addr === COUNT) { reg_count := wdata.toUFix } + when (addr === COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } + when (addr === COREID) { reg_coreid := wdata(15,0) } + when (addr === FROMHOST) { when (reg_fromhost === UFix(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } + when (addr === TOHOST) { when (reg_tohost === UFix(0)) { reg_tohost := wdata } } + when (addr === CLR_IPI) { r_irq_ipi := wdata(0) } + when (addr === K0) { reg_k0 := wdata; } + when (addr === K1) { reg_k1 := wdata; } + when (addr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } + when (addr === VECBANK) { reg_vecbank:= wdata(7,0) } } io.host.ipi_rep.ready := Bool(true) @@ -285,9 +290,6 @@ class PCR(implicit conf: RocketConfiguration) extends Component class ioReadPort(d: Int, w: Int) extends Bundle { - val addr = UFix(INPUT, log2Up(d)) - val en = Bool(INPUT) - val data = Bits(OUTPUT, w) override def clone = new ioReadPort(d, w).asInstanceOf[this.type] } From fc46daecf6c53d6f0c2fe7c47f81383812c1cf68 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 2 Apr 2013 17:37:21 -0700 Subject: [PATCH 0578/1087] don't flush pipeline on writes to side-effect-free PCRs notably, K0, K1, and EPC --- rocket/src/main/scala/ctrl.scala | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 664f476c..fcf80934 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -6,6 +6,7 @@ import Constants._ import Instructions._ import hwacha._ import ALU._ +import Util._ class CtrlDpathIO extends Bundle() { @@ -174,13 +175,13 @@ object XDecode extends DecodeConstants REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.S,N,N,N,Y,Y), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.C,N,N,N,Y,Y), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.S,N,N,N,Y,N), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.C,N,N,N,Y,N), ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,Y,N,Y,N), FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,Y,N,N,N,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.F,N,N,N,Y,Y), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.T,N,N,N,Y,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.F,N,N,N,Y,N), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.T,N,N,N,Y,N), RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR.N,N,N,N,N,N), RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR.N,N,N,N,N,N), RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_IRT,PCR.N,N,N,N,N,N)) @@ -393,7 +394,6 @@ class Control(implicit conf: RocketConfiguration) extends Component val wb_reg_eret = Reg(resetVal = Bool(false)) val wb_reg_xcpt = Reg(resetVal = Bool(false)) val wb_reg_replay = Reg(resetVal = Bool(false)) - val wb_reg_replay_next = Reg(resetVal = Bool(false)) val wb_reg_cause = Reg(){UFix()} val wb_reg_fp_val = Reg(resetVal = Bool(false)) val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) @@ -453,6 +453,9 @@ class Control(implicit conf: RocketConfiguration) extends Component // executing ERET when traps are enabled causes an illegal instruction exception val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status.et) + // flush pipeline on PCR writes that may have side effects + val id_pcr_flush = id_pcr != PCR.N && id_pcr != PCR.F && + id_raddr1 != PCR.K0 && id_raddr1 != PCR.K1 && id_raddr1 != PCR.EPC val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), @@ -499,7 +502,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_flush_inst := id_fence_i ex_reg_fp_val := id_fp_val ex_reg_vec_val := id_vec_val.toBool - ex_reg_replay_next := id_replay_next + ex_reg_replay_next := id_replay_next || id_pcr_flush ex_reg_load_use := id_load_use; ex_reg_mem_cmd := id_mem_cmd ex_reg_mem_type := id_mem_type.toUFix @@ -567,7 +570,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem wb_reg_replay := replay_mem && !take_pc_wb - wb_reg_xcpt := mem_xcpt && !take_pc_wb && !wb_reg_replay_next + wb_reg_xcpt := mem_xcpt && !take_pc_wb when (mem_xcpt) { wb_reg_cause := mem_cause } when (ctrl_killm) { @@ -580,7 +583,6 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_mem_val := Bool(false) wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) - wb_reg_replay_next := Bool(false) } .otherwise { wb_reg_valid := mem_reg_valid @@ -592,7 +594,6 @@ class Control(implicit conf: RocketConfiguration) extends Component wb_reg_mem_val := mem_reg_mem_val wb_reg_div_mul_val := mem_reg_div_mul_val wb_reg_fp_val := mem_reg_fp_val - wb_reg_replay_next := mem_reg_replay_next } val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || vec_replay || io.dpath.pcr_replay @@ -651,7 +652,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.imem.req.bits.taken := !ex_reg_btb_hit || ex_reg_jalr io.imem.req.valid := take_pc - // stall for RAW/WAW hazards on loads, AMOs, and mul/div in execute stage. + // stall for RAW/WAW hazards on PCRs, loads, AMOs, and mul/div in execute stage. val data_hazard_ex = ex_reg_wen && (id_renx1.toBool && id_raddr1 === io.dpath.ex_waddr || id_renx2.toBool && id_raddr2 === io.dpath.ex_waddr || @@ -661,10 +662,10 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) || + val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) - // stall for RAW/WAW hazards on LB/LH and mul/div in memory stage. + // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = if (!conf.fastLoadWord) Bool(true) else if (conf.fastLoadByte) Bool(false) @@ -678,7 +679,7 @@ class Control(implicit conf: RocketConfiguration) extends Component io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -705,10 +706,11 @@ class Control(implicit conf: RocketConfiguration) extends Component id_fp_val && id_stall_fpu || id_mem_val && !io.dmem.req.ready || vec_stalld - ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || id_interrupt + val ctrl_draind = id_interrupt || ex_reg_replay_next + ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind - io.dpath.killd := take_pc || ctrl_stalld && !id_interrupt - io.imem.resp.ready := pc_taken || !ctrl_stalld + io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind + io.imem.resp.ready := pc_taken || !ctrl_stalld || ctrl_draind io.imem.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen From 8cbdeb2abfde4a0417865b306b47d788c33db1d4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 3 Apr 2013 22:15:39 -0700 Subject: [PATCH 0579/1087] add LR/SC support --- rocket/src/main/scala/ctrl.scala | 17 +++++++++++------ rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/instructions.scala | 4 ++++ rocket/src/main/scala/nbdcache.scala | 15 +++++++++++++-- rocket/src/main/scala/ptw.scala | 3 +++ 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index fcf80934..113977d4 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -127,6 +127,11 @@ object XDecode extends DecodeConstants AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + + LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), @@ -383,7 +388,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val mem_reg_replay_next = Reg(resetVal = Bool(false)) val mem_reg_pcr = Reg(resetVal = PCR.N) val mem_reg_cause = Reg(){UFix()} - val mem_reg_mem_type = Reg(){Bits()} + val mem_reg_slow_bypass = Reg(){Bool()} val wb_reg_valid = Reg(resetVal = Bool(false)) val wb_reg_pcr = Reg(resetVal = PCR.N) @@ -516,8 +521,9 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_div_mul_val && !io.dpath.div_mul_rdy || mem_reg_replay_next ctrl_killx := take_pc_wb || replay_ex - val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === io.dpath.ex_br_taken) + // detect 2-cycle load-use delay for LB/LH/SC + val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || AVec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), @@ -552,7 +558,7 @@ class Control(implicit conf: RocketConfiguration) extends Component mem_reg_fp_val := ex_reg_fp_val mem_reg_vec_val := ex_reg_vec_val mem_reg_replay_next := ex_reg_replay_next - mem_reg_mem_type := ex_reg_mem_type + mem_reg_slow_bypass := ex_slow_bypass mem_reg_xcpt := ex_xcpt } @@ -667,9 +673,8 @@ class Control(implicit conf: RocketConfiguration) extends Component // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = - if (!conf.fastLoadWord) Bool(true) - else if (conf.fastLoadByte) Bool(false) - else AVec(MT_B, MT_BU, MT_H, MT_HU) contains mem_reg_mem_type + if (conf.fastLoadWord) Bool(!conf.fastLoadByte) && mem_reg_slow_bypass + else Bool(true) val data_hazard_mem = mem_reg_wen && (id_raddr1 != UFix(0) && id_renx1 && id_raddr1 === io.dpath.mem_waddr || id_raddr2 != UFix(0) && id_renx2 && id_raddr2 === io.dpath.mem_waddr || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index c649a593..aad94b67 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -200,6 +200,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ptw.ptbr := pcr.io.ptbr io.ptw.invalidate := pcr.io.ptbr_wen + io.ptw.eret := io.ctrl.eret io.ptw.status := pcr.io.status // branch resolution logic diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 377f2058..229d27b5 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -88,6 +88,10 @@ object Instructions val AMOMAX_D = Bits("b?????_?????_?????_0000101011_0101011",32); val AMOMINU_D = Bits("b?????_?????_?????_0000110011_0101011",32); val AMOMAXU_D = Bits("b?????_?????_?????_0000111011_0101011",32); + val LR_W = Bits("b?????_?????_00000_1000000010_0101011",32); + val LR_D = Bits("b?????_?????_00000_1000000011_0101011",32); + val SC_W = Bits("b?????_?????_?????_1000001010_0101011",32); + val SC_D = Bits("b?????_?????_?????_1000001011_0101011",32); val FENCE_I = Bits("b?????_?????_????????????_001_0101111",32); val FENCE = Bits("b?????_?????_????????????_010_0101111",32); val SYSCALL = Bits("b00000_00000_00000_0000000000_1110111",32); diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d8977d25..b515cf7e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -869,6 +869,17 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en)){Bits()}) val s2_hit = s2_tag_match && conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) + // load-reserved/store-conditional + val s2_lr_valid = Reg(resetVal = Bool(false)) + val s2_lr_addr = Reg{UFix()} + val s2_lr_addr_match = s2_lr_addr === (s2_req.addr >> conf.offbits) + when (s2_valid_masked && s2_req.cmd === M_XLR) { + s2_lr_valid := true + s2_lr_addr := s2_req.addr >> conf.offbits + } + when (prober.io.mshr_req.valid && s2_lr_addr_match) { s2_lr_valid := false } + when (io.cpu.ptw.eret) { s2_lr_valid := false } + val s2_data = Vec(conf.ways){Bits(width = conf.bitsperrow)} for (w <- 0 until conf.ways) { val regs = Vec(conf.wordsperrow){Reg{Bits(width = conf.encdatabits)}} @@ -1015,13 +1026,13 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio io.cpu.req.ready := Bool(false) } - val s2_read = isRead(s2_req.cmd) + val s2_read = isRead(s2_req.cmd) || s2_req.cmd === M_XSC io.cpu.resp.valid := s2_read && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits := s2_req io.cpu.resp.bits.replay := s2_replay && s2_read io.cpu.resp.bits.data := loadgen.word - io.cpu.resp.bits.data_subword := loadgen.byte + io.cpu.resp.bits.data_subword := Mux(s2_req.cmd === M_XSC, !s2_lr_addr_match, loadgen.byte) io.cpu.resp.bits.store_data := s2_req.data io.mem.grant_ack <> mshr.io.mem_finish diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 7972dc3d..bfc898c4 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -15,11 +15,13 @@ class TLBPTWIO extends Bundle { val status = new Status().asInput val invalidate = Bool(INPUT) + val eret = Bool(INPUT) } class DatapathPTWIO extends Bundle { val ptbr = UFix(INPUT, PADDR_BITS) val invalidate = Bool(INPUT) + val eret = Bool(INPUT) val status = new Status().asInput } @@ -82,6 +84,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component io.requestor(i).resp.bits.perm := r_pte(9,4) io.requestor(i).resp.bits.ppn := resp_ppn.toUFix io.requestor(i).invalidate := io.dpath.invalidate + io.requestor(i).eret := io.dpath.eret io.requestor(i).status := io.dpath.status } From 1abb9277db13055b0fdd2035199c92eb36f912f8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 5 Apr 2013 19:13:38 -0700 Subject: [PATCH 0580/1087] fix LR/SC atomicity violation note, it's still not starvation-free. --- rocket/src/main/scala/nbdcache.scala | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b515cf7e..2fba57f5 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -872,8 +872,10 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // load-reserved/store-conditional val s2_lr_valid = Reg(resetVal = Bool(false)) val s2_lr_addr = Reg{UFix()} - val s2_lr_addr_match = s2_lr_addr === (s2_req.addr >> conf.offbits) - when (s2_valid_masked && s2_req.cmd === M_XLR) { + val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) + val s2_lr_addr_match = s2_lr_valid && s2_lr_addr === (s2_req.addr >> conf.offbits) + val s2_sc_fail = s2_sc && !s2_lr_addr_match + when ((s2_valid_masked && s2_hit || s2_replay) && s2_lr) { s2_lr_valid := true s2_lr_addr := s2_req.addr >> conf.offbits } @@ -898,7 +900,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s2_data_correctable = AVec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits - s3_valid := (s2_valid_masked && s2_hit || s2_replay) && isWrite(s2_req.cmd) + s3_valid := (s2_valid_masked && s2_hit && !s2_sc_fail || s2_replay) && isWrite(s2_req.cmd) val amoalu = new AMOALU when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) { s3_req := s2_req @@ -978,7 +980,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s4_valid = Reg(s3_valid, resetVal = Bool(false)) val s4_req = RegEn(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( - (s2_valid_masked || s2_replay, s2_req, amoalu.io.out), + (s2_valid_masked && !s2_sc_fail || s2_replay, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3)) @@ -1026,13 +1028,13 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio io.cpu.req.ready := Bool(false) } - val s2_read = isRead(s2_req.cmd) || s2_req.cmd === M_XSC - io.cpu.resp.valid := s2_read && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable + val s2_do_resp = isRead(s2_req.cmd) || s2_sc + io.cpu.resp.valid := s2_do_resp && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits := s2_req - io.cpu.resp.bits.replay := s2_replay && s2_read + io.cpu.resp.bits.replay := s2_replay && s2_do_resp io.cpu.resp.bits.data := loadgen.word - io.cpu.resp.bits.data_subword := Mux(s2_req.cmd === M_XSC, !s2_lr_addr_match, loadgen.byte) + io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) io.cpu.resp.bits.store_data := s2_req.data io.mem.grant_ack <> mshr.io.mem_finish From e74e032c87fa8cbc95fb5c516542956bc165b363 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 6 Apr 2013 01:03:37 -0700 Subject: [PATCH 0581/1087] simplify MSHR memory response logic --- rocket/src/main/scala/nbdcache.scala | 35 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2fba57f5..2e19f741 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -216,8 +216,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid - io.probe_writeback.ready := (state != s_wb_req && state != s_wb_resp && state != s_meta_clear) || !idx_match //TODO != s_drain_rpq ? - when (state === s_drain_rpq && !rpq.io.deq.valid) { state := s_invalid } @@ -235,16 +233,16 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura line_state := conf.co.newStateOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) } } - when (state === s_refill_req) { - when (io.mem_req.ready) { state := s_refill_resp } + when (io.mem_req.fire()) { // s_refill_req + state := s_refill_resp } when (state === s_meta_clear && io.meta_write.ready) { state := s_refill_req } - when (state === s_wb_resp) { - when (reply) { state := s_meta_clear } + when (state === s_wb_resp && reply) { + state := s_meta_clear } - when (state === s_wb_req && io.wb_req.ready) { + when (io.wb_req.fire()) { // s_wb_req state := s_wb_resp } @@ -270,21 +268,22 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura } } - val finish_q = (new Queue(2 /* wb + refill */)) { (new LogicalNetworkIO){new GrantAck} } - finish_q.io.enq.valid := (wb_done || refill_done) && conf.co.requiresAck(io.mem_grant.bits.payload) - finish_q.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id - finish_q.io.enq.bits.header.dst := io.mem_grant.bits.header.src + val ackq = (new Queue(1)) { (new LogicalNetworkIO){new GrantAck} } + ackq.io.enq.valid := (wb_done || refill_done) && conf.co.requiresAck(io.mem_grant.bits.payload) + ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id + ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp - io.mem_finish.valid := finish_q.io.deq.valid && can_finish - finish_q.io.deq.ready := io.mem_finish.ready && can_finish - io.mem_finish.bits := finish_q.io.deq.bits + io.mem_finish.valid := ackq.io.deq.valid && can_finish + ackq.io.deq.ready := io.mem_finish.ready && can_finish + io.mem_finish.bits := ackq.io.deq.bits io.idx_match := (state != s_invalid) && idx_match io.mem_resp := req io.mem_resp.addr := Cat(req_idx, refill_count) << conf.ramoffbits io.tag := req.addr >> conf.untagbits - io.req_pri_rdy := state === s_invalid && !finish_q.io.deq.valid + io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready + io.probe_writeback.ready := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear) io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx @@ -292,18 +291,18 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en - io.wb_req.valid := state === s_wb_req + io.wb_req.valid := state === s_wb_req && ackq.io.enq.ready io.wb_req.bits.tag := req.old_meta.tag io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) io.wb_req.bits.r_type := conf.co.getReleaseTypeOnVoluntaryWriteback() - io.mem_req.valid := state === s_refill_req + io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready io.mem_req.bits.a_type := acquire_type io.mem_req.bits.addr := Cat(io.tag, req_idx).toUFix io.mem_req.bits.client_xact_id := Bits(id) - io.mem_finish <> finish_q.io.deq + io.mem_finish <> ackq.io.deq io.mem_req.bits.client_xact_id := Bits(id) io.meta_read.valid := state === s_drain_rpq From ae7720e28424f314654824cf02280523d1f1bdcb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 7 Apr 2013 19:27:21 -0700 Subject: [PATCH 0582/1087] guarantee LR/SC forward progress the mechanism is to block new probes for several cycles after a successful LR. this also cleans up the MSHR <-> ProbeUnit interface slightly. --- rocket/src/main/scala/nbdcache.scala | 69 +++++++++++++++------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2e19f741..8fb87254 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -36,6 +36,7 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, val encmetabits = code.width(metabits) val wordsperrow = MEM_DATA_BITS/databits val bitsperrow = wordsperrow*encdatabits + val lrsc_cycles = 32 // ISA requires 16-insn LRSC sequences to succeed } abstract class ReplacementPolicy @@ -178,7 +179,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq)) val idx_match = Bool(OUTPUT) - val probe_idx_match = Bool(OUTPUT) val tag = Bits(OUTPUT, conf.tagbits) val mem_req = (new FIFOIO) { new Acquire } @@ -189,7 +189,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura val mem_grant = (new PipeIO) { (new LogicalNetworkIO) {new Grant} }.flip val mem_finish = (new FIFOIO) { (new LogicalNetworkIO) {new GrantAck} } val wb_req = (new FIFOIO) { new WritebackReq } - val probe_writeback = (new FIFOIO) { Bool() }.flip + val probe_rdy = Bool(OUTPUT) } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } @@ -283,7 +283,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura io.tag := req.addr >> conf.untagbits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.probe_writeback.ready := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear) + io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear) io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx @@ -332,8 +332,8 @@ class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) val mem_grant = (new PipeIO) { (new LogicalNetworkIO){new Grant} }.flip val mem_finish = (new FIFOIO) { (new LogicalNetworkIO){new GrantAck} } val wb_req = (new FIFOIO) { new WritebackReq } - val probe = (new FIFOIO) { new Bool() }.flip + val probe_rdy = Bool(OUTPUT) val fence_rdy = Bool(OUTPUT) } @@ -346,6 +346,8 @@ class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) val idxMatch = Vec(conf.nmshr) { Bool() } val tagList = Vec(conf.nmshr) { Bits() } + val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> conf.untagbits + val wbTagList = Vec(conf.nmshr) { Bits() } val memRespMux = Vec(conf.nmshr) { new DataWriteReq } val meta_read_arb = (new Arbiter(conf.nmshr)) { new MetaReadReq } @@ -356,14 +358,12 @@ class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } - val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> conf.untagbits - val wb_probe_match = Mux1H(idxMatch, wbTagList) === io.req.bits.addr >> conf.untagbits - var idx_match = Bool(false) var pri_rdy = Bool(false) - var fence = Bool(false) var sec_rdy = Bool(false) - var writeback_probe_rdy = Bool(true) + + io.fence_rdy := true + io.probe_rdy := true for (i <- 0 to conf.nmshr-1) { val mshr = new MSHR(i) @@ -385,17 +385,16 @@ class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.wb_req <> wb_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) - mshr.io.probe_writeback.valid := io.probe.valid - mshr.io.probe_writeback.bits := wb_probe_match mshr.io.mem_grant <> io.mem_grant memRespMux(i) := mshr.io.mem_resp pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy - fence = fence || !mshr.io.req_pri_rdy idx_match = idx_match || mshr.io.idx_match - writeback_probe_rdy = writeback_probe_rdy && mshr.io.probe_writeback.ready + + when (!mshr.io.req_pri_rdy) { io.fence_rdy := false } + when (!mshr.io.probe_rdy) { io.probe_rdy := false } } alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match @@ -409,8 +408,6 @@ class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match io.mem_resp := memRespMux(io.mem_grant.bits.payload.client_xact_id) - io.fence_rdy := !fence - io.probe.ready := writeback_probe_rdy || !wb_probe_match val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) io.replay.bits.data := sdq(RegEn(replay_arb.io.out.bits.sdq_id, free_sdq)) @@ -501,9 +498,9 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { val rep = (new FIFOIO) { new Release } val meta_read = (new FIFOIO) { new MetaReadReq } val meta_write = (new FIFOIO) { new MetaWriteReq } - val mshr_req = (new FIFOIO) { Bool() } val wb_req = (new FIFOIO) { new WritebackReq } val way_en = Bits(INPUT, conf.ways) + val mshr_rdy = Bool(INPUT) val line_state = UFix(INPUT, 2) } @@ -533,7 +530,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { state := s_release line_state := io.line_state way_en := io.way_en - when (!io.mshr_req.ready) { state := s_meta_read } + when (!io.mshr_rdy) { state := s_meta_read } } when (state === s_meta_resp) { state := s_mshr_req @@ -562,7 +559,6 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.meta_write.bits.data.state := conf.co.newStateOnProbe(req, line_state) io.meta_write.bits.data.tag := req.addr >> UFix(conf.idxbits) - io.mshr_req.valid := state === s_mshr_req io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr @@ -869,17 +865,23 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s2_hit = s2_tag_match && conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) // load-reserved/store-conditional - val s2_lr_valid = Reg(resetVal = Bool(false)) - val s2_lr_addr = Reg{UFix()} + val lrsc_count = Reg(resetVal = UFix(0)) + val lrsc_valid = lrsc_count.orR + val lrsc_addr = Reg{UFix()} val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) - val s2_lr_addr_match = s2_lr_valid && s2_lr_addr === (s2_req.addr >> conf.offbits) - val s2_sc_fail = s2_sc && !s2_lr_addr_match - when ((s2_valid_masked && s2_hit || s2_replay) && s2_lr) { - s2_lr_valid := true - s2_lr_addr := s2_req.addr >> conf.offbits + val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> conf.offbits) + val s2_sc_fail = s2_sc && !s2_lrsc_addr_match + when (lrsc_valid) { lrsc_count := lrsc_count - 1 } + when (s2_valid_masked && s2_hit || s2_replay) { + when (s2_lr) { + when (!lrsc_valid) { lrsc_count := conf.lrsc_cycles-1 } + lrsc_addr := s2_req.addr >> conf.offbits + } + when (s2_sc) { + lrsc_count := 0 + } } - when (prober.io.mshr_req.valid && s2_lr_addr_match) { s2_lr_valid := false } - when (io.cpu.ptw.eret) { s2_lr_valid := false } + when (io.cpu.ptw.eret) { lrsc_count := 0 } val s2_data = Vec(conf.ways){Bits(width = conf.bitsperrow)} for (w <- 0 until conf.ways) { @@ -899,7 +901,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s2_data_correctable = AVec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits - s3_valid := (s2_valid_masked && s2_hit && !s2_sc_fail || s2_replay) && isWrite(s2_req.cmd) + s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd) val amoalu = new AMOALU when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) { s3_req := s2_req @@ -950,14 +952,17 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val releaseArb = (new Arbiter(2)) { new Release } FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release - prober.io.req <> FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) + val probe = FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) + prober.io.req.valid := probe.valid && !lrsc_valid + probe.ready := prober.io.req.ready && !lrsc_valid + prober.io.req.bits := probe.bits prober.io.rep <> releaseArb.io.in(1) - prober.io.mshr_req <> mshr.io.probe prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way prober.io.line_state := s2_hit_state prober.io.meta_read <> metaReadArb.io.in(2) prober.io.meta_write <> metaWriteArb.io.in(1) + prober.io.mshr_rdy := mshr.io.probe_rdy // refills val refill = conf.co.messageUpdatesDataArray(io.mem.grant.bits.payload) @@ -979,7 +984,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s4_valid = Reg(s3_valid, resetVal = Bool(false)) val s4_req = RegEn(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( - (s2_valid_masked && !s2_sc_fail || s2_replay, s2_req, amoalu.io.out), + ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3)) @@ -1035,6 +1040,6 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) io.cpu.resp.bits.store_data := s2_req.data - + io.mem.grant_ack <> mshr.io.mem_finish } From db5a060c7d62b205fa3b934ac58fe08ee9e6b57b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 10 Apr 2013 13:47:30 -0700 Subject: [PATCH 0583/1087] fix io dir --- rocket/src/main/scala/dpath_util.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a9d199e8..25fd8438 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -109,7 +109,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component val rw = new Bundle { val addr = UFix(INPUT, log2Up(conf.nxpr)) val cmd = Bits(INPUT, PCR.SZ) - val rdata = Bits(INPUT, conf.xprlen) + val rdata = Bits(OUTPUT, conf.xprlen) val wdata = Bits(INPUT, conf.xprlen) } From 50ccc20bf317ee431c86640c0bb7234becd7563d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 22 Apr 2013 04:20:15 -0700 Subject: [PATCH 0584/1087] replace RDNPC with AUIPC --- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/dpath.scala | 10 +++++----- rocket/src/main/scala/instructions.scala | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 113977d4..327d5e6b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -97,7 +97,7 @@ object XDecode extends DecodeConstants JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), - RDNPC-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), + AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index aad94b67..375f39e6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -214,15 +214,15 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, io.ctrl.ex_br_type === BR_J)))))) - val ex_pc_plus4 = ex_reg_pc + 4 - val ex_branch_target = (ex_reg_pc.toFix + (ex_imm << 1)).toUFix + val ex_pc_plus4 = ex_reg_pc.toFix + Mux(ex_reg_sel_alu2 === A2_LTYPE, ex_reg_inst(26,7).toFix << 12, Fix(4)) + val ex_branch_target = ex_reg_pc.toFix + (ex_imm << 1) val tsc_reg = WideCounter(64) val irt_reg = WideCounter(64, io.ctrl.wb_valid) // writeback select mux val ex_wdata = - Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4.toFix, + Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg.value, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg.value, alu.io.out))).toBits // WB_ALU @@ -328,8 +328,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.imem.req.bits.pc := Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), - Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec).toUFix, - wb_reg_pc))) // PC_WB + Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), + wb_reg_pc))).toUFix // PC_WB // expose debug signals to testbench // XXX debug() doesn't right, so create a false dependence diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 229d27b5..edef2410 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -20,6 +20,7 @@ object Instructions val BLTU = Bits("b?????_?????_?????_???????_110_1100011",32); val BGEU = Bits("b?????_?????_?????_???????_111_1100011",32); val LUI = Bits("b?????_????????????????????_0110111",32); + val AUIPC = Bits("b?????_????????????????????_0010111",32); val ADDI = Bits("b?????_?????_????????????_000_0010011",32); val SLLI = Bits("b?????_?????_000000_??????_001_0010011",32); val SLTI = Bits("b?????_?????_????????????_010_0010011",32); From e8b20f3d38f71f1e67d44ea406ed9dee42e01992 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 25 Apr 2013 17:37:04 -0700 Subject: [PATCH 0585/1087] clear meta state of silently-dropped, clean evictee, so as to prevent a write race on meta array between probes on evictee and refill grant --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 8fb87254..e5ab5c58 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -264,7 +264,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura state := s_refill_req } }.otherwise { // writback if necessary and refill - state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_refill_req) + state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_meta_clear) } } From 1501e90c1f5f58996ebc36b1cf99eb5d5819c413 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 30 Apr 2013 00:37:51 -0700 Subject: [PATCH 0586/1087] interlock probe unit on tag RAW hazards --- rocket/src/main/scala/nbdcache.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e5ab5c58..63f72357 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -283,7 +283,10 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura io.tag := req.addr >> conf.untagbits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear) + + val meta_hazard = Reg(resetVal = UFix(0,2)) + when (meta_hazard != 0 || io.meta_write.fire()) { meta_hazard := meta_hazard + 1 } + io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && meta_hazard === 0) io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx From 722bc917d32cacce1dd4cd8cf6f981d005565074 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 1 May 2013 10:05:54 -0700 Subject: [PATCH 0587/1087] broaden scope of s1_nack to include new probes accepted by the probe unit on that cycle --- rocket/src/main/scala/nbdcache.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 63f72357..66d906c2 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -190,6 +190,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura val mem_finish = (new FIFOIO) { (new LogicalNetworkIO) {new GrantAck} } val wb_req = (new FIFOIO) { new WritebackReq } val probe_rdy = Bool(OUTPUT) + val mshr_rdy = Bool(INPUT) } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } @@ -1012,7 +1013,8 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || - s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready + s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready || + s1_req.addr(tagmsb, indexlsb) === io.mem.probe.bits.payload.addr && io.mem.probe.fire() val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshr.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshr.io.secondary_miss From b6945408cb9847e44ee7765b899a0627e73b4a19 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 1 May 2013 10:24:36 -0700 Subject: [PATCH 0588/1087] temp --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 66d906c2..5c3eb0c7 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1014,7 +1014,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready || - s1_req.addr(tagmsb, indexlsb) === io.mem.probe.bits.payload.addr && io.mem.probe.fire() + s1_req.addr >> conf.offbits === io.mem.probe.bits.payload.addr && io.mem.probe.fire() val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshr.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshr.io.secondary_miss From 63a38e79824a5cf66931f4f5fa0478786e977580 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 May 2013 16:34:33 -0700 Subject: [PATCH 0589/1087] Revert "temp" This reverts commit 73705e6ed8f98d08ce6b30fbe760de694c6563ae. --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 5c3eb0c7..66d906c2 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1014,7 +1014,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready || - s1_req.addr >> conf.offbits === io.mem.probe.bits.payload.addr && io.mem.probe.fire() + s1_req.addr(tagmsb, indexlsb) === io.mem.probe.bits.payload.addr && io.mem.probe.fire() val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshr.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshr.io.secondary_miss From a6a88fce19f85b0dca1b1a36c3fa76038acd0787 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 May 2013 16:34:45 -0700 Subject: [PATCH 0590/1087] Revert "broaden scope of s1_nack to include new probes accepted by the probe unit on that cycle" This reverts commit b41e6bc50519631ba097ac1196737be7107295f9. --- rocket/src/main/scala/nbdcache.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 66d906c2..63f72357 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -190,7 +190,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura val mem_finish = (new FIFOIO) { (new LogicalNetworkIO) {new GrantAck} } val wb_req = (new FIFOIO) { new WritebackReq } val probe_rdy = Bool(OUTPUT) - val mshr_rdy = Bool(INPUT) } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } @@ -1013,8 +1012,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || - s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready || - s1_req.addr(tagmsb, indexlsb) === io.mem.probe.bits.payload.addr && io.mem.probe.fire() + s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshr.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshr.io.secondary_miss From 474d321cc720f0e4c6526c5c04e5a6a67a384e2e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 May 2013 16:35:24 -0700 Subject: [PATCH 0591/1087] fix meta hazard counter to reset on new meta writes --- rocket/src/main/scala/nbdcache.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 63f72357..647a96ec 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -285,7 +285,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura io.req_sec_rdy := sec_rdy && rpq.io.enq.ready val meta_hazard = Reg(resetVal = UFix(0,2)) - when (meta_hazard != 0 || io.meta_write.fire()) { meta_hazard := meta_hazard + 1 } + when (meta_hazard != 0) { meta_hazard := meta_hazard + 1 } + when (io.meta_write.fire()) { meta_hazard := 1 } io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && meta_hazard === 0) io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear From d405ffa949dc556680387afffccb89ba9c1d8644 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 May 2013 21:01:20 -0700 Subject: [PATCH 0592/1087] assume all I$ grants bear data --- rocket/src/main/scala/icache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 9e6ef285..d0437716 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -173,7 +173,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte rdy := state === s_ready && !s2_miss Assert(!c.co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") - val (rf_cnt, refill_done) = Counter(io.mem.grant.valid && !c.co.isVoluntary(io.mem.grant.bits.payload), REFILL_CYCLES) + val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) val enc_tagbits = c.code.width(c.tagbits) @@ -225,7 +225,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte for (i <- 0 until c.assoc) { val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.code.width(c.databits)) } val s1_raddr = Reg{UFix()} - when (io.mem.grant.valid && c.co.messageHasData(io.mem.grant.bits.payload) && repl_way === UFix(i)) { + when (io.mem.grant.valid && repl_way === UFix(i)) { val d = io.mem.grant.bits.payload.data data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } From dfa7a03f73066725657a7a88df98dd128fc8c02e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 18 May 2013 00:45:13 -0700 Subject: [PATCH 0593/1087] use assert, not Assert --- rocket/src/main/scala/icache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index d0437716..318b2896 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -172,7 +172,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - Assert(!c.co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") + assert(!c.co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) From 1dab9842313063b6d0ff65e36cedb8bbde7fb131 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 18 May 2013 00:45:29 -0700 Subject: [PATCH 0594/1087] use UFix instead of Bits for arithmetic --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 647a96ec..bfd9c83c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -669,7 +669,7 @@ class AMOALU(implicit conf: DCacheConfig) extends Component { val word = io.typ === MT_W || io.typ === MT_WU || io.typ === MT_B || io.typ === MT_BU val mask = Fix(-1,64) ^ (io.addr(2) << 31) - val adder_out = (io.lhs & mask) + (io.rhs & mask) + val adder_out = (io.lhs & mask).toUFix + (io.rhs & mask) val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) val cmp_rhs = Mux(word && !io.addr(2), io.rhs(31), io.rhs(63)) From 6eb4c2542a6a2cdc30451ce64d576524f6512e27 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 18 May 2013 18:09:23 -0700 Subject: [PATCH 0595/1087] comment out I$ assert for now --- rocket/src/main/scala/icache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 318b2896..e5ad20c9 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -172,7 +172,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - assert(!c.co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") + //assert(!c.co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) From 3a1b5f01b20483a6a20bb363ab386057be121560 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 19 May 2013 23:27:47 -0700 Subject: [PATCH 0596/1087] don't take interrupts while they're disabled! a control bug allowed an interrupt to be taken on the instruction immediately following an interrupt-disabling instruction (but not thereafter). --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 327d5e6b..584e9bce 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -530,7 +530,7 @@ class Control(implicit conf: RocketConfiguration) extends Component (ex_reg_fp_val && io.fpu.illegal_rm, UFix(2)))) mem_reg_replay := replay_ex && !take_pc_wb; - mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb + mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb && !mem_reg_replay_next when (ex_xcpt) { mem_reg_cause := ex_cause } mem_reg_div_mul_val := ex_reg_div_mul_val && io.dpath.div_mul_rdy From dcde377303e97e4b7d21bd5e488bc012b5075245 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 20 May 2013 15:22:58 -0700 Subject: [PATCH 0597/1087] Fix DM I$ deadlock BTB predictions were causing infinite miss loops --- rocket/src/main/scala/icache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index e5ad20c9..f4febe9d 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -110,7 +110,7 @@ class Frontend(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) ex icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn - icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss + icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) From 28f914c3f24deaecd6c82a4fc5b7243d8f9eb909 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 May 2013 16:53:47 -0700 Subject: [PATCH 0598/1087] don't JALR to speculatively-bypassed addresses Technically not necessary, but probably improves performance. --- rocket/src/main/scala/ctrl.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 584e9bce..c08ec103 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -516,12 +516,14 @@ class Control(implicit conf: RocketConfiguration) extends Component // replay inst in ex stage val wb_dcache_miss = wb_reg_mem_val && !io.dmem.resp.valid - val replay_ex = wb_dcache_miss && ex_reg_load_use || mem_reg_flush_inst || - ex_reg_mem_val && !io.dmem.req.ready || - ex_reg_div_mul_val && !io.dpath.div_mul_rdy || - mem_reg_replay_next + val replay_ex_structural = ex_reg_mem_val && !io.dmem.req.ready || + ex_reg_div_mul_val && !io.dpath.div_mul_rdy + val replay_ex_other = wb_dcache_miss && ex_reg_load_use || mem_reg_replay_next + val replay_ex = replay_ex_structural || replay_ex_other ctrl_killx := take_pc_wb || replay_ex - val take_pc_ex = !Mux(ex_reg_jalr, ex_reg_btb_hit && io.dpath.jalr_eq, ex_reg_btb_hit === io.dpath.ex_br_taken) + val take_pc_ex = !Mux(ex_reg_jalr, + ex_reg_btb_hit && io.dpath.jalr_eq && !replay_ex_other, + ex_reg_btb_hit === io.dpath.ex_br_taken) // detect 2-cycle load-use delay for LB/LH/SC val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || AVec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) From 69b508ff393a93260eeafdc78c4af8fbb6d3d447 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 21 May 2013 17:21:04 -0700 Subject: [PATCH 0599/1087] ported caches and htif to use new tilelink --- rocket/src/main/scala/htif.scala | 12 ++++++------ rocket/src/main/scala/icache.scala | 8 ++++---- rocket/src/main/scala/nbdcache.scala | 16 ++++++++-------- rocket/src/main/scala/tile.scala | 13 ++++++------- 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 6a872715..8a999419 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -139,7 +139,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w when (state === state_mem_req && x_init.io.enq.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } - when (state === state_mem_wdata && io.mem.acquire_data.ready) { + when (state === state_mem_wdata && io.mem.acquire.data.ready) { when (mem_cnt.andR) { state := state_mem_wresp } @@ -185,15 +185,15 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) val co = conf.co.asInstanceOf[CoherencePolicyWithUncached] x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteAcquire(init_addr, UFix(0)), co.getUncachedReadAcquire(init_addr, UFix(0))) - io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UFix(conf.ln.nClients), UFix(0)) - io.mem.acquire_data.valid := state === state_mem_wdata - io.mem.acquire_data.bits.payload.data := mem_req_data + io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UFix(conf.ln.nClients), UFix(0)) + io.mem.acquire.data.valid := state === state_mem_wdata + io.mem.acquire.data.bits.payload.data := mem_req_data io.mem.grant_ack.valid := (state === state_mem_finish) && mem_needs_ack io.mem.grant_ack.bits.payload.master_xact_id := mem_gxid io.mem.grant_ack.bits.header.dst := mem_gsrc io.mem.probe.ready := Bool(false) - io.mem.release.valid := Bool(false) - io.mem.release_data.valid := Bool(false) + io.mem.release.meta.valid := Bool(false) + io.mem.release.data.valid := Bool(false) val pcrReadData = Reg{Bits(width = io.cpu(0).pcr_rep.bits.getWidth)} for (i <- 0 until conf.ln.nClients) { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 9e6ef285..99c9926b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -246,9 +246,9 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte // output signals io.resp.valid := s2_hit - io.mem.acquire.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.acquire.bits.payload := c.co.getUncachedReadAcquire(s2_addr >> UFix(c.offbits), UFix(0)) - io.mem.acquire_data.valid := Bool(false) + io.mem.acquire.meta.valid := (state === s_request) && finish_q.io.enq.ready + io.mem.acquire.meta.bits.payload := c.co.getUncachedReadAcquire(s2_addr >> UFix(c.offbits), UFix(0)) + io.mem.acquire.data.valid := Bool(false) io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) io.mem.grant.ready := Bool(true) @@ -259,7 +259,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte invalidated := Bool(false) } is (s_request) { - when (io.mem.acquire.ready && finish_q.io.enq.ready) { state := s_refill_wait } + when (io.mem.acquire.meta.ready && finish_q.io.enq.ready) { state := s_refill_wait } } is (s_refill_wait) { when (io.mem.grant.valid) { state := s_refill } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 66d906c2..bfe12ac9 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -937,11 +937,11 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio mshr.io.mem_grant.bits := io.mem.grant.bits when (mshr.io.req.fire()) { replacer.miss } - io.mem.acquire <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_req) - //TODO io.mem.acquire_data should be connected to uncached store data generator - //io.mem.acquire_data <> FIFOedLogicalNetworkIOWrapper(TODO) - io.mem.acquire_data.valid := Bool(false) - io.mem.acquire_data.bits.payload.data := UFix(0) + io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_req) + //TODO io.mem.acquire.data should be connected to uncached store data generator + //io.mem.acquire.data <> FIFOedLogicalNetworkIOWrapper(TODO) + io.mem.acquire.data.valid := Bool(false) + io.mem.acquire.data.bits.payload.data := UFix(0) // replays readArb.io.in(1).valid := mshr.io.replay.valid @@ -954,7 +954,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // probes val releaseArb = (new Arbiter(2)) { new Release } - FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release + FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release.meta val probe = FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) prober.io.req.valid := probe.valid && !lrsc_valid @@ -982,7 +982,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio wb.io.data_req <> readArb.io.in(2) wb.io.data_resp := s2_data_corrected releaseArb.io.in(0) <> wb.io.release - FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release_data + FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release.data // store->load bypassing val s4_valid = Reg(s3_valid, resetVal = Bool(false)) @@ -1014,7 +1014,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready || - s1_req.addr(tagmsb, indexlsb) === io.mem.probe.bits.payload.addr && io.mem.probe.fire() + (s1_req.addr >> conf.offbits) === io.mem.probe.bits.payload.addr && io.mem.probe.fire() val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshr.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshr.io.secondary_miss diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index b073f9be..2c6cd558 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -40,20 +40,19 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val icache = new Frontend()(confIn.icache, lnConf) val dcache = new HellaCache - val arbiter = new UncachedTileLinkIOArbiter(memPorts) + val arbiter = new UncachedTileLinkIOArbiter(memPorts, confIn.dcache.co) arbiter.io.in(dcachePortId) <> dcache.io.mem arbiter.io.in(icachePortId) <> icache.io.mem io.tilelink.acquire <> arbiter.io.out.acquire - io.tilelink.acquire_data <> arbiter.io.out.acquire_data arbiter.io.out.grant <> io.tilelink.grant io.tilelink.grant_ack <> arbiter.io.out.grant_ack dcache.io.mem.probe <> io.tilelink.probe - io.tilelink.release_data <> dcache.io.mem.release_data - io.tilelink.release.valid := dcache.io.mem.release.valid - dcache.io.mem.release.ready := io.tilelink.release.ready - io.tilelink.release.bits := dcache.io.mem.release.bits - io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UFix(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) + io.tilelink.release.data <> dcache.io.mem.release.data + io.tilelink.release.meta.valid := dcache.io.mem.release.meta.valid + dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready + io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits + io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UFix(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) /*val ioSubBundles = io.tilelink.getClass.getMethods.filter( x => classOf[ClientSourcedIO[Data]].isAssignableFrom(x.getReturnType)).map{ m => From c837c1d80010f6fde214cd50cc65ea7f82d39013 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 21 May 2013 18:28:44 -0700 Subject: [PATCH 0600/1087] fix bug in previous JALR commit on commit tag 9a122c06d1bf11237d7fb0769d454a67bbb7400e --- rocket/src/main/scala/ctrl.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c08ec103..8273deed 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -521,9 +521,9 @@ class Control(implicit conf: RocketConfiguration) extends Component val replay_ex_other = wb_dcache_miss && ex_reg_load_use || mem_reg_replay_next val replay_ex = replay_ex_structural || replay_ex_other ctrl_killx := take_pc_wb || replay_ex - val take_pc_ex = !Mux(ex_reg_jalr, - ex_reg_btb_hit && io.dpath.jalr_eq && !replay_ex_other, - ex_reg_btb_hit === io.dpath.ex_br_taken) + val take_pc_ex = Mux(ex_reg_jalr, + !(ex_reg_btb_hit && io.dpath.jalr_eq) && !replay_ex_other, + ex_reg_btb_hit != io.dpath.ex_br_taken) // detect 2-cycle load-use delay for LB/LH/SC val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || AVec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) From 11133d6d4c926f8aa2fcb555e812f18375ec829b Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 21 May 2013 18:59:21 -0700 Subject: [PATCH 0601/1087] clock gate s2 registers in the frontend --- rocket/src/main/scala/icache.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index f4febe9d..374bae6b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -81,9 +81,11 @@ class Frontend(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) ex s1_same_block := s0_same_block && !tlb.io.resp.miss s1_pc := npc s2_valid := !icmiss - s2_pc := s1_pc - s2_btb_hit := btb.io.hit - s2_xcpt_if := tlb.io.resp.xcpt_if + when (!icmiss) { + s2_pc := s1_pc + s2_btb_hit := btb.io.hit + s2_xcpt_if := tlb.io.resp.xcpt_if + } } when (io.cpu.req.valid) { s1_same_block := Bool(false) From fe9adfe71b90046d7d6b84d0c9fe033b06c9229d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 May 2013 19:35:08 -0700 Subject: [PATCH 0602/1087] Simplify and correct integer multiplier --- rocket/src/main/scala/divider.scala | 43 +++++++++++++++-------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index cb9215e9..f76d38ed 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -9,7 +9,7 @@ import Util._ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { val io = new MultiplierIO val w = io.req.bits.in1.getWidth - val mulw = (w+1+mulUnroll-1)/mulUnroll*mulUnroll + val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UFix() }; val state = Reg(resetVal = s_ready); @@ -19,7 +19,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val divby0 = Reg{Bool()} val neg_out = Reg{Bool()} val divisor = Reg{Bits(width = w+1)} // div only needs w bits - val remainder = Reg{Bits(width = 2*mulw+1)} // div only needs 2*w+1 bits + val remainder = Reg{Bits(width = 2*mulw+2)} // div only needs 2*w+1 bits def sext(x: Bits, cmds: Vec[Bits]) = { val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) @@ -29,19 +29,21 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM, FN_MULH, FN_MULHSU)) val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM, FN_MULH)) - val subtractor = remainder(2*w,w) - divisor(w-1,0) + val subtractor = remainder(2*w,w) - divisor(w,0) + val negated_remainder = -remainder(w-1,0) when (state === s_neg_inputs) { - state := s_div_busy - when (remainder(w-1)) { - remainder := -remainder(w-1,0) + val isMul = AVec(FN_MUL, FN_MULH, FN_MULHU, FN_MULHSU).contains(req.fn) + state := Mux(isMul, s_mul_busy, s_div_busy) + when (remainder(w-1) || isMul) { + remainder := negated_remainder } - when (divisor(w-1) && !AVec(FN_MULHU, FN_MULHSU).contains(req.fn)) { - divisor := subtractor(w-1,0) + when (divisor(w-1) || isMul) { + divisor := subtractor } } when (state === s_neg_output) { - remainder := -remainder(w-1,0) + remainder := negated_remainder state := s_done } when (state === s_move_rem) { @@ -49,16 +51,13 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke state := Mux(neg_out, s_neg_output, s_done) } when (state === s_mul_busy) { - val carryIn = remainder(w) - val mplier = Cat(remainder(2*mulw,w+1),remainder(w-1,0)).toFix + val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0)) + val mplier = mulReg(mulw-1,0) + val accum = mulReg(2*mulw,mulw).toFix val mpcand = divisor.toFix - val prod0 = mplier(2*mulw-1,mulw) + - (if (mulUnroll == 1) Mux(mplier(0), -Cat(mpcand < Fix(0), mpcand).toFix, Mux(carryIn, mpcand, Fix(0))) - else (mplier(mulUnroll-1,0) + carryIn.toUFix).toFix * mpcand) - val prod = Mux(mplier(mulUnroll-1,0).andR && carryIn, mplier(2*mulw-1,mulw), prod0) - val sum = Cat(prod, mplier(mulw-1,mulUnroll)) - val carryOut = mplier(mulUnroll-1) - remainder := Cat(sum(sum.getWidth-1,w), carryOut, sum(w-1,0)).toFix + val prod = mplier(mulUnroll-1,0) * mpcand + accum + val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)) + remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toFix count := count + 1 when (count === mulw/mulUnroll-1) { @@ -81,7 +80,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke divby0 := divby0 && !msb remainder := Cat(Mux(msb, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !msb) - val divisorMSB = Log2(divisor, w) + val divisorMSB = Log2(divisor(w-1,0), w) val dividendMSB = Log2(remainder(w-1,0), w) val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0)) @@ -101,12 +100,14 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke when (io.req.fire()) { val isMul = AVec(FN_MUL, FN_MULH, FN_MULHU, FN_MULHSU).contains(io.req.bits.fn) val isRem = AVec(FN_REM, FN_REMU).contains(io.req.bits.fn) - state := Mux(isMul, s_mul_busy, Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div_busy)) + val mulState = Mux(lhs_sign, s_neg_inputs, s_mul_busy) + val divState = Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div_busy) + state := Mux(isMul, mulState, divState) count := UFix(0) neg_out := !isMul && Mux(isRem, lhs_sign, lhs_sign != rhs_sign) divby0 := true divisor := Cat(rhs_sign, rhs_in) - remainder := Cat(Fill(mulw-w, isMul && lhs_sign), Bool(false), lhs_in) + remainder := lhs_in req := io.req.bits } From 12205b9684817c438af354a5bee3e652c8a7bc35 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 23 May 2013 14:09:03 -0700 Subject: [PATCH 0603/1087] remove obsolete config file reader prototype --- rocket/src/main/scala/config.scala | 56 ------------------------------ 1 file changed, 56 deletions(-) delete mode 100644 rocket/src/main/scala/config.scala diff --git a/rocket/src/main/scala/config.scala b/rocket/src/main/scala/config.scala deleted file mode 100644 index 13d7344a..00000000 --- a/rocket/src/main/scala/config.scala +++ /dev/null @@ -1,56 +0,0 @@ -package rocket -package config - -import java.io.File -import java.io.FileInputStream -import java.util.Properties -import scala.util.{Properties => SProperties} - -class Config(props: Properties) { - private val msg = "Configuration is missing requested parameter " - def getInt(name: String): Int = Option(props.getProperty(name).toInt).getOrElse(sys.error(msg+name)) - def getString(name: String): String = Option(props.getProperty(name)).getOrElse(sys.error(msg+name)) - def getBoolean(name: String): Boolean = Option(props.getProperty(name).toBoolean).getOrElse(sys.error(msg+name)) - def apply(name: String): Int = getInt(name) -} - -object Config { - - lazy val internal_config = getConfig() - - def apply(name: String) = internal_config(name) - - private def getConfig(): Config = { - - val filePath0 = - SProperties - .envOrNone("ROCKET_CONFIG") - .orElse(SProperties.propOrNone("rocket.config")) - if (filePath0.isEmpty) - Console.err.println(""" - | WARNING: Could not find configuration file to load. - | Options are: - | (1) Set environmental variable ROCKET_CONFIG to the config file path - | (2) Set system property rocket.config to the config file path - | Using default values for config. - """.stripMargin) - - val filePath = - filePath0.flatMap(fp => { - val f = new File(fp) - if (!f.isFile) { - Console.err.println(""" - | WARNING: File '%s' is not a valid file path - | Using default values for config - """.format(fp).stripMargin) - None - } else Some(fp) - }) - - val props = new Properties() - filePath.map(fp => props.load(new FileInputStream(fp))) - new Config(props) - } - -} - From 95c5147dc5f5ca9bfcc86b3608f00f3ff67248a9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 13 Jun 2013 10:31:04 -0700 Subject: [PATCH 0604/1087] Add RISC-V instruction disassembler --- rocket/src/main/scala/dpath.scala | 12 +- rocket/src/main/scala/instructions.scala | 816 ++++++++++++++++------- rocket/src/main/scala/util.scala | 72 ++ 3 files changed, 644 insertions(+), 256 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 375f39e6..8c0d0036 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -196,6 +196,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val pcr = new PCR pcr.io.host <> io.host pcr.io <> io.ctrl + pcr.io.pc := wb_reg_pc io.ctrl.pcr_replay := pcr.io.replay io.ptw.ptbr := pcr.io.ptbr @@ -331,9 +332,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), wb_reg_pc))).toUFix // PC_WB - // expose debug signals to testbench - // XXX debug() doesn't right, so create a false dependence - val debugList = List(wb_reg_pc, wb_reg_inst, wb_wen, wb_reg_waddr, wb_wdata, wb_reg_rs1, wb_reg_rs2) - pcr.io.pc := wb_reg_pc | (debugList.map(d => d^d).reduce(_|_)).toUFix - debugList.foreach(debug _) + printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] %s\n", + tsc_reg(32,0), io.ctrl.wb_valid, wb_reg_pc, + Mux(wb_wen, wb_reg_waddr, UFix(0)), wb_wdata, + wb_reg_inst(26,22), wb_reg_rs1, + wb_reg_inst(21,17), wb_reg_rs2, + wb_reg_inst, Disassemble(wb_reg_inst)) } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index edef2410..33446f3c 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -7,259 +7,573 @@ import Constants._ object Instructions { // automatically generated by parse-opcodes - val J = Bits("b?????????????????????????_1100111",32); - val JAL = Bits("b?????????????????????????_1101111",32); - val JALR_C = Bits("b?????_?????_????????????_000_1101011",32); - val JALR_R = Bits("b?????_?????_????????????_001_1101011",32); - val JALR_J = Bits("b?????_?????_????????????_010_1101011",32); - val RDNPC = Bits("b?????_00000_000000000000_100_1101011",32); - val BEQ = Bits("b?????_?????_?????_???????_000_1100011",32); - val BNE = Bits("b?????_?????_?????_???????_001_1100011",32); - val BLT = Bits("b?????_?????_?????_???????_100_1100011",32); - val BGE = Bits("b?????_?????_?????_???????_101_1100011",32); - val BLTU = Bits("b?????_?????_?????_???????_110_1100011",32); - val BGEU = Bits("b?????_?????_?????_???????_111_1100011",32); - val LUI = Bits("b?????_????????????????????_0110111",32); - val AUIPC = Bits("b?????_????????????????????_0010111",32); - val ADDI = Bits("b?????_?????_????????????_000_0010011",32); - val SLLI = Bits("b?????_?????_000000_??????_001_0010011",32); - val SLTI = Bits("b?????_?????_????????????_010_0010011",32); - val SLTIU = Bits("b?????_?????_????????????_011_0010011",32); - val XORI = Bits("b?????_?????_????????????_100_0010011",32); - val SRLI = Bits("b?????_?????_000000_??????_101_0010011",32); - val SRAI = Bits("b?????_?????_000001_??????_101_0010011",32); - val ORI = Bits("b?????_?????_????????????_110_0010011",32); - val ANDI = Bits("b?????_?????_????????????_111_0010011",32); - val ADD = Bits("b?????_?????_?????_0000000000_0110011",32); - val SUB = Bits("b?????_?????_?????_1000000000_0110011",32); - val SLL = Bits("b?????_?????_?????_0000000001_0110011",32); - val SLT = Bits("b?????_?????_?????_0000000010_0110011",32); - val SLTU = Bits("b?????_?????_?????_0000000011_0110011",32); - val riscvXOR = Bits("b?????_?????_?????_0000000100_0110011",32); - val SRL = Bits("b?????_?????_?????_0000000101_0110011",32); - val SRA = Bits("b?????_?????_?????_1000000101_0110011",32); - val riscvOR = Bits("b?????_?????_?????_0000000110_0110011",32); - val riscvAND = Bits("b?????_?????_?????_0000000111_0110011",32); - val MUL = Bits("b?????_?????_?????_0000001000_0110011",32); - val MULH = Bits("b?????_?????_?????_0000001001_0110011",32); - val MULHSU = Bits("b?????_?????_?????_0000001010_0110011",32); - val MULHU = Bits("b?????_?????_?????_0000001011_0110011",32); - val DIV = Bits("b?????_?????_?????_0000001100_0110011",32); - val DIVU = Bits("b?????_?????_?????_0000001101_0110011",32); - val REM = Bits("b?????_?????_?????_0000001110_0110011",32); - val REMU = Bits("b?????_?????_?????_0000001111_0110011",32); - val ADDIW = Bits("b?????_?????_????????????_000_0011011",32); - val SLLIW = Bits("b?????_?????_000000_0_?????_001_0011011",32); - val SRLIW = Bits("b?????_?????_000000_0_?????_101_0011011",32); - val SRAIW = Bits("b?????_?????_000001_0_?????_101_0011011",32); - val ADDW = Bits("b?????_?????_?????_0000000000_0111011",32); - val SUBW = Bits("b?????_?????_?????_1000000000_0111011",32); - val SLLW = Bits("b?????_?????_?????_0000000001_0111011",32); - val SRLW = Bits("b?????_?????_?????_0000000101_0111011",32); - val SRAW = Bits("b?????_?????_?????_1000000101_0111011",32); - val MULW = Bits("b?????_?????_?????_0000001000_0111011",32); - val DIVW = Bits("b?????_?????_?????_0000001100_0111011",32); - val DIVUW = Bits("b?????_?????_?????_0000001101_0111011",32); - val REMW = Bits("b?????_?????_?????_0000001110_0111011",32); - val REMUW = Bits("b?????_?????_?????_0000001111_0111011",32); - val LB = Bits("b?????_?????_????????????_000_0000011",32); - val LH = Bits("b?????_?????_????????????_001_0000011",32); - val LW = Bits("b?????_?????_????????????_010_0000011",32); - val LD = Bits("b?????_?????_????????????_011_0000011",32); - val LBU = Bits("b?????_?????_????????????_100_0000011",32); - val LHU = Bits("b?????_?????_????????????_101_0000011",32); - val LWU = Bits("b?????_?????_????????????_110_0000011",32); - val SB = Bits("b?????_?????_?????_???????_000_0100011",32); - val SH = Bits("b?????_?????_?????_???????_001_0100011",32); - val SW = Bits("b?????_?????_?????_???????_010_0100011",32); - val SD = Bits("b?????_?????_?????_???????_011_0100011",32); - val AMOADD_W = Bits("b?????_?????_?????_0000000010_0101011",32); - val AMOSWAP_W = Bits("b?????_?????_?????_0000001010_0101011",32); - val AMOAND_W = Bits("b?????_?????_?????_0000010010_0101011",32); - val AMOOR_W = Bits("b?????_?????_?????_0000011010_0101011",32); - val AMOMIN_W = Bits("b?????_?????_?????_0000100010_0101011",32); - val AMOMAX_W = Bits("b?????_?????_?????_0000101010_0101011",32); - val AMOMINU_W = Bits("b?????_?????_?????_0000110010_0101011",32); - val AMOMAXU_W = Bits("b?????_?????_?????_0000111010_0101011",32); - val AMOADD_D = Bits("b?????_?????_?????_0000000011_0101011",32); - val AMOSWAP_D = Bits("b?????_?????_?????_0000001011_0101011",32); - val AMOAND_D = Bits("b?????_?????_?????_0000010011_0101011",32); - val AMOOR_D = Bits("b?????_?????_?????_0000011011_0101011",32); - val AMOMIN_D = Bits("b?????_?????_?????_0000100011_0101011",32); - val AMOMAX_D = Bits("b?????_?????_?????_0000101011_0101011",32); - val AMOMINU_D = Bits("b?????_?????_?????_0000110011_0101011",32); - val AMOMAXU_D = Bits("b?????_?????_?????_0000111011_0101011",32); - val LR_W = Bits("b?????_?????_00000_1000000010_0101011",32); - val LR_D = Bits("b?????_?????_00000_1000000011_0101011",32); - val SC_W = Bits("b?????_?????_?????_1000001010_0101011",32); - val SC_D = Bits("b?????_?????_?????_1000001011_0101011",32); - val FENCE_I = Bits("b?????_?????_????????????_001_0101111",32); - val FENCE = Bits("b?????_?????_????????????_010_0101111",32); - val SYSCALL = Bits("b00000_00000_00000_0000000000_1110111",32); - val BREAK = Bits("b00000_00000_00000_0000000001_1110111",32); - val RDCYCLE = Bits("b?????_00000_00000_0000000100_1110111",32); - val RDTIME = Bits("b?????_00000_00000_0000001100_1110111",32); - val RDINSTRET = Bits("b?????_00000_00000_0000010100_1110111",32); - val CLEARPCR = Bits("b?????_?????_????????????_000_1111011",32); - val SETPCR = Bits("b?????_?????_????????????_001_1111011",32); - val MFPCR = Bits("b?????_?????_00000_0000000010_1111011",32); - val MTPCR = Bits("b?????_?????_?????_0000000011_1111011",32); - val ERET = Bits("b00000_00000_00000_0000000100_1111011",32); - val CFLUSH = Bits("b00000_00000_00000_0000000101_1111011",32); + def J = Bits("b?????????????????????????_1100111",32); + def JAL = Bits("b?????????????????????????_1101111",32); + def JALR_C = Bits("b?????_?????_????????????_000_1101011",32); + def JALR_R = Bits("b?????_?????_????????????_001_1101011",32); + def JALR_J = Bits("b?????_?????_????????????_010_1101011",32); + def RDNPC = Bits("b?????_00000_000000000000_100_1101011",32); + def BEQ = Bits("b?????_?????_?????_???????_000_1100011",32); + def BNE = Bits("b?????_?????_?????_???????_001_1100011",32); + def BLT = Bits("b?????_?????_?????_???????_100_1100011",32); + def BGE = Bits("b?????_?????_?????_???????_101_1100011",32); + def BLTU = Bits("b?????_?????_?????_???????_110_1100011",32); + def BGEU = Bits("b?????_?????_?????_???????_111_1100011",32); + def LUI = Bits("b?????_????????????????????_0110111",32); + def AUIPC = Bits("b?????_????????????????????_0010111",32); + def ADDI = Bits("b?????_?????_????????????_000_0010011",32); + def SLLI = Bits("b?????_?????_000000_??????_001_0010011",32); + def SLTI = Bits("b?????_?????_????????????_010_0010011",32); + def SLTIU = Bits("b?????_?????_????????????_011_0010011",32); + def XORI = Bits("b?????_?????_????????????_100_0010011",32); + def SRLI = Bits("b?????_?????_000000_??????_101_0010011",32); + def SRAI = Bits("b?????_?????_000001_??????_101_0010011",32); + def ORI = Bits("b?????_?????_????????????_110_0010011",32); + def ANDI = Bits("b?????_?????_????????????_111_0010011",32); + def ADD = Bits("b?????_?????_?????_0000000000_0110011",32); + def SUB = Bits("b?????_?????_?????_1000000000_0110011",32); + def SLL = Bits("b?????_?????_?????_0000000001_0110011",32); + def SLT = Bits("b?????_?????_?????_0000000010_0110011",32); + def SLTU = Bits("b?????_?????_?????_0000000011_0110011",32); + def riscvXOR = Bits("b?????_?????_?????_0000000100_0110011",32); + def SRL = Bits("b?????_?????_?????_0000000101_0110011",32); + def SRA = Bits("b?????_?????_?????_1000000101_0110011",32); + def riscvOR = Bits("b?????_?????_?????_0000000110_0110011",32); + def riscvAND = Bits("b?????_?????_?????_0000000111_0110011",32); + def MUL = Bits("b?????_?????_?????_0000001000_0110011",32); + def MULH = Bits("b?????_?????_?????_0000001001_0110011",32); + def MULHSU = Bits("b?????_?????_?????_0000001010_0110011",32); + def MULHU = Bits("b?????_?????_?????_0000001011_0110011",32); + def DIV = Bits("b?????_?????_?????_0000001100_0110011",32); + def DIVU = Bits("b?????_?????_?????_0000001101_0110011",32); + def REM = Bits("b?????_?????_?????_0000001110_0110011",32); + def REMU = Bits("b?????_?????_?????_0000001111_0110011",32); + def ADDIW = Bits("b?????_?????_????????????_000_0011011",32); + def SLLIW = Bits("b?????_?????_000000_0_?????_001_0011011",32); + def SRLIW = Bits("b?????_?????_000000_0_?????_101_0011011",32); + def SRAIW = Bits("b?????_?????_000001_0_?????_101_0011011",32); + def ADDW = Bits("b?????_?????_?????_0000000000_0111011",32); + def SUBW = Bits("b?????_?????_?????_1000000000_0111011",32); + def SLLW = Bits("b?????_?????_?????_0000000001_0111011",32); + def SRLW = Bits("b?????_?????_?????_0000000101_0111011",32); + def SRAW = Bits("b?????_?????_?????_1000000101_0111011",32); + def MULW = Bits("b?????_?????_?????_0000001000_0111011",32); + def DIVW = Bits("b?????_?????_?????_0000001100_0111011",32); + def DIVUW = Bits("b?????_?????_?????_0000001101_0111011",32); + def REMW = Bits("b?????_?????_?????_0000001110_0111011",32); + def REMUW = Bits("b?????_?????_?????_0000001111_0111011",32); + def LB = Bits("b?????_?????_????????????_000_0000011",32); + def LH = Bits("b?????_?????_????????????_001_0000011",32); + def LW = Bits("b?????_?????_????????????_010_0000011",32); + def LD = Bits("b?????_?????_????????????_011_0000011",32); + def LBU = Bits("b?????_?????_????????????_100_0000011",32); + def LHU = Bits("b?????_?????_????????????_101_0000011",32); + def LWU = Bits("b?????_?????_????????????_110_0000011",32); + def SB = Bits("b?????_?????_?????_???????_000_0100011",32); + def SH = Bits("b?????_?????_?????_???????_001_0100011",32); + def SW = Bits("b?????_?????_?????_???????_010_0100011",32); + def SD = Bits("b?????_?????_?????_???????_011_0100011",32); + def AMOADD_W = Bits("b?????_?????_?????_0000000010_0101011",32); + def AMOSWAP_W = Bits("b?????_?????_?????_0000001010_0101011",32); + def AMOAND_W = Bits("b?????_?????_?????_0000010010_0101011",32); + def AMOOR_W = Bits("b?????_?????_?????_0000011010_0101011",32); + def AMOMIN_W = Bits("b?????_?????_?????_0000100010_0101011",32); + def AMOMAX_W = Bits("b?????_?????_?????_0000101010_0101011",32); + def AMOMINU_W = Bits("b?????_?????_?????_0000110010_0101011",32); + def AMOMAXU_W = Bits("b?????_?????_?????_0000111010_0101011",32); + def AMOADD_D = Bits("b?????_?????_?????_0000000011_0101011",32); + def AMOSWAP_D = Bits("b?????_?????_?????_0000001011_0101011",32); + def AMOAND_D = Bits("b?????_?????_?????_0000010011_0101011",32); + def AMOOR_D = Bits("b?????_?????_?????_0000011011_0101011",32); + def AMOMIN_D = Bits("b?????_?????_?????_0000100011_0101011",32); + def AMOMAX_D = Bits("b?????_?????_?????_0000101011_0101011",32); + def AMOMINU_D = Bits("b?????_?????_?????_0000110011_0101011",32); + def AMOMAXU_D = Bits("b?????_?????_?????_0000111011_0101011",32); + def LR_W = Bits("b?????_?????_00000_1000000010_0101011",32); + def LR_D = Bits("b?????_?????_00000_1000000011_0101011",32); + def SC_W = Bits("b?????_?????_?????_1000001010_0101011",32); + def SC_D = Bits("b?????_?????_?????_1000001011_0101011",32); + def FENCE_I = Bits("b?????_?????_????????????_001_0101111",32); + def FENCE = Bits("b?????_?????_????????????_010_0101111",32); + def SYSCALL = Bits("b00000_00000_00000_0000000000_1110111",32); + def BREAK = Bits("b00000_00000_00000_0000000001_1110111",32); + def RDCYCLE = Bits("b?????_00000_00000_0000000100_1110111",32); + def RDTIME = Bits("b?????_00000_00000_0000001100_1110111",32); + def RDINSTRET = Bits("b?????_00000_00000_0000010100_1110111",32); + def CLEARPCR = Bits("b?????_?????_????????????_000_1111011",32); + def SETPCR = Bits("b?????_?????_????????????_001_1111011",32); + def MFPCR = Bits("b?????_?????_00000_0000000010_1111011",32); + def MTPCR = Bits("b?????_?????_?????_0000000011_1111011",32); + def ERET = Bits("b00000_00000_00000_0000000100_1111011",32); + def CFLUSH = Bits("b00000_00000_00000_0000000101_1111011",32); // floating point instructions - val FMOVZ = Bits("b?????_?????_?????_0000010101_1110111",32); - val FMOVN = Bits("b?????_?????_?????_0000011101_1110111",32); - val FADD_S = Bits("b?????_?????_?????_00000_???_00_1010011",32); - val FSUB_S = Bits("b?????_?????_?????_00001_???_00_1010011",32); - val FMUL_S = Bits("b?????_?????_?????_00010_???_00_1010011",32); - val FDIV_S = Bits("b?????_?????_?????_00011_???_00_1010011",32); - val FSQRT_S = Bits("b?????_?????_00000_00100_???_00_1010011",32); - val FSGNJ_S = Bits("b?????_?????_?????_00101_000_00_1010011",32); - val FSGNJN_S = Bits("b?????_?????_?????_00110_000_00_1010011",32); - val FSGNJX_S = Bits("b?????_?????_?????_00111_000_00_1010011",32); - val FADD_D = Bits("b?????_?????_?????_00000_???_01_1010011",32); - val FSUB_D = Bits("b?????_?????_?????_00001_???_01_1010011",32); - val FMUL_D = Bits("b?????_?????_?????_00010_???_01_1010011",32); - val FDIV_D = Bits("b?????_?????_?????_00011_???_01_1010011",32); - val FSQRT_D = Bits("b?????_?????_00000_00100_???_01_1010011",32); - val FSGNJ_D = Bits("b?????_?????_?????_00101_000_01_1010011",32); - val FSGNJN_D = Bits("b?????_?????_?????_00110_000_01_1010011",32); - val FSGNJX_D = Bits("b?????_?????_?????_00111_000_01_1010011",32); - val FCVT_L_S = Bits("b?????_?????_00000_01000_???_00_1010011",32); - val FCVT_LU_S = Bits("b?????_?????_00000_01001_???_00_1010011",32); - val FCVT_W_S = Bits("b?????_?????_00000_01010_???_00_1010011",32); - val FCVT_WU_S = Bits("b?????_?????_00000_01011_???_00_1010011",32); - val FCVT_L_D = Bits("b?????_?????_00000_01000_???_01_1010011",32); - val FCVT_LU_D = Bits("b?????_?????_00000_01001_???_01_1010011",32); - val FCVT_W_D = Bits("b?????_?????_00000_01010_???_01_1010011",32); - val FCVT_WU_D = Bits("b?????_?????_00000_01011_???_01_1010011",32); - val FCVT_S_L = Bits("b?????_?????_00000_01100_???_00_1010011",32); - val FCVT_S_LU = Bits("b?????_?????_00000_01101_???_00_1010011",32); - val FCVT_S_W = Bits("b?????_?????_00000_01110_???_00_1010011",32); - val FCVT_S_WU = Bits("b?????_?????_00000_01111_???_00_1010011",32); - val FCVT_D_L = Bits("b?????_?????_00000_01100_???_01_1010011",32); - val FCVT_D_LU = Bits("b?????_?????_00000_01101_???_01_1010011",32); - val FCVT_D_W = Bits("b?????_?????_00000_01110_???_01_1010011",32); - val FCVT_D_WU = Bits("b?????_?????_00000_01111_???_01_1010011",32); - val FCVT_S_D = Bits("b?????_?????_00000_10001_???_00_1010011",32); - val FCVT_D_S = Bits("b?????_?????_00000_10000_???_01_1010011",32); - val FEQ_S = Bits("b?????_?????_?????_10101_000_00_1010011",32); - val FLT_S = Bits("b?????_?????_?????_10110_000_00_1010011",32); - val FLE_S = Bits("b?????_?????_?????_10111_000_00_1010011",32); - val FEQ_D = Bits("b?????_?????_?????_10101_000_01_1010011",32); - val FLT_D = Bits("b?????_?????_?????_10110_000_01_1010011",32); - val FLE_D = Bits("b?????_?????_?????_10111_000_01_1010011",32); - val FMIN_S = Bits("b?????_?????_?????_11000_000_00_1010011",32); - val FMAX_S = Bits("b?????_?????_?????_11001_000_00_1010011",32); - val FMIN_D = Bits("b?????_?????_?????_11000_000_01_1010011",32); - val FMAX_D = Bits("b?????_?????_?????_11001_000_01_1010011",32); - val MFTX_S = Bits("b?????_?????_00000_11100_000_00_1010011",32); - val MFTX_D = Bits("b?????_?????_00000_11100_000_01_1010011",32); - val MFFSR = Bits("b?????_00000_00000_11101_000_00_1010011",32); - val MXTF_S = Bits("b?????_?????_00000_11110_000_00_1010011",32); - val MXTF_D = Bits("b?????_?????_00000_11110_000_01_1010011",32); - val MTFSR = Bits("b?????_?????_00000_11111_000_00_1010011",32); - val FLW = Bits("b?????_?????_????????????_010_0000111",32); - val FLD = Bits("b?????_?????_????????????_011_0000111",32); - val FSW = Bits("b?????_?????_?????_???????_010_0100111",32); - val FSD = Bits("b?????_?????_?????_???????_011_0100111",32); - val FMADD_S = Bits("b?????_?????_?????_?????_???_00_1000011",32); - val FMSUB_S = Bits("b?????_?????_?????_?????_???_00_1000111",32); - val FNMSUB_S = Bits("b?????_?????_?????_?????_???_00_1001011",32); - val FNMADD_S = Bits("b?????_?????_?????_?????_???_00_1001111",32); - val FMADD_D = Bits("b?????_?????_?????_?????_???_01_1000011",32); - val FMSUB_D = Bits("b?????_?????_?????_?????_???_01_1000111",32); - val FNMSUB_D = Bits("b?????_?????_?????_?????_???_01_1001011",32); - val FNMADD_D = Bits("b?????_?????_?????_?????_???_01_1001111",32); + def FMOVZ = Bits("b?????_?????_?????_0000010101_1110111",32); + def FMOVN = Bits("b?????_?????_?????_0000011101_1110111",32); + def FADD_S = Bits("b?????_?????_?????_00000_???_00_1010011",32); + def FSUB_S = Bits("b?????_?????_?????_00001_???_00_1010011",32); + def FMUL_S = Bits("b?????_?????_?????_00010_???_00_1010011",32); + def FDIV_S = Bits("b?????_?????_?????_00011_???_00_1010011",32); + def FSQRT_S = Bits("b?????_?????_00000_00100_???_00_1010011",32); + def FSGNJ_S = Bits("b?????_?????_?????_00101_000_00_1010011",32); + def FSGNJN_S = Bits("b?????_?????_?????_00110_000_00_1010011",32); + def FSGNJX_S = Bits("b?????_?????_?????_00111_000_00_1010011",32); + def FADD_D = Bits("b?????_?????_?????_00000_???_01_1010011",32); + def FSUB_D = Bits("b?????_?????_?????_00001_???_01_1010011",32); + def FMUL_D = Bits("b?????_?????_?????_00010_???_01_1010011",32); + def FDIV_D = Bits("b?????_?????_?????_00011_???_01_1010011",32); + def FSQRT_D = Bits("b?????_?????_00000_00100_???_01_1010011",32); + def FSGNJ_D = Bits("b?????_?????_?????_00101_000_01_1010011",32); + def FSGNJN_D = Bits("b?????_?????_?????_00110_000_01_1010011",32); + def FSGNJX_D = Bits("b?????_?????_?????_00111_000_01_1010011",32); + def FCVT_L_S = Bits("b?????_?????_00000_01000_???_00_1010011",32); + def FCVT_LU_S = Bits("b?????_?????_00000_01001_???_00_1010011",32); + def FCVT_W_S = Bits("b?????_?????_00000_01010_???_00_1010011",32); + def FCVT_WU_S = Bits("b?????_?????_00000_01011_???_00_1010011",32); + def FCVT_L_D = Bits("b?????_?????_00000_01000_???_01_1010011",32); + def FCVT_LU_D = Bits("b?????_?????_00000_01001_???_01_1010011",32); + def FCVT_W_D = Bits("b?????_?????_00000_01010_???_01_1010011",32); + def FCVT_WU_D = Bits("b?????_?????_00000_01011_???_01_1010011",32); + def FCVT_S_L = Bits("b?????_?????_00000_01100_???_00_1010011",32); + def FCVT_S_LU = Bits("b?????_?????_00000_01101_???_00_1010011",32); + def FCVT_S_W = Bits("b?????_?????_00000_01110_???_00_1010011",32); + def FCVT_S_WU = Bits("b?????_?????_00000_01111_???_00_1010011",32); + def FCVT_D_L = Bits("b?????_?????_00000_01100_???_01_1010011",32); + def FCVT_D_LU = Bits("b?????_?????_00000_01101_???_01_1010011",32); + def FCVT_D_W = Bits("b?????_?????_00000_01110_???_01_1010011",32); + def FCVT_D_WU = Bits("b?????_?????_00000_01111_???_01_1010011",32); + def FCVT_S_D = Bits("b?????_?????_00000_10001_???_00_1010011",32); + def FCVT_D_S = Bits("b?????_?????_00000_10000_???_01_1010011",32); + def FEQ_S = Bits("b?????_?????_?????_10101_000_00_1010011",32); + def FLT_S = Bits("b?????_?????_?????_10110_000_00_1010011",32); + def FLE_S = Bits("b?????_?????_?????_10111_000_00_1010011",32); + def FEQ_D = Bits("b?????_?????_?????_10101_000_01_1010011",32); + def FLT_D = Bits("b?????_?????_?????_10110_000_01_1010011",32); + def FLE_D = Bits("b?????_?????_?????_10111_000_01_1010011",32); + def FMIN_S = Bits("b?????_?????_?????_11000_000_00_1010011",32); + def FMAX_S = Bits("b?????_?????_?????_11001_000_00_1010011",32); + def FMIN_D = Bits("b?????_?????_?????_11000_000_01_1010011",32); + def FMAX_D = Bits("b?????_?????_?????_11001_000_01_1010011",32); + def MFTX_S = Bits("b?????_?????_00000_11100_000_00_1010011",32); + def MFTX_D = Bits("b?????_?????_00000_11100_000_01_1010011",32); + def MFFSR = Bits("b?????_00000_00000_11101_000_00_1010011",32); + def MXTF_S = Bits("b?????_?????_00000_11110_000_00_1010011",32); + def MXTF_D = Bits("b?????_?????_00000_11110_000_01_1010011",32); + def MTFSR = Bits("b?????_?????_00000_11111_000_00_1010011",32); + def FLW = Bits("b?????_?????_????????????_010_0000111",32); + def FLD = Bits("b?????_?????_????????????_011_0000111",32); + def FSW = Bits("b?????_?????_?????_???????_010_0100111",32); + def FSD = Bits("b?????_?????_?????_???????_011_0100111",32); + def FMADD_S = Bits("b?????_?????_?????_?????_???_00_1000011",32); + def FMSUB_S = Bits("b?????_?????_?????_?????_???_00_1000111",32); + def FNMSUB_S = Bits("b?????_?????_?????_?????_???_00_1001011",32); + def FNMADD_S = Bits("b?????_?????_?????_?????_???_00_1001111",32); + def FMADD_D = Bits("b?????_?????_?????_?????_???_01_1000011",32); + def FMSUB_D = Bits("b?????_?????_?????_?????_???_01_1000111",32); + def FNMSUB_D = Bits("b?????_?????_?????_?????_???_01_1001011",32); + def FNMADD_D = Bits("b?????_?????_?????_?????_???_01_1001111",32); // vector instructions - val FENCE_V_L = Bits("b?????_?????_????????????_100_0101111",32); - val FENCE_V_G = Bits("b?????_?????_????????????_101_0101111",32); - val MOVZ = Bits("b?????_?????_?????_0000000101_1110111",32); - val MOVN = Bits("b?????_?????_?????_0000001101_1110111",32); - val STOP = Bits("b00000_00000_00000_0000000010_1110111",32); - val UTIDX = Bits("b?????_00000_00000_0000000011_1110111",32); - val VLD = Bits("b?????_?????_00000_0000000011_0001011",32); - val VLW = Bits("b?????_?????_00000_0000000010_0001011",32); - val VLWU = Bits("b?????_?????_00000_0000000110_0001011",32); - val VLH = Bits("b?????_?????_00000_0000000001_0001011",32); - val VLHU = Bits("b?????_?????_00000_0000000101_0001011",32); - val VLB = Bits("b?????_?????_00000_0000000000_0001011",32); - val VLBU = Bits("b?????_?????_00000_0000000100_0001011",32); - val VFLD = Bits("b?????_?????_00000_0000001011_0001011",32); - val VFLW = Bits("b?????_?????_00000_0000001010_0001011",32); - val VLSTD = Bits("b?????_?????_?????_0000100011_0001011",32); - val VLSTW = Bits("b?????_?????_?????_0000100010_0001011",32); - val VLSTWU = Bits("b?????_?????_?????_0000100110_0001011",32); - val VLSTH = Bits("b?????_?????_?????_0000100001_0001011",32); - val VLSTHU = Bits("b?????_?????_?????_0000100101_0001011",32); - val VLSTB = Bits("b?????_?????_?????_0000100000_0001011",32); - val VLSTBU = Bits("b?????_?????_?????_0000100100_0001011",32); - val VFLSTD = Bits("b?????_?????_?????_0000101011_0001011",32); - val VFLSTW = Bits("b?????_?????_?????_0000101010_0001011",32); - val VLSEGD = Bits("b?????_?????_?????_0001000011_0001011",32); - val VLSEGW = Bits("b?????_?????_?????_0001000010_0001011",32); - val VLSEGWU = Bits("b?????_?????_?????_0001000110_0001011",32); - val VLSEGH = Bits("b?????_?????_?????_0001000001_0001011",32); - val VLSEGHU = Bits("b?????_?????_?????_0001000101_0001011",32); - val VLSEGB = Bits("b?????_?????_?????_0001000000_0001011",32); - val VLSEGBU = Bits("b?????_?????_?????_0001000100_0001011",32); - val VFLSEGD = Bits("b?????_?????_?????_0001001011_0001011",32); - val VFLSEGW = Bits("b?????_?????_?????_0001001010_0001011",32); - val VLSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001011",32); - val VLSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001011",32); - val VLSEGSTWU = Bits("b?????_?????_?????_?????_101_10_0001011",32); - val VLSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001011",32); - val VLSEGSTHU = Bits("b?????_?????_?????_?????_101_01_0001011",32); - val VLSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001011",32); - val VLSEGSTBU = Bits("b?????_?????_?????_?????_101_00_0001011",32); - val VFLSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001011",32); - val VFLSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001011",32); - val VSD = Bits("b?????_?????_00000_0000000011_0001111",32); - val VSW = Bits("b?????_?????_00000_0000000010_0001111",32); - val VSH = Bits("b?????_?????_00000_0000000001_0001111",32); - val VSB = Bits("b?????_?????_00000_0000000000_0001111",32); - val VFSD = Bits("b?????_?????_00000_0000001011_0001111",32); - val VFSW = Bits("b?????_?????_00000_0000001010_0001111",32); - val VSSTD = Bits("b?????_?????_?????_0000100011_0001111",32); - val VSSTW = Bits("b?????_?????_?????_0000100010_0001111",32); - val VSSTH = Bits("b?????_?????_?????_0000100001_0001111",32); - val VSSTB = Bits("b?????_?????_?????_0000100000_0001111",32); - val VFSSTD = Bits("b?????_?????_?????_0000101011_0001111",32); - val VFSSTW = Bits("b?????_?????_?????_0000101010_0001111",32); - val VSSEGD = Bits("b?????_?????_?????_0001000011_0001111",32); - val VSSEGW = Bits("b?????_?????_?????_0001000010_0001111",32); - val VSSEGH = Bits("b?????_?????_?????_0001000001_0001111",32); - val VSSEGB = Bits("b?????_?????_?????_0001000000_0001111",32); - val VFSSEGD = Bits("b?????_?????_?????_0001001011_0001111",32); - val VFSSEGW = Bits("b?????_?????_?????_0001001010_0001111",32); - val VSSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001111",32); - val VSSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001111",32); - val VSSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001111",32); - val VSSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001111",32); - val VFSSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001111",32); - val VFSSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001111",32); - val VMVV = Bits("b?????_?????_00000_0000000000_1110011",32); - val VMSV = Bits("b?????_?????_00000_0000010000_1110011",32); - val VMST = Bits("b?????_?????_?????_0000100000_1110011",32); - val VMTS = Bits("b?????_?????_?????_0000110000_1110011",32); - val VFMVV = Bits("b?????_?????_00000_0000000010_1110011",32); - val VFMSV = Bits("b?????_?????_00000_0000010010_1110011",32); - val VFMST = Bits("b?????_?????_?????_0000100010_1110011",32); - val VFMTS = Bits("b?????_?????_?????_0000110010_1110011",32); - val VVCFGIVL = Bits("b?????_?????_????????????_001_1110011",32); - val VTCFGIVL = Bits("b?????_?????_????????????_011_1110011",32); - val VVCFG = Bits("b00000_?????_?????_0000001000_1110011",32); - val VTCFG = Bits("b00000_?????_?????_0000011000_1110011",32); - val VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); - val VF = Bits("b00000_?????_????????????_111_1110011",32); + def FENCE_V_L = Bits("b?????_?????_????????????_100_0101111",32); + def FENCE_V_G = Bits("b?????_?????_????????????_101_0101111",32); + def MOVZ = Bits("b?????_?????_?????_0000000101_1110111",32); + def MOVN = Bits("b?????_?????_?????_0000001101_1110111",32); + def STOP = Bits("b00000_00000_00000_0000000010_1110111",32); + def UTIDX = Bits("b?????_00000_00000_0000000011_1110111",32); + def VLD = Bits("b?????_?????_00000_0000000011_0001011",32); + def VLW = Bits("b?????_?????_00000_0000000010_0001011",32); + def VLWU = Bits("b?????_?????_00000_0000000110_0001011",32); + def VLH = Bits("b?????_?????_00000_0000000001_0001011",32); + def VLHU = Bits("b?????_?????_00000_0000000101_0001011",32); + def VLB = Bits("b?????_?????_00000_0000000000_0001011",32); + def VLBU = Bits("b?????_?????_00000_0000000100_0001011",32); + def VFLD = Bits("b?????_?????_00000_0000001011_0001011",32); + def VFLW = Bits("b?????_?????_00000_0000001010_0001011",32); + def VLSTD = Bits("b?????_?????_?????_0000100011_0001011",32); + def VLSTW = Bits("b?????_?????_?????_0000100010_0001011",32); + def VLSTWU = Bits("b?????_?????_?????_0000100110_0001011",32); + def VLSTH = Bits("b?????_?????_?????_0000100001_0001011",32); + def VLSTHU = Bits("b?????_?????_?????_0000100101_0001011",32); + def VLSTB = Bits("b?????_?????_?????_0000100000_0001011",32); + def VLSTBU = Bits("b?????_?????_?????_0000100100_0001011",32); + def VFLSTD = Bits("b?????_?????_?????_0000101011_0001011",32); + def VFLSTW = Bits("b?????_?????_?????_0000101010_0001011",32); + def VLSEGD = Bits("b?????_?????_?????_0001000011_0001011",32); + def VLSEGW = Bits("b?????_?????_?????_0001000010_0001011",32); + def VLSEGWU = Bits("b?????_?????_?????_0001000110_0001011",32); + def VLSEGH = Bits("b?????_?????_?????_0001000001_0001011",32); + def VLSEGHU = Bits("b?????_?????_?????_0001000101_0001011",32); + def VLSEGB = Bits("b?????_?????_?????_0001000000_0001011",32); + def VLSEGBU = Bits("b?????_?????_?????_0001000100_0001011",32); + def VFLSEGD = Bits("b?????_?????_?????_0001001011_0001011",32); + def VFLSEGW = Bits("b?????_?????_?????_0001001010_0001011",32); + def VLSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001011",32); + def VLSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001011",32); + def VLSEGSTWU = Bits("b?????_?????_?????_?????_101_10_0001011",32); + def VLSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001011",32); + def VLSEGSTHU = Bits("b?????_?????_?????_?????_101_01_0001011",32); + def VLSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001011",32); + def VLSEGSTBU = Bits("b?????_?????_?????_?????_101_00_0001011",32); + def VFLSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001011",32); + def VFLSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001011",32); + def VSD = Bits("b?????_?????_00000_0000000011_0001111",32); + def VSW = Bits("b?????_?????_00000_0000000010_0001111",32); + def VSH = Bits("b?????_?????_00000_0000000001_0001111",32); + def VSB = Bits("b?????_?????_00000_0000000000_0001111",32); + def VFSD = Bits("b?????_?????_00000_0000001011_0001111",32); + def VFSW = Bits("b?????_?????_00000_0000001010_0001111",32); + def VSSTD = Bits("b?????_?????_?????_0000100011_0001111",32); + def VSSTW = Bits("b?????_?????_?????_0000100010_0001111",32); + def VSSTH = Bits("b?????_?????_?????_0000100001_0001111",32); + def VSSTB = Bits("b?????_?????_?????_0000100000_0001111",32); + def VFSSTD = Bits("b?????_?????_?????_0000101011_0001111",32); + def VFSSTW = Bits("b?????_?????_?????_0000101010_0001111",32); + def VSSEGD = Bits("b?????_?????_?????_0001000011_0001111",32); + def VSSEGW = Bits("b?????_?????_?????_0001000010_0001111",32); + def VSSEGH = Bits("b?????_?????_?????_0001000001_0001111",32); + def VSSEGB = Bits("b?????_?????_?????_0001000000_0001111",32); + def VFSSEGD = Bits("b?????_?????_?????_0001001011_0001111",32); + def VFSSEGW = Bits("b?????_?????_?????_0001001010_0001111",32); + def VSSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001111",32); + def VSSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001111",32); + def VSSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001111",32); + def VSSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001111",32); + def VFSSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001111",32); + def VFSSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001111",32); + def VMVV = Bits("b?????_?????_00000_0000000000_1110011",32); + def VMSV = Bits("b?????_?????_00000_0000010000_1110011",32); + def VMST = Bits("b?????_?????_?????_0000100000_1110011",32); + def VMTS = Bits("b?????_?????_?????_0000110000_1110011",32); + def VFMVV = Bits("b?????_?????_00000_0000000010_1110011",32); + def VFMSV = Bits("b?????_?????_00000_0000010010_1110011",32); + def VFMST = Bits("b?????_?????_?????_0000100010_1110011",32); + def VFMTS = Bits("b?????_?????_?????_0000110010_1110011",32); + def VVCFGIVL = Bits("b?????_?????_????????????_001_1110011",32); + def VTCFGIVL = Bits("b?????_?????_????????????_011_1110011",32); + def VVCFG = Bits("b00000_?????_?????_0000001000_1110011",32); + def VTCFG = Bits("b00000_?????_?????_0000011000_1110011",32); + def VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); + def VF = Bits("b00000_?????_????????????_111_1110011",32); // vector supervisor instructions - val VENQCMD = Bits("b00000_?????_?????_0001010110_1111011",32) - val VENQIMM1 = Bits("b00000_?????_?????_0001011110_1111011",32) - val VENQIMM2 = Bits("b00000_?????_?????_0001100110_1111011",32) - val VENQCNT = Bits("b00000_?????_?????_0001101110_1111011",32) - val VXCPTKILL = Bits("b00000_00000_00000_0000010110_1111011",32) - val VXCPTEVAC = Bits("b00000_?????_00000_0001000110_1111011",32) - val VXCPTHOLD = Bits("b00000_00000_00000_0001001110_1111011",32) + def VENQCMD = Bits("b00000_?????_?????_0001010110_1111011",32) + def VENQIMM1 = Bits("b00000_?????_?????_0001011110_1111011",32) + def VENQIMM2 = Bits("b00000_?????_?????_0001100110_1111011",32) + def VENQCNT = Bits("b00000_?????_?????_0001101110_1111011",32) + def VXCPTKILL = Bits("b00000_00000_00000_0000010110_1111011",32) + def VXCPTEVAC = Bits("b00000_?????_00000_0001000110_1111011",32) + def VXCPTHOLD = Bits("b00000_00000_00000_0001001110_1111011",32) - val NOP = Bits("b00000_00000_000000000000_000_0010011",32); + def NOP = Bits("b00000_00000_000000000000_000_0010011",32); +} + +object Disassemble +{ + def apply(insn: Bits) = { + val name :: fmt :: Nil = ListLookup(insn, default, table) + sprintf("%s %s", name, operands(insn, fmt)) + } + + private def operands(insn: Bits, fmt: Bits): Bits = { + val x = AVec(Str(" x0"), Str(" ra"), Str(" s0"), Str(" s1"), + Str(" s2"), Str(" s3"), Str(" s4"), Str(" s5"), + Str(" s6"), Str(" s7"), Str(" s8"), Str(" s9"), + Str("s10"), Str("s11"), Str(" sp"), Str(" tp"), + Str(" v0"), Str(" v1"), Str(" a0"), Str(" a1"), + Str(" a2"), Str(" a3"), Str(" a4"), Str(" a5"), + Str(" a6"), Str(" a7"), Str(" a8"), Str(" a9"), + Str("a10"), Str("a11"), Str("a12"), Str("a13")) + val f = AVec(Str(" fs0"), Str(" fs1"), Str(" fs2"), Str(" fs3"), + Str(" fs4"), Str(" fs5"), Str(" fs6"), Str(" fs7"), + Str(" fs8"), Str(" fs9"), Str("fs10"), Str("fs11"), + Str("fs12"), Str("fs13"), Str("fs14"), Str("fs15"), + Str(" fv0"), Str(" fv1"), Str(" fa0"), Str(" fa1"), + Str(" fa2"), Str(" fa3"), Str(" fa4"), Str(" fa5"), + Str(" fa6"), Str(" fa7"), Str(" fa8"), Str(" fa9"), + Str("fa10"), Str("fa11"), Str("fa12"), Str("fa13")) + + def hex(x: Fix, plus: Char = ' ') = + Cat(Mux(x < Fix(0), Str("-0x"), Str(plus + "0x")), Str(x.abs, 16)) + + val comma = Str(',') + val lparen = Str('(') + val rparen = Str(')') + + val rd = insn(31,27) + val rs1 = insn(26,22) + val rs2 = insn(21,17) + val rs3 = insn(16,12) + val immv = insn(21,10).toFix + val bmmv = Cat(insn(31,27), insn(16,10)).toFix + val jmmv = insn(31,7).toFix + + val imm = hex(immv) + val bmm = hex(bmmv << UFix(1)) + val jmm = hex(jmmv << UFix(1)) + val lmm = Cat(Str("0x"), Str(insn(26,7).toUFix, 16)) + + val laddr = Cat(Str(immv), lparen, x(rs1), rparen) + val saddr = Cat(Str(bmmv), lparen, x(rs1), rparen) + + val r0 = x(rd) + val r1 = Cat(r0, comma, x(rs1)) + val r2 = Cat(r1, comma, x(rs2)) + val f1 = Cat(f(rd), comma, f(rs1)) + val f2 = Cat(f1, comma, f(rs2)) + val f3 = Cat(f2, comma, f(rs3)) + val fx = Cat(f(rd), comma, x(rs1)) + val xf1 = Cat(x(rd), comma, f(rs1)) + val xf2 = Cat(xf1, comma, f(rs2)) + val z = Str(' ') + val i = Cat(r1, comma, imm) + val b = Cat(x(rs1), comma, x(rs2), comma, bmm) + val j = jmm + val l = Cat(x(rd), comma, lmm) + val ld = Cat(x(rd), comma, laddr) + val st = Cat(x(rs2), comma, saddr) + val fld = Cat(f(rd), comma, laddr) + val fst = Cat(f(rs2), comma, saddr) + val amo = r2 + + val opts = Seq(r0, r1, r2, f1, f2, f3, fx, xf1, xf2, z, i, b, j, l, ld, st, + fld, fst, amo) + val maxLen = opts.map(_.getWidth).reduce(_ max _) + val padded = opts.map(x => x << UFix(maxLen - x.getWidth)) + AVec(padded)(fmt) + } + + private def FMT_R0 = Bits(0, 5) + private def FMT_R1 = Bits(1, 5) + private def FMT_R2 = Bits(2, 5) + private def FMT_F1 = Bits(3, 5) + private def FMT_F2 = Bits(4, 5) + private def FMT_F3 = Bits(5, 5) + private def FMT_FX = Bits(6, 5) + private def FMT_XF1 = Bits(7, 5) + private def FMT_XF2 = Bits(8, 5) + private def FMT_0 = Bits(9, 5) + private def FMT_I = Bits(10, 5) + private def FMT_B = Bits(11, 5) + private def FMT_J = Bits(12, 5) + private def FMT_L = Bits(13, 5) + private def FMT_LD = Bits(14, 5) + private def FMT_ST = Bits(15, 5) + private def FMT_FLD = Bits(16, 5) + private def FMT_FST = Bits(17, 5) + private def FMT_AMO = Bits(18, 5) + + private def default = List(Str("unknown "), FMT_0) + + import Instructions._ + private def table = Array( + BNE-> List(Str("bne "), FMT_B), + BEQ-> List(Str("beq "), FMT_B), + BLT-> List(Str("blt "), FMT_B), + BLTU-> List(Str("bltu "), FMT_B), + BGE-> List(Str("bge "), FMT_B), + BGEU-> List(Str("bgeu "), FMT_B), + + J-> List(Str("j "), FMT_J), + JAL-> List(Str("jal "), FMT_J), + JALR_C-> List(Str("jalr.c "), FMT_LD), + JALR_J-> List(Str("jalr.j "), FMT_LD), + JALR_R-> List(Str("jalr.r "), FMT_LD), + AUIPC-> List(Str("auipc "), FMT_L), + + LB-> List(Str("lb "), FMT_LD), + LH-> List(Str("lh "), FMT_LD), + LW-> List(Str("lw "), FMT_LD), + LD-> List(Str("ld "), FMT_LD), + LBU-> List(Str("lbu "), FMT_LD), + LHU-> List(Str("lhu "), FMT_LD), + LWU-> List(Str("lwu "), FMT_LD), + SB-> List(Str("sb "), FMT_ST), + SH-> List(Str("sh "), FMT_ST), + SW-> List(Str("sw "), FMT_ST), + SD-> List(Str("sd "), FMT_ST), + + AMOADD_W-> List(Str("amoadd.w "), FMT_AMO), + AMOSWAP_W-> List(Str("amoswap.w "), FMT_AMO), + AMOAND_W-> List(Str("amoand.w "), FMT_AMO), + AMOOR_W-> List(Str("amoor.w "), FMT_AMO), + AMOMIN_W-> List(Str("amomin.w "), FMT_AMO), + AMOMINU_W-> List(Str("amominu.w "), FMT_AMO), + AMOMAX_W-> List(Str("amomax.w "), FMT_AMO), + AMOMAXU_W-> List(Str("amomaxu.w "), FMT_AMO), + AMOADD_D-> List(Str("amoadd.d "), FMT_AMO), + AMOSWAP_D-> List(Str("amoswap.d "), FMT_AMO), + AMOAND_D-> List(Str("amoand.d "), FMT_AMO), + AMOOR_D-> List(Str("amoor.d "), FMT_AMO), + AMOMIN_D-> List(Str("amomin.d "), FMT_AMO), + AMOMINU_D-> List(Str("amominu.d "), FMT_AMO), + AMOMAX_D-> List(Str("amomax.d "), FMT_AMO), + AMOMAXU_D-> List(Str("amomaxu.d "), FMT_AMO), + + LR_W-> List(Str("lr.w "), FMT_AMO), + LR_D-> List(Str("lr.d "), FMT_AMO), + SC_W-> List(Str("sc.w "), FMT_AMO), + SC_D-> List(Str("sc.d "), FMT_AMO), + + LUI-> List(Str("lui "), FMT_L), + ADDI-> List(Str("addi "), FMT_I), + SLTI -> List(Str("slti "), FMT_I), + SLTIU-> List(Str("sltiu "), FMT_I), + ANDI-> List(Str("andi "), FMT_I), + ORI-> List(Str("ori "), FMT_I), + XORI-> List(Str("xori "), FMT_I), + SLLI-> List(Str("slli "), FMT_I), + SRLI-> List(Str("srli "), FMT_I), + SRAI-> List(Str("srai "), FMT_I), + ADD-> List(Str("add "), FMT_R2), + SUB-> List(Str("sub "), FMT_R2), + SLT-> List(Str("slt "), FMT_R2), + SLTU-> List(Str("sltu "), FMT_R2), + riscvAND-> List(Str("and "), FMT_R2), + riscvOR-> List(Str("or "), FMT_R2), + riscvXOR-> List(Str("xor "), FMT_R2), + SLL-> List(Str("sll "), FMT_R2), + SRL-> List(Str("srl "), FMT_R2), + SRA-> List(Str("sra "), FMT_R2), + + ADDIW-> List(Str("addiw "), FMT_I), + SLLIW-> List(Str("slliw "), FMT_I), + SRLIW-> List(Str("srliw "), FMT_I), + SRAIW-> List(Str("sraiw "), FMT_I), + ADDW-> List(Str("addw "), FMT_R2), + SUBW-> List(Str("subw "), FMT_R2), + SLLW-> List(Str("sllw "), FMT_R2), + SRLW-> List(Str("srlw "), FMT_R2), + SRAW-> List(Str("sraw "), FMT_R2), + + MUL-> List(Str("mul "), FMT_R2), + MULH-> List(Str("mulh "), FMT_R2), + MULHU-> List(Str("mulhu "), FMT_R2), + MULHSU-> List(Str("mulhsu "), FMT_R2), + MULW-> List(Str("mulw "), FMT_R2), + + DIV-> List(Str("div "), FMT_R2), + DIVU-> List(Str("divu "), FMT_R2), + REM-> List(Str("rem "), FMT_R2), + REMU-> List(Str("remu "), FMT_R2), + DIVW-> List(Str("divw "), FMT_R2), + DIVUW-> List(Str("divuw "), FMT_R2), + REMW-> List(Str("remw "), FMT_R2), + REMUW-> List(Str("remuw "), FMT_R2), + + SYSCALL-> List(Str("syscall "), FMT_0), + SETPCR-> List(Str("setpcr "), FMT_I), + CLEARPCR-> List(Str("clearpcr "), FMT_I), + ERET-> List(Str("eret "), FMT_0), + FENCE-> List(Str("fence "), FMT_0), + FENCE_I-> List(Str("fence.i "), FMT_0), + MFPCR-> List(Str("mfpcr "), FMT_R2), + MTPCR-> List(Str("mtpcr "), FMT_R2), + RDTIME-> List(Str("rdtime "), FMT_R0), + RDCYCLE-> List(Str("rdcycle "), FMT_R0), + RDINSTRET-> List(Str("rdinstret "), FMT_R0), + + FCVT_S_D-> List(Str("fcvt.s.d "), FMT_F1), + FCVT_D_S-> List(Str("fcvt.d.s "), FMT_F1), + FSGNJ_S-> List(Str("fsgnj.s "), FMT_F2), + FSGNJ_D-> List(Str("fsgnj.d "), FMT_F2), + FSGNJX_S-> List(Str("fsgnx.s "), FMT_F2), + FSGNJX_D-> List(Str("fsgnx.d "), FMT_F2), + FSGNJN_S-> List(Str("fsgnjn.s "), FMT_F2), + FSGNJN_D-> List(Str("fsgnjn.d "), FMT_F2), + FMIN_S-> List(Str("fmin.s "), FMT_F2), + FMIN_D-> List(Str("fmin.d "), FMT_F2), + FMAX_S-> List(Str("fmax.s "), FMT_F2), + FMAX_D-> List(Str("fmax.d "), FMT_F2), + FADD_S-> List(Str("fadd.s "), FMT_F2), + FADD_D-> List(Str("fadd.d "), FMT_F2), + FSUB_S-> List(Str("fsub.s "), FMT_F2), + FSUB_D-> List(Str("fsub.d "), FMT_F2), + FMUL_S-> List(Str("fmul.s "), FMT_F2), + FMUL_D-> List(Str("fmul.d "), FMT_F2), + FMADD_S-> List(Str("fmadd.s "), FMT_F3), + FMADD_D-> List(Str("fmadd.d "), FMT_F3), + FMSUB_S-> List(Str("fmsub.s "), FMT_F3), + FMSUB_D-> List(Str("fmsub.d "), FMT_F3), + FNMADD_S-> List(Str("fnmadd.s "), FMT_F3), + FNMADD_D-> List(Str("fnmadd.d "), FMT_F3), + FNMSUB_S-> List(Str("fnmsub.s "), FMT_F3), + FNMSUB_D-> List(Str("fnmsub.d "), FMT_F3), + MFTX_S-> List(Str("mftx.s "), FMT_XF1), + MFTX_D-> List(Str("mftx.d "), FMT_XF1), + FCVT_W_S-> List(Str("fcvt.w.s "), FMT_XF1), + FCVT_W_D-> List(Str("fcvt.w.d "), FMT_XF1), + FCVT_WU_S-> List(Str("fcvt.wu.s "), FMT_XF1), + FCVT_WU_D-> List(Str("fcvt.wu.d "), FMT_XF1), + FCVT_L_S-> List(Str("fcvt.l.s "), FMT_XF1), + FCVT_L_D-> List(Str("fcvt.l.d "), FMT_XF1), + FCVT_LU_S-> List(Str("fcvt.lu.s "), FMT_XF1), + FCVT_LU_D-> List(Str("fcvt.lu.d "), FMT_XF1), + FEQ_S-> List(Str("feq.s "), FMT_XF2), + FEQ_D-> List(Str("feq.d "), FMT_XF2), + FLT_S-> List(Str("flt.s "), FMT_XF2), + FLT_D-> List(Str("flt.d "), FMT_XF2), + FLE_S-> List(Str("fle.s "), FMT_XF2), + FLE_D-> List(Str("fle.d "), FMT_XF2), + MXTF_S-> List(Str("mxtf.s "), FMT_FX), + MXTF_D-> List(Str("mxtf.d "), FMT_FX), + FCVT_S_W-> List(Str("fcvt.s.w "), FMT_FX), + FCVT_D_W-> List(Str("fcvt.d.w "), FMT_FX), + FCVT_S_WU-> List(Str("fcvt.s.wu "), FMT_FX), + FCVT_D_WU-> List(Str("fcvt.d.wu "), FMT_FX), + FCVT_S_L-> List(Str("fcvt.s.l "), FMT_FX), + FCVT_D_L-> List(Str("fcvt.d.l "), FMT_FX), + FCVT_S_LU-> List(Str("fcvt.s.lu "), FMT_FX), + FCVT_D_LU-> List(Str("fcvt.d.lu "), FMT_FX), + MFFSR-> List(Str("mffsr "), FMT_R0), + MTFSR-> List(Str("mtfsr "), FMT_R1), + FLW-> List(Str("flw "), FMT_FLD), + FLD-> List(Str("fld "), FMT_FLD), + FSW-> List(Str("fsw "), FMT_FST), + FSD-> List(Str("fsd "), FMT_FST), + + VVCFGIVL-> List(Str("vecInst "), FMT_0), + VVCFG-> List(Str("vecInst "), FMT_0), + VSETVL-> List(Str("vecInst "), FMT_0), + VF-> List(Str("vecInst "), FMT_0), + VMVV-> List(Str("vecInst "), FMT_0), + VMSV-> List(Str("vecInst "), FMT_0), + VFMVV-> List(Str("vecInst "), FMT_0), + FENCE_V_L-> List(Str("vecInst "), FMT_0), + FENCE_V_G-> List(Str("vecInst "), FMT_0), + VLD-> List(Str("vecInst "), FMT_0), + VLW-> List(Str("vecInst "), FMT_0), + VLWU-> List(Str("vecInst "), FMT_0), + VLH-> List(Str("vecInst "), FMT_0), + VLHU-> List(Str("vecInst "), FMT_0), + VLB-> List(Str("vecInst "), FMT_0), + VLBU-> List(Str("vecInst "), FMT_0), + VSD-> List(Str("vecInst "), FMT_0), + VSW-> List(Str("vecInst "), FMT_0), + VSH-> List(Str("vecInst "), FMT_0), + VSB-> List(Str("vecInst "), FMT_0), + VFLD-> List(Str("vecInst "), FMT_0), + VFLW-> List(Str("vecInst "), FMT_0), + VFSD-> List(Str("vecInst "), FMT_0), + VFSW-> List(Str("vecInst "), FMT_0), + VLSTD-> List(Str("vecInst "), FMT_0), + VLSTW-> List(Str("vecInst "), FMT_0), + VLSTWU-> List(Str("vecInst "), FMT_0), + VLSTH-> List(Str("vecInst "), FMT_0), + VLSTHU-> List(Str("vecInst "), FMT_0), + VLSTB-> List(Str("vecInst "), FMT_0), + VLSTBU-> List(Str("vecInst "), FMT_0), + VSSTD-> List(Str("vecInst "), FMT_0), + VSSTW-> List(Str("vecInst "), FMT_0), + VSSTH-> List(Str("vecInst "), FMT_0), + VSSTB-> List(Str("vecInst "), FMT_0), + VFLSTD-> List(Str("vecInst "), FMT_0), + VFLSTW-> List(Str("vecInst "), FMT_0), + VFSSTD-> List(Str("vecInst "), FMT_0), + VFSSTW-> List(Str("vecInst "), FMT_0), + + VENQCMD-> List(Str("vecInst "), FMT_0), + VENQIMM1-> List(Str("vecInst "), FMT_0), + VENQIMM2-> List(Str("vecInst "), FMT_0), + VENQCNT-> List(Str("vecInst "), FMT_0), + VXCPTEVAC-> List(Str("vecInst "), FMT_0), + VXCPTKILL-> List(Str("vecInst "), FMT_0), + VXCPTHOLD-> List(Str("vecInst "), FMT_0) + ) } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 183621a6..98e7d5bf 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -1,6 +1,7 @@ package rocket import Chisel._ +import scala.math._ object Util { @@ -24,6 +25,77 @@ object AVec tabulate(n1)(i1 => tabulate(n2)(f(i1, _))) } +object Str +{ + def apply(s: String): Bits = { + var i = BigInt(0) + require(s.forall(validChar _)) + for (c <- s) + i = (i << 8) | c + Lit(i, s.length*8){Bits()} + } + def apply(x: Char): Bits = { + require(validChar(x)) + Lit(x, 8){Bits()} + } + def apply(x: UFix): Bits = apply(x, 10) + def apply(x: UFix, radix: Int): Bits = { + val rad = UFix(radix) + val digs = digits(radix) + val w = x.getWidth + require(w > 0) + + var q = x + var s = digs(q % rad) + for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) { + q = q / rad + s = Cat(Mux(Bool(radix == 10) && q === UFix(0), Str(' '), digs(q % rad)), s) + } + s + } + def apply(x: Fix): Bits = apply(x, 10) + def apply(x: Fix, radix: Int): Bits = { + val neg = x < Fix(0) + val abs = Mux(neg, -x, x).toUFix + if (radix != 10) { + Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix)) + } else { + val rad = UFix(radix) + val digs = digits(radix) + val w = abs.getWidth + require(w > 0) + + var q = abs + var s = digs(q % rad) + var needSign = neg + for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) { + q = q / rad + val placeSpace = q === UFix(0) + val space = Mux(needSign, Str('-'), Str(' ')) + needSign = needSign && !placeSpace + s = Cat(Mux(placeSpace, space, digs(q % rad)), s) + } + Cat(Mux(needSign, Str('-'), Str(' ')), s) + } + } + + def bigIntToString(x: BigInt): String = { + val s = new StringBuilder + var b = x + while (b != 0) { + s += (x & 0xFF).toChar + b = b >> 8 + } + s.toString + } + + private def digit(d: Int): Char = (if (d < 10) '0'+d else 'a'-10+d).toChar + private def digits(radix: Int): Vec[Bits] = + AVec((0 until radix).map(i => Str(digit(i)))) + + private def validChar(x: Char) = x == (x & 0xFF) +} + object Split { // is there a better way to do do this? From 7cc53c772531d066b3331159882c82b8de9e5838 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 15 Jun 2013 00:45:53 -0700 Subject: [PATCH 0605/1087] clean up Str --- rocket/src/main/scala/util.scala | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 98e7d5bf..1ec4b72f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -56,7 +56,7 @@ object Str def apply(x: Fix): Bits = apply(x, 10) def apply(x: Fix, radix: Int): Bits = { val neg = x < Fix(0) - val abs = Mux(neg, -x, x).toUFix + val abs = x.abs if (radix != 10) { Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix)) } else { @@ -79,16 +79,6 @@ object Str } } - def bigIntToString(x: BigInt): String = { - val s = new StringBuilder - var b = x - while (b != 0) { - s += (x & 0xFF).toChar - b = b >> 8 - } - s.toString - } - private def digit(d: Int): Char = (if (d < 10) '0'+d else 'a'-10+d).toChar private def digits(radix: Int): Vec[Bits] = AVec((0 until radix).map(i => Str(digit(i)))) From 5c00d0a030ea829e94e5a439db0673a824929450 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 9 Jul 2013 15:31:39 -0700 Subject: [PATCH 0606/1087] new tilelink arbiter type --- rocket/src/main/scala/tile.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 2c6cd558..60cdc764 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -40,7 +40,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val icache = new Frontend()(confIn.icache, lnConf) val dcache = new HellaCache - val arbiter = new UncachedTileLinkIOArbiter(memPorts, confIn.dcache.co) + val arbiter = new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts, confIn.dcache.co) arbiter.io.in(dcachePortId) <> dcache.io.mem arbiter.io.in(icachePortId) <> icache.io.mem From 9abdf4e154f57636169ba3d6cd84e8848f458081 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 23 Jul 2013 20:26:17 -0700 Subject: [PATCH 0607/1087] Make compatible with scala 2.10. List.sort deprecated. Refactor constants into package object. --- rocket/src/main/scala/arbiter.scala | 2 -- rocket/src/main/scala/consts.scala | 6 ------ rocket/src/main/scala/core.scala | 3 +-- rocket/src/main/scala/ctrl.scala | 3 +-- rocket/src/main/scala/ctrl_vec.scala | 2 -- rocket/src/main/scala/decode.scala | 6 +++--- rocket/src/main/scala/divider.scala | 2 -- rocket/src/main/scala/dpath.scala | 3 +-- rocket/src/main/scala/dpath_alu.scala | 1 - rocket/src/main/scala/dpath_util.scala | 6 +++--- rocket/src/main/scala/dpath_vec.scala | 1 - rocket/src/main/scala/ecc.scala | 1 - rocket/src/main/scala/fpu.scala | 3 +-- rocket/src/main/scala/htif.scala | 1 - rocket/src/main/scala/icache.scala | 2 -- rocket/src/main/scala/instructions.scala | 1 - rocket/src/main/scala/multiplier.scala | 2 -- rocket/src/main/scala/nbdcache.scala | 5 +++-- rocket/src/main/scala/package.scala | 19 ++++--------------- rocket/src/main/scala/ptw.scala | 4 ++-- rocket/src/main/scala/tile.scala | 2 -- rocket/src/main/scala/tlb.scala | 7 +++---- 22 files changed, 22 insertions(+), 60 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index ac842d85..44c3581a 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import Constants._ import uncore._ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Component diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index b6c7fb82..d1ce166e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -56,12 +56,6 @@ trait InterruptConstants { val CAUSE_INTERRUPT = 32 } -abstract trait RocketDcacheConstants extends uncore.constants.CacheConstants with uncore.constants.AddressConstants { - require(OFFSET_BITS == log2Up(uncore.Constants.CACHE_DATA_SIZE_IN_BYTES)) - require(OFFSET_BITS <= uncore.Constants.ACQUIRE_WRITE_MASK_BITS) - require(log2Up(OFFSET_BITS) <= uncore.Constants.ACQUIRE_SUBWORD_ADDR_BITS) -} - trait VectorOpConstants { val VEC_X = Bits("b??", 2).toUFix val VEC_FN_N = UFix(0, 2) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 39209a62..81df73c4 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -1,9 +1,8 @@ package rocket import Chisel._ -import Node._ -import Constants._ import hwacha._ +import uncore.constants.MemoryOpConstants._ import Util._ class RocketIO(implicit conf: RocketConfiguration) extends Bundle diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 8273deed..682af625 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -1,10 +1,9 @@ package rocket import Chisel._ -import Node._ -import Constants._ import Instructions._ import hwacha._ +import uncore.constants.MemoryOpConstants._ import ALU._ import Util._ diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 35974ec9..a3179993 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import Constants._ import Instructions._ import hwacha.Constants._ diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index a63c5c3d..bfea1102 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -88,7 +88,7 @@ object Simplify for (p <- r; if p.prime) prime = p :: prime } - prime.sort(_<_) + prime.sortWith(_<_) } def getEssentialPrimeImplicants(prime: Seq[Term], minterms: Seq[Term]): (Seq[Term],Seq[Term],Seq[Term]) = { for (i <- 0 until prime.size) { @@ -116,7 +116,7 @@ object Simplify val ca = getCost(a, bits) val cb = getCost(b, bits) def listLess(a: List[Term], b: List[Term]): Boolean = !b.isEmpty && (a.isEmpty || a.head < b.head || a.head == b.head && listLess(a.tail, b.tail)) - ca < cb || ca == cb && listLess(a.sort(_<_), b.sort(_<_)) + ca < cb || ca == cb && listLess(a.sortWith(_<_), b.sortWith(_<_)) } def getCover(implicants: Seq[Term], minterms: Seq[Term], bits: Int) = { if (minterms.nonEmpty) { @@ -179,7 +179,7 @@ object SimplifyDC for (p <- r; if p.prime) prime = p :: prime } - prime.sort(_<_) + prime.sortWith(_<_) } def verify(cover: Seq[Term], minterms: Seq[Term], maxterms: Seq[Term]) = { diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index f76d38ed..91ce2122 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import Constants._ import ALU._ import Util._ diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 8c0d0036..3f6a2677 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -1,11 +1,10 @@ package rocket import Chisel._ -import Node._ -import Constants._ import Instructions._ import Util._ import hwacha._ +import uncore.constants.AddressConstants._ class Datapath(implicit conf: RocketConfiguration) extends Component { diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 5a29ab20..3c751fe0 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -2,7 +2,6 @@ package rocket import Chisel._ import Node._ -import Constants._ import Instructions._ object ALU diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 25fd8438..a3d3572e 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -1,10 +1,10 @@ package rocket import Chisel._ -import Node._ -import Constants._ -import scala.math._ import Util._ +import Node._ +import uncore.constants.AddressConstants._ +import scala.math._ class DpathBTBIO extends Bundle { diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index caceae02..9300b012 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -2,7 +2,6 @@ package rocket import Chisel._ import Node._ -import Constants._ import Instructions._ import hwacha.Constants._ diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala index 179315d6..d122e3a6 100644 --- a/rocket/src/main/scala/ecc.scala +++ b/rocket/src/main/scala/ecc.scala @@ -1,7 +1,6 @@ package rocket import Chisel._ -import Constants._ import uncore._ import Util._ diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 41fd4cb4..f1eee8cb 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -1,11 +1,10 @@ package rocket import Chisel._ -import Node._ -import Constants._ import Instructions._ import Util._ import FPConstants._ +import uncore.constants.MemoryOpConstants._ object FPConstants { diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 8a999419..6a21ed21 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -2,7 +2,6 @@ package rocket import Chisel._ import Node._ -import Constants._ import uncore._ import Util._ diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 31810b60..23f27c9b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import Constants._ import uncore._ import Util._ diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 33446f3c..4c10f6b2 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -2,7 +2,6 @@ package rocket import Chisel._ import Node._ -import Constants._ object Instructions { diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 79905b4e..06d35d8a 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import Constants._ import ALU._ class MultiplierReq(implicit conf: RocketConfiguration) extends Bundle { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 78dbb61e..f4c26ed8 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import Constants._ import uncore._ import Util._ @@ -12,6 +10,9 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, narrowRead: Boolean = true, reqtagbits: Int = -1, databits: Int = -1) { + require(OFFSET_BITS == log2Up(CACHE_DATA_SIZE_IN_BYTES)) + require(OFFSET_BITS <= ACQUIRE_WRITE_MASK_BITS) + require(log2Up(OFFSET_BITS) <= ACQUIRE_SUBWORD_ADDR_BITS) require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this def lines = sets*ways diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index dfc32dd4..ca2926fc 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -1,18 +1,7 @@ -package rocket -import rocket.constants._ - -import Chisel._ -import scala.math._ - -//TODO: When compiler bug SI-5604 is fixed in 2.10, change object Constants to -// package object rocket and remove import Constants._'s from other files -object Constants extends - ScalarOpConstants with - uncore.constants.MemoryOpConstants with - InterruptConstants with - RocketDcacheConstants with - VectorOpConstants with - uncore.constants.MemoryInterfaceConstants +package object rocket extends + rocket.constants.ScalarOpConstants with + rocket.constants.InterruptConstants with + rocket.constants.VectorOpConstants { val START_ADDR = 0x2000 } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index bfc898c4..bcf9ce1b 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -1,8 +1,8 @@ package rocket import Chisel._ -import Node._ -import Constants._ +import uncore.constants.AddressConstants._ +import uncore.constants.MemoryOpConstants._ import Util._ class TLBPTWIO extends Bundle { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 60cdc764..10ba3184 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -1,8 +1,6 @@ package rocket import Chisel._ -import Node._ -import Constants._ import uncore._ import Util._ diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 33c4377d..a4dc13f6 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -1,9 +1,8 @@ package rocket -import Chisel._; -import Node._; -import Constants._; -import scala.math._; +import Chisel._ +import uncore.constants.AddressConstants._ +import scala.math._ class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { val clear = Bool(INPUT); From 3132db4f90cda99e4175088350f1e076deea32df Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Tue, 30 Jul 2013 16:36:28 -0700 Subject: [PATCH 0608/1087] Add stats PCR (cr28) to be used to flag whether a core is doing 'interesting' activity. --- rocket/src/main/scala/dpath_util.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a3d3572e..d379feeb 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -97,6 +97,7 @@ object PCR val K1 = 13 val VECBANK = 18 val VECCFG = 19 + val STATS = 28 val RESET = 29 val TOHOST = 30 val FROMHOST = 31 @@ -131,6 +132,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component val replay = Bool(OUTPUT) val vecbank = Bits(OUTPUT, 8) val vecbankcnt = UFix(OUTPUT, 4) + val stats = Bool(OUTPUT) val vec_appvl = UFix(INPUT, 12) val vec_nxregs = UFix(INPUT, 6) val vec_nfregs = UFix(INPUT, 6) @@ -150,6 +152,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component val reg_k1 = Reg{Bits(width = conf.xprlen)} val reg_ptbr = Reg{UFix(width = PADDR_BITS)} val reg_vecbank = Reg(resetVal = Fix(-1,8).toBits) + val reg_stats = Reg(resetVal = Bool(false)) val reg_error_mode = Reg(resetVal = Bool(false)) val reg_status = Reg{new Status} // reset down below @@ -193,6 +196,8 @@ class PCR(implicit conf: RocketConfiguration) extends Component cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) + io.stats := reg_stats + when (io.badvaddr_wen || io.vec_irq_aux_wen) { val wdata = Mux(io.badvaddr_wen, io.rw.wdata, io.vec_irq_aux) val (upper, lower) = Split(wdata, VADDR_BITS) @@ -240,7 +245,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank, read_veccfg, reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, - reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost, reg_fromhost + reg_stats/*x*/, read_veccfg/*x*/, reg_tohost, reg_fromhost )(addr) when (wen) { @@ -267,6 +272,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component when (addr === K1) { reg_k1 := wdata; } when (addr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } when (addr === VECBANK) { reg_vecbank:= wdata(7,0) } + when (addr === STATS) { reg_stats := wdata(0) } } io.host.ipi_rep.ready := Bool(true) From bef6c1db353d18774aacc611edc34bd65c712568 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 2 Aug 2013 10:06:01 -0700 Subject: [PATCH 0609/1087] minor nbdcache cleanup --- rocket/src/main/scala/nbdcache.scala | 66 ++++++++++++++-------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f4c26ed8..de48538d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -21,6 +21,7 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, def vpnbits = VADDR_BITS - PGIDX_BITS def pgidxbits = PGIDX_BITS def offbits = OFFSET_BITS + def maxaddrbits = ppnbits.max(vpnbits+1) + pgidxbits def paddrbits = ppnbits + pgidxbits def lineaddrbits = paddrbits - offbits def idxbits = log2Up(sets) @@ -694,7 +695,7 @@ class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { val kill = Bool() val typ = Bits(width = 3) val phys = Bool() - val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits) + val addr = UFix(width = conf.maxaddrbits) val data = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) @@ -710,7 +711,7 @@ class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { val data_subword = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) - val addr = UFix(width = conf.ppnbits.max(conf.vpnbits+1) + conf.pgidxbits) + val addr = UFix(width = conf.maxaddrbits) val store_data = Bits(width = conf.databits) override def clone = new HellaCacheResp().asInstanceOf[this.type] @@ -747,7 +748,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val wb = new WritebackUnit val prober = new ProbeUnit - val mshr = new MSHRFile + val mshrs = new MSHRFile io.cpu.req.ready := Bool(true) val s1_valid = Reg(io.cpu.req.fire(), resetVal = Bool(false)) @@ -791,8 +792,8 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio s1_req := prober.io.meta_read.bits s1_req.phys := Bool(true) } - when (mshr.io.replay.valid) { - s1_req := mshr.io.replay.bits + when (mshrs.io.replay.valid) { + s1_req := mshrs.io.replay.bits } when (s2_recycle) { s1_req := s2_req @@ -805,7 +806,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio s2_req.cmd := s1_req.cmd s2_req.tag := s1_req.tag when (s1_write) { - s2_req.data := Mux(s1_replay, mshr.io.replay.bits.data, io.cpu.req.bits.data) + s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.req.bits.data) } when (s1_recycled) { s2_req.data := s1_req.data } } @@ -927,32 +928,31 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))){new MetaData}) // miss handling - mshr.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) - mshr.io.req.bits := s2_req - mshr.io.req.bits.tag_match := s2_tag_match - mshr.io.req.bits.old_meta := Mux(s2_tag_match, MetaData(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) - mshr.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) - mshr.io.req.bits.data := s2_req.data + mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) + mshrs.io.req.bits := s2_req + mshrs.io.req.bits.tag_match := s2_tag_match + mshrs.io.req.bits.old_meta := Mux(s2_tag_match, MetaData(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) + mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) + mshrs.io.req.bits.data := s2_req.data - mshr.io.mem_grant.valid := io.mem.grant.fire() - mshr.io.mem_grant.bits := io.mem.grant.bits - when (mshr.io.req.fire()) { replacer.miss } + mshrs.io.mem_grant.valid := io.mem.grant.fire() + mshrs.io.mem_grant.bits := io.mem.grant.bits + when (mshrs.io.req.fire()) { replacer.miss } - io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(mshr.io.mem_req) + io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(mshrs.io.mem_req) //TODO io.mem.acquire.data should be connected to uncached store data generator //io.mem.acquire.data <> FIFOedLogicalNetworkIOWrapper(TODO) io.mem.acquire.data.valid := Bool(false) io.mem.acquire.data.bits.payload.data := UFix(0) // replays - readArb.io.in(1).valid := mshr.io.replay.valid - readArb.io.in(1).bits := mshr.io.replay.bits + readArb.io.in(1).valid := mshrs.io.replay.valid + readArb.io.in(1).bits := mshrs.io.replay.bits readArb.io.in(1).bits.way_en := Fix(-1) - mshr.io.replay.ready := readArb.io.in(1).ready - s1_replay := mshr.io.replay.valid && readArb.io.in(1).ready - metaReadArb.io.in(1) <> mshr.io.meta_read - metaWriteArb.io.in(0) <> mshr.io.meta_write - + mshrs.io.replay.ready := readArb.io.in(1).ready + s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready + metaReadArb.io.in(1) <> mshrs.io.meta_read + metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes val releaseArb = (new Arbiter(2)) { new Release } FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release.meta @@ -967,18 +967,18 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio prober.io.line_state := s2_hit_state prober.io.meta_read <> metaReadArb.io.in(2) prober.io.meta_write <> metaWriteArb.io.in(1) - prober.io.mshr_rdy := mshr.io.probe_rdy + prober.io.mshr_rdy := mshrs.io.probe_rdy // refills - val refill = conf.co.messageUpdatesDataArray(io.mem.grant.bits.payload) + val refill = tl.co.messageUpdatesDataArray(io.mem.grant.bits.payload) writeArb.io.in(1).valid := io.mem.grant.valid && refill io.mem.grant.ready := writeArb.io.in(1).ready || !refill - writeArb.io.in(1).bits := mshr.io.mem_resp + writeArb.io.in(1).bits := mshrs.io.mem_resp writeArb.io.in(1).bits.wmask := Fix(-1) writeArb.io.in(1).bits.data := io.mem.grant.bits.payload.data // writebacks - wb.io.req <> mshr.io.wb_req + wb.io.req <> mshrs.io.wb_req wb.io.meta_read <> metaReadArb.io.in(3) wb.io.data_req <> readArb.io.in(2) wb.io.data_resp := s2_data_corrected @@ -1016,10 +1016,10 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) - when (s2_nack_hit) { mshr.io.req.valid := Bool(false) } - val s2_nack_victim = s2_hit && mshr.io.secondary_miss - val s2_nack_miss = !s2_hit && !mshr.io.req.ready - val s2_nack_fence = s2_req.cmd === M_FENCE && !mshr.io.fence_rdy + when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) } + val s2_nack_victim = s2_hit && mshrs.io.secondary_miss + val s2_nack_miss = !s2_hit && !mshrs.io.req.ready + val s2_nack_fence = s2_req.cmd === M_FENCE && !mshrs.io.fence_rdy val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss || s2_nack_fence s2_valid_masked := s2_valid && !s2_nack @@ -1030,7 +1030,7 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // after a nack, block until nack condition resolves to save energy val block_fence = Reg(resetVal = Bool(false)) - block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshr.io.fence_rdy + block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshrs.io.fence_rdy val block_miss = Reg(resetVal = Bool(false)) block_miss := (s2_valid || block_miss) && s2_nack_miss when (block_fence || block_miss) { @@ -1046,5 +1046,5 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) io.cpu.resp.bits.store_data := s2_req.data - io.mem.grant_ack <> mshr.io.mem_finish + io.mem.grant_ack <> mshrs.io.mem_finish } From 4eaab214d2cdd191905014a0635bb5228d99105b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 2 Aug 2013 14:54:16 -0700 Subject: [PATCH 0610/1087] Fold uncore constants into TileLinkConfiguration, update coherence API --- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/htif.scala | 26 +++++----- rocket/src/main/scala/icache.scala | 13 ++--- rocket/src/main/scala/nbdcache.scala | 67 ++++++++++++++------------ rocket/src/main/scala/tile.scala | 12 +++-- 7 files changed, 67 insertions(+), 57 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 81df73c4..1f89a54a 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -7,7 +7,7 @@ import Util._ class RocketIO(implicit conf: RocketConfiguration) extends Bundle { - val host = new HTIFIO(conf.lnConf.nClients) + val host = new HTIFIO(conf.tl.ln.nClients) val imem = new CPUFrontendIO()(conf.icache) val vimem = new CPUFrontendIO()(conf.icache) val dmem = new HellaCacheIO()(conf.dcache) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 3f6a2677..0c71889e 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -9,7 +9,7 @@ import uncore.constants.AddressConstants._ class Datapath(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val host = new HTIFIO(conf.lnConf.nClients) + val host = new HTIFIO(conf.tl.ln.nClients) val ctrl = (new CtrlDpathIO).flip val dmem = new HellaCacheIO()(conf.dcache) val ptw = (new DatapathPTWIO).flip diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a3d3572e..7afde659 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -105,7 +105,7 @@ object PCR class PCR(implicit conf: RocketConfiguration) extends Component { val io = new Bundle { - val host = new HTIFIO(conf.lnConf.nClients) + val host = new HTIFIO(conf.tl.ln.nClients) val rw = new Bundle { val addr = UFix(INPUT, log2Up(conf.nxpr)) val cmd = Bits(INPUT, PCR.SZ) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 6a21ed21..583e90ce 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -44,13 +44,14 @@ class SCRIO extends Bundle val wdata = Bits(OUTPUT, 64) } -class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component with ClientCoherenceAgent +class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component with ClientCoherenceAgent { - implicit val lnConf = conf.ln + implicit val (ln, co) = (conf.ln, conf.co) + val nTiles = ln.nClients-1 // This HTIF is itself a TileLink client val io = new Bundle { val host = new HostIO(w) - val cpu = Vec(conf.ln.nClients) { new HTIFIO(conf.ln.nClients).flip } - val mem = new TileLinkIO()(conf.ln) + val cpu = Vec(nTiles) { new HTIFIO(nTiles).flip } + val mem = new TileLinkIO val scr = new SCRIO } @@ -91,7 +92,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) - val pcr_coreid = addr(log2Up(conf.ln.nClients)-1+20+1,20) + val pcr_coreid = addr(log2Up(nTiles)-1+20+1,20) val pcr_wdata = packet_ram(0) val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR @@ -126,8 +127,8 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UFix() } val state = Reg(resetVal = state_rx) + val rx_cmd = Mux(rx_word_count === UFix(0), next_cmd, cmd) when (state === state_rx && rx_done) { - val rx_cmd = Mux(rx_word_count === UFix(0), next_cmd, cmd) state := Mux(rx_cmd === cmd_readmem || rx_cmd === cmd_writemem, state_mem_req, Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr_req, state_tx)) @@ -182,9 +183,10 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w } x_init.io.enq.valid := state === state_mem_req val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) - val co = conf.co.asInstanceOf[CoherencePolicyWithUncached] - x_init.io.enq.bits := Mux(cmd === cmd_writemem, co.getUncachedWriteAcquire(init_addr, UFix(0)), co.getUncachedReadAcquire(init_addr, UFix(0))) - io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UFix(conf.ln.nClients), UFix(0)) + x_init.io.enq.bits := Mux(cmd === cmd_writemem, + Acquire(co.getUncachedWriteAcquireType, init_addr, UFix(0)), + Acquire(co.getUncachedReadAcquireType, init_addr, UFix(0))) + io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UFix(conf.ln.nClients), UFix(0)) // By convention HTIF is the client with the largest id io.mem.acquire.data.valid := state === state_mem_wdata io.mem.acquire.data.bits.payload.data := mem_req_data io.mem.grant_ack.valid := (state === state_mem_finish) && mem_needs_ack @@ -195,7 +197,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w io.mem.release.data.valid := Bool(false) val pcrReadData = Reg{Bits(width = io.cpu(0).pcr_rep.bits.getWidth)} - for (i <- 0 until conf.ln.nClients) { + for (i <- 0 until nTiles) { val my_reset = Reg(resetVal = Bool(true)) val my_ipi = Reg(resetVal = Bool(false)) @@ -212,7 +214,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w } cpu.ipi_rep.valid := my_ipi cpu.ipi_req.ready := Bool(true) - for (j <- 0 until conf.ln.nClients) { + for (j <- 0 until nTiles) { when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UFix(i)) { my_ipi := Bool(true) } @@ -239,7 +241,7 @@ class rocketHTIF(w: Int)(implicit conf: UncoreConfiguration) extends Component w val scr_rdata = Vec(io.scr.rdata.size){Bits(width = 64)} for (i <- 0 until scr_rdata.size) scr_rdata(i) := io.scr.rdata(i) - scr_rdata(0) := conf.ln.nClients + scr_rdata(0) := nTiles scr_rdata(1) := (UFix(REFILL_CYCLES*MEM_DATA_BITS/8) << x_init.io.enq.bits.addr.getWidth) >> 20 io.scr.wen := false diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 23f27c9b..4f80d72b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -4,7 +4,7 @@ import Chisel._ import uncore._ import Util._ -case class ICacheConfig(sets: Int, assoc: Int, co: CoherencePolicyWithUncached, +case class ICacheConfig(sets: Int, assoc: Int, ntlb: Int = 8, nbtb: Int = 8, code: Code = new IdentityCode) { @@ -48,7 +48,7 @@ class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { val invalidate = Bool(OUTPUT) } -class Frontend(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) extends Component +class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Component { val io = new Bundle { val cpu = new CPUFrontendIO()(c).flip @@ -121,8 +121,9 @@ class Frontend(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) ex io.cpu.resp.bits.xcpt_if := s2_xcpt_if } -class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) extends Component +class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Component { + implicit val lnConf = tl.ln val io = new Bundle { val req = new PipeIO()(new Bundle { val idx = UFix(width = PGIDX_BITS) @@ -172,7 +173,7 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - //assert(!c.co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") + //assert(!co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) @@ -241,13 +242,13 @@ class ICache(implicit c: ICacheConfig, lnconf: LogicalNetworkConfiguration) exte io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val finish_q = (new Queue(1)) { new GrantAck } - finish_q.io.enq.valid := refill_done && c.co.requiresAck(io.mem.grant.bits.payload) + finish_q.io.enq.valid := refill_done && tl.co.requiresAck(io.mem.grant.bits.payload) finish_q.io.enq.bits.master_xact_id := io.mem.grant.bits.payload.master_xact_id // output signals io.resp.valid := s2_hit io.mem.acquire.meta.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.acquire.meta.bits.payload := c.co.getUncachedReadAcquire(s2_addr >> UFix(c.offbits), UFix(0)) + io.mem.acquire.meta.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UFix(c.offbits), UFix(0)) io.mem.acquire.data.valid := Bool(false) io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) io.mem.grant.ready := Bool(true) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index de48538d..ddb3bdf5 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -4,12 +4,14 @@ import Chisel._ import uncore._ import Util._ -case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, +case class DCacheConfig(sets: Int, ways: Int, nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, + states: Int = 2, code: Code = new IdentityCode, narrowRead: Boolean = true, reqtagbits: Int = -1, databits: Int = -1) { + require(states > 0) require(OFFSET_BITS == log2Up(CACHE_DATA_SIZE_IN_BYTES)) require(OFFSET_BITS <= ACQUIRE_WRITE_MASK_BITS) require(log2Up(OFFSET_BITS) <= ACQUIRE_SUBWORD_ADDR_BITS) @@ -32,7 +34,7 @@ case class DCacheConfig(sets: Int, ways: Int, co: CoherencePolicy, def databytes = databits/8 def wordoffbits = log2Up(databytes) def isNarrowRead = narrowRead && databits*ways % MEM_DATA_BITS == 0 - val statebits = 2 // TODO: obtain from coherence policy + val statebits = log2Up(states) val metabits = statebits + tagbits val encdatabits = code.width(databits) val encmetabits = code.width(metabits) @@ -126,18 +128,18 @@ class DataWriteReq(implicit conf: DCacheConfig) extends Bundle { override def clone = new DataWriteReq().asInstanceOf[this.type] } -class InternalProbe(implicit conf: DCacheConfig) extends Probe { - val client_xact_id = Bits(width = CLIENT_XACT_ID_MAX_BITS) +class InternalProbe(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Probe { + val client_xact_id = Bits(width = tl.clientXactIdBits) override def clone = new InternalProbe().asInstanceOf[this.type] } -class WritebackReq(implicit conf: DCacheConfig) extends Bundle { +class WritebackReq(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) - val client_xact_id = Bits(width = CLIENT_XACT_ID_MAX_BITS) - val r_type = UFix(width = RELEASE_TYPE_MAX_BITS) + val client_xact_id = Bits(width = tl.clientXactIdBits) + val r_type = UFix(width = tl.co.releaseTypeBits) override def clone = new WritebackReq().asInstanceOf[this.type] } @@ -171,7 +173,8 @@ class MetaWriteReq(implicit conf: DCacheConfig) extends Bundle { override def clone = new MetaWriteReq().asInstanceOf[this.type] } -class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { +class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { + implicit val ln = tl.ln val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -206,7 +209,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura val req_cmd = io.req_bits.cmd val req_idx = req.addr(conf.untagbits-1,conf.offbits) val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) - val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !conf.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !tl.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UFix(id) val refill_done = reply && refill_count.andR @@ -232,7 +235,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura when (refill_done) { state := s_meta_write_req } when (reply) { refill_count := refill_count + UFix(1) - line_state := conf.co.newStateOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) + line_state := tl.co.newStateOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) } } when (io.mem_req.fire()) { // s_refill_req @@ -249,29 +252,29 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - acquire_type := conf.co.getAcquireTypeOnSecondaryMiss(req_cmd, conf.co.newStateOnFlush(), io.mem_req.bits) + acquire_type := tl.co.getAcquireTypeOnSecondaryMiss(req_cmd, tl.co.newStateOnFlush(), io.mem_req.bits) } when (io.req_pri_val && io.req_pri_rdy) { - line_state := conf.co.newStateOnFlush() + line_state := tl.co.newStateOnFlush() refill_count := UFix(0) - acquire_type := conf.co.getAcquireTypeOnPrimaryMiss(req_cmd, conf.co.newStateOnFlush()) - release_type := conf.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc + acquire_type := tl.co.getAcquireTypeOnPrimaryMiss(req_cmd, tl.co.newStateOnFlush()) + release_type := tl.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits when (io.req_bits.tag_match) { - when (conf.co.isHit(req_cmd, io.req_bits.old_meta.state)) { // set dirty bit + when (tl.co.isHit(req_cmd, io.req_bits.old_meta.state)) { // set dirty bit state := s_meta_write_req - line_state := conf.co.newStateOnHit(req_cmd, io.req_bits.old_meta.state) + line_state := tl.co.newStateOnHit(req_cmd, io.req_bits.old_meta.state) }.otherwise { // upgrade permissions state := s_refill_req } }.otherwise { // writback if necessary and refill - state := Mux(conf.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_meta_clear) + state := Mux(tl.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_meta_clear) } } val ackq = (new Queue(1)) { (new LogicalNetworkIO){new GrantAck} } - ackq.io.enq.valid := (wb_done || refill_done) && conf.co.requiresAck(io.mem_grant.bits.payload) + ackq.io.enq.valid := (wb_done || refill_done) && tl.co.requiresAck(io.mem_grant.bits.payload) ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp @@ -293,7 +296,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx - io.meta_write.bits.data.state := Mux(state === s_meta_clear, conf.co.newStateOnFlush(), line_state) + io.meta_write.bits.data.state := Mux(state === s_meta_clear, tl.co.newStateOnFlush(), line_state) io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en @@ -302,7 +305,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) - io.wb_req.bits.r_type := conf.co.getReleaseTypeOnVoluntaryWriteback() + io.wb_req.bits.r_type := tl.co.getReleaseTypeOnVoluntaryWriteback() io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready io.mem_req.bits.a_type := acquire_type @@ -325,7 +328,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfigura } } -class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { +class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { + implicit val ln = tl.ln val io = new Bundle { val req = (new FIFOIO) { new MSHRReq }.flip val secondary_miss = Bool(OUTPUT) @@ -426,7 +430,7 @@ class MSHRFile(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) } -class WritebackUnit(implicit conf: DCacheConfig) extends Component { +class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new WritebackReq() }.flip val probe = (new FIFOIO) { new WritebackReq() }.flip @@ -498,7 +502,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Component { io.meta_read.bits.addr := io.release.bits.addr << conf.offbits } -class ProbeUnit(implicit conf: DCacheConfig) extends Component { +class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { val io = new Bundle { val req = (new FIFOIO) { new InternalProbe }.flip val rep = (new FIFOIO) { new Release } @@ -529,7 +533,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { when (state === s_release && io.rep.ready) { state := s_invalid when (hit) { - state := Mux(conf.co.needsWriteback(line_state), s_writeback_req, s_meta_write) + state := Mux(tl.co.needsWriteback(line_state), s_writeback_req, s_meta_write) } } when (state === s_mshr_req) { @@ -554,7 +558,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.req.ready := state === s_invalid io.rep.valid := state === s_release - io.rep.bits := conf.co.newRelease(req, Mux(hit, line_state, conf.co.newStateOnFlush), req.client_xact_id) + io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read io.meta_read.bits.addr := req.addr << UFix(conf.offbits) @@ -562,7 +566,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr - io.meta_write.bits.data.state := conf.co.newStateOnProbe(req, line_state) + io.meta_write.bits.data.state := tl.co.newStateOnProbe(req, line_state) io.meta_write.bits.data.tag := req.addr >> UFix(conf.idxbits) io.wb_req.valid := state === s_writeback_req @@ -573,7 +577,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Component { io.wb_req.bits.client_xact_id := UFix(0) // DNC } -class MetaDataArray(implicit conf: DCacheConfig) extends Component { +class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { val io = new Bundle { val read = (new FIFOIO) { new MetaReadReq }.flip val write = (new FIFOIO) { new MetaWriteReq }.flip @@ -589,7 +593,7 @@ class MetaDataArray(implicit conf: DCacheConfig) extends Component { when (rst || io.write.valid) { val addr = Mux(rst, rst_cnt, io.write.bits.idx) - val data = Cat(Mux(rst, conf.co.newStateOnFlush, io.write.bits.data.state), io.write.bits.data.tag) + val data = Cat(Mux(rst, tl.co.newStateOnFlush, io.write.bits.data.state), io.write.bits.data.tag) val mask = Mux(rst, Fix(-1), io.write.bits.way_en) tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) } @@ -735,7 +739,8 @@ class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { val ptw = (new TLBPTWIO).flip } -class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguration) extends Component { +class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { + implicit val ln = tl.ln val io = new Bundle { val cpu = (new HellaCacheIO).flip val mem = new TileLinkIO @@ -862,13 +867,13 @@ class HellaCache(implicit conf: DCacheConfig, lnconf: LogicalNetworkConfiguratio // tag check and way muxing def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(f)){gen} val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits - val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && conf.co.isValid(meta.io.resp(w).state)){Bits()}.toBits + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).state)){Bits()}.toBits s1_clk_en := metaReadArb.io.out.valid val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en)){Bits()}) - val s2_hit = s2_tag_match && conf.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === conf.co.newStateOnHit(s2_req.cmd, s2_hit_state) + val s2_hit = s2_tag_match && tl.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === tl.co.newStateOnHit(s2_req.cmd, s2_hit_state) // load-reserved/store-conditional val lrsc_count = Reg(resetVal = UFix(0)) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 10ba3184..84b13e3f 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,7 +4,7 @@ import Chisel._ import uncore._ import Util._ -case class RocketConfiguration(lnConf: LogicalNetworkConfiguration, co: CoherencePolicyWithUncached, +case class RocketConfiguration(tl: TileLinkConfiguration, icache: ICacheConfig, dcache: DCacheConfig, fpu: Boolean, vec: Boolean, fastLoadWord: Boolean = true, @@ -26,7 +26,9 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val icachePortId = 1 val vicachePortId = 2 implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) - implicit val lnConf = confIn.lnConf + implicit val icConf = confIn.icache + implicit val tlConf = confIn.tl + implicit val lnConf = confIn.tl.ln implicit val conf = confIn.copy(dcache = dcConf) val io = new Bundle { @@ -35,10 +37,10 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon } val core = new Core - val icache = new Frontend()(confIn.icache, lnConf) + val icache = new Frontend val dcache = new HellaCache - val arbiter = new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts, confIn.dcache.co) + val arbiter = new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts) arbiter.io.in(dcachePortId) <> dcache.io.mem arbiter.io.in(icachePortId) <> icache.io.mem @@ -61,7 +63,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon }*/ if (conf.vec) { - val vicache = new Frontend()(ICacheConfig(128, 1, conf.co), lnConf) // 128 sets x 1 ways (8KB) + val vicache = new Frontend()(ICacheConfig(128, 1), tlConf) // 128 sets x 1 ways (8KB) arbiter.io.in(vicachePortId) <> vicache.io.mem core.io.vimem <> vicache.io.cpu } From 1a9e43aa11807d5192e83925a353877473b99eaa Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 12 Aug 2013 10:39:11 -0700 Subject: [PATCH 0611/1087] initial attempt at upgrade --- rocket/src/main/scala/arbiter.scala | 14 +- rocket/src/main/scala/consts.scala | 80 +++--- rocket/src/main/scala/core.scala | 20 +- rocket/src/main/scala/ctrl.scala | 216 +++++++-------- rocket/src/main/scala/ctrl_vec.scala | 28 +- rocket/src/main/scala/decode.scala | 2 +- rocket/src/main/scala/divider.scala | 64 ++--- rocket/src/main/scala/dpath.scala | 160 +++++------ rocket/src/main/scala/dpath_alu.scala | 18 +- rocket/src/main/scala/dpath_util.scala | 112 ++++---- rocket/src/main/scala/dpath_vec.scala | 160 +++++------ rocket/src/main/scala/ecc.scala | 12 +- rocket/src/main/scala/fpu.scala | 130 ++++----- rocket/src/main/scala/htif.scala | 124 ++++----- rocket/src/main/scala/icache.scala | 121 +++++---- rocket/src/main/scala/instructions.scala | 22 +- rocket/src/main/scala/multiplier.scala | 48 ++-- rocket/src/main/scala/nbdcache.scala | 324 ++++++++++++----------- rocket/src/main/scala/ptw.scala | 44 +-- rocket/src/main/scala/tile.scala | 22 +- rocket/src/main/scala/tlb.scala | 72 ++--- rocket/src/main/scala/util.scala | 36 +-- 22 files changed, 921 insertions(+), 908 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 44c3581a..78c3b47e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -3,14 +3,14 @@ package rocket import Chisel._ import uncore._ -class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Component +class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Module { val io = new Bundle { - val requestor = Vec(n) { new HellaCacheIO()(conf.dcache) }.flip + val requestor = Vec.fill(n){new HellaCacheIO()(conf.dcache)}.flip val mem = new HellaCacheIO()(conf.dcache) } - val r_valid = io.requestor.map(r => Reg(r.req.valid)) + val r_valid = io.requestor.map(r => RegUpdate(r.req.valid)) io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) io.requestor(0).req.ready := io.mem.req.ready @@ -18,7 +18,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid io.mem.req.bits := io.requestor(n-1).req.bits - io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UFix(n-1, log2Up(n))) + io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UInt(n-1, log2Up(n))) for (i <- n-2 to 0 by -1) { val req = io.requestor(i).req when (req.valid) { @@ -26,7 +26,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp io.mem.req.bits.typ := req.bits.typ io.mem.req.bits.addr := req.bits.addr io.mem.req.bits.phys := req.bits.phys - io.mem.req.bits.tag := Cat(req.bits.tag, UFix(i, log2Up(n))) + io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n))) } when (r_valid(i)) { io.mem.req.bits.kill := req.bits.kill @@ -36,11 +36,11 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Comp for (i <- 0 until n) { val resp = io.requestor(i).resp - val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UFix(i) + val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i) resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).xcpt := io.mem.xcpt resp.bits := io.mem.resp.bits - resp.bits.tag := io.mem.resp.bits.tag >> UFix(log2Up(n)) + resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d1ce166e..d8a16383 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -16,32 +16,32 @@ trait ScalarOpConstants { val BR_LTU = Bits(6, 3) val BR_GEU = Bits(7, 3) - val PC_EX4 = UFix(0, 2) - val PC_EX = UFix(1, 2) - val PC_WB = UFix(2, 2) - val PC_PCR = UFix(3, 2) + val PC_EX4 = UInt(0, 2) + val PC_EX = UInt(1, 2) + val PC_WB = UInt(2, 2) + val PC_PCR = UInt(3, 2) val A2_X = Bits("b???", 3) - val A2_BTYPE = UFix(0, 3); - val A2_LTYPE = UFix(1, 3); - val A2_ITYPE = UFix(2, 3); - val A2_ZERO = UFix(4, 3); - val A2_JTYPE = UFix(5, 3); - val A2_RTYPE = UFix(6, 3); + val A2_BTYPE = UInt(0, 3); + val A2_LTYPE = UInt(1, 3); + val A2_ITYPE = UInt(2, 3); + val A2_ZERO = UInt(4, 3); + val A2_JTYPE = UInt(5, 3); + val A2_RTYPE = UInt(6, 3); val X = Bits("b?", 1) - val N = Bits(0, 1); - val Y = Bits(1, 1); + val N = Bits(0, 1) + val Y = Bits(1, 1) - val WA_X = X - val WA_RD = N - val WA_RA = Y + val WA_X = UInt("b?", 1) + val WA_RD = UInt(0, 1) + val WA_RA = UInt(1, 1) - val WB_X = Bits("b???", 3) - val WB_PC = UFix(0, 3); - val WB_ALU = UFix(2, 3); - val WB_TSC = UFix(4, 3); - val WB_IRT = UFix(5, 3); + val WB_X = UInt("b???", 3) + val WB_PC = UInt(0, 3); + val WB_ALU = UInt(2, 3); + val WB_TSC = UInt(4, 3); + val WB_IRT = UInt(5, 3); val SZ_DW = 1 val DW_X = X @@ -49,7 +49,7 @@ trait ScalarOpConstants { val DW_64 = Y val DW_XPR = Y - val RA = UFix(1, 5); + val RA = UInt(1, 5); } trait InterruptConstants { @@ -57,26 +57,26 @@ trait InterruptConstants { } trait VectorOpConstants { - val VEC_X = Bits("b??", 2).toUFix - val VEC_FN_N = UFix(0, 2) - val VEC_VL = UFix(1, 2) - val VEC_CFG = UFix(2, 2) - val VEC_CFGVL = UFix(3, 2) + val VEC_X = Bits("b??", 2).toUInt + val VEC_FN_N = UInt(0, 2) + val VEC_VL = UInt(1, 2) + val VEC_CFG = UInt(2, 2) + val VEC_CFGVL = UInt(3, 2) - val VCMD_I = UFix(0, 3) - val VCMD_F = UFix(1, 3) - val VCMD_TX = UFix(2, 3) - val VCMD_TF = UFix(3, 3) - val VCMD_MX = UFix(4, 3) - val VCMD_MF = UFix(5, 3) - val VCMD_A = UFix(6, 3) - val VCMD_X = UFix(0, 3) + val VCMD_I = UInt(0, 3) + val VCMD_F = UInt(1, 3) + val VCMD_TX = UInt(2, 3) + val VCMD_TF = UInt(3, 3) + val VCMD_MX = UInt(4, 3) + val VCMD_MF = UInt(5, 3) + val VCMD_A = UInt(6, 3) + val VCMD_X = UInt(0, 3) - val VIMM_VLEN = UFix(0, 1) - val VIMM_ALU = UFix(1, 1) - val VIMM_X = UFix(0, 1) + val VIMM_VLEN = UInt(0, 1) + val VIMM_ALU = UInt(1, 1) + val VIMM_X = UInt(0, 1) - val VIMM2_RS2 = UFix(0, 1) - val VIMM2_ALU = UFix(1, 1) - val VIMM2_X = UFix(0, 1) + val VIMM2_RS2 = UInt(0, 1) + val VIMM2_ALU = UInt(1, 1) + val VIMM2_X = UInt(0, 1) } diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 1f89a54a..d755443f 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -13,12 +13,12 @@ class RocketIO(implicit conf: RocketConfiguration) extends Bundle val dmem = new HellaCacheIO()(conf.dcache) } -class Core(implicit conf: RocketConfiguration) extends Component +class Core(implicit conf: RocketConfiguration) extends Module { val io = new RocketIO - val ctrl = new Control - val dpath = new Datapath + val ctrl = Module(new Control) + val dpath = Module(new Datapath) ctrl.io.dpath <> dpath.io.ctrl dpath.io.host <> io.host @@ -26,7 +26,7 @@ class Core(implicit conf: RocketConfiguration) extends Component ctrl.io.imem <> io.imem dpath.io.imem <> io.imem - val dmemArb = new HellaCacheArbiter(2 + conf.vec) + val dmemArb = Module(new HellaCacheArbiter(2 + conf.vec)) dmemArb.io.mem <> io.dmem val dmem = dmemArb.io.requestor dmem(1) <> ctrl.io.dmem @@ -35,20 +35,20 @@ class Core(implicit conf: RocketConfiguration) extends Component val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw) val fpu: FPU = if (conf.fpu) { - val fpu = new FPU(4,6) + val fpu = Module(new FPU(4,6)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl fpu } else null if (conf.vec) { - val vu = new vu(Reg(reset)) + val vu = Module(new vu(RegUpdate(this.getReset))) - val vdtlb = new TLB(8) + val vdtlb = Module(new TLB(8)) ptw += vdtlb.io.ptw vdtlb.io <> vu.io.vtlb - val pftlb = new TLB(2) + val pftlb = Module(new TLB(2)) pftlb.io <> vu.io.vpftlb ptw += pftlb.io.ptw @@ -104,7 +104,7 @@ class Core(implicit conf: RocketConfiguration) extends Component // exceptions vu.io.xcpt.exception := ctrl.io.vec_iface.exception vu.io.xcpt.evac := ctrl.io.vec_iface.evac - vu.io.xcpt.evac_addr := dpath.io.vec_iface.evac_addr.toUFix + vu.io.xcpt.evac_addr := dpath.io.vec_iface.evac_addr.toUInt vu.io.xcpt.kill := ctrl.io.vec_iface.kill vu.io.xcpt.hold := ctrl.io.vec_iface.hold @@ -125,7 +125,7 @@ class Core(implicit conf: RocketConfiguration) extends Component fpu.io.dfma.valid := Bool(false) } - val thePTW = new PTW(ptw.length) + val thePTW = Module(new PTW(ptw.length)) ptw zip thePTW.io.requestor map { case (a, b) => a <> b } thePTW.io.dpath <> dpath.io.ptw dmem(0) <> thePTW.io.mem diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 682af625..09d7c1df 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -10,20 +10,20 @@ import Util._ class CtrlDpathIO extends Bundle() { // outputs to datapath - val sel_pc = UFix(OUTPUT, 3); + val sel_pc = UInt(OUTPUT, 3); val killd = Bool(OUTPUT); val ren2 = Bool(OUTPUT); val ren1 = Bool(OUTPUT); - val sel_alu2 = UFix(OUTPUT, 3); + val sel_alu2 = UInt(OUTPUT, 3); val fn_dw = Bool(OUTPUT); - val fn_alu = UFix(OUTPUT, SZ_ALU_FN); + val fn_alu = UInt(OUTPUT, SZ_ALU_FN); val div_mul_val = Bool(OUTPUT) val div_mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); val div_kill = Bool(OUTPUT) val sel_wa = Bool(OUTPUT); - val sel_wb = UFix(OUTPUT, 3); - val pcr = UFix(OUTPUT, 3) + val sel_wb = UInt(OUTPUT, 3); + val pcr = UInt(OUTPUT, 3) val eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); val wb_load = Bool(OUTPUT) @@ -39,7 +39,7 @@ class CtrlDpathIO extends Bundle() val mem_rs2_val = Bool(OUTPUT) // exception handling val exception = Bool(OUTPUT); - val cause = UFix(OUTPUT, 6); + val cause = UInt(OUTPUT, 6); val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath @@ -49,13 +49,13 @@ class CtrlDpathIO extends Bundle() val ex_br_taken = Bool(INPUT) val div_mul_rdy = Bool(INPUT) val mem_ll_wb = Bool(INPUT) - val mem_ll_waddr = UFix(INPUT, 5) - val ex_waddr = UFix(INPUT, 5); // write addr from execute stage - val mem_waddr = UFix(INPUT, 5); // write addr from memory stage - val wb_waddr = UFix(INPUT, 5); // write addr from writeback stage + val mem_ll_waddr = UInt(INPUT, 5) + val ex_waddr = UInt(INPUT, 5); // write addr from execute stage + val mem_waddr = UInt(INPUT, 5); // write addr from memory stage + val wb_waddr = UInt(INPUT, 5); // write addr from writeback stage val status = new Status().asInput val fp_sboard_clr = Bool(INPUT); - val fp_sboard_clra = UFix(INPUT, 5); + val fp_sboard_clra = UInt(INPUT, 5); val pcr_replay = Bool(INPUT) } @@ -72,7 +72,7 @@ abstract trait DecodeConstants // | | | | | | | | | | | | | | | | | | | | | | | | List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WA_X, WB_X, PCR.X,N,X,X,X,X) - val table: Array[(Bits, List[Bits])] + val table: Array[(UInt, List[UInt])] } object XDecode extends DecodeConstants @@ -318,7 +318,7 @@ object VDecode extends DecodeConstants VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,Y,N)) } -class Control(implicit conf: RocketConfiguration) extends Component +class Control(implicit conf: RocketConfiguration) extends Module { val io = new Bundle { val dpath = new CtrlDpathIO @@ -339,11 +339,15 @@ class Control(implicit conf: RocketConfiguration) extends Component if (conf.fpu) decode_table ++= FDecode.table if (conf.vec) decode_table ++= VDecode.table - val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) - - val id_int_val :: id_fp_val :: id_vec_val :: id_br_type :: id_jalr :: id_renx2 :: id_renx1 :: id_sel_alu2 :: id_fn_dw :: id_fn_alu :: cs0 = cs - val id_mem_val :: id_mem_cmd :: id_mem_type :: id_mul_val :: id_div_val :: id_wen :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_pcr :: id_fence_i :: id_eret :: id_syscall :: id_privileged :: id_replay_next :: Nil = cs1 + val logic = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) + val cs = logic.map { + case b if b.inputs.head.getClass == classOf[Bool] => b.toBool + case u => u + } + + val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_vec_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: id_sel_alu2 :: (id_fn_dw: Bool) :: id_fn_alu :: cs0 = cs + val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: id_sel_wa :: id_sel_wb :: cs1 = cs0 + val id_pcr :: (id_fence_i: Bool) :: (id_eret: Bool) :: (id_syscall: Bool) :: (id_privileged: Bool) :: (id_replay_next: Bool) :: Nil = cs1 val id_raddr3 = io.dpath.inst(16,12); val id_raddr2 = io.dpath.inst(21,17); @@ -351,70 +355,70 @@ class Control(implicit conf: RocketConfiguration) extends Component val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val id_load_use = Bool(); - val ex_reg_xcpt_interrupt = Reg(resetVal = Bool(false)) - val ex_reg_valid = Reg(resetVal = Bool(false)) - val ex_reg_eret = Reg(resetVal = Bool(false)) - val ex_reg_wen = Reg(resetVal = Bool(false)) - val ex_reg_fp_wen = Reg(resetVal = Bool(false)) - val ex_reg_flush_inst = Reg(resetVal = Bool(false)) - val ex_reg_jalr = Reg(resetVal = Bool(false)) - val ex_reg_btb_hit = Reg(resetVal = Bool(false)) - val ex_reg_div_mul_val = Reg(resetVal = Bool(false)) - val ex_reg_mem_val = Reg(resetVal = Bool(false)) - val ex_reg_xcpt = Reg(resetVal = Bool(false)) - val ex_reg_fp_val = Reg(resetVal = Bool(false)) - val ex_reg_vec_val = Reg(resetVal = Bool(false)) - val ex_reg_replay_next = Reg(resetVal = Bool(false)) - val ex_reg_load_use = Reg(resetVal = Bool(false)) - val ex_reg_pcr = Reg(resetVal = PCR.N) - val ex_reg_br_type = Reg(resetVal = BR_N) - val ex_reg_mem_cmd = Reg(){Bits()} - val ex_reg_mem_type = Reg(){Bits()} - val ex_reg_cause = Reg(){UFix()} + val ex_reg_xcpt_interrupt = RegReset(Bool(false)) + val ex_reg_valid = RegReset(Bool(false)) + val ex_reg_eret = RegReset(Bool(false)) + val ex_reg_wen = RegReset(Bool(false)) + val ex_reg_fp_wen = RegReset(Bool(false)) + val ex_reg_flush_inst = RegReset(Bool(false)) + val ex_reg_jalr = RegReset(Bool(false)) + val ex_reg_btb_hit = RegReset(Bool(false)) + val ex_reg_div_mul_val = RegReset(Bool(false)) + val ex_reg_mem_val = RegReset(Bool(false)) + val ex_reg_xcpt = RegReset(Bool(false)) + val ex_reg_fp_val = RegReset(Bool(false)) + val ex_reg_vec_val = RegReset(Bool(false)) + val ex_reg_replay_next = RegReset(Bool(false)) + val ex_reg_load_use = RegReset(Bool(false)) + val ex_reg_pcr = RegReset(PCR.N) + val ex_reg_br_type = RegReset(BR_N) + val ex_reg_mem_cmd = Reg(Bits()) + val ex_reg_mem_type = Reg(Bits()) + val ex_reg_cause = Reg(UInt()) - val mem_reg_xcpt_interrupt = Reg(resetVal = Bool(false)) - val mem_reg_valid = Reg(resetVal = Bool(false)) - val mem_reg_eret = Reg(resetVal = Bool(false)) - val mem_reg_wen = Reg(resetVal = Bool(false)) - val mem_reg_fp_wen = Reg(resetVal = Bool(false)) - val mem_reg_flush_inst = Reg(resetVal = Bool(false)) - val mem_reg_div_mul_val = Reg(resetVal = Bool(false)) - val mem_reg_mem_val = Reg(resetVal = Bool(false)) - val mem_reg_xcpt = Reg(resetVal = Bool(false)) - val mem_reg_fp_val = Reg(resetVal = Bool(false)) - val mem_reg_vec_val = Reg(resetVal = Bool(false)) - val mem_reg_replay = Reg(resetVal = Bool(false)) - val mem_reg_replay_next = Reg(resetVal = Bool(false)) - val mem_reg_pcr = Reg(resetVal = PCR.N) - val mem_reg_cause = Reg(){UFix()} - val mem_reg_slow_bypass = Reg(){Bool()} + val mem_reg_xcpt_interrupt = RegReset(Bool(false)) + val mem_reg_valid = RegReset(Bool(false)) + val mem_reg_eret = RegReset(Bool(false)) + val mem_reg_wen = RegReset(Bool(false)) + val mem_reg_fp_wen = RegReset(Bool(false)) + val mem_reg_flush_inst = RegReset(Bool(false)) + val mem_reg_div_mul_val = RegReset(Bool(false)) + val mem_reg_mem_val = RegReset(Bool(false)) + val mem_reg_xcpt = RegReset(Bool(false)) + val mem_reg_fp_val = RegReset(Bool(false)) + val mem_reg_vec_val = RegReset(Bool(false)) + val mem_reg_replay = RegReset(Bool(false)) + val mem_reg_replay_next = RegReset(Bool(false)) + val mem_reg_pcr = RegReset(PCR.N) + val mem_reg_cause = Reg(UInt()) + val mem_reg_slow_bypass = Reg(Bool()) - val wb_reg_valid = Reg(resetVal = Bool(false)) - val wb_reg_pcr = Reg(resetVal = PCR.N) - val wb_reg_wen = Reg(resetVal = Bool(false)) - val wb_reg_fp_wen = Reg(resetVal = Bool(false)) - val wb_reg_flush_inst = Reg(resetVal = Bool(false)) - val wb_reg_mem_val = Reg(resetVal = Bool(false)) - val wb_reg_eret = Reg(resetVal = Bool(false)) - val wb_reg_xcpt = Reg(resetVal = Bool(false)) - val wb_reg_replay = Reg(resetVal = Bool(false)) - val wb_reg_cause = Reg(){UFix()} - val wb_reg_fp_val = Reg(resetVal = Bool(false)) - val wb_reg_div_mul_val = Reg(resetVal = Bool(false)) + val wb_reg_valid = RegReset(Bool(false)) + val wb_reg_pcr = RegReset(PCR.N) + val wb_reg_wen = RegReset(Bool(false)) + val wb_reg_fp_wen = RegReset(Bool(false)) + val wb_reg_flush_inst = RegReset(Bool(false)) + val wb_reg_mem_val = RegReset(Bool(false)) + val wb_reg_eret = RegReset(Bool(false)) + val wb_reg_xcpt = RegReset(Bool(false)) + val wb_reg_replay = RegReset(Bool(false)) + val wb_reg_cause = Reg(UInt()) + val wb_reg_fp_val = RegReset(Bool(false)) + val wb_reg_div_mul_val = RegReset(Bool(false)) val take_pc = Bool() - val pc_taken = Reg(take_pc, resetVal = Bool(false)) + val pc_taken = Reg(update = take_pc, reset = Bool(false)) val take_pc_wb = Bool() val ctrl_killd = Bool() val ctrl_killx = Bool() val ctrl_killm = Bool() val sr = io.dpath.status - var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UFix(CAUSE_INTERRUPT+i))) + var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(CAUSE_INTERRUPT+i))) val (vec_replay, vec_stalld) = if (conf.vec) { // vector control - val vec = new rocketCtrlVec() + val vec = Module(new rocketCtrlVec) io.vec_dpath <> vec.io.dpath io.vec_iface <> vec.io.iface @@ -425,7 +429,7 @@ class Control(implicit conf: RocketConfiguration) extends Component vec.io.exception := wb_reg_xcpt vec.io.eret := wb_reg_eret - val vec_dec = new rocketCtrlVecDecoder() + val vec_dec = Module(new rocketCtrlVecDecoder) vec_dec.io.inst := io.dpath.inst val s = io.dpath.status.s @@ -452,7 +456,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) val id_interrupt = io.dpath.status.et && id_interrupt_unmasked - def checkExceptions(x: Seq[(Bits, UFix)]) = + def checkExceptions(x: Seq[(Bool, UInt)]) = (x.map(_._1).reduce(_||_), PriorityMux(x)) // executing ERET when traps are enabled causes an illegal instruction exception @@ -463,13 +467,13 @@ class Control(implicit conf: RocketConfiguration) extends Component val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), - (io.imem.resp.bits.xcpt_ma, UFix(0)), - (io.imem.resp.bits.xcpt_if, UFix(1)), - (illegal_inst, UFix(2)), - (id_privileged && !io.dpath.status.s, UFix(3)), - (id_fp_val && !io.dpath.status.ef, UFix(4)), - (id_syscall, UFix(6)), - (id_vec_val && !io.dpath.status.ev, UFix(12)))) + (io.imem.resp.bits.xcpt_ma, UInt(0)), + (io.imem.resp.bits.xcpt_if, UInt(1)), + (illegal_inst, UInt(2)), + (id_privileged && !io.dpath.status.s, UInt(3)), + (id_fp_val && !io.dpath.status.ef, UInt(4)), + (id_syscall, UInt(6)), + (id_vec_val && !io.dpath.status.ev, UInt(12)))) ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } @@ -500,7 +504,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) ex_reg_pcr := id_pcr - ex_reg_wen := id_wen && id_waddr != UFix(0) + ex_reg_wen := id_wen && id_waddr != UInt(0) ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := id_fence_i @@ -509,7 +513,7 @@ class Control(implicit conf: RocketConfiguration) extends Component ex_reg_replay_next := id_replay_next || id_pcr_flush ex_reg_load_use := id_load_use; ex_reg_mem_cmd := id_mem_cmd - ex_reg_mem_type := id_mem_type.toUFix + ex_reg_mem_type := id_mem_type.toUInt ex_reg_xcpt := id_xcpt } @@ -528,7 +532,7 @@ class Control(implicit conf: RocketConfiguration) extends Component val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), - (ex_reg_fp_val && io.fpu.illegal_rm, UFix(2)))) + (ex_reg_fp_val && io.fpu.illegal_rm, UInt(2)))) mem_reg_replay := replay_ex && !take_pc_wb; mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb && !mem_reg_replay_next @@ -565,10 +569,10 @@ class Control(implicit conf: RocketConfiguration) extends Component val (mem_xcpt, mem_cause) = checkExceptions(List( (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), - (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UFix( 8)), - (mem_reg_mem_val && io.dmem.xcpt.ma.st, UFix( 9)), - (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UFix(10)), - (mem_reg_mem_val && io.dmem.xcpt.pf.st, UFix(11)))) + (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UInt( 8)), + (mem_reg_mem_val && io.dmem.xcpt.ma.st, UInt( 9)), + (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(10)), + (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_vec_val || mem_reg_pcr != PCR.N) @@ -607,14 +611,14 @@ class Control(implicit conf: RocketConfiguration) extends Component class Scoreboard(n: Int) { - val r = Reg(resetVal = Bits(0, n)) + val r = RegReset(Bits(0, n)) var next = r var ens = Bool(false) - def apply(addr: UFix) = r(addr) - def set(en: Bool, addr: UFix): Unit = update(en, next | mask(en, addr)) - def clear(en: Bool, addr: UFix): Unit = update(en, next & ~mask(en, addr)) - private def mask(en: Bool, addr: UFix) = Mux(en, UFix(1) << addr, UFix(0)) - private def update(en: Bool, update: Bits) = { + def apply(addr: UInt) = r(addr) + def set(en: Bool, addr: UInt): Unit = update(en, next | mask(en, addr)) + def clear(en: Bool, addr: UInt): Unit = update(en, next & ~mask(en, addr)) + private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) + private def update(en: Bool, update: UInt) = { next = update ens = ens || en when (ens) { r := next } @@ -640,8 +644,8 @@ class Control(implicit conf: RocketConfiguration) extends Component // write cause to PCR on an exception io.dpath.exception := wb_reg_xcpt io.dpath.cause := wb_reg_cause - io.dpath.badvaddr_wen := wb_reg_xcpt && (wb_reg_cause === UFix(10) || wb_reg_cause === UFix(11)) - io.dpath.vec_irq_aux_wen := wb_reg_xcpt && wb_reg_cause >= UFix(24) && wb_reg_cause < UFix(32) + io.dpath.badvaddr_wen := wb_reg_xcpt && (wb_reg_cause === UInt(10) || wb_reg_cause === UInt(11)) + io.dpath.vec_irq_aux_wen := wb_reg_xcpt && wb_reg_cause >= UInt(24) && wb_reg_cause < UInt(32) // control transfer from ex/wb take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_eret @@ -677,9 +681,9 @@ class Control(implicit conf: RocketConfiguration) extends Component if (conf.fastLoadWord) Bool(!conf.fastLoadByte) && mem_reg_slow_bypass else Bool(true) val data_hazard_mem = mem_reg_wen && - (id_raddr1 != UFix(0) && id_renx1 && id_raddr1 === io.dpath.mem_waddr || - id_raddr2 != UFix(0) && id_renx2 && id_raddr2 === io.dpath.mem_waddr || - id_waddr != UFix(0) && id_wen && id_waddr === io.dpath.mem_waddr) + (id_raddr1 != UInt(0) && id_renx1 && id_raddr1 === io.dpath.mem_waddr || + id_raddr2 != UInt(0) && id_renx2 && id_raddr2 === io.dpath.mem_waddr || + id_waddr != UInt(0) && id_wen && id_waddr === io.dpath.mem_waddr) val fp_data_hazard_mem = mem_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || @@ -691,9 +695,9 @@ class Control(implicit conf: RocketConfiguration) extends Component // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. val data_hazard_wb = wb_reg_wen && - (id_raddr1 != UFix(0) && id_renx1 && (id_raddr1 === io.dpath.wb_waddr) || - id_raddr2 != UFix(0) && id_renx2 && (id_raddr2 === io.dpath.wb_waddr) || - id_waddr != UFix(0) && id_wen && (id_waddr === io.dpath.wb_waddr)) + (id_raddr1 != UInt(0) && id_renx1 && (id_raddr1 === io.dpath.wb_waddr) || + id_raddr2 != UInt(0) && id_renx2 && (id_raddr2 === io.dpath.wb_waddr) || + id_waddr != UInt(0) && id_wen && (id_waddr === io.dpath.wb_waddr)) val fp_data_hazard_wb = wb_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || @@ -703,9 +707,9 @@ class Control(implicit conf: RocketConfiguration) extends Component fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) val id_sboard_hazard = - (id_raddr1 != UFix(0) && id_renx1 && sboard(id_raddr1) || - id_raddr2 != UFix(0) && id_renx2 && sboard(id_raddr2) || - id_waddr != UFix(0) && id_wen && sboard(id_waddr)) + (id_raddr1 != UInt(0) && id_renx1 && sboard(id_raddr1) || + id_raddr2 != UInt(0) && id_renx2 && sboard(id_raddr2) || + id_waddr != UInt(0) && id_wen && sboard(id_waddr)) val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || @@ -723,9 +727,9 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.wb_load := wb_reg_mem_val && wb_reg_wen io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; - io.dpath.sel_alu2 := id_sel_alu2.toUFix + io.dpath.sel_alu2 := id_sel_alu2.toUInt io.dpath.fn_dw := id_fn_dw.toBool; - io.dpath.fn_alu := id_fn_alu.toUFix + io.dpath.fn_alu := id_fn_alu.toUInt io.dpath.div_mul_val := ex_reg_div_mul_val io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val; @@ -736,8 +740,8 @@ class Control(implicit conf: RocketConfiguration) extends Component io.dpath.wb_wen := wb_reg_wen && !replay_wb io.dpath.wb_valid := wb_reg_valid && !replay_wb io.dpath.sel_wa := id_sel_wa.toBool; - io.dpath.sel_wb := id_sel_wb.toUFix - io.dpath.pcr := wb_reg_pcr.toUFix + io.dpath.sel_wb := id_sel_wb.toUInt + io.dpath.pcr := wb_reg_pcr.toUInt io.dpath.eret := wb_reg_eret io.dpath.ex_mem_type := ex_reg_mem_type io.dpath.ex_br_type := ex_reg_br_type diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index a3179993..85ef702b 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -18,15 +18,15 @@ class CtrlDpathVecIO extends Bundle class CtrlVecInterfaceIO extends Bundle { - val vcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) - val vximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) - val vximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) - val vcntq = new FIFOIO()(Bits(width = SZ_VLEN+1)) + val vcmdq = Decoupled(Bits(width = SZ_VCMD)) + val vximm1q = Decoupled(Bits(width = SZ_VIMM)) + val vximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) + val vcntq = Decoupled(Bits(width = SZ_VLEN+1)) - val vpfcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) - val vpfximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) - val vpfximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) - val vpfcntq = new FIFOIO()(Bits(width = SZ_VLEN)) + val vpfcmdq = Decoupled(Bits(width = SZ_VCMD)) + val vpfximm1q = Decoupled(Bits(width = SZ_VIMM)) + val vpfximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) + val vpfcntq = Decoupled(Bits(width = SZ_VLEN)) val vcmdq_user_ready = Bool(INPUT) val vximm1q_user_ready = Bool(INPUT) @@ -34,7 +34,7 @@ class CtrlVecInterfaceIO extends Bundle val vfence_ready = Bool(INPUT) val irq = Bool(INPUT) - val irq_cause = UFix(INPUT, 5) + val irq_cause = UInt(INPUT, 5) val exception = Bool(OUTPUT) @@ -55,7 +55,7 @@ class CtrlVecIO extends Bundle val replay = Bool(OUTPUT) val vfence_ready = Bool(OUTPUT) val irq = Bool(OUTPUT) - val irq_cause = UFix(OUTPUT, 5) + val irq_cause = UInt(OUTPUT, 5) } class rocketCtrlVecSigs extends Bundle @@ -82,7 +82,7 @@ class rocketCtrlVecSigs extends Bundle val xcpthold = Bool() } -class rocketCtrlVecDecoder extends Component +class rocketCtrlVecDecoder extends Module { val io = new Bundle { @@ -184,11 +184,11 @@ class rocketCtrlVecDecoder extends Component io.sigs.xcpthold := xcpthold.toBool } -class rocketCtrlVec extends Component +class rocketCtrlVec extends Module { val io = new CtrlVecIO - val dec = new rocketCtrlVecDecoder() + val dec = Module(new rocketCtrlVecDecoder) dec.io.inst := io.dpath.inst val valid_common = io.valid && io.sr_ev && dec.io.sigs.valid && !(dec.io.sigs.appvlmask && io.dpath.appvl0) @@ -261,7 +261,7 @@ class rocketCtrlVec extends Component io.iface.exception := io.exception && io.sr_ev - val reg_hold = Reg(resetVal = Bool(false)) + val reg_hold = RegReset(Bool(false)) when (valid_common && dec.io.sigs.xcpthold) { reg_hold := Bool(true) } when (io.eret) { reg_hold := Bool(false) } diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index bfea1102..0a390da2 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -17,7 +17,7 @@ object DecodeLogic terms.map { t => if (!cache.contains(t)) cache += t -> ((if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addr.width)-(t.mask+1), addr.width){Bits()}) === Lit(t.value, addr.width){Bits()}) - cache(t) + cache(t).toBool }.foldLeft(Bool(false))(_||_) } def apply(addr: Bits, default: Iterable[Bits], mapping: Iterable[(Bits, Iterable[Bits])]) = { diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 91ce2122..08e35766 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -4,20 +4,20 @@ import Chisel._ import ALU._ import Util._ -class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { +class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { val io = new MultiplierIO val w = io.req.bits.in1.getWidth val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll - val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UFix() }; - val state = Reg(resetVal = s_ready); + val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UInt() }; + val state = RegReset(s_ready) - val req = Reg{io.req.bits.clone} - val count = Reg{UFix(width = log2Up(w+1))} - val divby0 = Reg{Bool()} - val neg_out = Reg{Bool()} - val divisor = Reg{Bits(width = w+1)} // div only needs w bits - val remainder = Reg{Bits(width = 2*mulw+2)} // div only needs 2*w+1 bits + val req = Reg(io.req.bits.clone) + val count = Reg(UInt(width = log2Up(w+1))) + val divby0 = Reg(Bool()) + val neg_out = Reg(Bool()) + val divisor = Reg(Bits(width = w+1)) // div only needs w bits + val remainder = Reg(Bits(width = 2*mulw+2)) // div only needs 2*w+1 bits def sext(x: Bits, cmds: Vec[Bits]) = { val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) @@ -51,11 +51,11 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke when (state === s_mul_busy) { val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0)) val mplier = mulReg(mulw-1,0) - val accum = mulReg(2*mulw,mulw).toFix - val mpcand = divisor.toFix + val accum = mulReg(2*mulw,mulw).toSInt + val mpcand = divisor.toSInt val prod = mplier(mulUnroll-1,0) * mpcand + accum - val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)) - remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toFix + val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt + remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toSInt count := count + 1 when (count === mulw/mulUnroll-1) { @@ -66,13 +66,13 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke } } when (state === s_div_busy) { - when (count === UFix(w)) { + when (count === UInt(w)) { state := Mux(neg_out && !divby0, s_neg_output, s_done) when (AVec(FN_REM, FN_REMU) contains req.fn) { state := s_move_rem } } - count := count + UFix(1) + count := count + UInt(1) val msb = subtractor(w) divby0 := divby0 && !msb @@ -80,8 +80,8 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val divisorMSB = Log2(divisor(w-1,0), w) val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB - val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0)) + val eOutPos = UInt(w-1, log2Up(2*w)) + divisorMSB - dividendMSB + val eOut = count === UInt(0) && eOutPos > 0 && (divisorMSB != UInt(0) || divisor(0)) when (Bool(earlyOut) && eOut) { val shift = eOutPos(log2Up(w)-1,0) remainder := remainder(w-1,0) << shift @@ -101,7 +101,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val mulState = Mux(lhs_sign, s_neg_inputs, s_mul_busy) val divState = Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div_busy) state := Mux(isMul, mulState, divState) - count := UFix(0) + count := UInt(0) neg_out := !isMul && Mux(isRem, lhs_sign, lhs_sign != rhs_sign) divby0 := true divisor := Cat(rhs_sign, rhs_in) @@ -115,20 +115,20 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke io.req.ready := state === s_ready } -class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { +class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { val io = new MultiplierIO val w = io.req.bits.in1.getWidth - val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(6) { UFix() }; - val state = Reg(resetVal = s_ready); + val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(6) { UInt() }; + val state = RegReset(s_ready) - val count = Reg() { UFix(width = log2Up(w+1)) } - val divby0 = Reg() { Bool() }; - val neg_out = Reg() { Bool() }; - val r_req = Reg{io.req.bits.clone} + val count = Reg(UInt(width = log2Up(w+1))) + val divby0 = Reg(Bool()) + val neg_out = Reg(Bool()) + val r_req = Reg(io.req.bits) - val divisor = Reg() { Bits() } - val remainder = Reg() { Bits(width = 2*w+1) } + val divisor = Reg(Bits()) + val remainder = Reg(Bits(width = 2*w+1)) val subtractor = remainder(2*w,w) - divisor def sext(x: Bits, cmds: Vec[Bits]) = { @@ -159,10 +159,10 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext state := Mux(neg_out, s_neg_output, s_done) } when (state === s_busy) { - when (count === UFix(w)) { + when (count === UInt(w)) { state := Mux(r_isRem, s_move_rem, Mux(neg_out && !divby0, s_neg_output, s_done)) } - count := count + UFix(1) + count := count + UInt(1) val msb = subtractor(w) divby0 := divby0 && !msb @@ -170,8 +170,8 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext val divisorMSB = Log2(divisor, w) val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UFix(w-1, log2Up(2*w)) + divisorMSB - dividendMSB - val eOut = count === UFix(0) && eOutPos > 0 && (divisorMSB != UFix(0) || divisor(0)) + val eOutPos = UInt(w-1, log2Up(2*w)) + divisorMSB - dividendMSB + val eOut = count === UInt(0) && eOutPos > 0 && (divisorMSB != UInt(0) || divisor(0)) when (Bool(earlyOut) && eOut) { val shift = eOutPos(log2Up(w)-1,0) remainder := remainder(w-1,0) << shift @@ -187,7 +187,7 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext } when (io.req.fire()) { state := Mux(lhs_sign || rhs_sign, s_neg_inputs, s_busy) - count := UFix(0) + count := UInt(0) neg_out := Mux(AVec(FN_REM, FN_REMU).contains(io.req.bits.fn), lhs_sign, lhs_sign != rhs_sign) divby0 := true divisor := rhs_in diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0c71889e..a22d0803 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -6,7 +6,7 @@ import Util._ import hwacha._ import uncore.constants.AddressConstants._ -class Datapath(implicit conf: RocketConfiguration) extends Component +class Datapath(implicit conf: RocketConfiguration) extends Module { val io = new Bundle { val host = new HTIFIO(conf.tl.ln.nClients) @@ -20,71 +20,71 @@ class Datapath(implicit conf: RocketConfiguration) extends Component } // execute definitions - val ex_reg_pc = Reg() { UFix() }; - val ex_reg_inst = Reg() { Bits() }; - val ex_reg_waddr = Reg() { UFix() }; - val ex_reg_ctrl_fn_dw = Reg() { UFix() }; - val ex_reg_ctrl_fn_alu = Reg() { UFix() }; - val ex_reg_sel_alu2 = Reg() { UFix() }; - val ex_reg_ctrl_sel_wb = Reg() { UFix() }; - val ex_reg_kill = Reg{Bool()} - val ex_reg_rs1_bypass = Reg{Bool()} - val ex_reg_rs1_lsb = Reg{Bits()} - val ex_reg_rs1_msb = Reg{Bits()} - val ex_reg_rs2_bypass = Reg{Bool()} - val ex_reg_rs2_lsb = Reg{Bits()} - val ex_reg_rs2_msb = Reg{Bits()} + val ex_reg_pc = Reg(UInt()) + val ex_reg_inst = Reg(Bits()) + val ex_reg_waddr = Reg(UInt()) + val ex_reg_ctrl_fn_dw = Reg(UInt()) + val ex_reg_ctrl_fn_alu = Reg(UInt()) + val ex_reg_sel_alu2 = Reg(UInt()) + val ex_reg_ctrl_sel_wb = Reg(UInt()) + val ex_reg_kill = Reg(Bool()) + val ex_reg_rs1_bypass = Reg(Bool()) + val ex_reg_rs1_lsb = Reg(Bits()) + val ex_reg_rs1_msb = Reg(Bits()) + val ex_reg_rs2_bypass = Reg(Bool()) + val ex_reg_rs2_lsb = Reg(Bits()) + val ex_reg_rs2_msb = Reg(Bits()) // memory definitions - val mem_reg_pc = Reg() { UFix() }; - val mem_reg_inst = Reg() { Bits() }; - val mem_reg_waddr = Reg() { UFix() }; - val mem_reg_wdata = Reg() { Bits() }; - val mem_reg_kill = Reg() { Bool() } - val mem_reg_store_data = Reg{Bits()} - val mem_reg_rs1 = Reg{Bits()} - val mem_reg_rs2 = Reg{Bits()} + val mem_reg_pc = Reg(UInt()) + val mem_reg_inst = Reg(Bits()) + val mem_reg_waddr = Reg(UInt()) + val mem_reg_wdata = Reg(Bits()) + val mem_reg_kill = Reg(Bool()) + val mem_reg_store_data = Reg(Bits()) + val mem_reg_rs1 = Reg(Bits()) + val mem_reg_rs2 = Reg(Bits()) // writeback definitions - val wb_reg_pc = Reg() { UFix() }; - val wb_reg_inst = Reg() { Bits() }; - val wb_reg_waddr = Reg() { UFix() } - val wb_reg_wdata = Reg() { Bits() } - val wb_reg_ll_wb = Reg(resetVal = Bool(false)); - val wb_wdata = Bits(); - val wb_reg_store_data = Reg{Bits()} - val wb_reg_rs1 = Reg{Bits()} - val wb_reg_rs2 = Reg{Bits()} + val wb_reg_pc = Reg(UInt()) + val wb_reg_inst = Reg(Bits()) + val wb_reg_waddr = Reg(UInt()) + val wb_reg_wdata = Reg(Bits()) + val wb_reg_ll_wb = RegReset(Bool(false)) + val wb_wdata = Bits() + val wb_reg_store_data = Reg(Bits()) + val wb_reg_rs1 = Reg(Bits()) + val wb_reg_rs2 = Reg(Bits()) val wb_wen = io.ctrl.wb_wen && io.ctrl.wb_valid || wb_reg_ll_wb // instruction decode stage val id_inst = io.imem.resp.bits.data val id_pc = io.imem.resp.bits.pc - val regfile_ = Mem(31){Bits(width = 64)} - def readRF(a: UFix) = regfile_(~a) - def writeRF(a: UFix, d: Bits) = regfile_(~a) := d + val regfile_ = Mem(Bits(width = 64), 31) + def readRF(a: UInt) = regfile_(~a) + def writeRF(a: UInt, d: Bits) = regfile_(~a) := d - val id_raddr1 = id_inst(26,22).toUFix; - val id_raddr2 = id_inst(21,17).toUFix; + val id_raddr1 = id_inst(26,22).toUInt; + val id_raddr2 = id_inst(21,17).toUInt; // bypass muxes - val id_rs1_zero = id_raddr1 === UFix(0) + val id_rs1_zero = id_raddr1 === UInt(0) val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass - val id_rs1_bypass_src = Mux(id_rs1_zero, UFix(0), Mux(id_rs1_ex_bypass, UFix(1), UFix(2) | io.ctrl.mem_load)) + val id_rs1_bypass_src = Mux(id_rs1_zero, UInt(0), Mux(id_rs1_ex_bypass, UInt(1), UInt(2) | io.ctrl.mem_load)) val id_rs1 = - Mux(id_raddr1 === UFix(0), UFix(0), + Mux(id_raddr1 === UInt(0), UInt(0), Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, readRF(id_raddr1))) - val id_rs2_zero = id_raddr2 === UFix(0) + val id_rs2_zero = id_raddr2 === UInt(0) val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass - val id_rs2_bypass_src = Mux(id_rs2_zero, UFix(0), Mux(id_rs2_ex_bypass, UFix(1), UFix(2) | io.ctrl.mem_load)) - val id_rs2 = Mux(id_raddr2 === UFix(0), UFix(0), + val id_rs2_bypass_src = Mux(id_rs2_zero, UInt(0), Mux(id_rs2_ex_bypass, UInt(1), UInt(2) | io.ctrl.mem_load)) + val id_rs2 = Mux(id_raddr2 === UInt(0), UInt(0), Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, readRF(id_raddr2))) @@ -94,11 +94,11 @@ class Datapath(implicit conf: RocketConfiguration) extends Component Mux(sel === A2_BTYPE, Cat(inst(31,27), inst(16,10)), Mux(sel === A2_JTYPE, inst(18,7), inst(21,10)))) - val msbs = Mux(sel === A2_ZERO, Bits(0), - Mux(sel === A2_LTYPE, inst(26,7).toFix, - Mux(sel === A2_JTYPE, inst(31,19).toFix, - Mux(sel === A2_ITYPE, inst(21), inst(31)).toFix))) - Cat(msbs, lsbs).toFix + val msbs = Mux(sel === A2_ZERO, SInt(0), + Mux(sel === A2_LTYPE, inst(26,7).toSInt, + Mux(sel === A2_JTYPE, inst(31,19).toSInt, + Mux(sel === A2_ITYPE, inst(21), inst(31)).toSInt))) + Cat(msbs, lsbs).toSInt } io.ctrl.inst := id_inst @@ -109,8 +109,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Component when (!io.ctrl.killd) { ex_reg_pc := id_pc ex_reg_inst := id_inst - ex_reg_waddr := Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUFix, RA) - ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUFix + ex_reg_waddr := Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUInt, RA) + ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUInt ex_reg_ctrl_fn_alu := io.ctrl.fn_alu ex_reg_sel_alu2 := io.ctrl.sel_alu2 ex_reg_ctrl_sel_wb := io.ctrl.sel_wb @@ -137,29 +137,29 @@ class Datapath(implicit conf: RocketConfiguration) extends Component val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data val ex_rs1 = - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(3) && Bool(conf.fastLoadWord), dmem_resp_data, - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(2), wb_reg_wdata, - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(1), mem_reg_wdata, - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UFix(0), Bits(0), + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(3) && Bool(conf.fastLoadWord), dmem_resp_data, + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(2), wb_reg_wdata, + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(1), mem_reg_wdata, + Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(0), Bits(0), Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb))))) val ex_rs2 = - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(3) && Bool(conf.fastLoadWord), dmem_resp_data, - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(2), wb_reg_wdata, - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(1), mem_reg_wdata, - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UFix(0), Bits(0), + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(3) && Bool(conf.fastLoadWord), dmem_resp_data, + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(2), wb_reg_wdata, + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(1), mem_reg_wdata, + Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(0), Bits(0), Cat(ex_reg_rs2_msb, ex_reg_rs2_lsb))))) val ex_imm = imm(ex_reg_sel_alu2, ex_reg_inst) val ex_op2 = Mux(ex_reg_sel_alu2 != A2_RTYPE, ex_imm, ex_rs2) - val alu = new ALU + val alu = Module(new ALU) alu.io.dw := ex_reg_ctrl_fn_dw; alu.io.fn := ex_reg_ctrl_fn_alu; - alu.io.in2 := ex_op2.toUFix - alu.io.in1 := ex_rs1.toUFix + alu.io.in2 := ex_op2.toUInt + alu.io.in1 := ex_rs1.toUInt // multiplier and divider - val div = new MulDiv(mulUnroll = if (conf.fastMulDiv) 8 else 1, - earlyOut = conf.fastMulDiv) + val div = Module(new MulDiv(mulUnroll = if (conf.fastMulDiv) 8 else 1, + earlyOut = conf.fastMulDiv)) div.io.req.valid := io.ctrl.div_mul_val div.io.req.bits.dw := ex_reg_ctrl_fn_dw div.io.req.bits.fn := ex_reg_ctrl_fn_alu @@ -173,16 +173,16 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.fpu.fromint_data := ex_rs1 io.ctrl.ex_waddr := ex_reg_waddr - def vaSign(a0: Bits, ea: Bits) = { + def vaSign(a0: UInt, ea: Bits) = { // efficient means to compress 64-bit VA into VADDR_BITS+1 bits // (VA is bad if VA(VADDR_BITS) != VA(VADDR_BITS-1)) val a = a0 >> VADDR_BITS-1 val e = ea(VADDR_BITS,VADDR_BITS-1) - Mux(a === UFix(0) || a === UFix(1), e != UFix(0), - Mux(a === Fix(-1) || a === Fix(-2), e === Fix(-1), + Mux(a === UInt(0) || a === UInt(1), e != UInt(0), + Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), e(0))) } - val ex_effective_address = Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUFix + val ex_effective_address = Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module @@ -192,7 +192,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component require(io.dmem.req.bits.tag.getWidth >= 6) // processor control regfile read - val pcr = new PCR + val pcr = Module(new PCR) pcr.io.host <> io.host pcr.io <> io.ctrl pcr.io.pc := wb_reg_pc @@ -204,18 +204,18 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ptw.status := pcr.io.status // branch resolution logic - io.ctrl.jalr_eq := ex_rs1 === id_pc.toFix && ex_reg_inst(21,10) === UFix(0) + io.ctrl.jalr_eq := ex_rs1 === id_pc.toSInt && ex_reg_inst(21,10) === UInt(0) io.ctrl.ex_br_taken := Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2, Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2, - Mux(io.ctrl.ex_br_type === BR_LT, ex_rs1.toFix < ex_rs2.toFix, - Mux(io.ctrl.ex_br_type === BR_GE, ex_rs1.toFix >= ex_rs2.toFix, + Mux(io.ctrl.ex_br_type === BR_LT, ex_rs1.toSInt < ex_rs2.toSInt, + Mux(io.ctrl.ex_br_type === BR_GE, ex_rs1.toSInt >= ex_rs2.toSInt, Mux(io.ctrl.ex_br_type === BR_LTU, ex_rs1 < ex_rs2, Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, io.ctrl.ex_br_type === BR_J)))))) - val ex_pc_plus4 = ex_reg_pc.toFix + Mux(ex_reg_sel_alu2 === A2_LTYPE, ex_reg_inst(26,7).toFix << 12, Fix(4)) - val ex_branch_target = ex_reg_pc.toFix + (ex_imm << 1) + val ex_pc_plus4 = ex_reg_pc.toSInt + Mux(ex_reg_sel_alu2 === A2_LTYPE, ex_reg_inst(26,7).toSInt << 12, SInt(4)) + val ex_branch_target = ex_reg_pc.toSInt + (ex_imm << 1) val tsc_reg = WideCounter(64) val irt_reg = WideCounter(64, io.ctrl.wb_valid) @@ -247,7 +247,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool - val dmem_resp_waddr = io.dmem.resp.bits.tag.toUFix >> UFix(1) + val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu val mem_ll_wdata = Bits() @@ -260,7 +260,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ctrl.mem_ll_waddr := dmem_resp_waddr io.ctrl.mem_ll_wb := Bool(true) } - when (io.ctrl.mem_ll_waddr === UFix(0)) { io.ctrl.mem_ll_wb := Bool(false) } + when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) } io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data @@ -291,7 +291,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component if (conf.vec) { // vector datapath - val vec = new rocketDpathVec() + val vec = Module(new rocketDpathVec) vec.io.ctrl <> io.vec_ctrl io.vec_iface <> vec.io.iface @@ -319,7 +319,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write - pcr.io.rw.addr := wb_reg_inst(26,22).toUFix + pcr.io.rw.addr := wb_reg_inst(26,22).toUInt pcr.io.rw.cmd := io.ctrl.pcr pcr.io.rw.wdata := wb_reg_wdata @@ -327,13 +327,13 @@ class Datapath(implicit conf: RocketConfiguration) extends Component io.imem.req.bits.currentpc := ex_reg_pc io.imem.req.bits.pc := Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address, ex_branch_target), + Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address.toSInt, ex_branch_target), Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), - wb_reg_pc))).toUFix // PC_WB + wb_reg_pc))).toUInt // PC_WB printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] %s\n", tsc_reg(32,0), io.ctrl.wb_valid, wb_reg_pc, - Mux(wb_wen, wb_reg_waddr, UFix(0)), wb_wdata, + Mux(wb_wen, wb_reg_waddr, UInt(0)), wb_wdata, wb_reg_inst(26,22), wb_reg_rs1, wb_reg_inst(21,17), wb_reg_rs2, wb_reg_inst, Disassemble(wb_reg_inst)) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 3c751fe0..4aeed0c6 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -39,13 +39,13 @@ import ALU._ class ALUIO(implicit conf: RocketConfiguration) extends Bundle { val dw = Bits(INPUT, SZ_DW) val fn = Bits(INPUT, SZ_ALU_FN) - val in2 = UFix(INPUT, conf.xprlen) - val in1 = UFix(INPUT, conf.xprlen) - val out = UFix(OUTPUT, conf.xprlen) - val adder_out = UFix(OUTPUT, conf.xprlen) + val in2 = UInt(INPUT, conf.xprlen) + val in1 = UInt(INPUT, conf.xprlen) + val out = UInt(OUTPUT, conf.xprlen) + val adder_out = UInt(OUTPUT, conf.xprlen) } -class ALU(implicit conf: RocketConfiguration) extends Component +class ALU(implicit conf: RocketConfiguration) extends Module { val io = new ALUIO @@ -57,12 +57,12 @@ class ALU(implicit conf: RocketConfiguration) extends Component Mux(isSLTU(io.fn), io.in2(63), io.in1(63))) // SLL, SRL, SRA - val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUFix - val shin_hi_32 = Mux(isSub(io.fn), Fill(32, io.in1(31)), UFix(0,32)) + val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUInt + val shin_hi_32 = Mux(isSub(io.fn), Fill(32, io.in1(31)), UInt(0,32)) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) val shin_r = Cat(shin_hi, io.in1(31,0)) val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r)) - val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toFix >> shamt)(63,0) + val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toSInt >> shamt)(63,0) val shout_l = Reverse(shout_r) val bitwise_logic = @@ -79,6 +79,6 @@ class ALU(implicit conf: RocketConfiguration) extends Component bitwise_logic)))) val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) - io.out := Cat(out_hi, out64(31,0)).toUFix + io.out := Cat(out_hi, out64(31,0)).toUInt io.adder_out := sum } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 65f377b2..762f1b30 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -8,18 +8,18 @@ import scala.math._ class DpathBTBIO extends Bundle { - val current_pc = UFix(INPUT, VADDR_BITS); + val current_pc = UInt(INPUT, VADDR_BITS); val hit = Bool(OUTPUT); - val target = UFix(OUTPUT, VADDR_BITS); + val target = UInt(OUTPUT, VADDR_BITS); val wen = Bool(INPUT); val clr = Bool(INPUT); val invalidate = Bool(INPUT); - val correct_pc = UFix(INPUT, VADDR_BITS); - val correct_target = UFix(INPUT, VADDR_BITS); + val correct_pc = UInt(INPUT, VADDR_BITS); + val correct_target = UInt(INPUT, VADDR_BITS); } // fully-associative branch target buffer -class rocketDpathBTB(entries: Int) extends Component +class rocketDpathBTB(entries: Int) extends Module { val io = new DpathBTBIO @@ -29,18 +29,18 @@ class rocketDpathBTB(entries: Int) extends Component val hit = Bool() val update = Bool() var update_reduction = Bool(false) - val hits = Vec(entries) { Bool() } - val updates = Vec(entries) { Bool() } - val targets = Vec(entries) { Reg() { UFix() } } + val hits = Vec.fill(entries){Bool()} + val updates = Vec.fill(entries){Bool()} + val targets = Vec.fill(entries){Reg(UInt())} val anyUpdate = updates.toBits.orR for (i <- 0 until entries) { - val tag = Reg() { UFix() } - val valid = Reg(resetVal = Bool(false)) + val tag = Reg(UInt()) + val valid = RegReset(Bool(false)) hits(i) := valid && tag === io.current_pc updates(i) := valid && tag === io.correct_pc - when (io.wen && (updates(i) || !anyUpdate && UFix(i) === repl_way)) { + when (io.wen && (updates(i) || !anyUpdate && UInt(i) === repl_way)) { valid := Bool(false) when (!io.clr) { valid := Bool(true) @@ -103,26 +103,26 @@ object PCR val FROMHOST = 31 } -class PCR(implicit conf: RocketConfiguration) extends Component +class PCR(implicit conf: RocketConfiguration) extends Module { val io = new Bundle { val host = new HTIFIO(conf.tl.ln.nClients) val rw = new Bundle { - val addr = UFix(INPUT, log2Up(conf.nxpr)) + val addr = UInt(INPUT, log2Up(conf.nxpr)) val cmd = Bits(INPUT, PCR.SZ) val rdata = Bits(OUTPUT, conf.xprlen) val wdata = Bits(INPUT, conf.xprlen) } val status = new Status().asOutput - val ptbr = UFix(OUTPUT, PADDR_BITS) - val evec = UFix(OUTPUT, VADDR_BITS) + val ptbr = UInt(OUTPUT, PADDR_BITS) + val evec = UInt(OUTPUT, VADDR_BITS) val exception = Bool(INPUT) - val cause = UFix(INPUT, 6) + val cause = UInt(INPUT, 6) val badvaddr_wen = Bool(INPUT) val vec_irq_aux = Bits(INPUT, conf.xprlen) val vec_irq_aux_wen = Bool(INPUT) - val pc = UFix(INPUT, VADDR_BITS+1) + val pc = UInt(INPUT, VADDR_BITS+1) val eret = Bool(INPUT) val ei = Bool(INPUT) val di = Bool(INPUT) @@ -131,38 +131,38 @@ class PCR(implicit conf: RocketConfiguration) extends Component val irq_ipi = Bool(OUTPUT) val replay = Bool(OUTPUT) val vecbank = Bits(OUTPUT, 8) - val vecbankcnt = UFix(OUTPUT, 4) + val vecbankcnt = UInt(OUTPUT, 4) val stats = Bool(OUTPUT) - val vec_appvl = UFix(INPUT, 12) - val vec_nxregs = UFix(INPUT, 6) - val vec_nfregs = UFix(INPUT, 6) + val vec_appvl = UInt(INPUT, 12) + val vec_nxregs = UInt(INPUT, 6) + val vec_nfregs = UInt(INPUT, 6) } import PCR._ - val reg_epc = Reg{Bits(width = conf.xprlen)} - val reg_badvaddr = Reg{Bits(width = conf.xprlen)} - val reg_ebase = Reg{Bits(width = conf.xprlen)} + val reg_epc = Reg(Bits(width = conf.xprlen)) + val reg_badvaddr = Reg(Bits(width = conf.xprlen)) + val reg_ebase = Reg(Bits(width = conf.xprlen)) val reg_count = WideCounter(32) - val reg_compare = Reg{Bits(width = 32)} - val reg_cause = Reg{Bits(width = io.cause.getWidth)} - val reg_tohost = Reg(resetVal = Bits(0, conf.xprlen)) - val reg_fromhost = Reg(resetVal = Bits(0, conf.xprlen)) - val reg_coreid = Reg{Bits(width = 16)} - val reg_k0 = Reg{Bits(width = conf.xprlen)} - val reg_k1 = Reg{Bits(width = conf.xprlen)} - val reg_ptbr = Reg{UFix(width = PADDR_BITS)} - val reg_vecbank = Reg(resetVal = Fix(-1,8).toBits) - val reg_stats = Reg(resetVal = Bool(false)) - val reg_error_mode = Reg(resetVal = Bool(false)) - val reg_status = Reg{new Status} // reset down below + val reg_compare = Reg(Bits(width = 32)) + val reg_cause = Reg(Bits(width = io.cause.getWidth)) + val reg_tohost = RegReset(Bits(0, conf.xprlen)) + val reg_fromhost = RegReset(Bits(0, conf.xprlen)) + val reg_coreid = Reg(Bits(width = 16)) + val reg_k0 = Reg(Bits(width = conf.xprlen)) + val reg_k1 = Reg(Bits(width = conf.xprlen)) + val reg_ptbr = Reg(UInt(width = PADDR_BITS)) + val reg_vecbank = RegReset(SInt(-1,8).toBits) + val reg_stats = RegReset(Bool(false)) + val reg_error_mode = RegReset(Bool(false)) + val reg_status = Reg(new Status) // reset down below - val r_irq_timer = Reg(resetVal = Bool(false)) - val r_irq_ipi = Reg(resetVal = Bool(true)) + val r_irq_timer = RegReset(Bool(false)) + val r_irq_ipi = RegReset(Bool(true)) - val host_pcr_req_valid = Reg{Bool()} // don't reset + val host_pcr_req_valid = Reg(Bool()) // don't reset val host_pcr_req_fire = host_pcr_req_valid && io.rw.cmd === PCR.N - val host_pcr_rep_valid = Reg{Bool()} // don't reset - val host_pcr_bits = Reg{io.host.pcr_req.bits.clone} + val host_pcr_rep_valid = Reg(Bool()) // don't reset + val host_pcr_bits = Reg(io.host.pcr_req.bits) io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid io.host.pcr_rep.valid := host_pcr_rep_valid io.host.pcr_rep.bits := host_pcr_bits.data @@ -186,12 +186,12 @@ class PCR(implicit conf: RocketConfiguration) extends Component io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), Bool(false), Bool(false), Bool(false), Bool(false)) io.ptbr_wen := wen && addr === PTBR - io.evec := Mux(io.exception, reg_ebase, reg_epc).toUFix + io.evec := Mux(io.exception, reg_ebase, reg_epc).toUInt io.ptbr := reg_ptbr io.host.debug.error_mode := reg_error_mode io.vecbank := reg_vecbank - var cnt = UFix(0,4) + var cnt = UInt(0,4) for (i <- 0 until 8) cnt = cnt + reg_vecbank(i) io.vecbankcnt := cnt(3,0) @@ -201,8 +201,8 @@ class PCR(implicit conf: RocketConfiguration) extends Component when (io.badvaddr_wen || io.vec_irq_aux_wen) { val wdata = Mux(io.badvaddr_wen, io.rw.wdata, io.vec_irq_aux) val (upper, lower) = Split(wdata, VADDR_BITS) - val sign = Mux(lower.toFix < Fix(0), upper.andR, upper.orR) - reg_badvaddr := Cat(sign, lower).toFix + val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) + reg_badvaddr := Cat(sign, lower).toSInt } when (io.exception) { @@ -212,7 +212,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component reg_status.s := true reg_status.ps := reg_status.s reg_status.et := false - reg_epc := io.pc.toFix + reg_epc := io.pc.toSInt reg_cause := io.cause } @@ -231,7 +231,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component io.host.ipi_req.bits := io.rw.wdata io.replay := io.host.ipi_req.valid && !io.host.ipi_req.ready - when (host_pcr_req_fire && !host_pcr_bits.rw && host_pcr_bits.addr === TOHOST) { reg_tohost := UFix(0) } + when (host_pcr_req_fire && !host_pcr_bits.rw && host_pcr_bits.addr === TOHOST) { reg_tohost := UInt(0) } val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS @@ -260,17 +260,17 @@ class PCR(implicit conf: RocketConfiguration) extends Component if (!conf.fpu) reg_status.ef := false if (!conf.rvc) reg_status.ec := false } - when (addr === EPC) { reg_epc := wdata(VADDR_BITS,0).toFix } - when (addr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toFix } - when (addr === COUNT) { reg_count := wdata.toUFix } - when (addr === COMPARE) { reg_compare := wdata(31,0).toUFix; r_irq_timer := Bool(false); } + when (addr === EPC) { reg_epc := wdata(VADDR_BITS,0).toSInt } + when (addr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toSInt } + when (addr === COUNT) { reg_count := wdata.toUInt } + when (addr === COMPARE) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false); } when (addr === COREID) { reg_coreid := wdata(15,0) } - when (addr === FROMHOST) { when (reg_fromhost === UFix(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } - when (addr === TOHOST) { when (reg_tohost === UFix(0)) { reg_tohost := wdata } } + when (addr === FROMHOST) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } + when (addr === TOHOST) { when (reg_tohost === UInt(0)) { reg_tohost := wdata } } when (addr === CLR_IPI) { r_irq_ipi := wdata(0) } when (addr === K0) { reg_k0 := wdata; } when (addr === K1) { reg_k1 := wdata; } - when (addr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUFix; } + when (addr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt; } when (addr === VECBANK) { reg_vecbank:= wdata(7,0) } when (addr === STATS) { reg_stats := wdata(0) } } @@ -278,7 +278,7 @@ class PCR(implicit conf: RocketConfiguration) extends Component io.host.ipi_rep.ready := Bool(true) when (io.host.ipi_rep.valid) { r_irq_ipi := Bool(true) } - when (reset) { + when(this.getReset) { reg_status.et := false reg_status.ef := false reg_status.ev := false @@ -301,7 +301,7 @@ class ioReadPort(d: Int, w: Int) extends Bundle class ioWritePort(d: Int, w: Int) extends Bundle { - val addr = UFix(INPUT, log2Up(d)) + val addr = UInt(INPUT, log2Up(d)) val en = Bool(INPUT) val data = Bits(INPUT, w) override def clone = new ioWritePort(d, w).asInstanceOf[this.type] diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 9300b012..01449e53 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -7,15 +7,15 @@ import hwacha.Constants._ class DpathVecInterfaceIO extends Bundle { - val vcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) - val vximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) - val vximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) - val vcntq = new FIFOIO()(Bits(width = SZ_VLEN+1)) + val vcmdq = Decoupled(Bits(width = SZ_VCMD)) + val vximm1q = Decoupled(Bits(width = SZ_VIMM)) + val vximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) + val vcntq = Decoupled(Bits(width = SZ_VLEN+1)) - val vpfcmdq = new FIFOIO()(Bits(width = SZ_VCMD)) - val vpfximm1q = new FIFOIO()(Bits(width = SZ_VIMM)) - val vpfximm2q = new FIFOIO()(Bits(width = SZ_VSTRIDE)) - val vpfcntq = new FIFOIO()(Bits(width = SZ_VLEN)) + val vpfcmdq = Decoupled(Bits(width = SZ_VCMD)) + val vpfximm1q = Decoupled(Bits(width = SZ_VIMM)) + val vpfximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) + val vpfcntq = Decoupled(Bits(width = SZ_VLEN)) val evac_addr = Bits(OUTPUT, 64) val irq_aux = Bits(INPUT, 64) @@ -28,90 +28,90 @@ class DpathVecIO extends Bundle val valid = Bool(INPUT) val inst = Bits(INPUT, 32) val vecbank = Bits(INPUT, 8) - val vecbankcnt = UFix(INPUT, 4) + val vecbankcnt = UInt(INPUT, 4) val wdata = Bits(INPUT, 64) val rs2 = Bits(INPUT, 64) val wen = Bool(OUTPUT) val irq_aux = Bits(OUTPUT, 64) - val appvl = UFix(OUTPUT, 12) - val nxregs = UFix(OUTPUT, 6) - val nfregs = UFix(OUTPUT, 6) + val appvl = UInt(OUTPUT, 12) + val nxregs = UInt(OUTPUT, 6) + val nfregs = UInt(OUTPUT, 6) } -class rocketDpathVec extends Component +class rocketDpathVec extends Module { val io = new DpathVecIO val nxregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.wdata(5,0), io.inst(15,10)) val nfregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.rs2(5,0), io.inst(21,16)) - val nxregs = Mux(nxregs_stage(5), Bits(32), Mux(nxregs_stage === Bits(0), Bits(1), nxregs_stage)) + UFix(0,7) - val nfregs = Mux(nfregs_stage(5), Bits(32), nfregs_stage) + UFix(0,7) + val nxregs = Mux(nxregs_stage(5), Bits(32), Mux(nxregs_stage === Bits(0), Bits(1), nxregs_stage)) + UInt(0,7) + val nfregs = Mux(nfregs_stage(5), Bits(32), nfregs_stage) + UInt(0,7) val nregs = nxregs + nfregs - //val uts_per_bank = UFix(4,9) + //val uts_per_bank = UInt(4,9) val nreg_mod_bank = MuxLookup( - nregs, UFix(4,9), Array( - UFix(0,7) -> UFix(256,9), - UFix(1,7) -> UFix(256,9), - UFix(2,7) -> UFix(256,9), - UFix(3,7) -> UFix(128,9), - UFix(4,7) -> UFix(85,9), - UFix(5,7) -> UFix(64,9), - UFix(6,7) -> UFix(51,9), - UFix(7,7) -> UFix(42,9), - UFix(8,7) -> UFix(36,9), - UFix(9,7) -> UFix(32,9), - UFix(10,7) -> UFix(28,9), - UFix(11,7) -> UFix(25,9), - UFix(12,7) -> UFix(23,9), - UFix(13,7) -> UFix(21,9), - UFix(14,7) -> UFix(19,9), - UFix(15,7) -> UFix(18,9), - UFix(16,7) -> UFix(17,9), - UFix(17,7) -> UFix(16,9), - UFix(18,7) -> UFix(15,9), - UFix(19,7) -> UFix(14,9), - UFix(20,7) -> UFix(13,9), - UFix(21,7) -> UFix(12,9), - UFix(22,7) -> UFix(12,9), - UFix(23,7) -> UFix(11,9), - UFix(24,7) -> UFix(11,9), - UFix(25,7) -> UFix(10,9), - UFix(26,7) -> UFix(10,9), - UFix(27,7) -> UFix(9,9), - UFix(28,7) -> UFix(9,9), - UFix(29,7) -> UFix(9,9), - UFix(30,7) -> UFix(8,9), - UFix(31,7) -> UFix(8,9), - UFix(32,7) -> UFix(8,9), - UFix(33,7) -> UFix(8,9), - UFix(34,7) -> UFix(7,9), - UFix(35,7) -> UFix(7,9), - UFix(36,7) -> UFix(7,9), - UFix(37,7) -> UFix(7,9), - UFix(38,7) -> UFix(6,9), - UFix(39,7) -> UFix(6,9), - UFix(40,7) -> UFix(6,9), - UFix(41,7) -> UFix(6,9), - UFix(42,7) -> UFix(6,9), - UFix(43,7) -> UFix(6,9), - UFix(44,7) -> UFix(5,9), - UFix(45,7) -> UFix(5,9), - UFix(46,7) -> UFix(5,9), - UFix(47,7) -> UFix(5,9), - UFix(48,7) -> UFix(5,9), - UFix(49,7) -> UFix(5,9), - UFix(50,7) -> UFix(5,9), - UFix(51,7) -> UFix(5,9), - UFix(52,7) -> UFix(5,9) + nregs, UInt(4,9), Array( + UInt(0,7) -> UInt(256,9), + UInt(1,7) -> UInt(256,9), + UInt(2,7) -> UInt(256,9), + UInt(3,7) -> UInt(128,9), + UInt(4,7) -> UInt(85,9), + UInt(5,7) -> UInt(64,9), + UInt(6,7) -> UInt(51,9), + UInt(7,7) -> UInt(42,9), + UInt(8,7) -> UInt(36,9), + UInt(9,7) -> UInt(32,9), + UInt(10,7) -> UInt(28,9), + UInt(11,7) -> UInt(25,9), + UInt(12,7) -> UInt(23,9), + UInt(13,7) -> UInt(21,9), + UInt(14,7) -> UInt(19,9), + UInt(15,7) -> UInt(18,9), + UInt(16,7) -> UInt(17,9), + UInt(17,7) -> UInt(16,9), + UInt(18,7) -> UInt(15,9), + UInt(19,7) -> UInt(14,9), + UInt(20,7) -> UInt(13,9), + UInt(21,7) -> UInt(12,9), + UInt(22,7) -> UInt(12,9), + UInt(23,7) -> UInt(11,9), + UInt(24,7) -> UInt(11,9), + UInt(25,7) -> UInt(10,9), + UInt(26,7) -> UInt(10,9), + UInt(27,7) -> UInt(9,9), + UInt(28,7) -> UInt(9,9), + UInt(29,7) -> UInt(9,9), + UInt(30,7) -> UInt(8,9), + UInt(31,7) -> UInt(8,9), + UInt(32,7) -> UInt(8,9), + UInt(33,7) -> UInt(8,9), + UInt(34,7) -> UInt(7,9), + UInt(35,7) -> UInt(7,9), + UInt(36,7) -> UInt(7,9), + UInt(37,7) -> UInt(7,9), + UInt(38,7) -> UInt(6,9), + UInt(39,7) -> UInt(6,9), + UInt(40,7) -> UInt(6,9), + UInt(41,7) -> UInt(6,9), + UInt(42,7) -> UInt(6,9), + UInt(43,7) -> UInt(6,9), + UInt(44,7) -> UInt(5,9), + UInt(45,7) -> UInt(5,9), + UInt(46,7) -> UInt(5,9), + UInt(47,7) -> UInt(5,9), + UInt(48,7) -> UInt(5,9), + UInt(49,7) -> UInt(5,9), + UInt(50,7) -> UInt(5,9), + UInt(51,7) -> UInt(5,9), + UInt(52,7) -> UInt(5,9) )) - val max_threads = UFix(WIDTH_BMASK) + val max_threads = UInt(WIDTH_BMASK) val uts_per_bank = Mux(Bool(HAVE_PVFB) & nreg_mod_bank > max_threads, max_threads, nreg_mod_bank) - val reg_hwvl = Reg(resetVal = UFix(32, 12)) - val reg_appvl0 = Reg(resetVal = Bool(true)) + val reg_hwvl = RegReset(UInt(32, 12)) + val reg_appvl0 = RegReset(Bool(true)) val hwvl_vcfg = (uts_per_bank * io.vecbankcnt)(11,0) val hwvl = @@ -119,13 +119,13 @@ class rocketDpathVec extends Component reg_hwvl) val appvl = - Mux(io.ctrl.fn === VEC_CFG, UFix(0), - Mux(io.wdata(11,0) < hwvl, io.wdata(11,0).toUFix, - hwvl.toUFix)) + Mux(io.ctrl.fn === VEC_CFG, UInt(0), + Mux(io.wdata(11,0) < hwvl, io.wdata(11,0).toUInt, + hwvl.toUInt)) - val reg_nxregs = Reg(resetVal = UFix(32, 6)) - val reg_nfregs = Reg(resetVal = UFix(32, 6)) - val reg_appvl = Reg(resetVal = UFix(0, 12)) + val reg_nxregs = RegReset(UInt(32, 6)) + val reg_nfregs = RegReset(UInt(32, 6)) + val reg_appvl = RegReset(UInt(0, 12)) when (io.valid) { @@ -148,7 +148,7 @@ class rocketDpathVec extends Component io.nxregs := reg_nxregs io.nfregs := reg_nfregs - val appvlm1 = appvl - UFix(1) + val appvlm1 = appvl - UInt(1) val waddr = io.inst(31,27) val raddr1 = io.inst(26,22) diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala index d122e3a6..fc5e8773 100644 --- a/rocket/src/main/scala/ecc.scala +++ b/rocket/src/main/scala/ecc.scala @@ -63,7 +63,7 @@ class SECCode extends Code } else x(mapping(i)) } - Vec(y){Bool()}.toBits + Vec(y).toBits } def decode(y: Bits) = new Decoding { val n = y.getWidth @@ -75,11 +75,11 @@ class SECCode extends Code yield y(j-1) r reduce (_^_) } - val s = Vec(syndrome){Bool()}.toBits + val s = Vec(syndrome).toBits - private def swizzle(z: Bits) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))){Bool()}.toBits + private def swizzle(z: Bits) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))).toBits def uncorrected = swizzle(y) - def corrected = swizzle(((y << 1) ^ UFixToOH(s)) >> 1) + def corrected = swizzle(((y.toUInt << 1) ^ UIntToOH(s)) >> 1) def correctable = s.orR def uncorrectable = Bool(false) } @@ -109,12 +109,12 @@ object ErrGen // generate a 1-bit error with approximate probability 2^-f def apply(width: Int, f: Int): Bits = { require(width > 0 && f >= 0 && log2Up(width) + f <= 16) - UFixToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0) + UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0) } def apply(x: Bits, f: Int): Bits = x ^ apply(x.getWidth, f) } -class SECDEDTest extends Component +class SECDEDTest extends Module { val code = new SECDEDCode val k = 4 diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index f1eee8cb..05aa270b 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -63,7 +63,7 @@ class FPUCtrlSigs extends Bundle val wrfsr = Bool() } -class FPUDecoder extends Component +class FPUDecoder extends Module { val io = new Bundle { val inst = Bits(INPUT, 32) @@ -160,7 +160,7 @@ class DpathFPUIO extends Bundle { val dmem_resp_val = Bool(OUTPUT) val dmem_resp_type = Bits(OUTPUT, 3) - val dmem_resp_tag = UFix(OUTPUT, 5) + val dmem_resp_tag = UInt(OUTPUT, 5) val dmem_resp_data = Bits(OUTPUT, 64) } @@ -173,24 +173,24 @@ class CtrlFPUIO extends Bundle { val dec = new FPUCtrlSigs().asInput val sboard_set = Bool(INPUT) val sboard_clr = Bool(INPUT) - val sboard_clra = UFix(INPUT, 5) + val sboard_clra = UInt(INPUT, 5) } object RegEn { def apply[T <: Data](data: T, en: Bool) = { - val r = Reg() { data.clone } + val r = Reg(data) when (en) { r := data } r } def apply[T <: Bits](data: T, en: Bool, resetVal: T) = { - val r = Reg(resetVal = resetVal) { data.clone } + val r = RegReset(resetVal) when (en) { r := data } r } } -class FPToInt extends Component +class FPToInt extends Module { class Input extends Bundle { val single = Bool() @@ -202,8 +202,8 @@ class FPToInt extends Component override def clone = new Input().asInstanceOf[this.type] } val io = new Bundle { - val in = new PipeIO()(new Input).flip - val out = new PipeIO()(new Bundle { + val in = Valid(new Input).flip + val out = Valid(new Bundle { val lt = Bool() val store = Bits(width = 64) val toint = Bits(width = 64) @@ -211,10 +211,10 @@ class FPToInt extends Component }) } - val in = Reg() { new Input } - val valid = Reg(io.in.valid) + val in = Reg(new Input) + val valid = RegUpdate(io.in.valid) when (io.in.valid) { - def upconvert(x: Bits) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 + def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 when (io.in.bits.cmd === FCMD_STORE) { in.in1 := io.in.bits.in2 }.otherwise { @@ -231,11 +231,11 @@ class FPToInt extends Component val unrec_s = hardfloat.recodedFloatNToFloatN(in.in1, 23, 9) val unrec_d = hardfloat.recodedFloatNToFloatN(in.in1, 52, 12) - val dcmp = new hardfloat.recodedFloatNCompare(52, 12) + val dcmp = Module(new hardfloat.recodedFloatNCompare(52, 12)) dcmp.io.a := in.in1 dcmp.io.b := in.in2 val dcmp_out = (in.cmd & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR - val dcmp_exc = (in.cmd & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UFix(4) + val dcmp_exc = (in.cmd & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UInt(4) val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, ~in.cmd(1,0), 52, 12, 64) @@ -269,7 +269,7 @@ class FPResult extends Bundle val exc = Bits(width = 5) } -class IntToFP(val latency: Int) extends Component +class IntToFP(val latency: Int) extends Module { class Input extends Bundle { val single = Bool() @@ -279,8 +279,8 @@ class IntToFP(val latency: Int) extends Component override def clone = new Input().asInstanceOf[this.type] } val io = new Bundle { - val in = new PipeIO()(new Input).flip - val out = new PipeIO()(new FPResult) + val in = Valid(new Input).flip + val out = Valid(new FPResult) } val in = Pipe(io.in) @@ -296,7 +296,7 @@ class IntToFP(val latency: Int) extends Component in.bits.cmd === FCMD_CVT_FMT_L || in.bits.cmd === FCMD_CVT_FMT_LU) { when (in.bits.single) { val u = hardfloat.anyToRecodedFloatN(in.bits.data, in.bits.rm, ~in.bits.cmd(1,0), 23, 9, 64) - mux.data := Cat(Fix(-1, 32), u._1) + mux.data := Cat(SInt(-1, 32), u._1) mux.exc := u._2 }.otherwise { val u = hardfloat.anyToRecodedFloatN(in.bits.data, in.bits.rm, ~in.bits.cmd(1,0), 52, 12, 64) @@ -308,7 +308,7 @@ class IntToFP(val latency: Int) extends Component io.out <> Pipe(in.valid, mux, latency-1) } -class FPToFP(val latency: Int) extends Component +class FPToFP(val latency: Int) extends Module { class Input extends Bundle { val single = Bool() @@ -319,8 +319,8 @@ class FPToFP(val latency: Int) extends Component override def clone = new Input().asInstanceOf[this.type] } val io = new Bundle { - val in = new PipeIO()(new Input).flip - val out = new PipeIO()(new FPResult) + val in = Valid(new Input).flip + val out = Valid(new FPResult) val lt = Bool(INPUT) // from FPToInt } @@ -356,7 +356,7 @@ class FPToFP(val latency: Int) extends Component } when (in.bits.cmd === FCMD_CVT_FMT_S || in.bits.cmd === FCMD_CVT_FMT_D) { when (in.bits.single) { - mux.data := Cat(Fix(-1, 32), d2s._1) + mux.data := Cat(SInt(-1, 32), d2s._1) mux.exc := d2s._2 }.otherwise { mux.data := s2d._1 @@ -378,15 +378,15 @@ class ioFMA(width: Int) extends Bundle { val exc = Bits(OUTPUT, 5) } -class FPUSFMAPipe(val latency: Int) extends Component +class FPUSFMAPipe(val latency: Int) extends Module { val io = new ioFMA(33) - val cmd = Reg() { Bits() } - val rm = Reg() { Bits() } - val in1 = Reg() { Bits() } - val in2 = Reg() { Bits() } - val in3 = Reg() { Bits() } + val cmd = Reg(Bits()) + val rm = Reg(Bits()) + val in1 = Reg(Bits()) + val in2 = Reg(Bits()) + val in3 = Reg(Bits()) val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB @@ -395,7 +395,7 @@ class FPUSFMAPipe(val latency: Int) extends Component val one = Bits("h80000000") val zero = Cat(io.in1(32) ^ io.in2(32), Bits(0, 32)) - val valid = Reg(io.valid) + val valid = RegUpdate(io.valid) when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm @@ -404,7 +404,7 @@ class FPUSFMAPipe(val latency: Int) extends Component in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } - val fma = new hardfloat.mulAddSubRecodedFloatN(23, 9) + val fma = Module(new hardfloat.mulAddSubRecodedFloatN(23, 9)) fma.io.op := cmd fma.io.roundingMode := rm fma.io.a := in1 @@ -415,15 +415,15 @@ class FPUSFMAPipe(val latency: Int) extends Component io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class FPUDFMAPipe(val latency: Int) extends Component +class FPUDFMAPipe(val latency: Int) extends Module { val io = new ioFMA(65) - val cmd = Reg() { Bits() } - val rm = Reg() { Bits() } - val in1 = Reg() { Bits() } - val in2 = Reg() { Bits() } - val in3 = Reg() { Bits() } + val cmd = Reg(Bits()) + val rm = Reg(Bits()) + val in1 = Reg(Bits()) + val in2 = Reg(Bits()) + val in3 = Reg(Bits()) val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB @@ -432,7 +432,7 @@ class FPUDFMAPipe(val latency: Int) extends Component val one = Bits("h8000000000000000") val zero = Cat(io.in1(64) ^ io.in2(64), Bits(0, 64)) - val valid = Reg(io.valid) + val valid = RegUpdate(io.valid) when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm @@ -441,7 +441,7 @@ class FPUDFMAPipe(val latency: Int) extends Component in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) } - val fma = new hardfloat.mulAddSubRecodedFloatN(52, 12) + val fma = Module(new hardfloat.mulAddSubRecodedFloatN(52, 12)) fma.io.op := cmd fma.io.roundingMode := rm fma.io.a := in1 @@ -452,7 +452,7 @@ class FPUDFMAPipe(val latency: Int) extends Component io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class FPU(sfma_latency: Int, dfma_latency: Int) extends Component +class FPU(sfma_latency: Int, dfma_latency: Int) extends Module { val io = new Bundle { val ctrl = (new CtrlFPUIO).flip @@ -461,16 +461,16 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val dfma = new ioFMA(65) } - val ex_reg_inst = Reg() { Bits() } + val ex_reg_inst = Reg(Bits()) when (io.ctrl.valid) { ex_reg_inst := io.dpath.inst } - val ex_reg_valid = Reg(io.ctrl.valid, Bool(false)) - val mem_reg_valid = Reg(ex_reg_valid && !io.ctrl.killx, resetVal = Bool(false)) + val ex_reg_valid = Reg(update=io.ctrl.valid, reset=Bool(false)) + val mem_reg_valid = Reg(update=ex_reg_valid && !io.ctrl.killx, reset=Bool(false)) val killm = io.ctrl.killm || io.ctrl.nack_mem - val wb_reg_valid = Reg(mem_reg_valid && !killm, resetVal = Bool(false)) + val wb_reg_valid = Reg(update=mem_reg_valid && !killm, reset=Bool(false)) - val fp_decoder = new FPUDecoder + val fp_decoder = Module(new FPUDecoder) fp_decoder.io.inst := io.dpath.inst val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) @@ -478,19 +478,19 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val wb_ctrl = RegEn(mem_ctrl, mem_reg_valid) // load response - val load_wb = Reg(io.dpath.dmem_resp_val) + val load_wb = RegUpdate(io.dpath.dmem_resp_val) val load_wb_single = RegEn(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val) val load_wb_data = RegEn(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val) val load_wb_tag = RegEn(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) - val load_wb_data_recoded = Mux(load_wb_single, Cat(Fix(-1), rec_s), rec_d) + val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1), rec_s), rec_d) - val fsr_rm = Reg() { Bits(width = 3) } - val fsr_exc = Reg() { Bits(width = 5) } + val fsr_rm = Reg(Bits(width = 3)) + val fsr_exc = Reg(Bits(width = 5)) // regfile - val regfile = Mem(32) { Bits(width = 65) } + val regfile = Mem(Bits(width = 65), 32) when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } val ex_rs1 = regfile(ex_reg_inst(26,22)) @@ -498,7 +498,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val ex_rs3 = regfile(ex_reg_inst(16,12)) val ex_rm = Mux(ex_reg_inst(11,9) === Bits(7), fsr_rm, ex_reg_inst(11,9)) - val fpiu = new FPToInt + val fpiu = Module(new FPToInt) fpiu.io.in.valid := ex_reg_valid && ctrl.toint fpiu.io.in.bits := ctrl fpiu.io.in.bits.rm := ex_rm @@ -509,12 +509,12 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component io.dpath.store_data := fpiu.io.out.bits.store io.dpath.toint_data := fpiu.io.out.bits.toint - val ifpu = new IntToFP(3) + val ifpu = Module(new IntToFP(3)) ifpu.io.in.valid := ex_reg_valid && ctrl.fromint ifpu.io.in.bits := ctrl ifpu.io.in.bits.rm := ex_rm ifpu.io.in.bits.data := io.dpath.fromint_data - val fpmu = new FPToFP(2) + val fpmu = Module(new FPToFP(2)) fpmu.io.in.valid := ex_reg_valid && ctrl.fastpipe fpmu.io.in.bits := ctrl fpmu.io.in.bits.rm := ex_rm @@ -525,7 +525,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB - val sfma = new FPUSFMAPipe(sfma_latency) + val sfma = Module(new FPUSFMAPipe(sfma_latency)) sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) @@ -535,7 +535,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component io.sfma.out := sfma.io.out io.sfma.exc := sfma.io.exc - val dfma = new FPUDFMAPipe(dfma_latency) + val dfma = Module(new FPUDFMAPipe(dfma_latency)) dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) @@ -546,7 +546,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component io.dfma.exc := dfma.io.exc // writeback arbitration - case class Pipe(p: Component, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits) + case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits) val pipes = List( Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), @@ -554,16 +554,16 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out, dfma.io.exc)) def latencyMask(c: FPUCtrlSigs, offset: Int) = { require(pipes.forall(_.lat >= offset)) - pipes.map(p => Mux(p.cond(c), UFix(1 << p.lat-offset), UFix(0))).reduce(_|_) + pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_) } - def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UFix(p._2), UFix(0))).reduce(_|_) + def pipeid(c: FPUCtrlSigs) = pipes.zipWithIndex.map(p => Mux(p._1.cond(c), UInt(p._2), UInt(0))).reduce(_|_) val maxLatency = pipes.map(_.lat).max val memLatencyMask = latencyMask(mem_ctrl, 2) - val wen = Reg(resetVal = Bits(0, maxLatency-1)) - val winfo = Vec(maxLatency-1) { Reg() { Bits() } } + val wen = RegReset(Bits(0, maxLatency-1)) + val winfo = Vec.fill(maxLatency-1){Reg(Bits())} val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) - val (write_port_busy, mem_winfo) = (Reg{Bool()}, Reg{Bits()}) + val (write_port_busy, mem_winfo) = (Reg(Bool()), Reg(Bits())) when (ex_reg_valid) { write_port_busy := mem_wen && (memLatencyMask & latencyMask(ctrl, 1)).orR || (wen & latencyMask(ctrl, 0)).orR mem_winfo := Cat(pipeid(ctrl), ex_reg_inst(31,27)) @@ -584,10 +584,10 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component } } - val waddr = winfo(0)(4,0).toUFix + val waddr = winfo(0)(4,0).toUInt val wsrc = winfo(0) >> waddr.getWidth - val wdata = (Vec(pipes.map(_.wdata)){Bits()})(wsrc) - val wexc = (Vec(pipes.map(_.wexc)){Bits()})(wsrc) + val wdata = Vec(pipes.map(_.wdata))(wsrc) + val wexc = Vec(pipes.map(_.wexc))(wsrc) when (wen(0)) { regfile(waddr(4,0)) := wdata } val wb_toint_exc = RegEn(fpiu.io.out.bits.exc, mem_ctrl.toint) @@ -606,12 +606,12 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Component val fp_inflight = wb_reg_valid && wb_ctrl.toint || wen.orR val fsr_busy = mem_ctrl.rdfsr && fp_inflight || wb_reg_valid && wb_ctrl.wrfsr - val units_busy = mem_reg_valid && mem_ctrl.fma && Reg(Mux(ctrl.single, io.sfma.valid, io.dfma.valid)) + val units_busy = mem_reg_valid && mem_ctrl.fma && RegUpdate(Mux(ctrl.single, io.sfma.valid, io.dfma.valid)) io.ctrl.nack_mem := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) - io.ctrl.sboard_set := wb_reg_valid && Reg(useScoreboard(_._1.cond(mem_ctrl))) - io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UFix(x._2)) + io.ctrl.sboard_set := wb_reg_valid && RegUpdate(useScoreboard(_._1.cond(mem_ctrl))) + io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UInt(x._2)) io.ctrl.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) io.ctrl.illegal_rm := ex_rm(2) && ctrl.round diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 583e90ce..eb4f536a 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -14,8 +14,8 @@ class HostIO(val w: Int) extends Bundle { val clk = Bool(OUTPUT) val clk_edge = Bool(OUTPUT) - val in = new FIFOIO()(Bits(width = w)).flip - val out = new FIFOIO()(Bits(width = w)) + val in = Decoupled(Bits(width = w)).flip + val out = Decoupled(Bits(width = w)) } class PCRReq extends Bundle @@ -29,28 +29,28 @@ class HTIFIO(ntiles: Int) extends Bundle { val reset = Bool(INPUT) val debug = new DebugIO - val pcr_req = (new FIFOIO) { new PCRReq }.flip - val pcr_rep = (new FIFOIO) { Bits(width = 64) } - val ipi_req = (new FIFOIO) { Bits(width = log2Up(ntiles)) } - val ipi_rep = (new FIFOIO) { Bool() }.flip + val pcr_req = Decoupled(new PCRReq).flip + val pcr_rep = Decoupled(Bits(width = 64)) + val ipi_req = Decoupled(Bits(width = log2Up(ntiles))) + val ipi_rep = Decoupled(Bool()).flip } class SCRIO extends Bundle { val n = 64 - val rdata = Vec(n) { Bits(INPUT, 64) } + val rdata = Vec.fill(n){Bits(INPUT, 64)} val wen = Bool(OUTPUT) - val waddr = UFix(OUTPUT, log2Up(n)) + val waddr = UInt(OUTPUT, log2Up(n)) val wdata = Bits(OUTPUT, 64) } -class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component with ClientCoherenceAgent +class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Module with ClientCoherenceAgent { implicit val (ln, co) = (conf.ln, conf.co) val nTiles = ln.nClients-1 // This HTIF is itself a TileLink client val io = new Bundle { val host = new HostIO(w) - val cpu = Vec(nTiles) { new HTIFIO(nTiles).flip } + val cpu = Vec.fill(nTiles){new HTIFIO(nTiles).flip} val mem = new TileLinkIO val scr = new SCRIO } @@ -60,19 +60,19 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component require(short_request_bits % w == 0) val rx_count_w = 13 + log2Up(64) - log2Up(w) // data size field is 12 bits - val rx_count = Reg(resetVal = UFix(0,rx_count_w)) - val rx_shifter = Reg() { Bits(width = short_request_bits) } + val rx_count = RegReset(UInt(0,rx_count_w)) + val rx_shifter = Reg(Bits(width = short_request_bits)) val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w)) val next_cmd = rx_shifter_in(3,0) - val cmd = Reg() { Bits() } - val size = Reg() { Bits() } - val pos = Reg() { Bits() } - val seqno = Reg() { Bits() } - val addr = Reg() { Bits() } + val cmd = Reg(Bits()) + val size = Reg(Bits()) + val pos = Reg(Bits()) + val seqno = Reg(Bits()) + val addr = Reg(Bits()) when (io.host.in.valid && io.host.in.ready) { rx_shifter := rx_shifter_in - rx_count := rx_count + UFix(1) - when (rx_count === UFix(short_request_bits/w-1)) { + rx_count := rx_count + UInt(1) + when (rx_count === UInt(short_request_bits/w-1)) { cmd := next_cmd size := rx_shifter_in(15,4) pos := rx_shifter_in(15,4+OFFSET_BITS-3) @@ -81,15 +81,15 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component } } - val rx_word_count = (rx_count >> UFix(log2Up(short_request_bits/w))) + val rx_word_count = (rx_count >> UInt(log2Up(short_request_bits/w))) val rx_word_done = io.host.in.valid && rx_count(log2Up(short_request_bits/w)-1,0).andR val packet_ram_depth = long_request_bits/short_request_bits-1 - val packet_ram = Vec(packet_ram_depth) { Reg() { Bits(width = short_request_bits) } } + val packet_ram = Vec.fill(packet_ram_depth){Reg(Bits(width = short_request_bits))} when (rx_word_done && io.host.in.ready) { - packet_ram(rx_word_count(log2Up(packet_ram_depth)-1,0) - UFix(1)) := rx_shifter_in + packet_ram(rx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1)) := rx_shifter_in } - val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UFix() } + val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UInt() } val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) val pcr_coreid = addr(log2Up(nTiles)-1+20+1,20) @@ -97,25 +97,25 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, bad_mem_packet, - Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UFix(1), + Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UInt(1), Bool(true))) - val tx_count = Reg(resetVal = UFix(0, rx_count_w)) + val tx_count = RegReset(UInt(0, rx_count_w)) val tx_subword_count = tx_count(log2Up(short_request_bits/w)-1,0) val tx_word_count = tx_count(rx_count_w-1, log2Up(short_request_bits/w)) - val packet_ram_raddr = tx_word_count(log2Up(packet_ram_depth)-1,0) - UFix(1) + val packet_ram_raddr = tx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1) when (io.host.out.valid && io.host.out.ready) { - tx_count := tx_count + UFix(1) + tx_count := tx_count + UInt(1) } - val rx_done = rx_word_done && Mux(rx_word_count === UFix(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2Up(packet_ram_depth)-1,0) === UFix(0)) - val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UFix(0)) - val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UFix(0) && packet_ram_raddr.andR) + val rx_done = rx_word_done && Mux(rx_word_count === UInt(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2Up(packet_ram_depth)-1,0) === UInt(0)) + val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UInt(0)) + val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UInt(0) && packet_ram_raddr.andR) - val mem_acked = Reg(resetVal = Bool(false)) - val mem_gxid = Reg() { Bits() } - val mem_gsrc = Reg() { UFix(width = conf.ln.idBits) } - val mem_needs_ack = Reg() { Bool() } + val mem_acked = RegReset(Bool(false)) + val mem_gxid = Reg(Bits()) + val mem_gsrc = Reg(UInt(width = conf.ln.idBits)) + val mem_needs_ack = Reg(Bool()) when (io.mem.grant.valid) { mem_acked := Bool(true) mem_gxid := io.mem.grant.bits.payload.master_xact_id @@ -124,18 +124,18 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component } io.mem.grant.ready := Bool(true) - val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UFix() } - val state = Reg(resetVal = state_rx) + val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UInt() } + val state = RegReset(state_rx) - val rx_cmd = Mux(rx_word_count === UFix(0), next_cmd, cmd) + val rx_cmd = Mux(rx_word_count === UInt(0), next_cmd, cmd) when (state === state_rx && rx_done) { state := Mux(rx_cmd === cmd_readmem || rx_cmd === cmd_writemem, state_mem_req, Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr_req, state_tx)) } - val mem_cnt = Reg(resetVal = UFix(0, log2Up(REFILL_CYCLES))) - val x_init = new Queue(1)(new Acquire) + val mem_cnt = RegReset(UInt(0, log2Up(REFILL_CYCLES))) + val x_init = Module(new Queue(new Acquire, 1)) when (state === state_mem_req && x_init.io.enq.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) } @@ -143,7 +143,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component when (mem_cnt.andR) { state := state_mem_wresp } - mem_cnt := mem_cnt + UFix(1) + mem_cnt := mem_cnt + UInt(1) } when (state === state_mem_wresp) { when (mem_acked) { @@ -156,37 +156,37 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component when (mem_cnt.andR) { state := state_mem_finish } - mem_cnt := mem_cnt + UFix(1) + mem_cnt := mem_cnt + UInt(1) } mem_acked := Bool(false) } when (state === state_mem_finish && io.mem.grant_ack.ready) { - state := Mux(cmd === cmd_readmem || pos === UFix(1), state_tx, state_rx) - pos := pos - UFix(1) - addr := addr + UFix(1 << OFFSET_BITS-3) + state := Mux(cmd === cmd_readmem || pos === UInt(1), state_tx, state_rx) + pos := pos - UInt(1) + addr := addr + UInt(1 << OFFSET_BITS-3) } when (state === state_tx && tx_done) { when (tx_word_count === tx_size) { - rx_count := UFix(0) - tx_count := UFix(0) + rx_count := UInt(0) + tx_count := UInt(0) } - state := Mux(cmd === cmd_readmem && pos != UFix(0), state_mem_req, state_rx) + state := Mux(cmd === cmd_readmem && pos != UInt(0), state_mem_req, state_rx) } var mem_req_data: Bits = null for (i <- 0 until MEM_DATA_BITS/short_request_bits) { - val idx = Cat(mem_cnt, UFix(i, log2Up(MEM_DATA_BITS/short_request_bits))) + val idx = Cat(mem_cnt, UInt(i, log2Up(MEM_DATA_BITS/short_request_bits))) when (state === state_mem_rdata && io.mem.grant.valid) { packet_ram(idx) := io.mem.grant.bits.payload.data((i+1)*short_request_bits-1, i*short_request_bits) } mem_req_data = Cat(packet_ram(idx), mem_req_data) } x_init.io.enq.valid := state === state_mem_req - val init_addr = addr.toUFix >> UFix(OFFSET_BITS-3) + val init_addr = addr.toUInt >> UInt(OFFSET_BITS-3) x_init.io.enq.bits := Mux(cmd === cmd_writemem, - Acquire(co.getUncachedWriteAcquireType, init_addr, UFix(0)), - Acquire(co.getUncachedReadAcquireType, init_addr, UFix(0))) - io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UFix(conf.ln.nClients), UFix(0)) // By convention HTIF is the client with the largest id + Acquire(co.getUncachedWriteAcquireType, init_addr, UInt(0)), + Acquire(co.getUncachedReadAcquireType, init_addr, UInt(0))) + io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UInt(conf.ln.nClients), UInt(0)) // By convention HTIF is the client with the largest id io.mem.acquire.data.valid := state === state_mem_wdata io.mem.acquire.data.bits.payload.data := mem_req_data io.mem.grant_ack.valid := (state === state_mem_finish) && mem_needs_ack @@ -196,13 +196,13 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component io.mem.release.meta.valid := Bool(false) io.mem.release.data.valid := Bool(false) - val pcrReadData = Reg{Bits(width = io.cpu(0).pcr_rep.bits.getWidth)} + val pcrReadData = Reg(Bits(width = io.cpu(0).pcr_rep.bits.getWidth)) for (i <- 0 until nTiles) { - val my_reset = Reg(resetVal = Bool(true)) - val my_ipi = Reg(resetVal = Bool(false)) + val my_reset = RegReset(Bool(true)) + val my_ipi = RegReset(Bool(false)) val cpu = io.cpu(i) - val me = pcr_coreid === UFix(i) + val me = pcr_coreid === UInt(i) cpu.pcr_req.valid := state === state_pcr_req && me && pcr_addr != PCR.RESET cpu.pcr_req.bits.rw := cmd === cmd_writecr cpu.pcr_req.bits.addr := pcr_addr @@ -215,7 +215,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component cpu.ipi_rep.valid := my_ipi cpu.ipi_req.ready := Bool(true) for (j <- 0 until nTiles) { - when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UFix(i)) { + when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UInt(i)) { my_ipi := Bool(true) } } @@ -238,16 +238,16 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component } } - val scr_rdata = Vec(io.scr.rdata.size){Bits(width = 64)} + val scr_rdata = Vec.fill(io.scr.rdata.size){Bits(width = 64)} for (i <- 0 until scr_rdata.size) scr_rdata(i) := io.scr.rdata(i) scr_rdata(0) := nTiles - scr_rdata(1) := (UFix(REFILL_CYCLES*MEM_DATA_BITS/8) << x_init.io.enq.bits.addr.getWidth) >> 20 + scr_rdata(1) := (UInt(REFILL_CYCLES*MEM_DATA_BITS/8) << x_init.io.enq.bits.addr.getWidth) >> 20 io.scr.wen := false io.scr.wdata := pcr_wdata - io.scr.waddr := pcr_addr.toUFix - when (state === state_pcr_req && pcr_coreid === Fix(-1)) { + io.scr.waddr := pcr_addr.toUInt + when (state === state_pcr_req && pcr_coreid === SInt(-1)) { io.scr.wen := cmd === cmd_writecr pcrReadData := scr_rdata(pcr_addr) state := state_tx @@ -256,7 +256,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component val tx_cmd = Mux(nack, cmd_nack, cmd_ack) val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext) - val tx_data = Mux(tx_word_count === UFix(0), tx_header, + val tx_data = Mux(tx_word_count === UInt(0), tx_header, Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcrReadData, packet_ram(packet_ram_raddr))) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 4f80d72b..262f3841 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -25,14 +25,14 @@ case class ICacheConfig(sets: Int, assoc: Int, } class FrontendReq extends Bundle { - val pc = UFix(width = VADDR_BITS+1) + val pc = UInt(width = VADDR_BITS+1) val mispredict = Bool() val taken = Bool() - val currentpc = UFix(width = VADDR_BITS+1) + val currentpc = UInt(width = VADDR_BITS+1) } class FrontendResp(implicit conf: ICacheConfig) extends Bundle { - val pc = UFix(width = VADDR_BITS+1) // ID stage PC + val pc = UInt(width = VADDR_BITS+1) // ID stage PC val data = Bits(width = conf.ibytes*8) val taken = Bool() val xcpt_ma = Bool() @@ -42,36 +42,36 @@ class FrontendResp(implicit conf: ICacheConfig) extends Bundle { } class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { - val req = new PipeIO()(new FrontendReq) - val resp = new FIFOIO()(new FrontendResp).flip + val req = Valid(new FrontendReq) + val resp = Decoupled(new FrontendResp).flip val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } -class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Component +class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module { val io = new Bundle { val cpu = new CPUFrontendIO()(c).flip val mem = new UncachedTileLinkIO } - val btb = new rocketDpathBTB(c.nbtb) - val icache = new ICache - val tlb = new TLB(c.ntlb) + val btb = Module(new rocketDpathBTB(c.nbtb)) + val icache = Module(new ICache) + val tlb = Module(new TLB(c.ntlb)) - val s1_pc = Reg() { UFix() } - val s1_same_block = Reg() { Bool() } - val s2_valid = Reg(resetVal = Bool(true)) - val s2_pc = Reg(resetVal = UFix(START_ADDR)) - val s2_btb_hit = Reg(resetVal = Bool(false)) - val s2_xcpt_if = Reg(resetVal = Bool(false)) + val s1_pc = Reg(UInt()) + val s1_same_block = Reg(Bool()) + val s2_valid = RegReset(Bool(true)) + val s2_pc = RegReset(UInt(START_ADDR)) + val s2_btb_hit = RegReset(Bool(false)) + val s2_xcpt_if = RegReset(Bool(false)) val btbTarget = Cat(btb.io.target(VADDR_BITS-1), btb.io.target) - val pcp4_0 = s1_pc + UFix(c.ibytes) + val pcp4_0 = s1_pc + UInt(c.ibytes) val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) val icmiss = s2_valid && !icache.io.resp.valid val predicted_npc = Mux(btb.io.hit, btbTarget, pcp4) - val npc = Mux(icmiss, s2_pc, predicted_npc).toUFix + val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val s0_same_block = !icmiss && !io.cpu.req.valid && (predicted_npc >> log2Up(c.databits/8)) === (s1_pc >> log2Up(c.databits/8)) val stall = io.cpu.resp.valid && !io.cpu.resp.ready @@ -100,8 +100,8 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Comp tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> UFix(PGIDX_BITS) - tlb.io.req.bits.asid := UFix(0) + tlb.io.req.bits.vpn := s1_pc >> UInt(PGIDX_BITS) + tlb.io.req.bits.asid := UInt(0) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) @@ -117,40 +117,45 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Comp io.cpu.resp.bits.pc := s2_pc io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)) io.cpu.resp.bits.taken := s2_btb_hit - io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UFix(0) + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if } -class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Component +class ICacheReq extends Bundle { + val idx = UInt(width = PGIDX_BITS) + val ppn = UInt(width = PPN_BITS) // delayed one cycle + val kill = Bool() // delayed one cycle +} + +class ICacheResp(implicit c: ICacheConfig) extends Bundle { + val data = Bits(width = c.ibytes*8) + val datablock = Bits(width = c.databits) + override def clone = new ICacheResp().asInstanceOf[this.type] +} + +class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module { implicit val lnConf = tl.ln val io = new Bundle { - val req = new PipeIO()(new Bundle { - val idx = UFix(width = PGIDX_BITS) - val ppn = UFix(width = PPN_BITS) // delayed one cycle - val kill = Bool() // delayed one cycle - }).flip - val resp = new FIFOIO()(new Bundle { - val data = Bits(width = c.ibytes*8) - val datablock = Bits(width = c.databits) - }) + val req = Valid(new ICacheReq).flip + val resp = Decoupled(new ICacheResp) val invalidate = Bool(INPUT) val mem = new UncachedTileLinkIO } - val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UFix() } - val state = Reg(resetVal = s_ready) - val invalidated = Reg() { Bool() } + val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UInt() } + val state = RegReset(s_ready) + val invalidated = Reg(Bool()) val stall = !io.resp.ready val rdy = Bool() - val s2_valid = Reg(resetVal = Bool(false)) - val s2_addr = Reg { UFix(width = PADDR_BITS) } + val s2_valid = RegReset(Bool(false)) + val s2_addr = Reg(UInt(width = PADDR_BITS)) val s2_any_tag_hit = Bool() - val s1_valid = Reg(resetVal = Bool(false)) - val s1_pgoff = Reg() { UFix(width = PGIDX_BITS) } - val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUFix + val s1_valid = RegReset(Bool(false)) + val s1_pgoff = Reg(UInt(width = PGIDX_BITS)) + val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt val s1_tag = s1_addr(c.tagbits+c.untagbits-1,c.untagbits) val s0_valid = io.req.valid || s1_valid && stall @@ -175,14 +180,14 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Compon //assert(!co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) - val repl_way = if (c.dm) UFix(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) + val repl_way = if (c.dm) UInt(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) val enc_tagbits = c.code.width(c.tagbits) - val tag_array = Mem(c.sets, seqRead = true) { Bits(width = enc_tagbits*c.assoc) } - val tag_raddr = Reg{UFix()} + val tag_array = Mem(Bits(width = enc_tagbits*c.assoc), c.sets, seqRead = true) + val tag_raddr = Reg(UInt()) when (refill_done) { - val wmask = FillInterleaved(enc_tagbits, if (c.dm) Bits(1) else UFixToOH(repl_way)) - val tag = c.code.encode(s2_tag) + val wmask = FillInterleaved(enc_tagbits, if (c.dm) Bits(1) else UIntToOH(repl_way)) + val tag = c.code.encode(s2_tag).toUInt tag_array.write(s2_idx, Fill(c.assoc, tag), wmask) } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM @@ -190,7 +195,7 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Compon tag_raddr := s0_pgoff(c.untagbits-1,c.offbits) } - val vb_array = Reg(resetVal = Bits(0, c.lines)) + val vb_array = RegReset(Bits(0, c.lines)) when (refill_done && !invalidated) { vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true)) } @@ -198,19 +203,19 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Compon vb_array := Bits(0) invalidated := Bool(true) } - val s2_disparity = Vec(c.assoc) { Bool() } + val s2_disparity = Vec.fill(c.assoc){Bool()} for (i <- 0 until c.assoc) - when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UFix(i), s2_idx), Bool(false)) } + when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s2_idx), Bool(false)) } - val s1_tag_match = Vec(c.assoc) { Bool() } - val s2_tag_hit = Vec(c.assoc) { Bool() } - val s2_dout = Vec(c.assoc){Reg{Bits()}} + val s1_tag_match = Vec.fill(c.assoc){Bool()} + val s2_tag_hit = Vec.fill(c.assoc){Bool()} + val s2_dout = Vec.fill(c.assoc){Reg(Bits())} for (i <- 0 until c.assoc) { - val s1_vb = vb_array(Cat(UFix(i), s1_pgoff(c.untagbits-1,c.offbits))).toBool - val s2_vb = Reg() { Bool() } - val s2_tag_disparity = Reg() { Bool() } - val s2_tag_match = Reg() { Bool() } + val s1_vb = vb_array(Cat(UInt(i), s1_pgoff(c.untagbits-1,c.offbits))).toBool + val s2_vb = Reg(Bool()) + val s2_tag_disparity = Reg(Bool()) + val s2_tag_match = Reg(Bool()) val tag_out = tag_array(tag_raddr)(enc_tagbits*(i+1)-1, enc_tagbits*i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb @@ -224,9 +229,9 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Compon s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) for (i <- 0 until c.assoc) { - val data_array = Mem(c.sets*REFILL_CYCLES, seqRead = true){ Bits(width = c.code.width(c.databits)) } - val s1_raddr = Reg{UFix()} - when (io.mem.grant.valid && repl_way === UFix(i)) { + val data_array = Mem(Bits(width = c.code.width(c.databits)), c.sets*REFILL_CYCLES, seqRead = true) + val s1_raddr = Reg(UInt()) + when (io.mem.grant.valid && repl_way === UInt(i)) { val d = io.mem.grant.bits.payload.data data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) } @@ -241,14 +246,14 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Compon io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) - val finish_q = (new Queue(1)) { new GrantAck } + val finish_q = Module(new Queue(new GrantAck, 1)) finish_q.io.enq.valid := refill_done && tl.co.requiresAck(io.mem.grant.bits.payload) finish_q.io.enq.bits.master_xact_id := io.mem.grant.bits.payload.master_xact_id // output signals io.resp.valid := s2_hit io.mem.acquire.meta.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.acquire.meta.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UFix(c.offbits), UFix(0)) + io.mem.acquire.meta.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UInt(c.offbits), UInt(0)) io.mem.acquire.data.valid := Bool(false) io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) io.mem.grant.ready := Bool(true) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 4c10f6b2..3a2b94d5 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -265,7 +265,7 @@ object Instructions object Disassemble { - def apply(insn: Bits) = { + def apply(insn: UInt) = { val name :: fmt :: Nil = ListLookup(insn, default, table) sprintf("%s %s", name, operands(insn, fmt)) } @@ -288,8 +288,8 @@ object Disassemble Str(" fa6"), Str(" fa7"), Str(" fa8"), Str(" fa9"), Str("fa10"), Str("fa11"), Str("fa12"), Str("fa13")) - def hex(x: Fix, plus: Char = ' ') = - Cat(Mux(x < Fix(0), Str("-0x"), Str(plus + "0x")), Str(x.abs, 16)) + def hex(x: SInt, plus: Char = ' ') = + Cat(Mux(x < SInt(0), Str("-0x"), Str(plus + "0x")), Str(x.abs, 16)) val comma = Str(',') val lparen = Str('(') @@ -299,14 +299,14 @@ object Disassemble val rs1 = insn(26,22) val rs2 = insn(21,17) val rs3 = insn(16,12) - val immv = insn(21,10).toFix - val bmmv = Cat(insn(31,27), insn(16,10)).toFix - val jmmv = insn(31,7).toFix + val immv = insn(21,10).toSInt + val bmmv = Cat(insn(31,27), insn(16,10)).toSInt + val jmmv = insn(31,7).toSInt val imm = hex(immv) - val bmm = hex(bmmv << UFix(1)) - val jmm = hex(jmmv << UFix(1)) - val lmm = Cat(Str("0x"), Str(insn(26,7).toUFix, 16)) + val bmm = hex(bmmv << UInt(1)) + val jmm = hex(jmmv << UInt(1)) + val lmm = Cat(Str("0x"), Str(insn(26,7).toUInt, 16)) val laddr = Cat(Str(immv), lparen, x(rs1), rparen) val saddr = Cat(Str(bmmv), lparen, x(rs1), rparen) @@ -334,8 +334,8 @@ object Disassemble val opts = Seq(r0, r1, r2, f1, f2, f3, fx, xf1, xf2, z, i, b, j, l, ld, st, fld, fst, amo) val maxLen = opts.map(_.getWidth).reduce(_ max _) - val padded = opts.map(x => x << UFix(maxLen - x.getWidth)) - AVec(padded)(fmt) + val padded = opts.map(x => x.toUInt << UInt(maxLen - x.getWidth)) + AVec(padded)(fmt.toUInt) } private def FMT_R0 = Bits(0, 5) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 06d35d8a..7d329de7 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -8,37 +8,37 @@ class MultiplierReq(implicit conf: RocketConfiguration) extends Bundle { val dw = Bits(width = SZ_DW) val in1 = Bits(width = conf.xprlen) val in2 = Bits(width = conf.xprlen) - val tag = UFix(width = conf.nxprbits) + val tag = UInt(width = conf.nxprbits) override def clone = new MultiplierReq().asInstanceOf[this.type] } class MultiplierResp(implicit conf: RocketConfiguration) extends Bundle { val data = Bits(width = conf.xprlen) - val tag = UFix(width = conf.nxprbits) + val tag = UInt(width = conf.nxprbits) override def clone = new MultiplierResp().asInstanceOf[this.type] } class MultiplierIO(implicit conf: RocketConfiguration) extends Bundle { - val req = new FIFOIO()(new MultiplierReq).flip + val req = Decoupled(new MultiplierReq).flip val kill = Bool(INPUT) - val resp = new FIFOIO()(new MultiplierResp) + val resp = Decoupled(new MultiplierResp) } -class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Component { +class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { val io = new MultiplierIO val w0 = io.req.bits.in1.getWidth val w = (w0+1+unroll-1)/unroll*unroll val cycles = w/unroll - val r_val = Reg(resetVal = Bool(false)); - val r_prod= Reg { Bits(width = w*2) } - val r_lsb = Reg { Bits() } - val r_cnt = Reg { UFix(width = log2Up(cycles+1)) } - val r_req = Reg{new MultiplierReq} - val r_lhs = Reg{Bits(width = w0+1)} + val r_val = RegReset(Bool(false)) + val r_prod = Reg(Bits(width = w*2)) + val r_lsb = Reg(Bits()) + val r_cnt = Reg(UInt(width = log2Up(cycles+1))) + val r_req = Reg(new MultiplierReq) + val r_lhs = Reg(Bits(width = w0+1)) val dw = io.req.bits.dw val fn = io.req.bits.fn @@ -55,7 +55,7 @@ class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rock when (io.req.fire()) { r_val := Bool(true) - r_cnt := UFix(0, log2Up(cycles+1)) + r_cnt := UInt(0, log2Up(cycles+1)) r_req := io.req.bits r_lhs := lhs_in r_prod:= rhs_in @@ -65,21 +65,21 @@ class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rock r_val := Bool(false) } - val eOutDist = (UFix(cycles)-r_cnt)*UFix(unroll) - val outShift = Mux(isMulFN(r_req.fn, FN_MUL), UFix(0), Mux(r_req.dw === DW_64, UFix(64), UFix(32))) - val shiftDist = Mux(r_cnt === UFix(cycles), outShift, eOutDist) - val eOutMask = (UFix(1) << eOutDist) - UFix(1) - val eOut = r_cnt != UFix(0) && Bool(earlyOut) && !((r_prod(w-1,0) ^ r_lsb.toFix) & eOutMask).orR - val shift = r_prod.toFix >> shiftDist + val eOutDist = (UInt(cycles)-r_cnt)*UInt(unroll) + val outShift = Mux(isMulFN(r_req.fn, FN_MUL), UInt(0), Mux(r_req.dw === DW_64, UInt(64), UInt(32))) + val shiftDist = Mux(r_cnt === UInt(cycles), outShift, eOutDist) + val eOutMask = (UInt(1) << eOutDist) - UInt(1) + val eOut = r_cnt != UInt(0) && Bool(earlyOut) && !((r_prod(w-1,0) ^ r_lsb.toSInt) & eOutMask).orR + val shift = r_prod.toSInt >> shiftDist - val sum = r_prod(2*w-1,w).toFix + r_prod(unroll-1,0).toFix * r_lhs.toFix + Mux(r_lsb, r_lhs.toFix, Fix(0)) - when (r_val && (r_cnt != UFix(cycles))) { + val sum = r_prod(2*w-1,w).toSInt + r_prod(unroll-1,0).toSInt * r_lhs.toSInt + Mux(r_lsb.toBool, r_lhs.toSInt, SInt(0)) + when (r_val && (r_cnt != UInt(cycles))) { r_lsb := r_prod(unroll-1) - r_prod := Cat(sum, r_prod(w-1,unroll)).toFix - r_cnt := r_cnt + UFix(1) + r_prod := Cat(sum, r_prod(w-1,unroll)).toSInt + r_cnt := r_cnt + UInt(1) when (eOut) { r_prod := shift - r_cnt := UFix(cycles) + r_cnt := UInt(cycles) } } @@ -89,5 +89,5 @@ class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rock io.req.ready := !r_val io.resp.bits := r_req io.resp.bits.data := Mux(r_req.dw === DW_64, out64, out32) - io.resp.valid := r_val && (r_cnt === UFix(cycles)) + io.resp.valid := r_val && (r_cnt === UInt(cycles)) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ddb3bdf5..92a8adad 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -45,7 +45,7 @@ case class DCacheConfig(sets: Int, ways: Int, abstract class ReplacementPolicy { - def way: UFix + def way: UInt def miss: Unit def hit: Unit } @@ -56,7 +56,7 @@ class RandomReplacement(implicit conf: DCacheConfig) extends ReplacementPolicy replace := Bool(false) val lfsr = LFSR16(replace) - def way = if (conf.dm) UFix(0) else lfsr(conf.waybits-1,0) + def way = if (conf.dm) UInt(0) else lfsr(conf.waybits-1,0) def miss = replace := Bool(true) def hit = {} } @@ -107,7 +107,7 @@ class MSHRReq(implicit conf: DCacheConfig) extends HellaCacheReq { } class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { - val sdq_id = UFix(width = log2Up(conf.nsdq)) + val sdq_id = UInt(width = log2Up(conf.nsdq)) override def clone = new Replay().asInstanceOf[this.type] } @@ -139,13 +139,13 @@ class WritebackReq(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exten val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) val client_xact_id = Bits(width = tl.clientXactIdBits) - val r_type = UFix(width = tl.co.releaseTypeBits) + val r_type = UInt(width = tl.co.releaseTypeWidth) override def clone = new WritebackReq().asInstanceOf[this.type] } object MetaData { - def apply(tag: Bits, state: UFix)(implicit conf: DCacheConfig) = { + def apply(tag: Bits, state: UInt)(implicit conf: DCacheConfig) = { val meta = new MetaData meta.state := state meta.tag := tag @@ -153,14 +153,14 @@ object MetaData { } } class MetaData(implicit conf: DCacheConfig) extends Bundle { - val state = UFix(width = conf.statebits) + val state = UInt(width = conf.statebits) val tag = Bits(width = conf.tagbits) override def clone = new MetaData().asInstanceOf[this.type] } class MetaReadReq(implicit conf: DCacheConfig) extends Bundle { - val addr = UFix(width = conf.paddrbits) + val addr = UInt(width = conf.paddrbits) override def clone = new MetaReadReq().asInstanceOf[this.type] } @@ -173,7 +173,7 @@ class MetaWriteReq(implicit conf: DCacheConfig) extends Bundle { override def clone = new MetaWriteReq().asInstanceOf[this.type] } -class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { +class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { implicit val ln = tl.ln val io = new Bundle { val req_pri_val = Bool(INPUT) @@ -181,41 +181,41 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) val req_bits = new MSHRReq().asInput - val req_sdq_id = UFix(INPUT, log2Up(conf.nsdq)) + val req_sdq_id = UInt(INPUT, log2Up(conf.nsdq)) val idx_match = Bool(OUTPUT) val tag = Bits(OUTPUT, conf.tagbits) - val mem_req = (new FIFOIO) { new Acquire } + val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput - val meta_read = (new FIFOIO) { new MetaReadReq } - val meta_write = (new FIFOIO) { new MetaWriteReq } - val replay = (new FIFOIO) { new Replay() } - val mem_grant = (new PipeIO) { (new LogicalNetworkIO) {new Grant} }.flip - val mem_finish = (new FIFOIO) { (new LogicalNetworkIO) {new GrantAck} } - val wb_req = (new FIFOIO) { new WritebackReq } + val meta_read = Decoupled(new MetaReadReq) + val meta_write = Decoupled(new MetaWriteReq) + val replay = Decoupled(new Replay()) + val mem_grant = Valid((new LogicalNetworkIO) {new Grant} ).flip + val mem_finish = Decoupled((new LogicalNetworkIO) {new GrantAck} ) + val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) } - val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UFix() } - val state = Reg(resetVal = s_invalid) + val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UInt() } + val state = RegReset(s_invalid) - val acquire_type = Reg { UFix() } - val release_type = Reg { UFix() } - val line_state = Reg { UFix() } - val refill_count = Reg { UFix(width = log2Up(REFILL_CYCLES)) } - val req = Reg { new MSHRReq() } + val acquire_type = Reg(UInt()) + val release_type = Reg(UInt()) + val line_state = Reg(UInt()) + val refill_count = Reg(UInt(width = log2Up(REFILL_CYCLES))) + val req = Reg(new MSHRReq()) val req_cmd = io.req_bits.cmd val req_idx = req.addr(conf.untagbits-1,conf.offbits) val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !tl.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UFix(id) + val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) val refill_done = reply && refill_count.andR val wb_done = reply && (state === s_wb_resp) - val rpq = (new Queue(conf.nrpq)) { new Replay } + val rpq = Module(new Queue(new Replay, conf.nrpq)) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id @@ -234,7 +234,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte when (state === s_refill_resp) { when (refill_done) { state := s_meta_write_req } when (reply) { - refill_count := refill_count + UFix(1) + refill_count := refill_count + UInt(1) line_state := tl.co.newStateOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) } } @@ -256,7 +256,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte } when (io.req_pri_val && io.req_pri_rdy) { line_state := tl.co.newStateOnFlush() - refill_count := UFix(0) + refill_count := UInt(0) acquire_type := tl.co.getAcquireTypeOnPrimaryMiss(req_cmd, tl.co.newStateOnFlush()) release_type := tl.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits @@ -273,7 +273,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte } } - val ackq = (new Queue(1)) { (new LogicalNetworkIO){new GrantAck} } + val ackq = Module(new Queue((new LogicalNetworkIO){new GrantAck}, 1)) ackq.io.enq.valid := (wb_done || refill_done) && tl.co.requiresAck(io.mem_grant.bits.payload) ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src @@ -289,7 +289,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - val meta_hazard = Reg(resetVal = UFix(0,2)) + val meta_hazard = RegReset(UInt(0,2)) when (meta_hazard != 0) { meta_hazard := meta_hazard + 1 } when (io.meta_write.fire()) { meta_hazard := 1 } io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && meta_hazard === 0) @@ -309,7 +309,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready io.mem_req.bits.a_type := acquire_type - io.mem_req.bits.addr := Cat(io.tag, req_idx).toUFix + io.mem_req.bits.addr := Cat(io.tag, req_idx).toUInt io.mem_req.bits.client_xact_id := Bits(id) io.mem_finish <> ackq.io.deq io.mem_req.bits.client_xact_id := Bits(id) @@ -320,7 +320,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid io.replay.bits := rpq.io.deq.bits io.replay.bits.phys := Bool(true) - io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(conf.offbits-1,0)).toUFix + io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(conf.offbits-1,0)).toUInt when (!io.meta_read.ready) { rpq.io.deq.ready := Bool(false) @@ -328,45 +328,45 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte } } -class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { +class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { implicit val ln = tl.ln val io = new Bundle { - val req = (new FIFOIO) { new MSHRReq }.flip + val req = Decoupled(new MSHRReq).flip val secondary_miss = Bool(OUTPUT) - val mem_req = (new FIFOIO) { new Acquire } + val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput - val meta_read = (new FIFOIO) { new MetaReadReq } - val meta_write = (new FIFOIO) { new MetaWriteReq } - val replay = (new FIFOIO) { new Replay } - val mem_grant = (new PipeIO) { (new LogicalNetworkIO){new Grant} }.flip - val mem_finish = (new FIFOIO) { (new LogicalNetworkIO){new GrantAck} } - val wb_req = (new FIFOIO) { new WritebackReq } + val meta_read = Decoupled(new MetaReadReq) + val meta_write = Decoupled(new MetaWriteReq) + val replay = Decoupled(new Replay) + val mem_grant = Valid((new LogicalNetworkIO){new Grant}).flip + val mem_finish = Decoupled((new LogicalNetworkIO){new GrantAck}) + val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) val fence_rdy = Bool(OUTPUT) } - val sdq_val = Reg(resetVal = Bits(0, conf.nsdq)) + val sdq_val = RegReset(Bits(0, conf.nsdq)) val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) val sdq_rdy = !sdq_val.andR val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) - val sdq = Mem(conf.nsdq) { io.req.bits.data.clone } + val sdq = Mem(io.req.bits.data, conf.nsdq) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val idxMatch = Vec(conf.nmshr) { Bool() } - val tagList = Vec(conf.nmshr) { Bits() } + val idxMatch = Vec.fill(conf.nmshr){Bool()} + val tagList = Vec.fill(conf.nmshr){Bits()} val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> conf.untagbits - val wbTagList = Vec(conf.nmshr) { Bits() } - val memRespMux = Vec(conf.nmshr) { new DataWriteReq } - val meta_read_arb = (new Arbiter(conf.nmshr)) { new MetaReadReq } - val meta_write_arb = (new Arbiter(conf.nmshr)) { new MetaWriteReq } - val mem_req_arb = (new Arbiter(conf.nmshr)) { new Acquire } - val mem_finish_arb = (new Arbiter(conf.nmshr)) { (new LogicalNetworkIO){new GrantAck} } - val wb_req_arb = (new Arbiter(conf.nmshr)) { new WritebackReq } - val replay_arb = (new Arbiter(conf.nmshr)) { new Replay() } - val alloc_arb = (new Arbiter(conf.nmshr)) { Bool() } + val wbTagList = Vec.fill(conf.nmshr){Bits()} + val memRespMux = Vec.fill(conf.nmshr){new DataWriteReq} + val meta_read_arb = Module(new Arbiter(new MetaReadReq, conf.nmshr)) + val meta_write_arb = Module(new Arbiter(new MetaWriteReq, conf.nmshr)) + val mem_req_arb = Module(new Arbiter(new Acquire, conf.nmshr)) + val mem_finish_arb = Module(new Arbiter((new LogicalNetworkIO){new GrantAck}, conf.nmshr)) + val wb_req_arb = Module(new Arbiter(new WritebackReq, conf.nmshr)) + val replay_arb = Module(new Arbiter(new Replay, conf.nmshr)) + val alloc_arb = Module(new Arbiter(Bool(), conf.nmshr)) var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -376,7 +376,7 @@ class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends C io.probe_rdy := true for (i <- 0 to conf.nmshr-1) { - val mshr = new MSHR(i) + val mshr = Module(new MSHR(i)) idxMatch(i) := mshr.io.idx_match tagList(i) := mshr.io.tag @@ -424,29 +424,29 @@ class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends C io.replay <> replay_arb.io.out when (io.replay.valid || sdq_enq) { - sdq_val := sdq_val & ~(UFixToOH(io.replay.bits.sdq_id) & Fill(conf.nsdq, free_sdq)) | + sdq_val := sdq_val & ~(UIntToOH(io.replay.bits.sdq_id) & Fill(conf.nsdq, free_sdq)) | PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq) } } -class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { +class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { val io = new Bundle { - val req = (new FIFOIO) { new WritebackReq() }.flip - val probe = (new FIFOIO) { new WritebackReq() }.flip - val meta_read = (new FIFOIO) { new MetaReadReq } - val data_req = (new FIFOIO) { new DataReadReq() } + val req = Decoupled(new WritebackReq()).flip + val probe = Decoupled(new WritebackReq()).flip + val meta_read = Decoupled(new MetaReadReq) + val data_req = Decoupled(new DataReadReq()) val data_resp = Bits(INPUT, conf.bitsperrow) - val release = (new FIFOIO) { new Release } - val release_data = (new FIFOIO) { new ReleaseData } + val release = Decoupled(new Release) + val release_data = Decoupled(new ReleaseData) } - val valid = Reg(resetVal = Bool(false)) - val r1_data_req_fired = Reg(resetVal = Bool(false)) - val r2_data_req_fired = Reg(resetVal = Bool(false)) - val cmd_sent = Reg{Bool()} - val cnt = Reg{UFix(width = log2Up(REFILL_CYCLES+1))} - val req = Reg{new WritebackReq} + val valid = RegReset(Bool(false)) + val r1_data_req_fired = RegReset(Bool(false)) + val r2_data_req_fired = RegReset(Bool(false)) + val cmd_sent = Reg(Bool()) + val cnt = Reg(UInt(width = log2Up(REFILL_CYCLES+1))) + val req = Reg(new WritebackReq) when (valid) { r1_data_req_fired := false @@ -459,7 +459,7 @@ class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte when (r2_data_req_fired && !io.release_data.ready) { r1_data_req_fired := false r2_data_req_fired := false - cnt := cnt - Mux[UFix](r1_data_req_fired, 2, 1) + cnt := cnt - Mux[UInt](r1_data_req_fired, 2, 1) } when (!r1_data_req_fired && !r2_data_req_fired && cmd_sent && cnt === REFILL_CYCLES) { @@ -483,7 +483,7 @@ class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte req := io.req.bits } - val fire = valid && cnt < UFix(REFILL_CYCLES) + val fire = valid && cnt < UInt(REFILL_CYCLES) io.req.ready := !valid && !io.probe.valid io.probe.ready := !valid io.data_req.valid := fire @@ -492,9 +492,9 @@ class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.release.valid := valid && !cmd_sent io.release.bits.r_type := req.r_type - io.release.bits.addr := Cat(req.tag, req.idx).toUFix + io.release.bits.addr := Cat(req.tag, req.idx).toUInt io.release.bits.client_xact_id := req.client_xact_id - io.release.bits.master_xact_id := UFix(0) + io.release.bits.master_xact_id := UInt(0) io.release_data.valid := r2_data_req_fired io.release_data.bits.data := io.data_resp @@ -502,23 +502,23 @@ class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.meta_read.bits.addr := io.release.bits.addr << conf.offbits } -class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { +class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { val io = new Bundle { - val req = (new FIFOIO) { new InternalProbe }.flip - val rep = (new FIFOIO) { new Release } - val meta_read = (new FIFOIO) { new MetaReadReq } - val meta_write = (new FIFOIO) { new MetaWriteReq } - val wb_req = (new FIFOIO) { new WritebackReq } + val req = Decoupled(new InternalProbe).flip + val rep = Decoupled(new Release) + val meta_read = Decoupled(new MetaReadReq) + val meta_write = Decoupled(new MetaWriteReq) + val wb_req = Decoupled(new WritebackReq) val way_en = Bits(INPUT, conf.ways) val mshr_rdy = Bool(INPUT) - val line_state = UFix(INPUT, 2) + val line_state = UInt(INPUT, 2) } - val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(9) { UFix() } - val state = Reg(resetVal = s_invalid) - val line_state = Reg() { UFix() } - val way_en = Reg() { Bits() } - val req = Reg() { new InternalProbe } + val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(9) { UInt() } + val state = RegReset(s_invalid) + val line_state = Reg(UInt()) + val way_en = Reg(Bits()) + val req = Reg(new InternalProbe) val hit = way_en.orR when (state === s_meta_write && io.meta_write.ready) { @@ -561,40 +561,40 @@ class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read - io.meta_read.bits.addr := req.addr << UFix(conf.offbits) + io.meta_read.bits.addr := req.addr << UInt(conf.offbits) io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr io.meta_write.bits.data.state := tl.co.newStateOnProbe(req, line_state) - io.meta_write.bits.data.tag := req.addr >> UFix(conf.idxbits) + io.meta_write.bits.data.tag := req.addr >> UInt(conf.idxbits) io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr - io.wb_req.bits.tag := req.addr >> UFix(conf.idxbits) - io.wb_req.bits.r_type := UFix(0) // DNC - io.wb_req.bits.client_xact_id := UFix(0) // DNC + io.wb_req.bits.tag := req.addr >> UInt(conf.idxbits) + io.wb_req.bits.r_type := UInt(0) // DNC + io.wb_req.bits.client_xact_id := UInt(0) // DNC } -class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { +class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { val io = new Bundle { - val read = (new FIFOIO) { new MetaReadReq }.flip - val write = (new FIFOIO) { new MetaWriteReq }.flip - val resp = Vec(conf.ways){ (new MetaData).asOutput } + val read = Decoupled(new MetaReadReq).flip + val write = Decoupled(new MetaWriteReq).flip + val resp = Vec.fill(conf.ways){(new MetaData).asOutput} } - val rst_cnt = Reg(resetVal = UFix(0, log2Up(conf.sets+1))) + val rst_cnt = RegReset(UInt(0, log2Up(conf.sets+1))) val rst = rst_cnt < conf.sets when (rst) { rst_cnt := rst_cnt+1 } val metabits = io.write.bits.data.state.width + conf.tagbits - val tags = Mem(conf.sets, seqRead = true) { UFix(width = metabits*conf.ways) } + val tags = Mem(UInt(width = metabits*conf.ways), conf.sets, seqRead = true) when (rst || io.write.valid) { val addr = Mux(rst, rst_cnt, io.write.bits.idx) val data = Cat(Mux(rst, tl.co.newStateOnFlush, io.write.bits.data.state), io.write.bits.data.tag) - val mask = Mux(rst, Fix(-1), io.write.bits.way_en) + val mask = Mux(rst, SInt(-1), io.write.bits.way_en) tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) } val tag = tags(RegEn(io.read.bits.addr >> conf.offbits, io.read.valid)) @@ -609,11 +609,11 @@ class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.write.ready := !rst } -class DataArray(implicit conf: DCacheConfig) extends Component { +class DataArray(implicit conf: DCacheConfig) extends Module { val io = new Bundle { - val read = new FIFOIO()(new DataReadReq).flip - val write = new FIFOIO()(new DataWriteReq).flip - val resp = Vec(conf.ways){ Bits(OUTPUT, conf.bitsperrow) } + val read = Decoupled(new DataReadReq).flip + val write = Decoupled(new DataWriteReq).flip + val resp = Vec.fill(conf.ways){Bits(OUTPUT, conf.bitsperrow)} } val waddr = io.write.bits.addr >> conf.ramoffbits @@ -623,10 +623,10 @@ class DataArray(implicit conf: DCacheConfig) extends Component { for (w <- 0 until conf.ways by conf.wordsperrow) { val wway_en = io.write.bits.way_en(w+conf.wordsperrow-1,w) val rway_en = io.read.bits.way_en(w+conf.wordsperrow-1,w) - val resp = Vec(conf.wordsperrow){Bits(width = conf.bitsperrow)} + val resp = Vec.fill(conf.wordsperrow){Bits(width = conf.bitsperrow)} val r_raddr = RegEn(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=conf.bitsperrow) } + val array = Mem(Bits(width=conf.bitsperrow), conf.sets*REFILL_CYCLES, seqRead = true) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { val data = Fill(conf.wordsperrow, io.write.bits.data(conf.encdatabits*(p+1)-1,conf.encdatabits*p)) val mask = FillInterleaved(conf.encdatabits, wway_en) @@ -645,7 +645,7 @@ class DataArray(implicit conf: DCacheConfig) extends Component { } else { val wmask = FillInterleaved(conf.encdatabits, io.write.bits.wmask) for (w <- 0 until conf.ways) { - val array = Mem(conf.sets*REFILL_CYCLES, seqRead = true){ Bits(width=conf.bitsperrow) } + val array = Mem(Bits(width=conf.bitsperrow), conf.sets*REFILL_CYCLES, seqRead = true) when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } @@ -657,7 +657,7 @@ class DataArray(implicit conf: DCacheConfig) extends Component { io.write.ready := Bool(true) } -class AMOALU(implicit conf: DCacheConfig) extends Component { +class AMOALU(implicit conf: DCacheConfig) extends Module { val io = new Bundle { val addr = Bits(INPUT, conf.offbits) val cmd = Bits(INPUT, 4) @@ -674,8 +674,8 @@ class AMOALU(implicit conf: DCacheConfig) extends Component { val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU val word = io.typ === MT_W || io.typ === MT_WU || io.typ === MT_B || io.typ === MT_BU - val mask = Fix(-1,64) ^ (io.addr(2) << 31) - val adder_out = (io.lhs & mask).toUFix + (io.rhs & mask) + val mask = SInt(-1,64) ^ (io.addr(2) << 31) + val adder_out = (io.lhs & mask).toUInt + (io.rhs & mask) val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) val cmp_rhs = Mux(word && !io.addr(2), io.rhs(31), io.rhs(63)) @@ -699,7 +699,7 @@ class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { val kill = Bool() val typ = Bits(width = 3) val phys = Bool() - val addr = UFix(width = conf.maxaddrbits) + val addr = UInt(width = conf.maxaddrbits) val data = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) @@ -715,7 +715,7 @@ class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { val data_subword = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) - val addr = UFix(width = conf.maxaddrbits) + val addr = UInt(width = conf.maxaddrbits) val store_data = Bits(width = conf.databits) override def clone = new HellaCacheResp().asInstanceOf[this.type] @@ -733,13 +733,13 @@ class HellaCacheExceptions extends Bundle { // interface between D$ and processor/DTLB class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { - val req = (new FIFOIO){ new HellaCacheReq } - val resp = (new PipeIO){ new HellaCacheResp }.flip + val req = Decoupled(new HellaCacheReq) + val resp = Valid(new HellaCacheResp).flip val xcpt = (new HellaCacheExceptions).asInput val ptw = (new TLBPTWIO).flip } -class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Component { +class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { implicit val ln = tl.ln val io = new Bundle { val cpu = (new HellaCacheIO).flip @@ -751,37 +751,37 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val offsetmsb = indexlsb-1 val offsetlsb = log2Up(conf.databytes) - val wb = new WritebackUnit - val prober = new ProbeUnit - val mshrs = new MSHRFile + val wb = Module(new WritebackUnit) + val prober = Module(new ProbeUnit) + val mshrs = Module(new MSHRFile) io.cpu.req.ready := Bool(true) - val s1_valid = Reg(io.cpu.req.fire(), resetVal = Bool(false)) - val s1_req = Reg{io.cpu.req.bits.clone} + val s1_valid = Reg(update=io.cpu.req.fire(), reset=Bool(false)) + val s1_req = Reg(io.cpu.req.bits.clone) val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill - val s1_replay = Reg(resetVal = Bool(false)) - val s1_clk_en = Reg{Bool()} + val s1_replay = RegReset(Bool(false)) + val s1_clk_en = Reg(Bool()) - val s2_valid = Reg(s1_valid_masked, resetVal = Bool(false)) - val s2_req = Reg{io.cpu.req.bits.clone} - val s2_replay = Reg(s1_replay, resetVal = Bool(false)) + val s2_valid = Reg(update=s1_valid_masked, reset=Bool(false)) + val s2_req = Reg(io.cpu.req.bits.clone) + val s2_replay = Reg(update=s1_replay, reset=Bool(false)) val s2_recycle = Bool() val s2_valid_masked = Bool() - val s3_valid = Reg(resetVal = Bool(false)) - val s3_req = Reg{io.cpu.req.bits.clone} - val s3_way = Reg{Bits()} + val s3_valid = RegReset(Bool(false)) + val s3_req = Reg(io.cpu.req.bits.clone) + val s3_way = Reg(Bits()) val s1_recycled = RegEn(s2_recycle, s1_clk_en) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) - val dtlb = new TLB(8) + val dtlb = Module(new TLB(8)) dtlb.io.ptw <> io.cpu.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys - dtlb.io.req.bits.asid := UFix(0) + dtlb.io.req.bits.asid := UInt(0) dtlb.io.req.bits.vpn := s1_req.addr >> conf.pgidxbits dtlb.io.req.bits.instruction := Bool(false) when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } @@ -806,14 +806,16 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(conf.pgidxbits-1,0)) when (s1_clk_en) { - s2_req.addr := s1_addr + s2_req.kill := s1_req.kill s2_req.typ := s1_req.typ - s2_req.cmd := s1_req.cmd - s2_req.tag := s1_req.tag + s2_req.phys := s1_req.phys + s2_req.addr := s1_addr when (s1_write) { s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.req.bits.data) } when (s1_recycled) { s2_req.data := s1_req.data } + s2_req.tag := s1_req.tag + s2_req.cmd := s1_req.cmd } val misaligned = @@ -827,19 +829,19 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - val meta = new MetaDataArray - val metaReadArb = (new Arbiter(5)) { new MetaReadReq } - val metaWriteArb = (new Arbiter(2)) { new MetaWriteReq } + val meta = Module(new MetaDataArray) + val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) + val metaWriteArb = Module(new Arbiter(new MetaWriteReq, 2)) metaReadArb.io.out <> meta.io.read metaWriteArb.io.out <> meta.io.write // data - val data = new DataArray - val readArb = new Arbiter(4)(new DataReadReq) + val data = Module(new DataArray) + val readArb = Module(new Arbiter(new DataReadReq, 4)) readArb.io.out.ready := !io.mem.grant.valid || io.mem.grant.ready // insert bubble if refill gets blocked readArb.io.out <> data.io.read - val writeArb = new Arbiter(2)(new DataWriteReq) + val writeArb = Module(new Arbiter(new DataWriteReq, 2)) data.io.write.valid := writeArb.io.out.valid writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits @@ -854,7 +856,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends // data read for new requests readArb.io.in(3).bits.addr := io.cpu.req.bits.addr readArb.io.in(3).valid := io.cpu.req.valid - readArb.io.in(3).bits.way_en := Fix(-1) + readArb.io.in(3).bits.way_en := SInt(-1) when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } // recycled requests @@ -862,23 +864,23 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends metaReadArb.io.in(0).bits.addr := s2_req.addr readArb.io.in(0).valid := s2_recycle readArb.io.in(0).bits.addr := s2_req.addr - readArb.io.in(0).bits.way_en := Fix(-1) + readArb.io.in(0).bits.way_en := SInt(-1) // tag check and way muxing - def wayMap[T <: Data](f: Int => T)(gen: => T) = Vec((0 until conf.ways).map(f)){gen} - val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)){Bits()}.toBits - val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).state)){Bits()}.toBits + def wayMap[T <: Data](f: Int => T) = Vec((0 until conf.ways).map(f)) + val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)).toBits + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).state)).toBits s1_clk_en := metaReadArb.io.out.valid val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR - val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en)){Bits()}) + val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en))) val s2_hit = s2_tag_match && tl.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === tl.co.newStateOnHit(s2_req.cmd, s2_hit_state) // load-reserved/store-conditional - val lrsc_count = Reg(resetVal = UFix(0)) + val lrsc_count = RegReset(UInt(0)) val lrsc_valid = lrsc_count.orR - val lrsc_addr = Reg{UFix()} + val lrsc_addr = Reg(UInt()) val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> conf.offbits) val s2_sc_fail = s2_sc && !s2_lrsc_addr_match @@ -894,9 +896,9 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends } when (io.cpu.ptw.eret) { lrsc_count := 0 } - val s2_data = Vec(conf.ways){Bits(width = conf.bitsperrow)} + val s2_data = Vec.fill(conf.ways){Bits(width = conf.bitsperrow)} for (w <- 0 until conf.ways) { - val regs = Vec(conf.wordsperrow){Reg{Bits(width = conf.encdatabits)}} + val regs = Vec.fill(conf.wordsperrow){Reg(Bits(width = conf.encdatabits))} val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { val en = en1 && (Bool(i == 0 || !conf.isNarrowRead) || s1_writeback) @@ -908,12 +910,12 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val s2_data_decoded = (0 until conf.wordsperrow).map(i => conf.code.decode(s2_data_muxed(conf.encdatabits*(i+1)-1,conf.encdatabits*i))) val s2_data_corrected = AVec(s2_data_decoded.map(_.corrected)).toBits val s2_data_uncorrected = AVec(s2_data_decoded.map(_.uncorrected)).toBits - val s2_word_idx = if (conf.isNarrowRead) UFix(0) else s2_req.addr(log2Up(conf.wordsperrow*conf.databytes)-1,3) + val s2_word_idx = if (conf.isNarrowRead) UInt(0) else s2_req.addr(log2Up(conf.wordsperrow*conf.databytes)-1,3) val s2_data_correctable = AVec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd) - val amoalu = new AMOALU + val amoalu = Module(new AMOALU) when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) { s3_req := s2_req s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out) @@ -921,16 +923,16 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends } writeArb.io.in(0).bits.addr := s3_req.addr - writeArb.io.in(0).bits.wmask := UFix(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUFix + writeArb.io.in(0).bits.wmask := UInt(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUInt writeArb.io.in(0).bits.data := Fill(conf.wordsperrow, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way // replacement policy val replacer = new RandomReplacement - val s1_replaced_way_en = UFixToOH(replacer.way) - val s2_replaced_way_en = UFixToOH(RegEn(replacer.way, s1_clk_en)) - val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))){new MetaData}) + val s1_replaced_way_en = UIntToOH(replacer.way) + val s2_replaced_way_en = UIntToOH(RegEn(replacer.way, s1_clk_en)) + val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) // miss handling mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) @@ -948,18 +950,18 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends //TODO io.mem.acquire.data should be connected to uncached store data generator //io.mem.acquire.data <> FIFOedLogicalNetworkIOWrapper(TODO) io.mem.acquire.data.valid := Bool(false) - io.mem.acquire.data.bits.payload.data := UFix(0) + io.mem.acquire.data.bits.payload.data := UInt(0) // replays readArb.io.in(1).valid := mshrs.io.replay.valid readArb.io.in(1).bits := mshrs.io.replay.bits - readArb.io.in(1).bits.way_en := Fix(-1) + readArb.io.in(1).bits.way_en := SInt(-1) mshrs.io.replay.ready := readArb.io.in(1).ready s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready metaReadArb.io.in(1) <> mshrs.io.meta_read metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes - val releaseArb = (new Arbiter(2)) { new Release } + val releaseArb = Module(new Arbiter(new Release, 2)) FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release.meta val probe = FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) @@ -979,7 +981,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends writeArb.io.in(1).valid := io.mem.grant.valid && refill io.mem.grant.ready := writeArb.io.in(1).ready || !refill writeArb.io.in(1).bits := mshrs.io.mem_resp - writeArb.io.in(1).bits.wmask := Fix(-1) + writeArb.io.in(1).bits.wmask := SInt(-1) writeArb.io.in(1).bits.data := io.mem.grant.bits.payload.data // writebacks @@ -991,15 +993,15 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release.data // store->load bypassing - val s4_valid = Reg(s3_valid, resetVal = Bool(false)) + val s4_valid = Reg(update=s3_valid, reset=Bool(false)) val s4_req = RegEn(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3)) - val s2_store_bypass_data = Reg{Bits(width = conf.databits)} - val s2_store_bypass = Reg{Bool()} + val s2_store_bypass_data = Reg(Bits(width = conf.databits)) + val s2_store_bypass = Reg(Bool()) when (s1_clk_en) { s2_store_bypass := false when (bypasses.map(_._1).reduce(_||_)) { @@ -1029,14 +1031,14 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends s2_valid_masked := s2_valid && !s2_nack val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable - val s2_recycle_next = Reg(resetVal = Bool(false)) + val s2_recycle_next = RegReset(Bool(false)) when (s1_valid || s1_replay) { s2_recycle_next := (s1_valid || s1_replay) && s2_recycle_ecc } s2_recycle := s2_recycle_ecc || s2_recycle_next // after a nack, block until nack condition resolves to save energy - val block_fence = Reg(resetVal = Bool(false)) + val block_fence = RegReset(Bool(false)) block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshrs.io.fence_rdy - val block_miss = Reg(resetVal = Bool(false)) + val block_miss = RegReset(Bool(false)) block_miss := (s2_valid || block_miss) && s2_nack_miss when (block_fence || block_miss) { io.cpu.req.ready := Bool(false) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index bcf9ce1b..806ac261 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,13 +5,15 @@ import uncore.constants.AddressConstants._ import uncore.constants.MemoryOpConstants._ import Util._ -class TLBPTWIO extends Bundle { - val req = new FIFOIO()(UFix(width = VPN_BITS)) - val resp = new PipeIO()(new Bundle { +class PTWResp extends Bundle { val error = Bool() - val ppn = UFix(width = PPN_BITS) + val ppn = UInt(width = PPN_BITS) val perm = Bits(width = PERM_BITS) - }).flip +} + +class TLBPTWIO extends Bundle { + val req = Decoupled(UInt(width = VPN_BITS)) + val resp = Valid(new PTWResp).flip val status = new Status().asInput val invalidate = Bool(INPUT) @@ -19,16 +21,16 @@ class TLBPTWIO extends Bundle { } class DatapathPTWIO extends Bundle { - val ptbr = UFix(INPUT, PADDR_BITS) + val ptbr = UInt(INPUT, PADDR_BITS) val invalidate = Bool(INPUT) val eret = Bool(INPUT) val status = new Status().asInput } -class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component +class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module { val io = new Bundle { - val requestor = Vec(n) { new TLBPTWIO }.flip + val requestor = Vec.fill(n){new TLBPTWIO}.flip val mem = new HellaCacheIO()(conf.dcache) val dpath = new DatapathPTWIO } @@ -37,17 +39,17 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val bitsPerLevel = VPN_BITS/levels require(VPN_BITS == levels * bitsPerLevel) - val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UFix() }; - val state = Reg(resetVal = s_ready) - val count = Reg{UFix(width = log2Up(levels))} + val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UInt() }; + val state = RegReset(s_ready) + val count = Reg(UInt(width = log2Up(levels))) - val r_req_vpn = Reg{Bits()} - val r_req_dest = Reg{Bits()} - val r_pte = Reg{Bits()} + val r_req_vpn = Reg(Bits()) + val r_req_dest = Reg(Bits()) + val r_pte = Reg(Bits()) val vpn_idx = AVec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) - val arb = new RRArbiter(n)(UFix(width = VPN_BITS)) + val arb = Module(new RRArbiter(UInt(width = VPN_BITS), n)) arb.io.in <> io.requestor.map(_.req) arb.io.out.ready := state === s_ready @@ -65,7 +67,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.addr := Cat(r_pte(PADDR_BITS-1,PGIDX_BITS), vpn_idx).toUFix << log2Up(conf.xprlen/8) + io.mem.req.bits.addr := Cat(r_pte(PADDR_BITS-1,PGIDX_BITS), vpn_idx).toUInt << log2Up(conf.xprlen/8) io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error @@ -78,11 +80,11 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component val resp_ppn = AVec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { - val me = r_req_dest === UFix(i) + val me = r_req_dest === UInt(i) io.requestor(i).resp.valid := resp_val && me io.requestor(i).resp.bits.error := resp_err io.requestor(i).resp.bits.perm := r_pte(9,4) - io.requestor(i).resp.bits.ppn := resp_ppn.toUFix + io.requestor(i).resp.bits.ppn := resp_ppn.toUInt io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).eret := io.dpath.eret io.requestor(i).status := io.dpath.status @@ -94,7 +96,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component when (arb.io.out.valid) { state := s_req; } - count := UFix(0) + count := UInt(0) } is (s_req) { when (io.mem.req.ready) { @@ -110,8 +112,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Component state := s_done } .otherwise { - count := count + UFix(1) - when (resp_ptd && count < UFix(levels-1)) { + count := count + UInt(1) + when (resp_ptd && count < UInt(levels-1)) { state := s_req } .otherwise { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 84b13e3f..937eaba7 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -19,16 +19,16 @@ case class RocketConfiguration(tl: TileLinkConfiguration, if (fastLoadByte) require(fastLoadWord) } -class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Component(resetSignal) with ClientCoherenceAgent +class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(reset = resetSignal) with ClientCoherenceAgent { val memPorts = 2 + confIn.vec val dcachePortId = 0 val icachePortId = 1 val vicachePortId = 2 - implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) - implicit val icConf = confIn.icache implicit val tlConf = confIn.tl implicit val lnConf = confIn.tl.ln + implicit val icConf = confIn.icache + implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) implicit val conf = confIn.copy(dcache = dcConf) val io = new Bundle { @@ -36,11 +36,11 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon val host = new HTIFIO(lnConf.nClients) } - val core = new Core - val icache = new Frontend - val dcache = new HellaCache + val core = Module(new Core) + val icache = Module(new Frontend) + val dcache = Module(new HellaCache) - val arbiter = new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts) + val arbiter = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts)) arbiter.io.in(dcachePortId) <> dcache.io.mem arbiter.io.in(icachePortId) <> icache.io.mem @@ -52,18 +52,18 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Compon io.tilelink.release.meta.valid := dcache.io.mem.release.meta.valid dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits - io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UFix(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) + io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) /*val ioSubBundles = io.tilelink.getClass.getMethods.filter( x => classOf[ClientSourcedIO[Data]].isAssignableFrom(x.getReturnType)).map{ m => m.invoke(io.tilelink).asInstanceOf[ClientSourcedIO[LogicalNetworkIO[Data]]] } ioSubBundles.foreach{ m => - m.bits.header.dst := UFix(0) - m.bits.header.src := UFix(0) + m.bits.header.dst := UInt(0) + m.bits.header.src := UInt(0) }*/ if (conf.vec) { - val vicache = new Frontend()(ICacheConfig(128, 1), tlConf) // 128 sets x 1 ways (8KB) + val vicache = Module(new Frontend()(ICacheConfig(128, 1), tlConf)) // 128 sets x 1 ways (8KB) arbiter.io.in(vicachePortId) <> vicache.io.mem core.io.vimem <> vicache.io.cpu } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index a4dc13f6..99997b39 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -4,25 +4,25 @@ import Chisel._ import uncore.constants.AddressConstants._ import scala.math._ -class ioCAM(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { +class CAMIO(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { val clear = Bool(INPUT); val clear_hit = Bool(INPUT) val tag = Bits(INPUT, tag_bits); val hit = Bool(OUTPUT); - val hits = UFix(OUTPUT, entries); + val hits = UInt(OUTPUT, entries); val valid_bits = Bits(OUTPUT, entries); val write = Bool(INPUT); val write_tag = Bits(INPUT, tag_bits); - val write_addr = UFix(INPUT, addr_bits); + val write_addr = UInt(INPUT, addr_bits); } -class rocketCAM(entries: Int, tag_bits: Int) extends Component { +class RocketCAM(entries: Int, tag_bits: Int) extends Module { val addr_bits = ceil(log(entries)/log(2)).toInt; - val io = new ioCAM(entries, addr_bits, tag_bits); - val cam_tags = Vec(entries) { Reg() { Bits(width = tag_bits) } } + val io = new CAMIO(entries, addr_bits, tag_bits); + val cam_tags = Vec.fill(entries){Reg(Bits(width = tag_bits))} - val vb_array = Reg(resetVal = Bits(0, entries)); + val vb_array = RegReset(Bits(0, entries)) when (io.write) { vb_array := vb_array.bitSet(io.write_addr, Bool(true)); cam_tags(io.write_addr) := io.write_tag @@ -37,27 +37,27 @@ class rocketCAM(entries: Int, tag_bits: Int) extends Component { val hits = (0 until entries).map(i => vb_array(i) && cam_tags(i) === io.tag) io.valid_bits := vb_array; - io.hits := Vec(hits){Bool()}.toBits.toUFix + io.hits := Vec(hits).toBits io.hit := io.hits.orR } class PseudoLRU(n: Int) { - val state = Reg() { Bits(width = n) } - def access(way: UFix) = { + val state = Reg(Bits(width = n)) + def access(way: UInt) = { var next_state = state - var idx = UFix(1,1) + var idx = UInt(1,1) for (i <- log2Up(n)-1 to 0 by -1) { val bit = way(i) - val mask = (UFix(1,n) << idx)(n-1,0) - next_state = next_state & ~mask | Mux(bit, UFix(0), mask) + val mask = (UInt(1,n) << idx)(n-1,0) + next_state = next_state & ~mask | Mux(bit, UInt(0), mask) //next_state.bitSet(idx, !bit) idx = Cat(idx, bit) } state := next_state } def replace = { - var idx = UFix(1,1) + var idx = UInt(1,1) for (i <- 0 until log2Up(n)) idx = Cat(idx, state(idx)) idx(log2Up(n)-1,0) @@ -66,8 +66,8 @@ class PseudoLRU(n: Int) class TLBReq extends Bundle { - val asid = UFix(width = ASID_BITS) - val vpn = UFix(width = VPN_BITS+1) + val asid = UInt(width = ASID_BITS) + val vpn = UInt(width = VPN_BITS+1) val passthrough = Bool() val instruction = Bool() } @@ -76,8 +76,8 @@ class TLBResp(entries: Int) extends Bundle { // lookup responses val miss = Bool(OUTPUT) - val hit_idx = UFix(OUTPUT, entries) - val ppn = UFix(OUTPUT, PPN_BITS) + val hit_idx = UInt(OUTPUT, entries) + val ppn = UInt(OUTPUT, PPN_BITS) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) @@ -85,23 +85,23 @@ class TLBResp(entries: Int) extends Bundle override def clone = new TLBResp(entries).asInstanceOf[this.type] } -class TLB(entries: Int) extends Component +class TLB(entries: Int) extends Module { val io = new Bundle { - val req = new FIFOIO()(new TLBReq).flip + val req = Decoupled(new TLBReq).flip val resp = new TLBResp(entries) val ptw = new TLBPTWIO } - val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) { UFix() } - val state = Reg(resetVal = s_ready) - val r_refill_tag = Reg() { UFix() } - val r_refill_waddr = Reg() { UFix() } + val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) { UInt() } + val state = RegReset(s_ready) + val r_refill_tag = Reg(UInt()) + val r_refill_waddr = Reg(UInt()) - val tag_cam = new rocketCAM(entries, ASID_BITS+VPN_BITS); - val tag_ram = Vec(entries) { Reg() { io.ptw.resp.bits.ppn.clone } } + val tag_cam = Module(new RocketCAM(entries, ASID_BITS+VPN_BITS)) + val tag_ram = Vec.fill(entries){Reg(io.ptw.resp.bits.ppn.clone)} - val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUFix + val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt tag_cam.io.clear := io.ptw.invalidate tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) tag_cam.io.tag := lookup_tag @@ -109,18 +109,18 @@ class TLB(entries: Int) extends Component tag_cam.io.write_tag := r_refill_tag tag_cam.io.write_addr := r_refill_waddr val tag_hit = tag_cam.io.hit - val tag_hit_addr = OHToUFix(tag_cam.io.hits) + val tag_hit_addr = OHToUInt(tag_cam.io.hits) // permission bit arrays - val ur_array = Reg{Bits()} // user read permission - val uw_array = Reg{Bits()} // user write permission - val ux_array = Reg{Bits()} // user execute permission - val sr_array = Reg{Bits()} // supervisor read permission - val sw_array = Reg{Bits()} // supervisor write permission - val sx_array = Reg{Bits()} // supervisor execute permission + val ur_array = Reg(Bits()) // user read permission + val uw_array = Reg(Bits()) // user write permission + val ux_array = Reg(Bits()) // user execute permission + val sr_array = Reg(Bits()) // supervisor read permission + val sw_array = Reg(Bits()) // supervisor write permission + val sx_array = Reg(Bits()) // supervisor execute permission when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn - val perm = (!io.ptw.resp.bits.error).toFix & io.ptw.resp.bits.perm(5,0) + val perm = (!io.ptw.resp.bits.error).toSInt & io.ptw.resp.bits.perm(5,0) ur_array := ur_array.bitSet(r_refill_waddr, perm(2)) uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) ux_array := ux_array.bitSet(r_refill_waddr, perm(0)) @@ -140,7 +140,7 @@ class TLB(entries: Int) extends Component val tlb_miss = io.ptw.status.vm && !tag_hit && !bad_va when (io.req.valid && tlb_hit) { - plru.access(OHToUFix(tag_cam.io.hits)) + plru.access(OHToUInt(tag_cam.io.hits)) } io.req.ready := state === s_ready diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 1ec4b72f..b656d16e 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -5,17 +5,17 @@ import scala.math._ object Util { - implicit def intToUFix(x: Int): UFix = UFix(x) + implicit def intToUInt(x: Int): UInt = UInt(x) implicit def intToBoolean(x: Int): Boolean = if (x != 0) true else false implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 implicit def booleanToBool(x: Boolean): Bits = Bool(x) - implicit def wcToUFix(c: WideCounter): UFix = c.value + implicit def wcToUInt(c: WideCounter): UInt = c.value } object AVec { - def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) { elts.head.clone } + def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) def apply[T <: Data](elts: Vec[T]): Vec[T] = apply(elts.toSeq) def apply[T <: Data](elt0: T, elts: T*): Vec[T] = apply(elt0 :: elts.toList) @@ -38,9 +38,9 @@ object Str require(validChar(x)) Lit(x, 8){Bits()} } - def apply(x: UFix): Bits = apply(x, 10) - def apply(x: UFix, radix: Int): Bits = { - val rad = UFix(radix) + def apply(x: UInt): Bits = apply(x, 10) + def apply(x: UInt, radix: Int): Bits = { + val rad = UInt(radix) val digs = digits(radix) val w = x.getWidth require(w > 0) @@ -49,18 +49,18 @@ object Str var s = digs(q % rad) for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) { q = q / rad - s = Cat(Mux(Bool(radix == 10) && q === UFix(0), Str(' '), digs(q % rad)), s) + s = Cat(Mux(Bool(radix == 10) && q === UInt(0), Str(' '), digs(q % rad)), s) } s } - def apply(x: Fix): Bits = apply(x, 10) - def apply(x: Fix, radix: Int): Bits = { - val neg = x < Fix(0) + def apply(x: SInt): Bits = apply(x, 10) + def apply(x: SInt, radix: Int): Bits = { + val neg = x < SInt(0) val abs = x.abs if (radix != 10) { Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix)) } else { - val rad = UFix(radix) + val rad = UInt(radix) val digs = digits(radix) val w = abs.getWidth require(w > 0) @@ -70,7 +70,7 @@ object Str var needSign = neg for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) { q = q / rad - val placeSpace = q === UFix(0) + val placeSpace = q === UInt(0) val space = Mux(needSign, Str('-'), Str(' ')) needSign = needSign && !placeSpace s = Cat(Mux(placeSpace, space, digs(q % rad)), s) @@ -117,21 +117,21 @@ case class WideCounter(width: Int, inc: Bool = Bool(true)) { private val isWide = width >= 4 private val smallWidth = if (isWide) log2Up(width) else width - private val small = Reg(resetVal = UFix(0, smallWidth)) - private val nextSmall = small + UFix(1, smallWidth+1) + private val small = RegReset(UInt(0, smallWidth)) + private val nextSmall = small + UInt(1, smallWidth+1) when (inc) { small := nextSmall(smallWidth-1,0) } private val large = if (isWide) { - val r = Reg(resetVal = UFix(0, width - smallWidth)) - when (inc && nextSmall(smallWidth)) { r := r + UFix(1) } + val r = RegReset(UInt(0, width - smallWidth)) + when (inc && nextSmall(smallWidth)) { r := r + UInt(1) } r } else null val value = Cat(large, small) - def := (x: UFix) = { + def := (x: UInt) = { val w = x.getWidth small := x(w.min(smallWidth)-1,0) - if (isWide) large := (if (w < smallWidth) UFix(0) else x(w.min(width)-1,smallWidth)) + if (isWide) large := (if (w < smallWidth) UInt(0) else x(w.min(width)-1,smallWidth)) } } From 387cf0ebe08a671e85d9fdc839c6b1ab3e74fe64 Mon Sep 17 00:00:00 2001 From: Huy Vo Date: Mon, 12 Aug 2013 20:51:54 -0700 Subject: [PATCH 0612/1087] reset -> resetVal, getReset -> reset --- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/fpu.scala | 6 +++--- rocket/src/main/scala/nbdcache.scala | 8 ++++---- rocket/src/main/scala/tile.scala | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index d755443f..dd3629b8 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -42,7 +42,7 @@ class Core(implicit conf: RocketConfiguration) extends Module } else null if (conf.vec) { - val vu = Module(new vu(RegUpdate(this.getReset))) + val vu = Module(new vu(RegUpdate(this.reset))) val vdtlb = Module(new TLB(8)) ptw += vdtlb.io.ptw diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 09d7c1df..84415f75 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -407,7 +407,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val wb_reg_div_mul_val = RegReset(Bool(false)) val take_pc = Bool() - val pc_taken = Reg(update = take_pc, reset = Bool(false)) + val pc_taken = Reg(update = take_pc, resetVal = Bool(false)) val take_pc_wb = Bool() val ctrl_killd = Bool() val ctrl_killx = Bool() diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 762f1b30..adfd1dee 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -278,7 +278,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module io.host.ipi_rep.ready := Bool(true) when (io.host.ipi_rep.valid) { r_irq_ipi := Bool(true) } - when(this.getReset) { + when(this.reset) { reg_status.et := false reg_status.ef := false reg_status.ev := false diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 05aa270b..ab70cc8d 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -465,10 +465,10 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module when (io.ctrl.valid) { ex_reg_inst := io.dpath.inst } - val ex_reg_valid = Reg(update=io.ctrl.valid, reset=Bool(false)) - val mem_reg_valid = Reg(update=ex_reg_valid && !io.ctrl.killx, reset=Bool(false)) + val ex_reg_valid = Reg(update=io.ctrl.valid, resetVal=Bool(false)) + val mem_reg_valid = Reg(update=ex_reg_valid && !io.ctrl.killx, resetVal=Bool(false)) val killm = io.ctrl.killm || io.ctrl.nack_mem - val wb_reg_valid = Reg(update=mem_reg_valid && !killm, reset=Bool(false)) + val wb_reg_valid = Reg(update=mem_reg_valid && !killm, resetVal=Bool(false)) val fp_decoder = Module(new FPUDecoder) fp_decoder.io.inst := io.dpath.inst diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 92a8adad..5852e2d2 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -756,15 +756,15 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val mshrs = Module(new MSHRFile) io.cpu.req.ready := Bool(true) - val s1_valid = Reg(update=io.cpu.req.fire(), reset=Bool(false)) + val s1_valid = Reg(update=io.cpu.req.fire(), resetVal=Bool(false)) val s1_req = Reg(io.cpu.req.bits.clone) val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill val s1_replay = RegReset(Bool(false)) val s1_clk_en = Reg(Bool()) - val s2_valid = Reg(update=s1_valid_masked, reset=Bool(false)) + val s2_valid = Reg(update=s1_valid_masked, resetVal=Bool(false)) val s2_req = Reg(io.cpu.req.bits.clone) - val s2_replay = Reg(update=s1_replay, reset=Bool(false)) + val s2_replay = Reg(update=s1_replay, resetVal=Bool(false)) val s2_recycle = Bool() val s2_valid_masked = Bool() @@ -993,7 +993,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release.data // store->load bypassing - val s4_valid = Reg(update=s3_valid, reset=Bool(false)) + val s4_valid = Reg(update=s3_valid, resetVal=Bool(false)) val s4_req = RegEn(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 937eaba7..d0d7c8b8 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -19,7 +19,7 @@ case class RocketConfiguration(tl: TileLinkConfiguration, if (fastLoadByte) require(fastLoadWord) } -class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(reset = resetSignal) with ClientCoherenceAgent +class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) with ClientCoherenceAgent { val memPorts = 2 + confIn.vec val dcachePortId = 0 From d9b3c7cfc8bba51ab7038c38ca8e2e63e3dff9ed Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 12 Aug 2013 22:18:25 -0700 Subject: [PATCH 0613/1087] Moved RenEn to ChiselUtil --- rocket/src/main/scala/fpu.scala | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 05aa270b..bf18a52c 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -176,20 +176,6 @@ class CtrlFPUIO extends Bundle { val sboard_clra = UInt(INPUT, 5) } -object RegEn -{ - def apply[T <: Data](data: T, en: Bool) = { - val r = Reg(data) - when (en) { r := data } - r - } - def apply[T <: Bits](data: T, en: Bool, resetVal: T) = { - val r = RegReset(resetVal) - when (en) { r := data } - r - } -} - class FPToInt extends Module { class Input extends Bundle { From 858169917e83150464ed7ab2c7c5eaa1336ee6c5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 12 Aug 2013 22:34:46 -0700 Subject: [PATCH 0614/1087] removed dummy DNCs handled by pruning --- rocket/src/main/scala/tile.scala | 8 -------- 1 file changed, 8 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 937eaba7..cc0ec4a8 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -54,14 +54,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - /*val ioSubBundles = io.tilelink.getClass.getMethods.filter( x => - classOf[ClientSourcedIO[Data]].isAssignableFrom(x.getReturnType)).map{ m => - m.invoke(io.tilelink).asInstanceOf[ClientSourcedIO[LogicalNetworkIO[Data]]] } - ioSubBundles.foreach{ m => - m.bits.header.dst := UInt(0) - m.bits.header.src := UInt(0) - }*/ - if (conf.vec) { val vicache = Module(new Frontend()(ICacheConfig(128, 1), tlConf)) // 128 sets x 1 ways (8KB) arbiter.io.in(vicachePortId) <> vicache.io.mem From b57043584776e49471a79bd046d6dc382d39e9d0 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 13 Aug 2013 17:50:02 -0700 Subject: [PATCH 0615/1087] Reg standardization --- rocket/src/main/scala/core.scala | 4 +- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/fpu.scala | 24 ++++----- rocket/src/main/scala/nbdcache.scala | 69 +++++++++++--------------- rocket/src/main/scala/tile.scala | 2 +- 6 files changed, 45 insertions(+), 58 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index d755443f..f0872dcc 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -42,7 +42,7 @@ class Core(implicit conf: RocketConfiguration) extends Module } else null if (conf.vec) { - val vu = Module(new vu(RegUpdate(this.getReset))) + val vu = Module(new vu(RegUpdate(reset))) val vdtlb = Module(new TLB(8)) ptw += vdtlb.io.ptw @@ -109,7 +109,7 @@ class Core(implicit conf: RocketConfiguration) extends Module vu.io.xcpt.hold := ctrl.io.vec_iface.hold // hooking up vector memory interface - dmem(2).req.bits.data := RegEn(StoreGen(vu.io.dmem_req.bits).data, vu.io.dmem_req.valid && isWrite(vu.io.dmem_req.bits.cmd)) + dmem(2).req.bits.data := RegEnable(StoreGen(vu.io.dmem_req.bits).data, vu.io.dmem_req.valid && isWrite(vu.io.dmem_req.bits.cmd)) dmem(2).req <> vu.io.dmem_req dmem(2).resp <> vu.io.dmem_resp diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 09d7c1df..20b59d9b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -407,7 +407,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val wb_reg_div_mul_val = RegReset(Bool(false)) val take_pc = Bool() - val pc_taken = Reg(update = take_pc, reset = Bool(false)) + val pc_taken = RegUpdate(take_pc, Bool(false)) val take_pc_wb = Bool() val ctrl_killd = Bool() val ctrl_killx = Bool() diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 762f1b30..7e193a35 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -278,7 +278,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module io.host.ipi_rep.ready := Bool(true) when (io.host.ipi_rep.valid) { r_irq_ipi := Bool(true) } - when(this.getReset) { + when(reset) { reg_status.et := false reg_status.ef := false reg_status.ev := false diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index bf18a52c..ac5f80e6 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -451,23 +451,23 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module when (io.ctrl.valid) { ex_reg_inst := io.dpath.inst } - val ex_reg_valid = Reg(update=io.ctrl.valid, reset=Bool(false)) - val mem_reg_valid = Reg(update=ex_reg_valid && !io.ctrl.killx, reset=Bool(false)) + val ex_reg_valid = Reg(updateData=io.ctrl.valid, resetData=Bool(false)) + val mem_reg_valid = Reg(updateData=ex_reg_valid && !io.ctrl.killx, resetData=Bool(false)) val killm = io.ctrl.killm || io.ctrl.nack_mem - val wb_reg_valid = Reg(update=mem_reg_valid && !killm, reset=Bool(false)) + val wb_reg_valid = Reg(updateData=mem_reg_valid && !killm, resetData=Bool(false)) val fp_decoder = Module(new FPUDecoder) fp_decoder.io.inst := io.dpath.inst - val ctrl = RegEn(fp_decoder.io.sigs, io.ctrl.valid) - val mem_ctrl = RegEn(ctrl, ex_reg_valid) - val wb_ctrl = RegEn(mem_ctrl, mem_reg_valid) + val ctrl = RegEnable(fp_decoder.io.sigs, io.ctrl.valid) + val mem_ctrl = RegEnable(ctrl, ex_reg_valid) + val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) // load response val load_wb = RegUpdate(io.dpath.dmem_resp_val) - val load_wb_single = RegEn(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val) - val load_wb_data = RegEn(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val) - val load_wb_tag = RegEn(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) + val load_wb_single = RegEnable(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val) + val load_wb_data = RegEnable(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val) + val load_wb_tag = RegEnable(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1), rec_s), rec_d) @@ -576,15 +576,15 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val wexc = Vec(pipes.map(_.wexc))(wsrc) when (wen(0)) { regfile(waddr(4,0)) := wdata } - val wb_toint_exc = RegEn(fpiu.io.out.bits.exc, mem_ctrl.toint) + val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) when (wb_reg_valid && wb_ctrl.toint || wen(0)) { fsr_exc := fsr_exc | Fill(fsr_exc.getWidth, wb_reg_valid && wb_ctrl.toint) & wb_toint_exc | Fill(fsr_exc.getWidth, wen(0)) & wexc } - val mem_fsr_wdata = RegEn(io.dpath.fromint_data(FSR_WIDTH-1,0), ex_reg_valid && ctrl.wrfsr) - val wb_fsr_wdata = RegEn(mem_fsr_wdata, mem_reg_valid && mem_ctrl.wrfsr) + val mem_fsr_wdata = RegEnable(io.dpath.fromint_data(FSR_WIDTH-1,0), ex_reg_valid && ctrl.wrfsr) + val wb_fsr_wdata = RegEnable(mem_fsr_wdata, mem_reg_valid && mem_ctrl.wrfsr) when (wb_reg_valid && wb_ctrl.wrfsr) { fsr_exc := wb_fsr_wdata fsr_rm := wb_fsr_wdata >> fsr_exc.getWidth diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 92a8adad..87c35d95 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -43,6 +43,11 @@ case class DCacheConfig(sets: Int, ways: Int, val lrsc_cycles = 32 // ISA requires 16-insn LRSC sequences to succeed } +abstract trait DCacheBundle extends Bundle { + implicit val conf: DCacheConfig + override def clone = this.getClass.getConstructors.head.newInstance(conf).asInstanceOf[this.type] +} + abstract class ReplacementPolicy { def way: UInt @@ -102,30 +107,22 @@ class MSHRReq(implicit conf: DCacheConfig) extends HellaCacheReq { val tag_match = Bool() val old_meta = new MetaData val way_en = Bits(width = conf.ways) - - override def clone = new MSHRReq().asInstanceOf[this.type] } class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { val sdq_id = UInt(width = log2Up(conf.nsdq)) - - override def clone = new Replay().asInstanceOf[this.type] } -class DataReadReq(implicit conf: DCacheConfig) extends Bundle { +class DataReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { val way_en = Bits(width = conf.ways) val addr = Bits(width = conf.untagbits) - - override def clone = new DataReadReq().asInstanceOf[this.type] } -class DataWriteReq(implicit conf: DCacheConfig) extends Bundle { +class DataWriteReq(implicit val conf: DCacheConfig) extends DCacheBundle { val way_en = Bits(width = conf.ways) val addr = Bits(width = conf.untagbits) val wmask = Bits(width = conf.wordsperrow) val data = Bits(width = conf.bitsperrow) - - override def clone = new DataWriteReq().asInstanceOf[this.type] } class InternalProbe(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Probe { @@ -152,25 +149,19 @@ object MetaData { meta } } -class MetaData(implicit conf: DCacheConfig) extends Bundle { +class MetaData(implicit val conf: DCacheConfig) extends DCacheBundle { val state = UInt(width = conf.statebits) val tag = Bits(width = conf.tagbits) - - override def clone = new MetaData().asInstanceOf[this.type] } -class MetaReadReq(implicit conf: DCacheConfig) extends Bundle { +class MetaReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { val addr = UInt(width = conf.paddrbits) - - override def clone = new MetaReadReq().asInstanceOf[this.type] } -class MetaWriteReq(implicit conf: DCacheConfig) extends Bundle { +class MetaWriteReq(implicit val conf: DCacheConfig) extends DCacheBundle { val way_en = Bits(width = conf.ways) val idx = Bits(width = conf.idxbits) val data = new MetaData() - - override def clone = new MetaWriteReq().asInstanceOf[this.type] } class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { @@ -420,7 +411,7 @@ class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends M io.mem_resp := memRespMux(io.mem_grant.bits.payload.client_xact_id) val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) - io.replay.bits.data := sdq(RegEn(replay_arb.io.out.bits.sdq_id, free_sdq)) + io.replay.bits.data := sdq(RegEnable(replay_arb.io.out.bits.sdq_id, free_sdq)) io.replay <> replay_arb.io.out when (io.replay.valid || sdq_enq) { @@ -597,7 +588,7 @@ class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val mask = Mux(rst, SInt(-1), io.write.bits.way_en) tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) } - val tag = tags(RegEn(io.read.bits.addr >> conf.offbits, io.read.valid)) + val tag = tags(RegEnable(io.read.bits.addr >> conf.offbits, io.read.valid)) for (w <- 0 until conf.ways) { val m = tag(metabits*(w+1)-1, metabits*w) @@ -624,7 +615,7 @@ class DataArray(implicit conf: DCacheConfig) extends Module { val wway_en = io.write.bits.way_en(w+conf.wordsperrow-1,w) val rway_en = io.read.bits.way_en(w+conf.wordsperrow-1,w) val resp = Vec.fill(conf.wordsperrow){Bits(width = conf.bitsperrow)} - val r_raddr = RegEn(io.read.bits.addr, io.read.valid) + val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { val array = Mem(Bits(width=conf.bitsperrow), conf.sets*REFILL_CYCLES, seqRead = true) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { @@ -632,7 +623,7 @@ class DataArray(implicit conf: DCacheConfig) extends Module { val mask = FillInterleaved(conf.encdatabits, wway_en) array.write(waddr, data, mask) } - resp(p) := array(RegEn(raddr, rway_en.orR && io.read.valid)) + resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid)) } for (dw <- 0 until conf.wordsperrow) { val r = AVec(resp.map(_(conf.encdatabits*(dw+1)-1,conf.encdatabits*dw))) @@ -649,7 +640,7 @@ class DataArray(implicit conf: DCacheConfig) extends Module { when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } - io.resp(w) := array(RegEn(raddr, io.read.bits.way_en(w) && io.read.valid)) + io.resp(w) := array(RegEnable(raddr, io.read.bits.way_en(w) && io.read.valid)) } } @@ -695,7 +686,7 @@ class AMOALU(implicit conf: DCacheConfig) extends Module { io.out := wmask & out | ~wmask & io.lhs } -class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { +class HellaCacheReq(implicit val conf: DCacheConfig) extends DCacheBundle { val kill = Bool() val typ = Bits(width = 3) val phys = Bool() @@ -703,11 +694,9 @@ class HellaCacheReq(implicit conf: DCacheConfig) extends Bundle { val data = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) val cmd = Bits(width = 4) - - override def clone = new HellaCacheReq().asInstanceOf[this.type] } -class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { +class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val typ = Bits(width = 3) @@ -717,8 +706,6 @@ class HellaCacheResp(implicit conf: DCacheConfig) extends Bundle { val cmd = Bits(width = 4) val addr = UInt(width = conf.maxaddrbits) val store_data = Bits(width = conf.databits) - - override def clone = new HellaCacheResp().asInstanceOf[this.type] } class AlignmentExceptions extends Bundle { @@ -756,15 +743,15 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val mshrs = Module(new MSHRFile) io.cpu.req.ready := Bool(true) - val s1_valid = Reg(update=io.cpu.req.fire(), reset=Bool(false)) + val s1_valid = Reg(updateData=io.cpu.req.fire(), resetData=Bool(false)) val s1_req = Reg(io.cpu.req.bits.clone) val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill val s1_replay = RegReset(Bool(false)) val s1_clk_en = Reg(Bool()) - val s2_valid = Reg(update=s1_valid_masked, reset=Bool(false)) + val s2_valid = Reg(updateData=s1_valid_masked, resetData=Bool(false)) val s2_req = Reg(io.cpu.req.bits.clone) - val s2_replay = Reg(update=s1_replay, reset=Bool(false)) + val s2_replay = Reg(updateData=s1_replay, resetData=Bool(false)) val s2_recycle = Bool() val s2_valid_masked = Bool() @@ -772,7 +759,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val s3_req = Reg(io.cpu.req.bits.clone) val s3_way = Reg(Bits()) - val s1_recycled = RegEn(s2_recycle, s1_clk_en) + val s1_recycled = RegEnable(s2_recycle, s1_clk_en) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) @@ -872,9 +859,9 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).state)).toBits s1_clk_en := metaReadArb.io.out.valid val s1_writeback = s1_clk_en && !s1_valid && !s1_replay - val s2_tag_match_way = RegEn(s1_tag_match_way, s1_clk_en) + val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR - val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEn(meta.io.resp(w).state, s1_clk_en))) + val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).state, s1_clk_en))) val s2_hit = s2_tag_match && tl.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === tl.co.newStateOnHit(s2_req.cmd, s2_hit_state) // load-reserved/store-conditional @@ -931,8 +918,8 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends // replacement policy val replacer = new RandomReplacement val s1_replaced_way_en = UIntToOH(replacer.way) - val s2_replaced_way_en = UIntToOH(RegEn(replacer.way, s1_clk_en)) - val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEn(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) + val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) + val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) // miss handling mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) @@ -993,8 +980,8 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release.data // store->load bypassing - val s4_valid = Reg(update=s3_valid, reset=Bool(false)) - val s4_req = RegEn(s3_req, s3_valid && metaReadArb.io.out.valid) + val s4_valid = Reg(updateData=s3_valid, resetData=Bool(false)) + val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), @@ -1022,7 +1009,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready - val s2_nack_hit = RegEn(s1_nack, s1_valid || s1_replay) + val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshrs.io.secondary_miss val s2_nack_miss = !s2_hit && !mshrs.io.req.ready diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index cc0ec4a8..b998fc6e 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -19,7 +19,7 @@ case class RocketConfiguration(tl: TileLinkConfiguration, if (fastLoadByte) require(fastLoadWord) } -class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(reset = resetSignal) with ClientCoherenceAgent +class Tile(_reset: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = _reset) with ClientCoherenceAgent { val memPorts = 2 + confIn.vec val dcachePortId = 0 From 3a266cbbfaa6a60d73b0c2f50347ce2d43ae62d5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 15 Aug 2013 15:28:15 -0700 Subject: [PATCH 0616/1087] final Reg changes --- rocket/src/main/scala/arbiter.scala | 2 +- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/ctrl.scala | 98 +++++++++++++------------- rocket/src/main/scala/ctrl_vec.scala | 2 +- rocket/src/main/scala/divider.scala | 4 +- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/dpath_util.scala | 16 ++--- rocket/src/main/scala/dpath_vec.scala | 10 +-- rocket/src/main/scala/fpu.scala | 20 +++--- rocket/src/main/scala/htif.scala | 14 ++-- rocket/src/main/scala/icache.scala | 16 ++--- rocket/src/main/scala/multiplier.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 36 +++++----- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/tlb.scala | 4 +- rocket/src/main/scala/util.scala | 4 +- 16 files changed, 117 insertions(+), 117 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 78c3b47e..3ad0e750 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -10,7 +10,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu val mem = new HellaCacheIO()(conf.dcache) } - val r_valid = io.requestor.map(r => RegUpdate(r.req.valid)) + val r_valid = io.requestor.map(r => Reg(next=r.req.valid)) io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) io.requestor(0).req.ready := io.mem.req.ready diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index f0872dcc..788e18dc 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -42,7 +42,7 @@ class Core(implicit conf: RocketConfiguration) extends Module } else null if (conf.vec) { - val vu = Module(new vu(RegUpdate(reset))) + val vu = Module(new vu(Reg(next=reset))) val vdtlb = Module(new TLB(8)) ptw += vdtlb.io.ptw diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 20b59d9b..6831b144 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -355,59 +355,59 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); val id_load_use = Bool(); - val ex_reg_xcpt_interrupt = RegReset(Bool(false)) - val ex_reg_valid = RegReset(Bool(false)) - val ex_reg_eret = RegReset(Bool(false)) - val ex_reg_wen = RegReset(Bool(false)) - val ex_reg_fp_wen = RegReset(Bool(false)) - val ex_reg_flush_inst = RegReset(Bool(false)) - val ex_reg_jalr = RegReset(Bool(false)) - val ex_reg_btb_hit = RegReset(Bool(false)) - val ex_reg_div_mul_val = RegReset(Bool(false)) - val ex_reg_mem_val = RegReset(Bool(false)) - val ex_reg_xcpt = RegReset(Bool(false)) - val ex_reg_fp_val = RegReset(Bool(false)) - val ex_reg_vec_val = RegReset(Bool(false)) - val ex_reg_replay_next = RegReset(Bool(false)) - val ex_reg_load_use = RegReset(Bool(false)) - val ex_reg_pcr = RegReset(PCR.N) - val ex_reg_br_type = RegReset(BR_N) + val ex_reg_xcpt_interrupt = Reg(init=Bool(false)) + val ex_reg_valid = Reg(init=Bool(false)) + val ex_reg_eret = Reg(init=Bool(false)) + val ex_reg_wen = Reg(init=Bool(false)) + val ex_reg_fp_wen = Reg(init=Bool(false)) + val ex_reg_flush_inst = Reg(init=Bool(false)) + val ex_reg_jalr = Reg(init=Bool(false)) + val ex_reg_btb_hit = Reg(init=Bool(false)) + val ex_reg_div_mul_val = Reg(init=Bool(false)) + val ex_reg_mem_val = Reg(init=Bool(false)) + val ex_reg_xcpt = Reg(init=Bool(false)) + val ex_reg_fp_val = Reg(init=Bool(false)) + val ex_reg_vec_val = Reg(init=Bool(false)) + val ex_reg_replay_next = Reg(init=Bool(false)) + val ex_reg_load_use = Reg(init=Bool(false)) + val ex_reg_pcr = Reg(init=PCR.N) + val ex_reg_br_type = Reg(init=BR_N) val ex_reg_mem_cmd = Reg(Bits()) val ex_reg_mem_type = Reg(Bits()) val ex_reg_cause = Reg(UInt()) - val mem_reg_xcpt_interrupt = RegReset(Bool(false)) - val mem_reg_valid = RegReset(Bool(false)) - val mem_reg_eret = RegReset(Bool(false)) - val mem_reg_wen = RegReset(Bool(false)) - val mem_reg_fp_wen = RegReset(Bool(false)) - val mem_reg_flush_inst = RegReset(Bool(false)) - val mem_reg_div_mul_val = RegReset(Bool(false)) - val mem_reg_mem_val = RegReset(Bool(false)) - val mem_reg_xcpt = RegReset(Bool(false)) - val mem_reg_fp_val = RegReset(Bool(false)) - val mem_reg_vec_val = RegReset(Bool(false)) - val mem_reg_replay = RegReset(Bool(false)) - val mem_reg_replay_next = RegReset(Bool(false)) - val mem_reg_pcr = RegReset(PCR.N) + val mem_reg_xcpt_interrupt = Reg(init=Bool(false)) + val mem_reg_valid = Reg(init=Bool(false)) + val mem_reg_eret = Reg(init=Bool(false)) + val mem_reg_wen = Reg(init=Bool(false)) + val mem_reg_fp_wen = Reg(init=Bool(false)) + val mem_reg_flush_inst = Reg(init=Bool(false)) + val mem_reg_div_mul_val = Reg(init=Bool(false)) + val mem_reg_mem_val = Reg(init=Bool(false)) + val mem_reg_xcpt = Reg(init=Bool(false)) + val mem_reg_fp_val = Reg(init=Bool(false)) + val mem_reg_vec_val = Reg(init=Bool(false)) + val mem_reg_replay = Reg(init=Bool(false)) + val mem_reg_replay_next = Reg(init=Bool(false)) + val mem_reg_pcr = Reg(init=PCR.N) val mem_reg_cause = Reg(UInt()) val mem_reg_slow_bypass = Reg(Bool()) - val wb_reg_valid = RegReset(Bool(false)) - val wb_reg_pcr = RegReset(PCR.N) - val wb_reg_wen = RegReset(Bool(false)) - val wb_reg_fp_wen = RegReset(Bool(false)) - val wb_reg_flush_inst = RegReset(Bool(false)) - val wb_reg_mem_val = RegReset(Bool(false)) - val wb_reg_eret = RegReset(Bool(false)) - val wb_reg_xcpt = RegReset(Bool(false)) - val wb_reg_replay = RegReset(Bool(false)) + val wb_reg_valid = Reg(init=Bool(false)) + val wb_reg_pcr = Reg(init=PCR.N) + val wb_reg_wen = Reg(init=Bool(false)) + val wb_reg_fp_wen = Reg(init=Bool(false)) + val wb_reg_flush_inst = Reg(init=Bool(false)) + val wb_reg_mem_val = Reg(init=Bool(false)) + val wb_reg_eret = Reg(init=Bool(false)) + val wb_reg_xcpt = Reg(init=Bool(false)) + val wb_reg_replay = Reg(init=Bool(false)) val wb_reg_cause = Reg(UInt()) - val wb_reg_fp_val = RegReset(Bool(false)) - val wb_reg_div_mul_val = RegReset(Bool(false)) + val wb_reg_fp_val = Reg(init=Bool(false)) + val wb_reg_div_mul_val = Reg(init=Bool(false)) val take_pc = Bool() - val pc_taken = RegUpdate(take_pc, Bool(false)) + val pc_taken = Reg(next=take_pc, init=Bool(false)) val take_pc_wb = Bool() val ctrl_killd = Bool() val ctrl_killx = Bool() @@ -611,17 +611,17 @@ class Control(implicit conf: RocketConfiguration) extends Module class Scoreboard(n: Int) { - val r = RegReset(Bits(0, n)) - var next = r + val r = Reg(init=Bits(0, n)) + var _next = r var ens = Bool(false) def apply(addr: UInt) = r(addr) - def set(en: Bool, addr: UInt): Unit = update(en, next | mask(en, addr)) - def clear(en: Bool, addr: UInt): Unit = update(en, next & ~mask(en, addr)) + def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) + def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) private def update(en: Bool, update: UInt) = { - next = update + _next = update ens = ens || en - when (ens) { r := next } + when (ens) { r := _next } } } diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 85ef702b..2fa4c6f9 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -261,7 +261,7 @@ class rocketCtrlVec extends Module io.iface.exception := io.exception && io.sr_ev - val reg_hold = RegReset(Bool(false)) + val reg_hold = Reg(init=Bool(false)) when (valid_common && dec.io.sigs.xcpthold) { reg_hold := Bool(true) } when (io.eret) { reg_hold := Bool(false) } diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 08e35766..6c6f0242 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -10,7 +10,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UInt() }; - val state = RegReset(s_ready) + val state = Reg(init=s_ready) val req = Reg(io.req.bits.clone) val count = Reg(UInt(width = log2Up(w+1))) @@ -120,7 +120,7 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext val w = io.req.bits.in1.getWidth val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(6) { UInt() }; - val state = RegReset(s_ready) + val state = Reg(init=s_ready) val count = Reg(UInt(width = log2Up(w+1))) val divby0 = Reg(Bool()) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a22d0803..4e32be4c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -50,7 +50,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val wb_reg_inst = Reg(Bits()) val wb_reg_waddr = Reg(UInt()) val wb_reg_wdata = Reg(Bits()) - val wb_reg_ll_wb = RegReset(Bool(false)) + val wb_reg_ll_wb = Reg(init=Bool(false)) val wb_wdata = Bits() val wb_reg_store_data = Reg(Bits()) val wb_reg_rs1 = Reg(Bits()) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 7e193a35..40ae0e14 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -36,7 +36,7 @@ class rocketDpathBTB(entries: Int) extends Module for (i <- 0 until entries) { val tag = Reg(UInt()) - val valid = RegReset(Bool(false)) + val valid = Reg(init=Bool(false)) hits(i) := valid && tag === io.current_pc updates(i) := valid && tag === io.correct_pc @@ -145,19 +145,19 @@ class PCR(implicit conf: RocketConfiguration) extends Module val reg_count = WideCounter(32) val reg_compare = Reg(Bits(width = 32)) val reg_cause = Reg(Bits(width = io.cause.getWidth)) - val reg_tohost = RegReset(Bits(0, conf.xprlen)) - val reg_fromhost = RegReset(Bits(0, conf.xprlen)) + val reg_tohost = Reg(init=Bits(0, conf.xprlen)) + val reg_fromhost = Reg(init=Bits(0, conf.xprlen)) val reg_coreid = Reg(Bits(width = 16)) val reg_k0 = Reg(Bits(width = conf.xprlen)) val reg_k1 = Reg(Bits(width = conf.xprlen)) val reg_ptbr = Reg(UInt(width = PADDR_BITS)) - val reg_vecbank = RegReset(SInt(-1,8).toBits) - val reg_stats = RegReset(Bool(false)) - val reg_error_mode = RegReset(Bool(false)) + val reg_vecbank = Reg(init=SInt(-1,8).toBits) + val reg_stats = Reg(init=Bool(false)) + val reg_error_mode = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below - val r_irq_timer = RegReset(Bool(false)) - val r_irq_ipi = RegReset(Bool(true)) + val r_irq_timer = Reg(init=Bool(false)) + val r_irq_ipi = Reg(init=Bool(true)) val host_pcr_req_valid = Reg(Bool()) // don't reset val host_pcr_req_fire = host_pcr_req_valid && io.rw.cmd === PCR.N diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala index 01449e53..a49cd2be 100644 --- a/rocket/src/main/scala/dpath_vec.scala +++ b/rocket/src/main/scala/dpath_vec.scala @@ -110,8 +110,8 @@ class rocketDpathVec extends Module val max_threads = UInt(WIDTH_BMASK) val uts_per_bank = Mux(Bool(HAVE_PVFB) & nreg_mod_bank > max_threads, max_threads, nreg_mod_bank) - val reg_hwvl = RegReset(UInt(32, 12)) - val reg_appvl0 = RegReset(Bool(true)) + val reg_hwvl = Reg(init=UInt(32, 12)) + val reg_appvl0 = Reg(init=Bool(true)) val hwvl_vcfg = (uts_per_bank * io.vecbankcnt)(11,0) val hwvl = @@ -123,9 +123,9 @@ class rocketDpathVec extends Module Mux(io.wdata(11,0) < hwvl, io.wdata(11,0).toUInt, hwvl.toUInt)) - val reg_nxregs = RegReset(UInt(32, 6)) - val reg_nfregs = RegReset(UInt(32, 6)) - val reg_appvl = RegReset(UInt(0, 12)) + val reg_nxregs = Reg(init=UInt(32, 6)) + val reg_nfregs = Reg(init=UInt(32, 6)) + val reg_appvl = Reg(init=UInt(0, 12)) when (io.valid) { diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index ac5f80e6..c6803a67 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -198,7 +198,7 @@ class FPToInt extends Module } val in = Reg(new Input) - val valid = RegUpdate(io.in.valid) + val valid = Reg(next=io.in.valid) when (io.in.valid) { def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 when (io.in.bits.cmd === FCMD_STORE) { @@ -381,7 +381,7 @@ class FPUSFMAPipe(val latency: Int) extends Module val one = Bits("h80000000") val zero = Cat(io.in1(32) ^ io.in2(32), Bits(0, 32)) - val valid = RegUpdate(io.valid) + val valid = Reg(next=io.valid) when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm @@ -418,7 +418,7 @@ class FPUDFMAPipe(val latency: Int) extends Module val one = Bits("h8000000000000000") val zero = Cat(io.in1(64) ^ io.in2(64), Bits(0, 64)) - val valid = RegUpdate(io.valid) + val valid = Reg(next=io.valid) when (io.valid) { cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) rm := io.rm @@ -451,10 +451,10 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module when (io.ctrl.valid) { ex_reg_inst := io.dpath.inst } - val ex_reg_valid = Reg(updateData=io.ctrl.valid, resetData=Bool(false)) - val mem_reg_valid = Reg(updateData=ex_reg_valid && !io.ctrl.killx, resetData=Bool(false)) + val ex_reg_valid = Reg(next=io.ctrl.valid, init=Bool(false)) + val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx, init=Bool(false)) val killm = io.ctrl.killm || io.ctrl.nack_mem - val wb_reg_valid = Reg(updateData=mem_reg_valid && !killm, resetData=Bool(false)) + val wb_reg_valid = Reg(next=mem_reg_valid && !killm, init=Bool(false)) val fp_decoder = Module(new FPUDecoder) fp_decoder.io.inst := io.dpath.inst @@ -464,7 +464,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) // load response - val load_wb = RegUpdate(io.dpath.dmem_resp_val) + val load_wb = Reg(next=io.dpath.dmem_resp_val) val load_wb_single = RegEnable(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val) val load_wb_data = RegEnable(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val) val load_wb_tag = RegEnable(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) @@ -546,7 +546,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val maxLatency = pipes.map(_.lat).max val memLatencyMask = latencyMask(mem_ctrl, 2) - val wen = RegReset(Bits(0, maxLatency-1)) + val wen = Reg(init=Bits(0, maxLatency-1)) val winfo = Vec.fill(maxLatency-1){Reg(Bits())} val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) val (write_port_busy, mem_winfo) = (Reg(Bool()), Reg(Bits())) @@ -592,11 +592,11 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val fp_inflight = wb_reg_valid && wb_ctrl.toint || wen.orR val fsr_busy = mem_ctrl.rdfsr && fp_inflight || wb_reg_valid && wb_ctrl.wrfsr - val units_busy = mem_reg_valid && mem_ctrl.fma && RegUpdate(Mux(ctrl.single, io.sfma.valid, io.dfma.valid)) + val units_busy = mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ctrl.single, io.sfma.valid, io.dfma.valid)) io.ctrl.nack_mem := fsr_busy || units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) - io.ctrl.sboard_set := wb_reg_valid && RegUpdate(useScoreboard(_._1.cond(mem_ctrl))) + io.ctrl.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl))) io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UInt(x._2)) io.ctrl.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index eb4f536a..f327dddd 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -60,7 +60,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Module wi require(short_request_bits % w == 0) val rx_count_w = 13 + log2Up(64) - log2Up(w) // data size field is 12 bits - val rx_count = RegReset(UInt(0,rx_count_w)) + val rx_count = Reg(init=UInt(0,rx_count_w)) val rx_shifter = Reg(Bits(width = short_request_bits)) val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w)) val next_cmd = rx_shifter_in(3,0) @@ -100,7 +100,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Module wi Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UInt(1), Bool(true))) - val tx_count = RegReset(UInt(0, rx_count_w)) + val tx_count = Reg(init=UInt(0, rx_count_w)) val tx_subword_count = tx_count(log2Up(short_request_bits/w)-1,0) val tx_word_count = tx_count(rx_count_w-1, log2Up(short_request_bits/w)) val packet_ram_raddr = tx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1) @@ -112,7 +112,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Module wi val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UInt(0)) val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UInt(0) && packet_ram_raddr.andR) - val mem_acked = RegReset(Bool(false)) + val mem_acked = Reg(init=Bool(false)) val mem_gxid = Reg(Bits()) val mem_gsrc = Reg(UInt(width = conf.ln.idBits)) val mem_needs_ack = Reg(Bool()) @@ -125,7 +125,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Module wi io.mem.grant.ready := Bool(true) val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UInt() } - val state = RegReset(state_rx) + val state = Reg(init=state_rx) val rx_cmd = Mux(rx_word_count === UInt(0), next_cmd, cmd) when (state === state_rx && rx_done) { @@ -134,7 +134,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Module wi state_tx)) } - val mem_cnt = RegReset(UInt(0, log2Up(REFILL_CYCLES))) + val mem_cnt = Reg(init=UInt(0, log2Up(REFILL_CYCLES))) val x_init = Module(new Queue(new Acquire, 1)) when (state === state_mem_req && x_init.io.enq.ready) { state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) @@ -198,8 +198,8 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Module wi val pcrReadData = Reg(Bits(width = io.cpu(0).pcr_rep.bits.getWidth)) for (i <- 0 until nTiles) { - val my_reset = RegReset(Bool(true)) - val my_ipi = RegReset(Bool(false)) + val my_reset = Reg(init=Bool(true)) + val my_ipi = Reg(init=Bool(false)) val cpu = io.cpu(i) val me = pcr_coreid === UInt(i) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 262f3841..bae95113 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -61,10 +61,10 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val s1_pc = Reg(UInt()) val s1_same_block = Reg(Bool()) - val s2_valid = RegReset(Bool(true)) - val s2_pc = RegReset(UInt(START_ADDR)) - val s2_btb_hit = RegReset(Bool(false)) - val s2_xcpt_if = RegReset(Bool(false)) + val s2_valid = Reg(init=Bool(true)) + val s2_pc = Reg(init=UInt(START_ADDR)) + val s2_btb_hit = Reg(init=Bool(false)) + val s2_xcpt_if = Reg(init=Bool(false)) val btbTarget = Cat(btb.io.target(VADDR_BITS-1), btb.io.target) val pcp4_0 = s1_pc + UInt(c.ibytes) @@ -144,16 +144,16 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module } val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UInt() } - val state = RegReset(s_ready) + val state = Reg(init=s_ready) val invalidated = Reg(Bool()) val stall = !io.resp.ready val rdy = Bool() - val s2_valid = RegReset(Bool(false)) + val s2_valid = Reg(init=Bool(false)) val s2_addr = Reg(UInt(width = PADDR_BITS)) val s2_any_tag_hit = Bool() - val s1_valid = RegReset(Bool(false)) + val s1_valid = Reg(init=Bool(false)) val s1_pgoff = Reg(UInt(width = PGIDX_BITS)) val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt val s1_tag = s1_addr(c.tagbits+c.untagbits-1,c.untagbits) @@ -195,7 +195,7 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module tag_raddr := s0_pgoff(c.untagbits-1,c.offbits) } - val vb_array = RegReset(Bits(0, c.lines)) + val vb_array = Reg(init=Bits(0, c.lines)) when (refill_done && !invalidated) { vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true)) } diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 7d329de7..95d1218b 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -33,7 +33,7 @@ class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rock val w = (w0+1+unroll-1)/unroll*unroll val cycles = w/unroll - val r_val = RegReset(Bool(false)) + val r_val = Reg(init=Bool(false)) val r_prod = Reg(Bits(width = w*2)) val r_lsb = Reg(Bits()) val r_cnt = Reg(UInt(width = log2Up(cycles+1))) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 87c35d95..5e8b4c96 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -189,7 +189,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte } val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UInt() } - val state = RegReset(s_invalid) + val state = Reg(init=s_invalid) val acquire_type = Reg(UInt()) val release_type = Reg(UInt()) @@ -280,7 +280,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready - val meta_hazard = RegReset(UInt(0,2)) + val meta_hazard = Reg(init=UInt(0,2)) when (meta_hazard != 0) { meta_hazard := meta_hazard + 1 } when (io.meta_write.fire()) { meta_hazard := 1 } io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && meta_hazard === 0) @@ -338,7 +338,7 @@ class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends M val fence_rdy = Bool(OUTPUT) } - val sdq_val = RegReset(Bits(0, conf.nsdq)) + val sdq_val = Reg(init=Bits(0, conf.nsdq)) val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) val sdq_rdy = !sdq_val.andR val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) @@ -432,9 +432,9 @@ class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val release_data = Decoupled(new ReleaseData) } - val valid = RegReset(Bool(false)) - val r1_data_req_fired = RegReset(Bool(false)) - val r2_data_req_fired = RegReset(Bool(false)) + val valid = Reg(init=Bool(false)) + val r1_data_req_fired = Reg(init=Bool(false)) + val r2_data_req_fired = Reg(init=Bool(false)) val cmd_sent = Reg(Bool()) val cnt = Reg(UInt(width = log2Up(REFILL_CYCLES+1))) val req = Reg(new WritebackReq) @@ -506,7 +506,7 @@ class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends } val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(9) { UInt() } - val state = RegReset(s_invalid) + val state = Reg(init=s_invalid) val line_state = Reg(UInt()) val way_en = Reg(Bits()) val req = Reg(new InternalProbe) @@ -575,7 +575,7 @@ class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val resp = Vec.fill(conf.ways){(new MetaData).asOutput} } - val rst_cnt = RegReset(UInt(0, log2Up(conf.sets+1))) + val rst_cnt = Reg(init=UInt(0, log2Up(conf.sets+1))) val rst = rst_cnt < conf.sets when (rst) { rst_cnt := rst_cnt+1 } @@ -743,19 +743,19 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val mshrs = Module(new MSHRFile) io.cpu.req.ready := Bool(true) - val s1_valid = Reg(updateData=io.cpu.req.fire(), resetData=Bool(false)) + val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) val s1_req = Reg(io.cpu.req.bits.clone) val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill - val s1_replay = RegReset(Bool(false)) + val s1_replay = Reg(init=Bool(false)) val s1_clk_en = Reg(Bool()) - val s2_valid = Reg(updateData=s1_valid_masked, resetData=Bool(false)) + val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_req = Reg(io.cpu.req.bits.clone) - val s2_replay = Reg(updateData=s1_replay, resetData=Bool(false)) + val s2_replay = Reg(next=s1_replay, init=Bool(false)) val s2_recycle = Bool() val s2_valid_masked = Bool() - val s3_valid = RegReset(Bool(false)) + val s3_valid = Reg(init=Bool(false)) val s3_req = Reg(io.cpu.req.bits.clone) val s3_way = Reg(Bits()) @@ -865,7 +865,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val s2_hit = s2_tag_match && tl.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === tl.co.newStateOnHit(s2_req.cmd, s2_hit_state) // load-reserved/store-conditional - val lrsc_count = RegReset(UInt(0)) + val lrsc_count = Reg(init=UInt(0)) val lrsc_valid = lrsc_count.orR val lrsc_addr = Reg(UInt()) val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) @@ -980,7 +980,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release.data // store->load bypassing - val s4_valid = Reg(updateData=s3_valid, resetData=Bool(false)) + val s4_valid = Reg(next=s3_valid, init=Bool(false)) val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid) val bypasses = List( ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), @@ -1018,14 +1018,14 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends s2_valid_masked := s2_valid && !s2_nack val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable - val s2_recycle_next = RegReset(Bool(false)) + val s2_recycle_next = Reg(init=Bool(false)) when (s1_valid || s1_replay) { s2_recycle_next := (s1_valid || s1_replay) && s2_recycle_ecc } s2_recycle := s2_recycle_ecc || s2_recycle_next // after a nack, block until nack condition resolves to save energy - val block_fence = RegReset(Bool(false)) + val block_fence = Reg(init=Bool(false)) block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshrs.io.fence_rdy - val block_miss = RegReset(Bool(false)) + val block_miss = Reg(init=Bool(false)) block_miss := (s2_valid || block_miss) && s2_nack_miss when (block_fence || block_miss) { io.cpu.req.ready := Bool(false) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 806ac261..3681586c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -40,7 +40,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module require(VPN_BITS == levels * bitsPerLevel) val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UInt() }; - val state = RegReset(s_ready) + val state = Reg(init=s_ready) val count = Reg(UInt(width = log2Up(levels))) val r_req_vpn = Reg(Bits()) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 99997b39..1c3af620 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -22,7 +22,7 @@ class RocketCAM(entries: Int, tag_bits: Int) extends Module { val io = new CAMIO(entries, addr_bits, tag_bits); val cam_tags = Vec.fill(entries){Reg(Bits(width = tag_bits))} - val vb_array = RegReset(Bits(0, entries)) + val vb_array = Reg(init=Bits(0, entries)) when (io.write) { vb_array := vb_array.bitSet(io.write_addr, Bool(true)); cam_tags(io.write_addr) := io.write_tag @@ -94,7 +94,7 @@ class TLB(entries: Int) extends Module } val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) { UInt() } - val state = RegReset(s_ready) + val state = Reg(init=s_ready) val r_refill_tag = Reg(UInt()) val r_refill_waddr = Reg(UInt()) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index b656d16e..ed008423 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -117,12 +117,12 @@ case class WideCounter(width: Int, inc: Bool = Bool(true)) { private val isWide = width >= 4 private val smallWidth = if (isWide) log2Up(width) else width - private val small = RegReset(UInt(0, smallWidth)) + private val small = Reg(init=UInt(0, smallWidth)) private val nextSmall = small + UInt(1, smallWidth+1) when (inc) { small := nextSmall(smallWidth-1,0) } private val large = if (isWide) { - val r = RegReset(UInt(0, width - smallWidth)) + val r = Reg(init=UInt(0, width - smallWidth)) when (inc && nextSmall(smallWidth)) { r := r + UInt(1) } r } else null From ff7b4860061633fb04c9a818409f53e2243c68b5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 15 Aug 2013 18:13:19 -0700 Subject: [PATCH 0617/1087] standardized sbt build --- rocket/src/main/scala/rocket.config | 1 - 1 file changed, 1 deletion(-) delete mode 100644 rocket/src/main/scala/rocket.config diff --git a/rocket/src/main/scala/rocket.config b/rocket/src/main/scala/rocket.config deleted file mode 100644 index 65b60ff0..00000000 --- a/rocket/src/main/scala/rocket.config +++ /dev/null @@ -1 +0,0 @@ -NWAYS 4 From d4a0db45752d4e9c1e14affb8b1117da893ae2b9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 23 Aug 2013 21:16:28 -0700 Subject: [PATCH 0618/1087] Reflect ISA changes --- rocket/src/main/scala/ctrl.scala | 35 +- rocket/src/main/scala/ctrl_vec.scala | 5 +- rocket/src/main/scala/decode.scala | 31 +- rocket/src/main/scala/dpath.scala | 7 +- rocket/src/main/scala/dpath_util.scala | 91 ++- rocket/src/main/scala/fpu.scala | 12 +- rocket/src/main/scala/htif.scala | 7 +- rocket/src/main/scala/instructions.scala | 923 +++++++++++------------ rocket/src/main/scala/ptw.scala | 20 +- rocket/src/main/scala/tile.scala | 1 - rocket/src/main/scala/tlb.scala | 8 +- rocket/src/main/scala/util.scala | 15 +- 12 files changed, 558 insertions(+), 597 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6831b144..46f4ad3b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -93,9 +93,7 @@ object XDecode extends DecodeConstants J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WA_RA,WB_PC, PCR.N,N,N,N,N,N), - JALR_C-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), - JALR_J-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), - JALR_R-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), + JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), @@ -146,9 +144,9 @@ object XDecode extends DecodeConstants SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - riscvAND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - riscvOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - riscvXOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), @@ -226,8 +224,8 @@ object FDecode extends DecodeConstants FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MFTX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MFTX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), @@ -242,8 +240,8 @@ object FDecode extends DecodeConstants FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MXTF_S-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MXTF_D-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), @@ -252,8 +250,8 @@ object FDecode extends DecodeConstants FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MFFSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MTFSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FRSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + FSSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), @@ -269,13 +267,10 @@ object VDecode extends DecodeConstants // | vec_val | | renx1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | - VVCFGIVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), - VVCFG-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), + VSETCFGVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), VF-> List(Y, N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - VMSV-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), @@ -454,22 +449,20 @@ class Control(implicit conf: RocketConfiguration) extends Module } else (Bool(false), Bool(false)) val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) - val id_interrupt = io.dpath.status.et && id_interrupt_unmasked + val id_interrupt = io.dpath.status.ei && id_interrupt_unmasked def checkExceptions(x: Seq[(Bool, UInt)]) = (x.map(_._1).reduce(_||_), PriorityMux(x)) - // executing ERET when traps are enabled causes an illegal instruction exception - val illegal_inst = !id_int_val.toBool || (id_eret.toBool && io.dpath.status.et) // flush pipeline on PCR writes that may have side effects val id_pcr_flush = id_pcr != PCR.N && id_pcr != PCR.F && - id_raddr1 != PCR.K0 && id_raddr1 != PCR.K1 && id_raddr1 != PCR.EPC + id_raddr1 != PCR.SUP0 && id_raddr1 != PCR.SUP1 && id_raddr1 != PCR.EPC val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), (io.imem.resp.bits.xcpt_ma, UInt(0)), (io.imem.resp.bits.xcpt_if, UInt(1)), - (illegal_inst, UInt(2)), + (!id_int_val.toBool, UInt(2)), (id_privileged && !io.dpath.status.s, UInt(3)), (id_fp_val && !io.dpath.status.ef, UInt(4)), (id_syscall, UInt(6)), diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala index 2fa4c6f9..5e048742 100644 --- a/rocket/src/main/scala/ctrl_vec.scala +++ b/rocket/src/main/scala/ctrl_vec.scala @@ -109,13 +109,10 @@ class rocketCtrlVecDecoder extends Module // val vcmd vimm vimm2 | fn | | | | | | | | | | | | | | // | | | | | | | | | | | | | | | | | | | | List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( - VVCFGIVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), - VVCFG-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, N,VEC_CFG, N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), + VSETCFGVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N), VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), - VMSV-> List(Y,VCMD_TX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N), - VFMVV-> List(Y,VCMD_TF,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), FENCE_V_L-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N), FENCE_V_G-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N), VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 0a390da2..502c1fee 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -13,10 +13,10 @@ object DecodeLogic new Term(b.value) } } - def logic(addr: Bits, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = { + def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = { terms.map { t => if (!cache.contains(t)) - cache += t -> ((if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addr.width)-(t.mask+1), addr.width){Bits()}) === Lit(t.value, addr.width){Bits()}) + cache += t -> ((if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth){Bits()}) === Lit(t.value, addrWidth){Bits()}) cache(t).toBool }.foldLeft(Bool(false))(_||_) } @@ -27,19 +27,26 @@ object DecodeLogic val dlit = d.litOf val dterm = term(dlit) val (keys, values) = map.unzip - val keysterms = keys.toList.map(k => term(k.litOf)) zip values.toList.map(v => term(v.head.litOf)) + val addrWidth = keys.map(_.getWidth).max + val terms = keys.toList.map(k => term(k.litOf)) + val termvalues = terms zip values.toList.map(v => term(v.head.litOf)) + + for (t <- terms.tails; if !t.isEmpty) + for (u <- t.tail) + assert(!t.head.intersects(u), "DecodeLogic: keys " + t + " and " + u + " overlap") val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => + val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) + val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) + val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) + if (((dterm.mask >> i) & 1) != 0) { - var mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) - var maxt = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) - logic(addr, cache, SimplifyDC(mint, maxt, addr.width)).toBits + logic(addr, addrWidth, cache, SimplifyDC(mint, maxt, addrWidth)).toBits } else { - val want = 1 - ((dterm.value.toInt >> i) & 1) - val mint = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == want }.map(_._1) - val dc = keysterms.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) - val bit = logic(addr, cache, Simplify(mint, dc, addr.width)).toBits - if (want == 1) bit else ~bit + val defbit = (dterm.value.toInt >> i) & 1 + val t = if (defbit == 0) mint else maxt + val bit = logic(addr, addrWidth, cache, Simplify(t, dc, addrWidth)).toBits + if (defbit == 0) bit else ~bit } }).reverse.reduceRight(Cat(_,_)) map = map map { case (x,y) => (x, y.tail) } @@ -71,7 +78,7 @@ class Term(val value: BigInt, val mask: BigInt = 0) new Term(value &~ bit, mask | bit) } - override def toString = value.toString + "-" + mask + (if (prime) "p" else "") + override def toString = value.toString(16) + "-" + mask.toString(16) + (if (prime) "p" else "") } object Simplify diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4e32be4c..f62e3327 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -199,7 +199,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.pcr_replay := pcr.io.replay io.ptw.ptbr := pcr.io.ptbr - io.ptw.invalidate := pcr.io.ptbr_wen + io.ptw.invalidate := pcr.io.fatc io.ptw.eret := io.ctrl.eret io.ptw.status := pcr.io.status @@ -216,6 +216,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val ex_pc_plus4 = ex_reg_pc.toSInt + Mux(ex_reg_sel_alu2 === A2_LTYPE, ex_reg_inst(26,7).toSInt << 12, SInt(4)) val ex_branch_target = ex_reg_pc.toSInt + (ex_imm << 1) + val ex_jalr_target = (ex_effective_address >> 1 << 1).toSInt val tsc_reg = WideCounter(64) val irt_reg = WideCounter(64, io.ctrl.wb_valid) @@ -327,8 +328,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.imem.req.bits.currentpc := ex_reg_pc io.imem.req.bits.pc := Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_effective_address.toSInt, ex_branch_target), - Mux(io.ctrl.sel_pc === PC_PCR, Cat(pcr.io.evec(VADDR_BITS-1), pcr.io.evec), + Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_jalr_target, ex_branch_target), + Mux(io.ctrl.sel_pc === PC_PCR, pcr.io.evec, wb_reg_pc))).toUInt // PC_WB printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] %s\n", diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 4aae4679..6e3d00a9 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -58,15 +58,15 @@ class Status extends Bundle { val ip = Bits(width = 8) val im = Bits(width = 8) val zero = Bits(width = 7) + val ev = Bool() val vm = Bool() val s64 = Bool() val u64 = Bool() - val s = Bool() - val ps = Bool() - val ec = Bool() - val ev = Bool() val ef = Bool() - val et = Bool() + val pei = Bool() + val ei = Bool() + val ps = Bool() + val s = Bool() } object PCR @@ -81,20 +81,22 @@ object PCR val S = Bits(7,3) // setpcr // regs - val STATUS = 0 - val EPC = 1 - val BADVADDR = 2 - val EVEC = 3 - val COUNT = 4 - val COMPARE = 5 - val CAUSE = 6 - val PTBR = 7 - val SEND_IPI = 8 - val CLR_IPI = 9 - val COREID = 10 - val IMPL = 11 - val K0 = 12 - val K1 = 13 + val SUP0 = 0 + val SUP1 = 1 + val EPC = 2 + val BADVADDR = 3 + val PTBR = 4 + val ASID = 5 + val COUNT = 6 + val COMPARE = 7 + val EVEC = 8 + val CAUSE = 9 + val STATUS = 10 + val HARTID = 11 + val IMPL = 12 + val FATC = 13 + val SEND_IPI = 14 + val CLR_IPI = 15 val VECBANK = 18 val VECCFG = 19 val STATS = 28 @@ -116,7 +118,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module val status = new Status().asOutput val ptbr = UInt(OUTPUT, PADDR_BITS) - val evec = UInt(OUTPUT, VADDR_BITS) + val evec = UInt(OUTPUT, VADDR_BITS+1) val exception = Bool(INPUT) val cause = UInt(INPUT, 6) val badvaddr_wen = Bool(INPUT) @@ -126,7 +128,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module val eret = Bool(INPUT) val ei = Bool(INPUT) val di = Bool(INPUT) - val ptbr_wen = Bool(OUTPUT) + val fatc = Bool(OUTPUT) val irq_timer = Bool(OUTPUT) val irq_ipi = Bool(OUTPUT) val replay = Bool(OUTPUT) @@ -139,21 +141,19 @@ class PCR(implicit conf: RocketConfiguration) extends Module } import PCR._ - val reg_epc = Reg(Bits(width = conf.xprlen)) - val reg_badvaddr = Reg(Bits(width = conf.xprlen)) - val reg_ebase = Reg(Bits(width = conf.xprlen)) + val reg_epc = Reg(Bits(width = VADDR_BITS+1)) + val reg_badvaddr = Reg(Bits(width = VADDR_BITS)) + val reg_evec = Reg(Bits(width = VADDR_BITS)) val reg_count = WideCounter(32) val reg_compare = Reg(Bits(width = 32)) val reg_cause = Reg(Bits(width = io.cause.getWidth)) val reg_tohost = Reg(init=Bits(0, conf.xprlen)) val reg_fromhost = Reg(init=Bits(0, conf.xprlen)) - val reg_coreid = Reg(Bits(width = 16)) - val reg_k0 = Reg(Bits(width = conf.xprlen)) - val reg_k1 = Reg(Bits(width = conf.xprlen)) + val reg_sup0 = Reg(Bits(width = conf.xprlen)) + val reg_sup1 = Reg(Bits(width = conf.xprlen)) val reg_ptbr = Reg(UInt(width = PADDR_BITS)) val reg_vecbank = Reg(init=SInt(-1,8).toBits) val reg_stats = Reg(init=Bool(false)) - val reg_error_mode = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below val r_irq_timer = Reg(init=Bool(false)) @@ -185,10 +185,9 @@ class PCR(implicit conf: RocketConfiguration) extends Module io.status := reg_status io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), Bool(false), Bool(false), Bool(false), Bool(false)) - io.ptbr_wen := wen && addr === PTBR - io.evec := Mux(io.exception, reg_ebase, reg_epc).toUInt + io.fatc := wen && addr === FATC + io.evec := Mux(io.exception, reg_evec.toSInt, reg_epc).toUInt io.ptbr := reg_ptbr - io.host.debug.error_mode := reg_error_mode io.vecbank := reg_vecbank var cnt = UInt(0,4) @@ -206,19 +205,17 @@ class PCR(implicit conf: RocketConfiguration) extends Module } when (io.exception) { - when (!reg_status.et) { - reg_error_mode := true - } reg_status.s := true reg_status.ps := reg_status.s - reg_status.et := false + reg_status.ei := false + reg_status.pei := reg_status.ei reg_epc := io.pc.toSInt reg_cause := io.cause } when (io.eret) { reg_status.s := reg_status.ps - reg_status.et := true + reg_status.ei := reg_status.pei } when (reg_count === reg_compare) { @@ -238,10 +235,10 @@ class PCR(implicit conf: RocketConfiguration) extends Module val read_veccfg = if (conf.vec) Cat(io.vec_nfregs, io.vec_nxregs, io.vec_appvl) else Bits(0) val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) io.rw.rdata := AVec[Bits]( - io.status.toBits, reg_epc, reg_badvaddr, reg_ebase, - reg_count, reg_compare, read_cause, read_ptbr, - reg_coreid/*x*/, read_impl/*x*/, reg_coreid, read_impl, - reg_k0, reg_k1, reg_k0/*x*/, reg_k1/*x*/, + reg_sup0, reg_sup1, reg_epc, reg_badvaddr, + reg_ptbr, Bits(0)/*asid*/, reg_count, reg_compare, + reg_evec, reg_cause, io.status.toBits, io.host.id, + read_impl, read_impl/*x*/, read_impl/*x*/, read_impl/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank, read_veccfg, reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, @@ -255,21 +252,21 @@ class PCR(implicit conf: RocketConfiguration) extends Module wdata)) reg_status := new Status().fromBits(sr_wdata) + reg_status.s64 := true + reg_status.u64 := true reg_status.zero := 0 if (!conf.vec) reg_status.ev := false if (!conf.fpu) reg_status.ef := false - if (!conf.rvc) reg_status.ec := false } when (addr === EPC) { reg_epc := wdata(VADDR_BITS,0).toSInt } - when (addr === EVEC) { reg_ebase := wdata(VADDR_BITS-1,0).toSInt } + when (addr === EVEC) { reg_evec := wdata(VADDR_BITS-1,0).toSInt } when (addr === COUNT) { reg_count := wdata.toUInt } when (addr === COMPARE) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false); } - when (addr === COREID) { reg_coreid := wdata(15,0) } when (addr === FROMHOST) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } when (addr === TOHOST) { when (reg_tohost === UInt(0)) { reg_tohost := wdata } } when (addr === CLR_IPI) { r_irq_ipi := wdata(0) } - when (addr === K0) { reg_k0 := wdata; } - when (addr === K1) { reg_k1 := wdata; } + when (addr === SUP0) { reg_sup0 := wdata; } + when (addr === SUP1) { reg_sup1 := wdata; } when (addr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt; } when (addr === VECBANK) { reg_vecbank:= wdata(7,0) } when (addr === STATS) { reg_stats := wdata(0) } @@ -279,10 +276,10 @@ class PCR(implicit conf: RocketConfiguration) extends Module when (io.host.ipi_rep.valid) { r_irq_ipi := Bool(true) } when(this.reset) { - reg_status.et := false + reg_status.ei := false + reg_status.pei := false reg_status.ef := false reg_status.ev := false - reg_status.ec := false reg_status.ps := false reg_status.s := true reg_status.u64 := true diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index c6803a67..7e564800 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -79,8 +79,8 @@ class FPUDecoder extends Module FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N), FSW -> List(FCMD_STORE, N,N,Y,N,Y,N,Y,N,N,N,N,N), FSD -> List(FCMD_STORE, N,N,Y,N,N,N,Y,N,N,N,N,N), - MXTF_S -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,Y,N,N), - MXTF_D -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,Y,N,N), + FMV_S_X -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,Y,N,N), + FMV_D_X -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,Y,N,N), FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N,Y,N,N), FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N,Y,N,N), FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N,Y,N,N), @@ -89,8 +89,8 @@ class FPUDecoder extends Module FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N,Y,N,N), FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N,Y,N,N), FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,Y,N,N), - MFTX_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,Y,N,N), - MFTX_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,Y,N,N), + FMV_X_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,Y,N,N), + FMV_X_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,Y,N,N), FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,Y,N,N), FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,Y,N,N), FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,Y,N,N), @@ -107,8 +107,8 @@ class FPUDecoder extends Module FEQ_D -> List(FCMD_EQ, N,Y,Y,N,N,N,Y,N,N,Y,N,N), FLT_D -> List(FCMD_LT, N,Y,Y,N,N,N,Y,N,N,Y,N,N), FLE_D -> List(FCMD_LE, N,Y,Y,N,N,N,Y,N,N,Y,N,N), - MTFSR -> List(FCMD_MTFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,Y), - MFFSR -> List(FCMD_MFFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,N), + FSSR -> List(FCMD_MTFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,Y), + FRSR -> List(FCMD_MFFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,N), FSGNJ_S -> List(FCMD_SGNJ, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), FSGNJN_S -> List(FCMD_SGNJN, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), FSGNJX_S -> List(FCMD_SGNJX, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index f327dddd..2452883b 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -5,11 +5,6 @@ import Node._ import uncore._ import Util._ -class DebugIO extends Bundle -{ - val error_mode = Bool(OUTPUT); -} - class HostIO(val w: Int) extends Bundle { val clk = Bool(OUTPUT) @@ -28,7 +23,7 @@ class PCRReq extends Bundle class HTIFIO(ntiles: Int) extends Bundle { val reset = Bool(INPUT) - val debug = new DebugIO + val id = UInt(INPUT, log2Up(ntiles)) val pcr_req = Decoupled(new PCRReq).flip val pcr_rep = Decoupled(Bits(width = 64)) val ipi_req = Decoupled(Bits(width = log2Up(ntiles))) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 3a2b94d5..a5d15e2d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -5,262 +5,248 @@ import Node._ object Instructions { - // automatically generated by parse-opcodes - def J = Bits("b?????????????????????????_1100111",32); - def JAL = Bits("b?????????????????????????_1101111",32); - def JALR_C = Bits("b?????_?????_????????????_000_1101011",32); - def JALR_R = Bits("b?????_?????_????????????_001_1101011",32); - def JALR_J = Bits("b?????_?????_????????????_010_1101011",32); - def RDNPC = Bits("b?????_00000_000000000000_100_1101011",32); - def BEQ = Bits("b?????_?????_?????_???????_000_1100011",32); - def BNE = Bits("b?????_?????_?????_???????_001_1100011",32); - def BLT = Bits("b?????_?????_?????_???????_100_1100011",32); - def BGE = Bits("b?????_?????_?????_???????_101_1100011",32); - def BLTU = Bits("b?????_?????_?????_???????_110_1100011",32); - def BGEU = Bits("b?????_?????_?????_???????_111_1100011",32); - def LUI = Bits("b?????_????????????????????_0110111",32); - def AUIPC = Bits("b?????_????????????????????_0010111",32); - def ADDI = Bits("b?????_?????_????????????_000_0010011",32); - def SLLI = Bits("b?????_?????_000000_??????_001_0010011",32); - def SLTI = Bits("b?????_?????_????????????_010_0010011",32); - def SLTIU = Bits("b?????_?????_????????????_011_0010011",32); - def XORI = Bits("b?????_?????_????????????_100_0010011",32); - def SRLI = Bits("b?????_?????_000000_??????_101_0010011",32); - def SRAI = Bits("b?????_?????_000001_??????_101_0010011",32); - def ORI = Bits("b?????_?????_????????????_110_0010011",32); - def ANDI = Bits("b?????_?????_????????????_111_0010011",32); - def ADD = Bits("b?????_?????_?????_0000000000_0110011",32); - def SUB = Bits("b?????_?????_?????_1000000000_0110011",32); - def SLL = Bits("b?????_?????_?????_0000000001_0110011",32); - def SLT = Bits("b?????_?????_?????_0000000010_0110011",32); - def SLTU = Bits("b?????_?????_?????_0000000011_0110011",32); - def riscvXOR = Bits("b?????_?????_?????_0000000100_0110011",32); - def SRL = Bits("b?????_?????_?????_0000000101_0110011",32); - def SRA = Bits("b?????_?????_?????_1000000101_0110011",32); - def riscvOR = Bits("b?????_?????_?????_0000000110_0110011",32); - def riscvAND = Bits("b?????_?????_?????_0000000111_0110011",32); - def MUL = Bits("b?????_?????_?????_0000001000_0110011",32); - def MULH = Bits("b?????_?????_?????_0000001001_0110011",32); - def MULHSU = Bits("b?????_?????_?????_0000001010_0110011",32); - def MULHU = Bits("b?????_?????_?????_0000001011_0110011",32); - def DIV = Bits("b?????_?????_?????_0000001100_0110011",32); - def DIVU = Bits("b?????_?????_?????_0000001101_0110011",32); - def REM = Bits("b?????_?????_?????_0000001110_0110011",32); - def REMU = Bits("b?????_?????_?????_0000001111_0110011",32); - def ADDIW = Bits("b?????_?????_????????????_000_0011011",32); - def SLLIW = Bits("b?????_?????_000000_0_?????_001_0011011",32); - def SRLIW = Bits("b?????_?????_000000_0_?????_101_0011011",32); - def SRAIW = Bits("b?????_?????_000001_0_?????_101_0011011",32); - def ADDW = Bits("b?????_?????_?????_0000000000_0111011",32); - def SUBW = Bits("b?????_?????_?????_1000000000_0111011",32); - def SLLW = Bits("b?????_?????_?????_0000000001_0111011",32); - def SRLW = Bits("b?????_?????_?????_0000000101_0111011",32); - def SRAW = Bits("b?????_?????_?????_1000000101_0111011",32); - def MULW = Bits("b?????_?????_?????_0000001000_0111011",32); - def DIVW = Bits("b?????_?????_?????_0000001100_0111011",32); - def DIVUW = Bits("b?????_?????_?????_0000001101_0111011",32); - def REMW = Bits("b?????_?????_?????_0000001110_0111011",32); - def REMUW = Bits("b?????_?????_?????_0000001111_0111011",32); - def LB = Bits("b?????_?????_????????????_000_0000011",32); - def LH = Bits("b?????_?????_????????????_001_0000011",32); - def LW = Bits("b?????_?????_????????????_010_0000011",32); - def LD = Bits("b?????_?????_????????????_011_0000011",32); - def LBU = Bits("b?????_?????_????????????_100_0000011",32); - def LHU = Bits("b?????_?????_????????????_101_0000011",32); - def LWU = Bits("b?????_?????_????????????_110_0000011",32); - def SB = Bits("b?????_?????_?????_???????_000_0100011",32); - def SH = Bits("b?????_?????_?????_???????_001_0100011",32); - def SW = Bits("b?????_?????_?????_???????_010_0100011",32); - def SD = Bits("b?????_?????_?????_???????_011_0100011",32); - def AMOADD_W = Bits("b?????_?????_?????_0000000010_0101011",32); - def AMOSWAP_W = Bits("b?????_?????_?????_0000001010_0101011",32); - def AMOAND_W = Bits("b?????_?????_?????_0000010010_0101011",32); - def AMOOR_W = Bits("b?????_?????_?????_0000011010_0101011",32); - def AMOMIN_W = Bits("b?????_?????_?????_0000100010_0101011",32); - def AMOMAX_W = Bits("b?????_?????_?????_0000101010_0101011",32); - def AMOMINU_W = Bits("b?????_?????_?????_0000110010_0101011",32); - def AMOMAXU_W = Bits("b?????_?????_?????_0000111010_0101011",32); - def AMOADD_D = Bits("b?????_?????_?????_0000000011_0101011",32); - def AMOSWAP_D = Bits("b?????_?????_?????_0000001011_0101011",32); - def AMOAND_D = Bits("b?????_?????_?????_0000010011_0101011",32); - def AMOOR_D = Bits("b?????_?????_?????_0000011011_0101011",32); - def AMOMIN_D = Bits("b?????_?????_?????_0000100011_0101011",32); - def AMOMAX_D = Bits("b?????_?????_?????_0000101011_0101011",32); - def AMOMINU_D = Bits("b?????_?????_?????_0000110011_0101011",32); - def AMOMAXU_D = Bits("b?????_?????_?????_0000111011_0101011",32); - def LR_W = Bits("b?????_?????_00000_1000000010_0101011",32); - def LR_D = Bits("b?????_?????_00000_1000000011_0101011",32); - def SC_W = Bits("b?????_?????_?????_1000001010_0101011",32); - def SC_D = Bits("b?????_?????_?????_1000001011_0101011",32); - def FENCE_I = Bits("b?????_?????_????????????_001_0101111",32); - def FENCE = Bits("b?????_?????_????????????_010_0101111",32); - def SYSCALL = Bits("b00000_00000_00000_0000000000_1110111",32); - def BREAK = Bits("b00000_00000_00000_0000000001_1110111",32); - def RDCYCLE = Bits("b?????_00000_00000_0000000100_1110111",32); - def RDTIME = Bits("b?????_00000_00000_0000001100_1110111",32); - def RDINSTRET = Bits("b?????_00000_00000_0000010100_1110111",32); - def CLEARPCR = Bits("b?????_?????_????????????_000_1111011",32); - def SETPCR = Bits("b?????_?????_????????????_001_1111011",32); - def MFPCR = Bits("b?????_?????_00000_0000000010_1111011",32); - def MTPCR = Bits("b?????_?????_?????_0000000011_1111011",32); - def ERET = Bits("b00000_00000_00000_0000000100_1111011",32); - def CFLUSH = Bits("b00000_00000_00000_0000000101_1111011",32); - // floating point instructions - def FMOVZ = Bits("b?????_?????_?????_0000010101_1110111",32); - def FMOVN = Bits("b?????_?????_?????_0000011101_1110111",32); - def FADD_S = Bits("b?????_?????_?????_00000_???_00_1010011",32); - def FSUB_S = Bits("b?????_?????_?????_00001_???_00_1010011",32); - def FMUL_S = Bits("b?????_?????_?????_00010_???_00_1010011",32); - def FDIV_S = Bits("b?????_?????_?????_00011_???_00_1010011",32); - def FSQRT_S = Bits("b?????_?????_00000_00100_???_00_1010011",32); - def FSGNJ_S = Bits("b?????_?????_?????_00101_000_00_1010011",32); - def FSGNJN_S = Bits("b?????_?????_?????_00110_000_00_1010011",32); - def FSGNJX_S = Bits("b?????_?????_?????_00111_000_00_1010011",32); - def FADD_D = Bits("b?????_?????_?????_00000_???_01_1010011",32); - def FSUB_D = Bits("b?????_?????_?????_00001_???_01_1010011",32); - def FMUL_D = Bits("b?????_?????_?????_00010_???_01_1010011",32); - def FDIV_D = Bits("b?????_?????_?????_00011_???_01_1010011",32); - def FSQRT_D = Bits("b?????_?????_00000_00100_???_01_1010011",32); - def FSGNJ_D = Bits("b?????_?????_?????_00101_000_01_1010011",32); - def FSGNJN_D = Bits("b?????_?????_?????_00110_000_01_1010011",32); - def FSGNJX_D = Bits("b?????_?????_?????_00111_000_01_1010011",32); - def FCVT_L_S = Bits("b?????_?????_00000_01000_???_00_1010011",32); - def FCVT_LU_S = Bits("b?????_?????_00000_01001_???_00_1010011",32); - def FCVT_W_S = Bits("b?????_?????_00000_01010_???_00_1010011",32); - def FCVT_WU_S = Bits("b?????_?????_00000_01011_???_00_1010011",32); - def FCVT_L_D = Bits("b?????_?????_00000_01000_???_01_1010011",32); - def FCVT_LU_D = Bits("b?????_?????_00000_01001_???_01_1010011",32); - def FCVT_W_D = Bits("b?????_?????_00000_01010_???_01_1010011",32); - def FCVT_WU_D = Bits("b?????_?????_00000_01011_???_01_1010011",32); - def FCVT_S_L = Bits("b?????_?????_00000_01100_???_00_1010011",32); - def FCVT_S_LU = Bits("b?????_?????_00000_01101_???_00_1010011",32); - def FCVT_S_W = Bits("b?????_?????_00000_01110_???_00_1010011",32); - def FCVT_S_WU = Bits("b?????_?????_00000_01111_???_00_1010011",32); - def FCVT_D_L = Bits("b?????_?????_00000_01100_???_01_1010011",32); - def FCVT_D_LU = Bits("b?????_?????_00000_01101_???_01_1010011",32); - def FCVT_D_W = Bits("b?????_?????_00000_01110_???_01_1010011",32); - def FCVT_D_WU = Bits("b?????_?????_00000_01111_???_01_1010011",32); - def FCVT_S_D = Bits("b?????_?????_00000_10001_???_00_1010011",32); - def FCVT_D_S = Bits("b?????_?????_00000_10000_???_01_1010011",32); - def FEQ_S = Bits("b?????_?????_?????_10101_000_00_1010011",32); - def FLT_S = Bits("b?????_?????_?????_10110_000_00_1010011",32); - def FLE_S = Bits("b?????_?????_?????_10111_000_00_1010011",32); - def FEQ_D = Bits("b?????_?????_?????_10101_000_01_1010011",32); - def FLT_D = Bits("b?????_?????_?????_10110_000_01_1010011",32); - def FLE_D = Bits("b?????_?????_?????_10111_000_01_1010011",32); - def FMIN_S = Bits("b?????_?????_?????_11000_000_00_1010011",32); - def FMAX_S = Bits("b?????_?????_?????_11001_000_00_1010011",32); - def FMIN_D = Bits("b?????_?????_?????_11000_000_01_1010011",32); - def FMAX_D = Bits("b?????_?????_?????_11001_000_01_1010011",32); - def MFTX_S = Bits("b?????_?????_00000_11100_000_00_1010011",32); - def MFTX_D = Bits("b?????_?????_00000_11100_000_01_1010011",32); - def MFFSR = Bits("b?????_00000_00000_11101_000_00_1010011",32); - def MXTF_S = Bits("b?????_?????_00000_11110_000_00_1010011",32); - def MXTF_D = Bits("b?????_?????_00000_11110_000_01_1010011",32); - def MTFSR = Bits("b?????_?????_00000_11111_000_00_1010011",32); - def FLW = Bits("b?????_?????_????????????_010_0000111",32); - def FLD = Bits("b?????_?????_????????????_011_0000111",32); - def FSW = Bits("b?????_?????_?????_???????_010_0100111",32); - def FSD = Bits("b?????_?????_?????_???????_011_0100111",32); - def FMADD_S = Bits("b?????_?????_?????_?????_???_00_1000011",32); - def FMSUB_S = Bits("b?????_?????_?????_?????_???_00_1000111",32); - def FNMSUB_S = Bits("b?????_?????_?????_?????_???_00_1001011",32); - def FNMADD_S = Bits("b?????_?????_?????_?????_???_00_1001111",32); - def FMADD_D = Bits("b?????_?????_?????_?????_???_01_1000011",32); - def FMSUB_D = Bits("b?????_?????_?????_?????_???_01_1000111",32); - def FNMSUB_D = Bits("b?????_?????_?????_?????_???_01_1001011",32); - def FNMADD_D = Bits("b?????_?????_?????_?????_???_01_1001111",32); - // vector instructions - def FENCE_V_L = Bits("b?????_?????_????????????_100_0101111",32); - def FENCE_V_G = Bits("b?????_?????_????????????_101_0101111",32); - def MOVZ = Bits("b?????_?????_?????_0000000101_1110111",32); - def MOVN = Bits("b?????_?????_?????_0000001101_1110111",32); - def STOP = Bits("b00000_00000_00000_0000000010_1110111",32); - def UTIDX = Bits("b?????_00000_00000_0000000011_1110111",32); - def VLD = Bits("b?????_?????_00000_0000000011_0001011",32); - def VLW = Bits("b?????_?????_00000_0000000010_0001011",32); - def VLWU = Bits("b?????_?????_00000_0000000110_0001011",32); - def VLH = Bits("b?????_?????_00000_0000000001_0001011",32); - def VLHU = Bits("b?????_?????_00000_0000000101_0001011",32); - def VLB = Bits("b?????_?????_00000_0000000000_0001011",32); - def VLBU = Bits("b?????_?????_00000_0000000100_0001011",32); - def VFLD = Bits("b?????_?????_00000_0000001011_0001011",32); - def VFLW = Bits("b?????_?????_00000_0000001010_0001011",32); - def VLSTD = Bits("b?????_?????_?????_0000100011_0001011",32); - def VLSTW = Bits("b?????_?????_?????_0000100010_0001011",32); - def VLSTWU = Bits("b?????_?????_?????_0000100110_0001011",32); - def VLSTH = Bits("b?????_?????_?????_0000100001_0001011",32); - def VLSTHU = Bits("b?????_?????_?????_0000100101_0001011",32); - def VLSTB = Bits("b?????_?????_?????_0000100000_0001011",32); - def VLSTBU = Bits("b?????_?????_?????_0000100100_0001011",32); - def VFLSTD = Bits("b?????_?????_?????_0000101011_0001011",32); - def VFLSTW = Bits("b?????_?????_?????_0000101010_0001011",32); - def VLSEGD = Bits("b?????_?????_?????_0001000011_0001011",32); - def VLSEGW = Bits("b?????_?????_?????_0001000010_0001011",32); - def VLSEGWU = Bits("b?????_?????_?????_0001000110_0001011",32); - def VLSEGH = Bits("b?????_?????_?????_0001000001_0001011",32); - def VLSEGHU = Bits("b?????_?????_?????_0001000101_0001011",32); - def VLSEGB = Bits("b?????_?????_?????_0001000000_0001011",32); - def VLSEGBU = Bits("b?????_?????_?????_0001000100_0001011",32); - def VFLSEGD = Bits("b?????_?????_?????_0001001011_0001011",32); - def VFLSEGW = Bits("b?????_?????_?????_0001001010_0001011",32); - def VLSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001011",32); - def VLSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001011",32); - def VLSEGSTWU = Bits("b?????_?????_?????_?????_101_10_0001011",32); - def VLSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001011",32); - def VLSEGSTHU = Bits("b?????_?????_?????_?????_101_01_0001011",32); - def VLSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001011",32); - def VLSEGSTBU = Bits("b?????_?????_?????_?????_101_00_0001011",32); - def VFLSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001011",32); - def VFLSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001011",32); - def VSD = Bits("b?????_?????_00000_0000000011_0001111",32); - def VSW = Bits("b?????_?????_00000_0000000010_0001111",32); - def VSH = Bits("b?????_?????_00000_0000000001_0001111",32); - def VSB = Bits("b?????_?????_00000_0000000000_0001111",32); - def VFSD = Bits("b?????_?????_00000_0000001011_0001111",32); - def VFSW = Bits("b?????_?????_00000_0000001010_0001111",32); - def VSSTD = Bits("b?????_?????_?????_0000100011_0001111",32); - def VSSTW = Bits("b?????_?????_?????_0000100010_0001111",32); - def VSSTH = Bits("b?????_?????_?????_0000100001_0001111",32); - def VSSTB = Bits("b?????_?????_?????_0000100000_0001111",32); - def VFSSTD = Bits("b?????_?????_?????_0000101011_0001111",32); - def VFSSTW = Bits("b?????_?????_?????_0000101010_0001111",32); - def VSSEGD = Bits("b?????_?????_?????_0001000011_0001111",32); - def VSSEGW = Bits("b?????_?????_?????_0001000010_0001111",32); - def VSSEGH = Bits("b?????_?????_?????_0001000001_0001111",32); - def VSSEGB = Bits("b?????_?????_?????_0001000000_0001111",32); - def VFSSEGD = Bits("b?????_?????_?????_0001001011_0001111",32); - def VFSSEGW = Bits("b?????_?????_?????_0001001010_0001111",32); - def VSSEGSTD = Bits("b?????_?????_?????_?????_100_11_0001111",32); - def VSSEGSTW = Bits("b?????_?????_?????_?????_100_10_0001111",32); - def VSSEGSTH = Bits("b?????_?????_?????_?????_100_01_0001111",32); - def VSSEGSTB = Bits("b?????_?????_?????_?????_100_00_0001111",32); - def VFSSEGSTD = Bits("b?????_?????_?????_?????_110_11_0001111",32); - def VFSSEGSTW = Bits("b?????_?????_?????_?????_110_10_0001111",32); - def VMVV = Bits("b?????_?????_00000_0000000000_1110011",32); - def VMSV = Bits("b?????_?????_00000_0000010000_1110011",32); - def VMST = Bits("b?????_?????_?????_0000100000_1110011",32); - def VMTS = Bits("b?????_?????_?????_0000110000_1110011",32); - def VFMVV = Bits("b?????_?????_00000_0000000010_1110011",32); - def VFMSV = Bits("b?????_?????_00000_0000010010_1110011",32); - def VFMST = Bits("b?????_?????_?????_0000100010_1110011",32); - def VFMTS = Bits("b?????_?????_?????_0000110010_1110011",32); - def VVCFGIVL = Bits("b?????_?????_????????????_001_1110011",32); - def VTCFGIVL = Bits("b?????_?????_????????????_011_1110011",32); - def VVCFG = Bits("b00000_?????_?????_0000001000_1110011",32); - def VTCFG = Bits("b00000_?????_?????_0000011000_1110011",32); - def VSETVL = Bits("b?????_?????_000000000000_101_1110011",32); - def VF = Bits("b00000_?????_????????????_111_1110011",32); - // vector supervisor instructions - def VENQCMD = Bits("b00000_?????_?????_0001010110_1111011",32) - def VENQIMM1 = Bits("b00000_?????_?????_0001011110_1111011",32) - def VENQIMM2 = Bits("b00000_?????_?????_0001100110_1111011",32) - def VENQCNT = Bits("b00000_?????_?????_0001101110_1111011",32) - def VXCPTKILL = Bits("b00000_00000_00000_0000010110_1111011",32) - def VXCPTEVAC = Bits("b00000_?????_00000_0001000110_1111011",32) - def VXCPTHOLD = Bits("b00000_00000_00000_0001001110_1111011",32) - - def NOP = Bits("b00000_00000_000000000000_000_0010011",32); + /* Automatically generated by parse-opcodes */ + def J = Bits("b?????????????????????????1101011") + def JAL = Bits("b?????????????????????????1101111") + def JALR = Bits("b??????????????????????0001100111") + def BEQ = Bits("b??????????????????????0001100011") + def BNE = Bits("b??????????????????????0011100011") + def BLT = Bits("b??????????????????????1001100011") + def BGE = Bits("b??????????????????????1011100011") + def BLTU = Bits("b??????????????????????1101100011") + def BGEU = Bits("b??????????????????????1111100011") + def LUI = Bits("b?????????????????????????0110111") + def AUIPC = Bits("b?????????????????????????0010111") + def ADDI = Bits("b??????????????????????0000010011") + def SLLI = Bits("b??????????000000??????0010010011") + def SLTI = Bits("b??????????????????????0100010011") + def SLTIU = Bits("b??????????????????????0110010011") + def XORI = Bits("b??????????????????????1000010011") + def SRLI = Bits("b??????????000000??????1010010011") + def SRAI = Bits("b??????????000001??????1010010011") + def ORI = Bits("b??????????????????????1100010011") + def ANDI = Bits("b??????????????????????1110010011") + def ADD = Bits("b???????????????00000000000110011") + def SUB = Bits("b???????????????10000000000110011") + def SLL = Bits("b???????????????00000000010110011") + def SLT = Bits("b???????????????00000000100110011") + def SLTU = Bits("b???????????????00000000110110011") + def XOR = Bits("b???????????????00000001000110011") + def SRL = Bits("b???????????????00000001010110011") + def SRA = Bits("b???????????????10000001010110011") + def OR = Bits("b???????????????00000001100110011") + def AND = Bits("b???????????????00000001110110011") + def MUL = Bits("b???????????????00000010000110011") + def MULH = Bits("b???????????????00000010010110011") + def MULHSU = Bits("b???????????????00000010100110011") + def MULHU = Bits("b???????????????00000010110110011") + def DIV = Bits("b???????????????00000011000110011") + def DIVU = Bits("b???????????????00000011010110011") + def REM = Bits("b???????????????00000011100110011") + def REMU = Bits("b???????????????00000011110110011") + def ADDIW = Bits("b??????????????????????0000011011") + def SLLIW = Bits("b??????????0000000?????0010011011") + def SRLIW = Bits("b??????????0000000?????1010011011") + def SRAIW = Bits("b??????????0000010?????1010011011") + def ADDW = Bits("b???????????????00000000000111011") + def SUBW = Bits("b???????????????10000000000111011") + def SLLW = Bits("b???????????????00000000010111011") + def SRLW = Bits("b???????????????00000001010111011") + def SRAW = Bits("b???????????????10000001010111011") + def MULW = Bits("b???????????????00000010000111011") + def DIVW = Bits("b???????????????00000011000111011") + def DIVUW = Bits("b???????????????00000011010111011") + def REMW = Bits("b???????????????00000011100111011") + def REMUW = Bits("b???????????????00000011110111011") + def LB = Bits("b??????????????????????0000000011") + def LH = Bits("b??????????????????????0010000011") + def LW = Bits("b??????????????????????0100000011") + def LD = Bits("b??????????????????????0110000011") + def LBU = Bits("b??????????????????????1000000011") + def LHU = Bits("b??????????????????????1010000011") + def LWU = Bits("b??????????????????????1100000011") + def SB = Bits("b??????????????????????0000100011") + def SH = Bits("b??????????????????????0010100011") + def SW = Bits("b??????????????????????0100100011") + def SD = Bits("b??????????????????????0110100011") + def AMOADD_W = Bits("b???????????????00000000100101011") + def AMOSWAP_W = Bits("b???????????????00000010100101011") + def AMOAND_W = Bits("b???????????????00000100100101011") + def AMOOR_W = Bits("b???????????????00000110100101011") + def AMOMIN_W = Bits("b???????????????00001000100101011") + def AMOMAX_W = Bits("b???????????????00001010100101011") + def AMOMINU_W = Bits("b???????????????00001100100101011") + def AMOMAXU_W = Bits("b???????????????00001110100101011") + def AMOADD_D = Bits("b???????????????00000000110101011") + def AMOSWAP_D = Bits("b???????????????00000010110101011") + def AMOAND_D = Bits("b???????????????00000100110101011") + def AMOOR_D = Bits("b???????????????00000110110101011") + def AMOMIN_D = Bits("b???????????????00001000110101011") + def AMOMAX_D = Bits("b???????????????00001010110101011") + def AMOMINU_D = Bits("b???????????????00001100110101011") + def AMOMAXU_D = Bits("b???????????????00001110110101011") + def LR_W = Bits("b??????????0000010000000100101011") + def LR_D = Bits("b??????????0000010000000110101011") + def SC_W = Bits("b???????????????10000010100101011") + def SC_D = Bits("b???????????????10000010110101011") + def FENCE_I = Bits("b??????????????????????0010101111") + def FENCE = Bits("b??????????????????????0100101111") + def FENCE_V_L = Bits("b??????????????????????1000101111") + def FENCE_V_G = Bits("b??????????????????????1010101111") + def SYSCALL = Bits("b00000000000000000000000001110111") + def BREAK = Bits("b00000000000000000000000011110111") + def RDCYCLE = Bits("b?????000000000000000001001110111") + def RDTIME = Bits("b?????000000000000000011001110111") + def RDINSTRET = Bits("b?????000000000000000101001110111") + def MTPCR = Bits("b???????????????00000000001110011") + def MFPCR = Bits("b??????????0000000000000011110011") + def SETPCR = Bits("b??????????????????????0101110011") + def CLEARPCR = Bits("b??????????????????????0111110011") + def ERET = Bits("b00000000000000000000001001110011") + def FADD_S = Bits("b???????????????00000???001010011") + def FSUB_S = Bits("b???????????????00001???001010011") + def FMUL_S = Bits("b???????????????00010???001010011") + def FDIV_S = Bits("b???????????????00011???001010011") + def FSQRT_S = Bits("b??????????0000000100???001010011") + def FSGNJ_S = Bits("b???????????????00101000001010011") + def FSGNJN_S = Bits("b???????????????00110000001010011") + def FSGNJX_S = Bits("b???????????????00111000001010011") + def FADD_D = Bits("b???????????????00000???011010011") + def FSUB_D = Bits("b???????????????00001???011010011") + def FMUL_D = Bits("b???????????????00010???011010011") + def FDIV_D = Bits("b???????????????00011???011010011") + def FSQRT_D = Bits("b??????????0000000100???011010011") + def FSGNJ_D = Bits("b???????????????00101000011010011") + def FSGNJN_D = Bits("b???????????????00110000011010011") + def FSGNJX_D = Bits("b???????????????00111000011010011") + def FCVT_L_S = Bits("b??????????0000001000???001010011") + def FCVT_LU_S = Bits("b??????????0000001001???001010011") + def FCVT_W_S = Bits("b??????????0000001010???001010011") + def FCVT_WU_S = Bits("b??????????0000001011???001010011") + def FCVT_L_D = Bits("b??????????0000001000???011010011") + def FCVT_LU_D = Bits("b??????????0000001001???011010011") + def FCVT_W_D = Bits("b??????????0000001010???011010011") + def FCVT_WU_D = Bits("b??????????0000001011???011010011") + def FCVT_S_L = Bits("b??????????0000001100???001010011") + def FCVT_S_LU = Bits("b??????????0000001101???001010011") + def FCVT_S_W = Bits("b??????????0000001110???001010011") + def FCVT_S_WU = Bits("b??????????0000001111???001010011") + def FCVT_D_L = Bits("b??????????0000001100???011010011") + def FCVT_D_LU = Bits("b??????????0000001101???011010011") + def FCVT_D_W = Bits("b??????????0000001110???011010011") + def FCVT_D_WU = Bits("b??????????0000001111???011010011") + def FCVT_S_D = Bits("b??????????0000010001???001010011") + def FCVT_D_S = Bits("b??????????0000010000???011010011") + def FEQ_S = Bits("b???????????????10101000001010011") + def FLT_S = Bits("b???????????????10110000001010011") + def FLE_S = Bits("b???????????????10111000001010011") + def FEQ_D = Bits("b???????????????10101000011010011") + def FLT_D = Bits("b???????????????10110000011010011") + def FLE_D = Bits("b???????????????10111000011010011") + def FMIN_S = Bits("b???????????????11000000001010011") + def FMAX_S = Bits("b???????????????11001000001010011") + def FMIN_D = Bits("b???????????????11000000011010011") + def FMAX_D = Bits("b???????????????11001000011010011") + def FMV_X_S = Bits("b??????????0000011100000001010011") + def FMV_X_D = Bits("b??????????0000011100000011010011") + def FRSR = Bits("b?????000000000011101000001010011") + def FMV_S_X = Bits("b??????????0000011110000001010011") + def FMV_D_X = Bits("b??????????0000011110000011010011") + def FSSR = Bits("b??????????0000011111000001010011") + def FLW = Bits("b??????????????????????0100000111") + def FLD = Bits("b??????????????????????0110000111") + def FSW = Bits("b??????????????????????0100100111") + def FSD = Bits("b??????????????????????0110100111") + def FMADD_S = Bits("b???????????????????????001000011") + def FMSUB_S = Bits("b???????????????????????001000111") + def FNMSUB_S = Bits("b???????????????????????001001011") + def FNMADD_S = Bits("b???????????????????????001001111") + def FMADD_D = Bits("b???????????????????????011000011") + def FMSUB_D = Bits("b???????????????????????011000111") + def FNMSUB_D = Bits("b???????????????????????011001011") + def FNMADD_D = Bits("b???????????????????????011001111") + /* Automatically generated by parse-opcodes */ + def STOP = Bits("b00000000000000000000001011110111") + def UTIDX = Bits("b?????000000000000000001101110111") + def MOVZ = Bits("b???????????????00000001111110111") + def MOVN = Bits("b???????????????00000011111110111") + def FMOVZ = Bits("b???????????????00000101111110111") + def FMOVN = Bits("b???????????????00000111111110111") + def VSETCFGVL = Bits("b??????????????????????1110001011") + def VSETVL = Bits("b??????????0000000000001100001011") + def VMVV = Bits("b??????????0000000000011100001011") + def VGETCFG = Bits("b?????000000000000000001000001011") + def VGETVL = Bits("b?????000000000000000011000001011") + def VF = Bits("b??????????00000???????0100001011") + def VXCPTSAVE = Bits("b00000?????0000000000000110001011") + def VXCPTRESTORE = Bits("b00000?????0000000000010110001011") + def VXCPTKILL = Bits("b00000000000000000000100110001011") + def VXCPTEVAC = Bits("b00000?????0000000000110110001011") + def VXCPTHOLD = Bits("b00000000000000000001000110001011") + def VENQCMD = Bits("b00000??????????00001010110001011") + def VENQIMM1 = Bits("b00000??????????00001100110001011") + def VENQIMM2 = Bits("b00000??????????00001110110001011") + def VENQCNT = Bits("b00000??????????00010000110001011") + def VLD = Bits("b??????????0000000000110100001111") + def VLW = Bits("b??????????0000000000100100001111") + def VLWU = Bits("b??????????0000000001100100001111") + def VLH = Bits("b??????????0000000000010100001111") + def VLHU = Bits("b??????????0000000001010100001111") + def VLB = Bits("b??????????0000000000000100001111") + def VLBU = Bits("b??????????0000000001000100001111") + def VFLD = Bits("b??????????0000000010110100001111") + def VFLW = Bits("b??????????0000000010100100001111") + def VLSTD = Bits("b???????????????00000110110001111") + def VLSTW = Bits("b???????????????00000100110001111") + def VLSTWU = Bits("b???????????????00001100110001111") + def VLSTH = Bits("b???????????????00000010110001111") + def VLSTHU = Bits("b???????????????00001010110001111") + def VLSTB = Bits("b???????????????00000000110001111") + def VLSTBU = Bits("b???????????????00001000110001111") + def VFLSTD = Bits("b???????????????00010110110001111") + def VFLSTW = Bits("b???????????????00010100110001111") + def VLSEGD = Bits("b??????????00000???00110101011011") + def VLSEGW = Bits("b??????????00000???00100101011011") + def VLSEGWU = Bits("b??????????00000???01100101011011") + def VLSEGH = Bits("b??????????00000???00010101011011") + def VLSEGHU = Bits("b??????????00000???01010101011011") + def VLSEGB = Bits("b??????????00000???00000101011011") + def VLSEGBU = Bits("b??????????00000???01000101011011") + def VFLSEGD = Bits("b??????????00000???10110101011011") + def VFLSEGW = Bits("b??????????00000???10100101011011") + def VLSEGSTD = Bits("b??????????????????00110111011011") + def VLSEGSTW = Bits("b??????????????????00100111011011") + def VLSEGSTWU = Bits("b??????????????????01100111011011") + def VLSEGSTH = Bits("b??????????????????00010111011011") + def VLSEGSTHU = Bits("b??????????????????01010111011011") + def VLSEGSTB = Bits("b??????????????????00000111011011") + def VLSEGSTBU = Bits("b??????????????????01000111011011") + def VFLSEGSTD = Bits("b??????????00000???10110111011011") + def VFLSEGSTW = Bits("b??????????00000???10100111011011") + def VSD = Bits("b??????????0000010000110100001111") + def VSW = Bits("b??????????0000010000100100001111") + def VSH = Bits("b??????????0000010000010100001111") + def VSB = Bits("b??????????0000010000000100001111") + def VFSD = Bits("b??????????0000010010110100001111") + def VFSW = Bits("b??????????0000010010100100001111") + def VSSTD = Bits("b???????????????10000110110001111") + def VSSTW = Bits("b???????????????10000100110001111") + def VSSTH = Bits("b???????????????10000010110001111") + def VSSTB = Bits("b???????????????10000000110001111") + def VFSSTD = Bits("b???????????????10010110110001111") + def VFSSTW = Bits("b???????????????10010100110001111") + def VSSEGD = Bits("b??????????00000???00110101111011") + def VSSEGW = Bits("b??????????00000???00100101111011") + def VSSEGH = Bits("b??????????00000???00010101111011") + def VSSEGB = Bits("b??????????00000???00000101111011") + def VFSSEGD = Bits("b??????????00000???10110101111011") + def VFSSEGW = Bits("b??????????00000???10100101111011") + def VSSEGSTD = Bits("b??????????????????00110111111011") + def VSSEGSTW = Bits("b??????????????????00100111111011") + def VSSEGSTH = Bits("b??????????????????00010111111011") + def VSSEGSTB = Bits("b??????????????????00000111111011") + def VFSSEGSTD = Bits("b??????????00000???10110111111011") + def VFSSEGSTW = Bits("b??????????00000???10100111111011") } object Disassemble @@ -277,16 +263,16 @@ object Disassemble Str("s10"), Str("s11"), Str(" sp"), Str(" tp"), Str(" v0"), Str(" v1"), Str(" a0"), Str(" a1"), Str(" a2"), Str(" a3"), Str(" a4"), Str(" a5"), - Str(" a6"), Str(" a7"), Str(" a8"), Str(" a9"), - Str("a10"), Str("a11"), Str("a12"), Str("a13")) + Str(" a6"), Str(" a7"), Str(" t0"), Str(" t1"), + Str(" t2"), Str(" t3"), Str(" t4"), Str(" t5")) val f = AVec(Str(" fs0"), Str(" fs1"), Str(" fs2"), Str(" fs3"), Str(" fs4"), Str(" fs5"), Str(" fs6"), Str(" fs7"), Str(" fs8"), Str(" fs9"), Str("fs10"), Str("fs11"), Str("fs12"), Str("fs13"), Str("fs14"), Str("fs15"), Str(" fv0"), Str(" fv1"), Str(" fa0"), Str(" fa1"), Str(" fa2"), Str(" fa3"), Str(" fa4"), Str(" fa5"), - Str(" fa6"), Str(" fa7"), Str(" fa8"), Str(" fa9"), - Str("fa10"), Str("fa11"), Str("fa12"), Str("fa13")) + Str(" fa6"), Str(" fa7"), Str(" ft0"), Str(" ft1"), + Str(" ft2"), Str(" ft3"), Str(" ft4"), Str(" ft5")) def hex(x: SInt, plus: Char = ' ') = Cat(Mux(x < SInt(0), Str("-0x"), Str(plus + "0x")), Str(x.abs, 16)) @@ -303,13 +289,14 @@ object Disassemble val bmmv = Cat(insn(31,27), insn(16,10)).toSInt val jmmv = insn(31,7).toSInt - val imm = hex(immv) - val bmm = hex(bmmv << UInt(1)) + val imm = hex(Mux(fmt === FMT_B, bmmv << UInt(1), + Mux(fmt === FMT_I || fmt === FMT_LD || fmt === FMT_FLD, immv, + Mux(fmt === FMT_ST || fmt === FMT_FST, bmmv, + SInt(0))))) val jmm = hex(jmmv << UInt(1)) val lmm = Cat(Str("0x"), Str(insn(26,7).toUInt, 16)) - val laddr = Cat(Str(immv), lparen, x(rs1), rparen) - val saddr = Cat(Str(bmmv), lparen, x(rs1), rparen) + val addr = Cat(comma, imm, lparen, x(rs1), rparen) val r0 = x(rd) val r1 = Cat(r0, comma, x(rs1)) @@ -322,13 +309,13 @@ object Disassemble val xf2 = Cat(xf1, comma, f(rs2)) val z = Str(' ') val i = Cat(r1, comma, imm) - val b = Cat(x(rs1), comma, x(rs2), comma, bmm) + val b = Cat(x(rs1), comma, x(rs2), comma, imm) val j = jmm val l = Cat(x(rd), comma, lmm) - val ld = Cat(x(rd), comma, laddr) - val st = Cat(x(rs2), comma, saddr) - val fld = Cat(f(rd), comma, laddr) - val fst = Cat(f(rs2), comma, saddr) + val ld = Cat(x(rd), addr) + val st = Cat(x(rs2), addr) + val fld = Cat(f(rd), addr) + val fst = Cat(f(rs2), addr) val amo = r2 val opts = Seq(r0, r1, r2, f1, f2, f3, fx, xf1, xf2, z, i, b, j, l, ld, st, @@ -338,6 +325,7 @@ object Disassemble AVec(padded)(fmt.toUInt) } + private def FMT_X = Bits("b?????", 5) private def FMT_R0 = Bits(0, 5) private def FMT_R1 = Bits(1, 5) private def FMT_R2 = Bits(2, 5) @@ -358,221 +346,216 @@ object Disassemble private def FMT_FST = Bits(17, 5) private def FMT_AMO = Bits(18, 5) - private def default = List(Str("unknown "), FMT_0) + private def default = List(Str("unknown "), FMT_0) import Instructions._ private def table = Array( - BNE-> List(Str("bne "), FMT_B), - BEQ-> List(Str("beq "), FMT_B), - BLT-> List(Str("blt "), FMT_B), - BLTU-> List(Str("bltu "), FMT_B), - BGE-> List(Str("bge "), FMT_B), - BGEU-> List(Str("bgeu "), FMT_B), + BNE-> List(Str("bne "), FMT_B), + BEQ-> List(Str("beq "), FMT_B), + BLT-> List(Str("blt "), FMT_B), + BLTU-> List(Str("bltu "), FMT_B), + BGE-> List(Str("bge "), FMT_B), + BGEU-> List(Str("bgeu "), FMT_B), - J-> List(Str("j "), FMT_J), - JAL-> List(Str("jal "), FMT_J), - JALR_C-> List(Str("jalr.c "), FMT_LD), - JALR_J-> List(Str("jalr.j "), FMT_LD), - JALR_R-> List(Str("jalr.r "), FMT_LD), - AUIPC-> List(Str("auipc "), FMT_L), + J-> List(Str("j "), FMT_J), + JAL-> List(Str("jal "), FMT_J), + JALR-> List(Str("jalr "), FMT_LD), + AUIPC-> List(Str("auipc "), FMT_L), - LB-> List(Str("lb "), FMT_LD), - LH-> List(Str("lh "), FMT_LD), - LW-> List(Str("lw "), FMT_LD), - LD-> List(Str("ld "), FMT_LD), - LBU-> List(Str("lbu "), FMT_LD), - LHU-> List(Str("lhu "), FMT_LD), - LWU-> List(Str("lwu "), FMT_LD), - SB-> List(Str("sb "), FMT_ST), - SH-> List(Str("sh "), FMT_ST), - SW-> List(Str("sw "), FMT_ST), - SD-> List(Str("sd "), FMT_ST), + LB-> List(Str("lb "), FMT_LD), + LH-> List(Str("lh "), FMT_LD), + LW-> List(Str("lw "), FMT_LD), + LD-> List(Str("ld "), FMT_LD), + LBU-> List(Str("lbu "), FMT_LD), + LHU-> List(Str("lhu "), FMT_LD), + LWU-> List(Str("lwu "), FMT_LD), + SB-> List(Str("sb "), FMT_ST), + SH-> List(Str("sh "), FMT_ST), + SW-> List(Str("sw "), FMT_ST), + SD-> List(Str("sd "), FMT_ST), - AMOADD_W-> List(Str("amoadd.w "), FMT_AMO), - AMOSWAP_W-> List(Str("amoswap.w "), FMT_AMO), - AMOAND_W-> List(Str("amoand.w "), FMT_AMO), - AMOOR_W-> List(Str("amoor.w "), FMT_AMO), - AMOMIN_W-> List(Str("amomin.w "), FMT_AMO), - AMOMINU_W-> List(Str("amominu.w "), FMT_AMO), - AMOMAX_W-> List(Str("amomax.w "), FMT_AMO), - AMOMAXU_W-> List(Str("amomaxu.w "), FMT_AMO), - AMOADD_D-> List(Str("amoadd.d "), FMT_AMO), - AMOSWAP_D-> List(Str("amoswap.d "), FMT_AMO), - AMOAND_D-> List(Str("amoand.d "), FMT_AMO), - AMOOR_D-> List(Str("amoor.d "), FMT_AMO), - AMOMIN_D-> List(Str("amomin.d "), FMT_AMO), - AMOMINU_D-> List(Str("amominu.d "), FMT_AMO), - AMOMAX_D-> List(Str("amomax.d "), FMT_AMO), - AMOMAXU_D-> List(Str("amomaxu.d "), FMT_AMO), + AMOADD_W-> List(Str("amoaddw "), FMT_AMO), + AMOSWAP_W-> List(Str("amoswapw"), FMT_AMO), + AMOAND_W-> List(Str("amoandw "), FMT_AMO), + AMOOR_W-> List(Str("amoorw "), FMT_AMO), + AMOMIN_W-> List(Str("amominw "), FMT_AMO), + AMOMINU_W-> List(Str("amominuw"), FMT_AMO), + AMOMAX_W-> List(Str("amomaxw "), FMT_AMO), + AMOMAXU_W-> List(Str("amomaxuw"), FMT_AMO), + AMOADD_D-> List(Str("amoaddd "), FMT_AMO), + AMOSWAP_D-> List(Str("amoswapd"), FMT_AMO), + AMOAND_D-> List(Str("amoandd "), FMT_AMO), + AMOOR_D-> List(Str("amoord "), FMT_AMO), + AMOMIN_D-> List(Str("amomind "), FMT_AMO), + AMOMINU_D-> List(Str("amominud"), FMT_AMO), + AMOMAX_D-> List(Str("amomaxd "), FMT_AMO), + AMOMAXU_D-> List(Str("amomaxud"), FMT_AMO), - LR_W-> List(Str("lr.w "), FMT_AMO), - LR_D-> List(Str("lr.d "), FMT_AMO), - SC_W-> List(Str("sc.w "), FMT_AMO), - SC_D-> List(Str("sc.d "), FMT_AMO), + LR_W-> List(Str("lr.w "), FMT_AMO), + LR_D-> List(Str("lr.d "), FMT_AMO), + SC_W-> List(Str("sc.w "), FMT_AMO), + SC_D-> List(Str("sc.d "), FMT_AMO), - LUI-> List(Str("lui "), FMT_L), - ADDI-> List(Str("addi "), FMT_I), - SLTI -> List(Str("slti "), FMT_I), - SLTIU-> List(Str("sltiu "), FMT_I), - ANDI-> List(Str("andi "), FMT_I), - ORI-> List(Str("ori "), FMT_I), - XORI-> List(Str("xori "), FMT_I), - SLLI-> List(Str("slli "), FMT_I), - SRLI-> List(Str("srli "), FMT_I), - SRAI-> List(Str("srai "), FMT_I), - ADD-> List(Str("add "), FMT_R2), - SUB-> List(Str("sub "), FMT_R2), - SLT-> List(Str("slt "), FMT_R2), - SLTU-> List(Str("sltu "), FMT_R2), - riscvAND-> List(Str("and "), FMT_R2), - riscvOR-> List(Str("or "), FMT_R2), - riscvXOR-> List(Str("xor "), FMT_R2), - SLL-> List(Str("sll "), FMT_R2), - SRL-> List(Str("srl "), FMT_R2), - SRA-> List(Str("sra "), FMT_R2), + LUI-> List(Str("lui "), FMT_L), + ADDI-> List(Str("addi "), FMT_I), + SLTI -> List(Str("slti "), FMT_I), + SLTIU-> List(Str("sltiu "), FMT_I), + ANDI-> List(Str("andi "), FMT_I), + ORI-> List(Str("ori "), FMT_I), + XORI-> List(Str("xori "), FMT_I), + SLLI-> List(Str("slli "), FMT_I), + SRLI-> List(Str("srli "), FMT_I), + SRAI-> List(Str("srai "), FMT_I), + ADD-> List(Str("add "), FMT_R2), + SUB-> List(Str("sub "), FMT_R2), + SLT-> List(Str("slt "), FMT_R2), + SLTU-> List(Str("sltu "), FMT_R2), + AND-> List(Str("and "), FMT_R2), + OR-> List(Str("or "), FMT_R2), + XOR-> List(Str("xor "), FMT_R2), + SLL-> List(Str("sll "), FMT_R2), + SRL-> List(Str("srl "), FMT_R2), + SRA-> List(Str("sra "), FMT_R2), - ADDIW-> List(Str("addiw "), FMT_I), - SLLIW-> List(Str("slliw "), FMT_I), - SRLIW-> List(Str("srliw "), FMT_I), - SRAIW-> List(Str("sraiw "), FMT_I), - ADDW-> List(Str("addw "), FMT_R2), - SUBW-> List(Str("subw "), FMT_R2), - SLLW-> List(Str("sllw "), FMT_R2), - SRLW-> List(Str("srlw "), FMT_R2), - SRAW-> List(Str("sraw "), FMT_R2), + ADDIW-> List(Str("addiw "), FMT_I), + SLLIW-> List(Str("slliw "), FMT_I), + SRLIW-> List(Str("srliw "), FMT_I), + SRAIW-> List(Str("sraiw "), FMT_I), + ADDW-> List(Str("addw "), FMT_R2), + SUBW-> List(Str("subw "), FMT_R2), + SLLW-> List(Str("sllw "), FMT_R2), + SRLW-> List(Str("srlw "), FMT_R2), + SRAW-> List(Str("sraw "), FMT_R2), - MUL-> List(Str("mul "), FMT_R2), - MULH-> List(Str("mulh "), FMT_R2), - MULHU-> List(Str("mulhu "), FMT_R2), - MULHSU-> List(Str("mulhsu "), FMT_R2), - MULW-> List(Str("mulw "), FMT_R2), + MUL-> List(Str("mul "), FMT_R2), + MULH-> List(Str("mulh "), FMT_R2), + MULHU-> List(Str("mulhu "), FMT_R2), + MULHSU-> List(Str("mulhsu "), FMT_R2), + MULW-> List(Str("mulw "), FMT_R2), - DIV-> List(Str("div "), FMT_R2), - DIVU-> List(Str("divu "), FMT_R2), - REM-> List(Str("rem "), FMT_R2), - REMU-> List(Str("remu "), FMT_R2), - DIVW-> List(Str("divw "), FMT_R2), - DIVUW-> List(Str("divuw "), FMT_R2), - REMW-> List(Str("remw "), FMT_R2), - REMUW-> List(Str("remuw "), FMT_R2), + DIV-> List(Str("div "), FMT_R2), + DIVU-> List(Str("divu "), FMT_R2), + REM-> List(Str("rem "), FMT_R2), + REMU-> List(Str("remu "), FMT_R2), + DIVW-> List(Str("divw "), FMT_R2), + DIVUW-> List(Str("divuw "), FMT_R2), + REMW-> List(Str("remw "), FMT_R2), + REMUW-> List(Str("remuw "), FMT_R2), - SYSCALL-> List(Str("syscall "), FMT_0), - SETPCR-> List(Str("setpcr "), FMT_I), - CLEARPCR-> List(Str("clearpcr "), FMT_I), - ERET-> List(Str("eret "), FMT_0), - FENCE-> List(Str("fence "), FMT_0), - FENCE_I-> List(Str("fence.i "), FMT_0), - MFPCR-> List(Str("mfpcr "), FMT_R2), - MTPCR-> List(Str("mtpcr "), FMT_R2), - RDTIME-> List(Str("rdtime "), FMT_R0), - RDCYCLE-> List(Str("rdcycle "), FMT_R0), - RDINSTRET-> List(Str("rdinstret "), FMT_R0), + SYSCALL-> List(Str("syscall "), FMT_0), + SETPCR-> List(Str("setpcr "), FMT_I), + CLEARPCR-> List(Str("clearpcr"), FMT_I), + ERET-> List(Str("eret "), FMT_0), + FENCE-> List(Str("fence "), FMT_0), + FENCE_I-> List(Str("fence.i "), FMT_0), + MFPCR-> List(Str("mfpcr "), FMT_R2), + MTPCR-> List(Str("mtpcr "), FMT_R2), + RDTIME-> List(Str("rdtime "), FMT_R0), + RDCYCLE-> List(Str("rdcycle "), FMT_R0), + RDINSTRET-> List(Str("rdinstrt"), FMT_R0), - FCVT_S_D-> List(Str("fcvt.s.d "), FMT_F1), - FCVT_D_S-> List(Str("fcvt.d.s "), FMT_F1), - FSGNJ_S-> List(Str("fsgnj.s "), FMT_F2), - FSGNJ_D-> List(Str("fsgnj.d "), FMT_F2), - FSGNJX_S-> List(Str("fsgnx.s "), FMT_F2), - FSGNJX_D-> List(Str("fsgnx.d "), FMT_F2), - FSGNJN_S-> List(Str("fsgnjn.s "), FMT_F2), - FSGNJN_D-> List(Str("fsgnjn.d "), FMT_F2), - FMIN_S-> List(Str("fmin.s "), FMT_F2), - FMIN_D-> List(Str("fmin.d "), FMT_F2), - FMAX_S-> List(Str("fmax.s "), FMT_F2), - FMAX_D-> List(Str("fmax.d "), FMT_F2), - FADD_S-> List(Str("fadd.s "), FMT_F2), - FADD_D-> List(Str("fadd.d "), FMT_F2), - FSUB_S-> List(Str("fsub.s "), FMT_F2), - FSUB_D-> List(Str("fsub.d "), FMT_F2), - FMUL_S-> List(Str("fmul.s "), FMT_F2), - FMUL_D-> List(Str("fmul.d "), FMT_F2), - FMADD_S-> List(Str("fmadd.s "), FMT_F3), - FMADD_D-> List(Str("fmadd.d "), FMT_F3), - FMSUB_S-> List(Str("fmsub.s "), FMT_F3), - FMSUB_D-> List(Str("fmsub.d "), FMT_F3), - FNMADD_S-> List(Str("fnmadd.s "), FMT_F3), - FNMADD_D-> List(Str("fnmadd.d "), FMT_F3), - FNMSUB_S-> List(Str("fnmsub.s "), FMT_F3), - FNMSUB_D-> List(Str("fnmsub.d "), FMT_F3), - MFTX_S-> List(Str("mftx.s "), FMT_XF1), - MFTX_D-> List(Str("mftx.d "), FMT_XF1), - FCVT_W_S-> List(Str("fcvt.w.s "), FMT_XF1), - FCVT_W_D-> List(Str("fcvt.w.d "), FMT_XF1), - FCVT_WU_S-> List(Str("fcvt.wu.s "), FMT_XF1), - FCVT_WU_D-> List(Str("fcvt.wu.d "), FMT_XF1), - FCVT_L_S-> List(Str("fcvt.l.s "), FMT_XF1), - FCVT_L_D-> List(Str("fcvt.l.d "), FMT_XF1), - FCVT_LU_S-> List(Str("fcvt.lu.s "), FMT_XF1), - FCVT_LU_D-> List(Str("fcvt.lu.d "), FMT_XF1), - FEQ_S-> List(Str("feq.s "), FMT_XF2), - FEQ_D-> List(Str("feq.d "), FMT_XF2), - FLT_S-> List(Str("flt.s "), FMT_XF2), - FLT_D-> List(Str("flt.d "), FMT_XF2), - FLE_S-> List(Str("fle.s "), FMT_XF2), - FLE_D-> List(Str("fle.d "), FMT_XF2), - MXTF_S-> List(Str("mxtf.s "), FMT_FX), - MXTF_D-> List(Str("mxtf.d "), FMT_FX), - FCVT_S_W-> List(Str("fcvt.s.w "), FMT_FX), - FCVT_D_W-> List(Str("fcvt.d.w "), FMT_FX), - FCVT_S_WU-> List(Str("fcvt.s.wu "), FMT_FX), - FCVT_D_WU-> List(Str("fcvt.d.wu "), FMT_FX), - FCVT_S_L-> List(Str("fcvt.s.l "), FMT_FX), - FCVT_D_L-> List(Str("fcvt.d.l "), FMT_FX), - FCVT_S_LU-> List(Str("fcvt.s.lu "), FMT_FX), - FCVT_D_LU-> List(Str("fcvt.d.lu "), FMT_FX), - MFFSR-> List(Str("mffsr "), FMT_R0), - MTFSR-> List(Str("mtfsr "), FMT_R1), - FLW-> List(Str("flw "), FMT_FLD), - FLD-> List(Str("fld "), FMT_FLD), - FSW-> List(Str("fsw "), FMT_FST), - FSD-> List(Str("fsd "), FMT_FST), + FCVT_S_D-> List(Str("fcvt.sd "), FMT_F1), + FCVT_D_S-> List(Str("fcvt.ds "), FMT_F1), + FSGNJ_S-> List(Str("fsgnj.s "), FMT_F2), + FSGNJ_D-> List(Str("fsgnj.d "), FMT_F2), + FSGNJX_S-> List(Str("fsgnx.s "), FMT_F2), + FSGNJX_D-> List(Str("fsgnx.d "), FMT_F2), + FSGNJN_S-> List(Str("fsgnjn.s"), FMT_F2), + FSGNJN_D-> List(Str("fsgnjn.d"), FMT_F2), + FMIN_S-> List(Str("fmin.s "), FMT_F2), + FMIN_D-> List(Str("fmin.d "), FMT_F2), + FMAX_S-> List(Str("fmax.s "), FMT_F2), + FMAX_D-> List(Str("fmax.d "), FMT_F2), + FADD_S-> List(Str("fadd.s "), FMT_F2), + FADD_D-> List(Str("fadd.d "), FMT_F2), + FSUB_S-> List(Str("fsub.s "), FMT_F2), + FSUB_D-> List(Str("fsub.d "), FMT_F2), + FMUL_S-> List(Str("fmul.s "), FMT_F2), + FMUL_D-> List(Str("fmul.d "), FMT_F2), + FMADD_S-> List(Str("fmadd.s "), FMT_F3), + FMADD_D-> List(Str("fmadd.d "), FMT_F3), + FMSUB_S-> List(Str("fmsub.s "), FMT_F3), + FMSUB_D-> List(Str("fmsub.d "), FMT_F3), + FNMADD_S-> List(Str("fnmadd.s"), FMT_F3), + FNMADD_D-> List(Str("fnmadd.d"), FMT_F3), + FNMSUB_S-> List(Str("fnmsub.s"), FMT_F3), + FNMSUB_D-> List(Str("fnmsub.d"), FMT_F3), + FMV_X_S-> List(Str("fmv.x.s "), FMT_XF1), + FMV_X_D-> List(Str("fmv.x.d "), FMT_XF1), + FCVT_W_S-> List(Str("fcvt.ws "), FMT_XF1), + FCVT_W_D-> List(Str("fcvt.wd "), FMT_XF1), + FCVT_WU_S-> List(Str("fcvt.wus"), FMT_XF1), + FCVT_WU_D-> List(Str("fcvt.wud"), FMT_XF1), + FCVT_L_S-> List(Str("fcvt.ls "), FMT_XF1), + FCVT_L_D-> List(Str("fcvt.ld "), FMT_XF1), + FCVT_LU_S-> List(Str("fcvt.lus"), FMT_XF1), + FCVT_LU_D-> List(Str("fcvt.lud"), FMT_XF1), + FEQ_S-> List(Str("feq.s "), FMT_XF2), + FEQ_D-> List(Str("feq.d "), FMT_XF2), + FLT_S-> List(Str("flt.s "), FMT_XF2), + FLT_D-> List(Str("flt.d "), FMT_XF2), + FLE_S-> List(Str("fle.s "), FMT_XF2), + FLE_D-> List(Str("fle.d "), FMT_XF2), + FMV_S_X-> List(Str("fmv.s.x "), FMT_FX), + FMV_D_X-> List(Str("fmv.d.x "), FMT_FX), + FCVT_S_W-> List(Str("fcvt.sw "), FMT_FX), + FCVT_D_W-> List(Str("fcvt.dw "), FMT_FX), + FCVT_S_WU-> List(Str("fcvt.swu"), FMT_FX), + FCVT_D_WU-> List(Str("fcvt.dwu"), FMT_FX), + FCVT_S_L-> List(Str("fcvt.sl "), FMT_FX), + FCVT_D_L-> List(Str("fcvt.dl "), FMT_FX), + FCVT_S_LU-> List(Str("fcvt.slu"), FMT_FX), + FCVT_D_LU-> List(Str("fcvt.dlu"), FMT_FX), + FRSR-> List(Str("frsr "), FMT_R0), + FSSR-> List(Str("fssr "), FMT_R1), + FLW-> List(Str("flw "), FMT_FLD), + FLD-> List(Str("fld "), FMT_FLD), + FSW-> List(Str("fsw "), FMT_FST), + FSD-> List(Str("fsd "), FMT_FST), - VVCFGIVL-> List(Str("vecInst "), FMT_0), - VVCFG-> List(Str("vecInst "), FMT_0), - VSETVL-> List(Str("vecInst "), FMT_0), - VF-> List(Str("vecInst "), FMT_0), - VMVV-> List(Str("vecInst "), FMT_0), - VMSV-> List(Str("vecInst "), FMT_0), - VFMVV-> List(Str("vecInst "), FMT_0), - FENCE_V_L-> List(Str("vecInst "), FMT_0), - FENCE_V_G-> List(Str("vecInst "), FMT_0), - VLD-> List(Str("vecInst "), FMT_0), - VLW-> List(Str("vecInst "), FMT_0), - VLWU-> List(Str("vecInst "), FMT_0), - VLH-> List(Str("vecInst "), FMT_0), - VLHU-> List(Str("vecInst "), FMT_0), - VLB-> List(Str("vecInst "), FMT_0), - VLBU-> List(Str("vecInst "), FMT_0), - VSD-> List(Str("vecInst "), FMT_0), - VSW-> List(Str("vecInst "), FMT_0), - VSH-> List(Str("vecInst "), FMT_0), - VSB-> List(Str("vecInst "), FMT_0), - VFLD-> List(Str("vecInst "), FMT_0), - VFLW-> List(Str("vecInst "), FMT_0), - VFSD-> List(Str("vecInst "), FMT_0), - VFSW-> List(Str("vecInst "), FMT_0), - VLSTD-> List(Str("vecInst "), FMT_0), - VLSTW-> List(Str("vecInst "), FMT_0), - VLSTWU-> List(Str("vecInst "), FMT_0), - VLSTH-> List(Str("vecInst "), FMT_0), - VLSTHU-> List(Str("vecInst "), FMT_0), - VLSTB-> List(Str("vecInst "), FMT_0), - VLSTBU-> List(Str("vecInst "), FMT_0), - VSSTD-> List(Str("vecInst "), FMT_0), - VSSTW-> List(Str("vecInst "), FMT_0), - VSSTH-> List(Str("vecInst "), FMT_0), - VSSTB-> List(Str("vecInst "), FMT_0), - VFLSTD-> List(Str("vecInst "), FMT_0), - VFLSTW-> List(Str("vecInst "), FMT_0), - VFSSTD-> List(Str("vecInst "), FMT_0), - VFSSTW-> List(Str("vecInst "), FMT_0), + VSETCFGVL-> List(Str("vecInst "), FMT_0), + VSETVL-> List(Str("vecInst "), FMT_0), + VF-> List(Str("vecInst "), FMT_0), + VMVV-> List(Str("vecInst "), FMT_0), + FENCE_V_L-> List(Str("vecInst "), FMT_0), + FENCE_V_G-> List(Str("vecInst "), FMT_0), + VLD-> List(Str("vecInst "), FMT_0), + VLW-> List(Str("vecInst "), FMT_0), + VLWU-> List(Str("vecInst "), FMT_0), + VLH-> List(Str("vecInst "), FMT_0), + VLHU-> List(Str("vecInst "), FMT_0), + VLB-> List(Str("vecInst "), FMT_0), + VLBU-> List(Str("vecInst "), FMT_0), + VSD-> List(Str("vecInst "), FMT_0), + VSW-> List(Str("vecInst "), FMT_0), + VSH-> List(Str("vecInst "), FMT_0), + VSB-> List(Str("vecInst "), FMT_0), + VFLD-> List(Str("vecInst "), FMT_0), + VFLW-> List(Str("vecInst "), FMT_0), + VFSD-> List(Str("vecInst "), FMT_0), + VFSW-> List(Str("vecInst "), FMT_0), + VLSTD-> List(Str("vecInst "), FMT_0), + VLSTW-> List(Str("vecInst "), FMT_0), + VLSTWU-> List(Str("vecInst "), FMT_0), + VLSTH-> List(Str("vecInst "), FMT_0), + VLSTHU-> List(Str("vecInst "), FMT_0), + VLSTB-> List(Str("vecInst "), FMT_0), + VLSTBU-> List(Str("vecInst "), FMT_0), + VSSTD-> List(Str("vecInst "), FMT_0), + VSSTW-> List(Str("vecInst "), FMT_0), + VSSTH-> List(Str("vecInst "), FMT_0), + VSSTB-> List(Str("vecInst "), FMT_0), + VFLSTD-> List(Str("vecInst "), FMT_0), + VFLSTW-> List(Str("vecInst "), FMT_0), + VFSSTD-> List(Str("vecInst "), FMT_0), + VFSSTW-> List(Str("vecInst "), FMT_0), - VENQCMD-> List(Str("vecInst "), FMT_0), - VENQIMM1-> List(Str("vecInst "), FMT_0), - VENQIMM2-> List(Str("vecInst "), FMT_0), - VENQCNT-> List(Str("vecInst "), FMT_0), - VXCPTEVAC-> List(Str("vecInst "), FMT_0), - VXCPTKILL-> List(Str("vecInst "), FMT_0), - VXCPTHOLD-> List(Str("vecInst "), FMT_0) + VENQCMD-> List(Str("vecInst "), FMT_0), + VENQIMM1-> List(Str("vecInst "), FMT_0), + VENQIMM2-> List(Str("vecInst "), FMT_0), + VENQCNT-> List(Str("vecInst "), FMT_0), + VXCPTEVAC-> List(Str("vecInst "), FMT_0), + VXCPTKILL-> List(Str("vecInst "), FMT_0), + VXCPTHOLD-> List(Str("vecInst "), FMT_0) ) } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 3681586c..f4acb1c5 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -72,9 +72,6 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module val resp_val = state === s_done || state === s_error val resp_err = state === s_error || state === s_wait - - val resp_ptd = io.mem.resp.bits.data(1,0) === Bits(1) - val resp_pte = io.mem.resp.bits.data(1,0) === Bits(2) val r_resp_ppn = io.mem.req.bits.addr >> PGIDX_BITS val resp_ppn = AVec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) @@ -83,7 +80,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module val me = r_req_dest === UInt(i) io.requestor(i).resp.valid := resp_val && me io.requestor(i).resp.bits.error := resp_err - io.requestor(i).resp.bits.perm := r_pte(9,4) + io.requestor(i).resp.bits.perm := r_pte(8,3) io.requestor(i).resp.bits.ppn := resp_ppn.toUInt io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).eret := io.dpath.eret @@ -108,16 +105,13 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module state := s_req } when (io.mem.resp.valid) { - when (resp_pte) { // page table entry - state := s_done - } - .otherwise { - count := count + UInt(1) - when (resp_ptd && count < UInt(levels-1)) { + state := s_error + when (io.mem.resp.bits.data(0)) { + when (!io.mem.resp.bits.data(1)) { + state := s_done + }.elsewhen (count < levels-1) { state := s_req - } - .otherwise { - state := s_error + count := count + 1 } } } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index c83be474..2bf49a68 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -15,7 +15,6 @@ case class RocketConfiguration(tl: TileLinkConfiguration, val xprlen = 64 val nxpr = 32 val nxprbits = log2Up(nxpr) - val rvc = false if (fastLoadByte) require(fastLoadWord) } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 1c3af620..586e646c 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -121,12 +121,12 @@ class TLB(entries: Int) extends Module when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn val perm = (!io.ptw.resp.bits.error).toSInt & io.ptw.resp.bits.perm(5,0) - ur_array := ur_array.bitSet(r_refill_waddr, perm(2)) + ur_array := ur_array.bitSet(r_refill_waddr, perm(0)) uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) - ux_array := ux_array.bitSet(r_refill_waddr, perm(0)) - sr_array := sr_array.bitSet(r_refill_waddr, perm(5)) + ux_array := ux_array.bitSet(r_refill_waddr, perm(2)) + sr_array := sr_array.bitSet(r_refill_waddr, perm(3)) sw_array := sw_array.bitSet(r_refill_waddr, perm(4)) - sx_array := sx_array.bitSet(r_refill_waddr, perm(3)) + sx_array := sx_array.bitSet(r_refill_waddr, perm(5)) } // high if there are any unused (invalid) entries in the TLB diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index ed008423..9b735f1d 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -41,15 +41,14 @@ object Str def apply(x: UInt): Bits = apply(x, 10) def apply(x: UInt, radix: Int): Bits = { val rad = UInt(radix) - val digs = digits(radix) val w = x.getWidth require(w > 0) var q = x - var s = digs(q % rad) + var s = digit(q % rad) for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) { q = q / rad - s = Cat(Mux(Bool(radix == 10) && q === UInt(0), Str(' '), digs(q % rad)), s) + s = Cat(Mux(Bool(radix == 10) && q === UInt(0), Str(' '), digit(q % rad)), s) } s } @@ -61,28 +60,24 @@ object Str Cat(Mux(neg, Str('-'), Str(' ')), Str(abs, radix)) } else { val rad = UInt(radix) - val digs = digits(radix) val w = abs.getWidth require(w > 0) var q = abs - var s = digs(q % rad) + var s = digit(q % rad) var needSign = neg for (i <- 1 until ceil(log(2)/log(radix)*w).toInt) { q = q / rad val placeSpace = q === UInt(0) val space = Mux(needSign, Str('-'), Str(' ')) needSign = needSign && !placeSpace - s = Cat(Mux(placeSpace, space, digs(q % rad)), s) + s = Cat(Mux(placeSpace, space, digit(q % rad)), s) } Cat(Mux(needSign, Str('-'), Str(' ')), s) } } - private def digit(d: Int): Char = (if (d < 10) '0'+d else 'a'-10+d).toChar - private def digits(radix: Int): Vec[Bits] = - AVec((0 until radix).map(i => Str(digit(i)))) - + private def digit(d: UInt): Bits = Mux(d < UInt(10), Str('0')+d, Str(('a'-10).toChar)+d)(7,0) private def validChar(x: Char) = x == (x & 0xFF) } From 52e31f3298c4d10f5cea6fcea48cf05ad774bae7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Aug 2013 14:39:23 -0700 Subject: [PATCH 0619/1087] Bypass scoreboard updates This reduces div/mul/D$ miss latency by 1 cycle. --- rocket/src/main/scala/ctrl.scala | 30 ++++++++++++++++++++---------- rocket/src/main/scala/dpath.scala | 19 +++++++------------ rocket/src/main/scala/tlb.scala | 2 +- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 46f4ad3b..d899bbf3 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,6 +37,8 @@ class CtrlDpathIO extends Bundle() val ex_mem_type = Bits(OUTPUT, 3) val ex_rs2_val = Bool(OUTPUT) val mem_rs2_val = Bool(OUTPUT) + val mem_ll_bypass_rs1 = Bool(OUTPUT) + val mem_ll_bypass_rs2 = Bool(OUTPUT) // exception handling val exception = Bool(OUTPUT); val cause = UInt(OUTPUT, 6); @@ -605,11 +607,17 @@ class Control(implicit conf: RocketConfiguration) extends Module class Scoreboard(n: Int) { val r = Reg(init=Bits(0, n)) - var _next = r + private var _next = r + private var cur = r var ens = Bool(false) - def apply(addr: UInt) = r(addr) def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) - def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) + def clear(en: Bool, addr: UInt): Unit = { + val m = ~mask(en, addr) + update(en, _next & m) + //cur = cur & m + } + def read(addr: UInt) = r(addr) + def readBypassed(addr: UInt) = cur(addr) private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) private def update(en: Bool, update: UInt) = { _next = update @@ -628,10 +636,10 @@ class Control(implicit conf: RocketConfiguration) extends Module fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) - io.fpu.dec.ren1 && fp_sboard(id_raddr1) || - io.fpu.dec.ren2 && fp_sboard(id_raddr2) || - io.fpu.dec.ren3 && fp_sboard(id_raddr3) || - io.fpu.dec.wen && fp_sboard(id_waddr) + io.fpu.dec.ren1 && fp_sboard.readBypassed(id_raddr1) || + io.fpu.dec.ren2 && fp_sboard.readBypassed(id_raddr2) || + io.fpu.dec.ren3 && fp_sboard.readBypassed(id_raddr3) || + io.fpu.dec.wen && fp_sboard.readBypassed(id_waddr) } else Bool(false) // write cause to PCR on an exception @@ -699,10 +707,12 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) || fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) + io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1 + io.dpath.mem_ll_bypass_rs2 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr2 val id_sboard_hazard = - (id_raddr1 != UInt(0) && id_renx1 && sboard(id_raddr1) || - id_raddr2 != UInt(0) && id_renx2 && sboard(id_raddr2) || - id_waddr != UInt(0) && id_wen && sboard(id_waddr)) + (id_raddr1 != UInt(0) && id_renx1 && sboard.read(id_raddr1) && !io.dpath.mem_ll_bypass_rs1 || + id_raddr2 != UInt(0) && id_renx2 && sboard.read(id_raddr2) && !io.dpath.mem_ll_bypass_rs2 || + id_waddr != UInt(0) && id_wen && sboard.read(id_waddr)) val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f62e3327..f71f0d99 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -71,22 +71,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // bypass muxes val id_rs1_zero = id_raddr1 === UInt(0) val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr - val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr - val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass - val id_rs1_bypass_src = Mux(id_rs1_zero, UInt(0), Mux(id_rs1_ex_bypass, UInt(1), UInt(2) | io.ctrl.mem_load)) - val id_rs1 = - Mux(id_raddr1 === UInt(0), UInt(0), - Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, - readRF(id_raddr1))) + val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr + val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass || io.ctrl.mem_ll_bypass_rs1 + val id_rs1_bypass_src = Mux(id_rs1_zero, UInt(0), Mux(id_rs1_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) + val id_rs1 = Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, readRF(id_raddr1)) val id_rs2_zero = id_raddr2 === UInt(0) val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr - val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass - val id_rs2_bypass_src = Mux(id_rs2_zero, UInt(0), Mux(id_rs2_ex_bypass, UInt(1), UInt(2) | io.ctrl.mem_load)) - val id_rs2 = Mux(id_raddr2 === UInt(0), UInt(0), - Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, - readRF(id_raddr2))) + val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass || io.ctrl.mem_ll_bypass_rs2 + val id_rs2_bypass_src = Mux(id_rs2_zero, UInt(0), Mux(id_rs2_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) + val id_rs2 = Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, readRF(id_raddr2)) // immediate generation def imm(sel: Bits, inst: Bits) = { diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 586e646c..0e9d6112 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -120,7 +120,7 @@ class TLB(entries: Int) extends Module val sx_array = Reg(Bits()) // supervisor execute permission when (io.ptw.resp.valid) { tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn - val perm = (!io.ptw.resp.bits.error).toSInt & io.ptw.resp.bits.perm(5,0) + val perm = (!io.ptw.resp.bits.error).toSInt & io.ptw.resp.bits.perm ur_array := ur_array.bitSet(r_refill_waddr, perm(0)) uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) ux_array := ux_array.bitSet(r_refill_waddr, perm(2)) From d1b5076fee683fc80db803bafb0d586d1357269c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Aug 2013 14:40:13 -0700 Subject: [PATCH 0620/1087] Don't update BTB when garbage was fetched --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d899bbf3..13e8fd55 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -660,7 +660,7 @@ class Control(implicit conf: RocketConfiguration) extends Module Mux(!ex_reg_btb_hit, PC_EX, // mispredicted taken branch PC_EX4))))) // mispredicted not taken branch - io.imem.req.bits.mispredict := !take_pc_wb && take_pc_ex + io.imem.req.bits.mispredict := !take_pc_wb && take_pc_ex && !ex_reg_xcpt io.imem.req.bits.taken := !ex_reg_btb_hit || ex_reg_jalr io.imem.req.valid := take_pc From 67f80ba4b2027aa29f60bb3eb86968432b3552b3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Aug 2013 14:40:57 -0700 Subject: [PATCH 0621/1087] Stall div/mul writeback until WB slot is free --- rocket/src/main/scala/dpath.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f71f0d99..f66caab9 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -162,7 +162,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module div.io.req.bits.in2 := ex_rs2 div.io.req.bits.tag := ex_reg_waddr div.io.kill := io.ctrl.div_mul_kill - div.io.resp.ready := Bool(true) + div.io.resp.ready := !io.ctrl.mem_wen io.ctrl.div_mul_rdy := div.io.req.ready io.fpu.fromint_data := ex_rs1 @@ -249,7 +249,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val mem_ll_wdata = Bits() mem_ll_wdata := div.io.resp.bits.data io.ctrl.mem_ll_waddr := div.io.resp.bits.tag - io.ctrl.mem_ll_wb := div.io.resp.valid + io.ctrl.mem_ll_wb := div.io.resp.valid && !io.ctrl.mem_wen when (dmem_resp_replay) { div.io.resp.ready := Bool(false) mem_ll_wdata := io.dmem.resp.bits.data_subword From daf23b8f79cc08ee7b2e0d9409483084aff7a0e0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Aug 2013 14:42:50 -0700 Subject: [PATCH 0622/1087] Add early out to multiplier --- rocket/src/main/scala/divider.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 6c6f0242..bfdfcb2e 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -54,11 +54,14 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val accum = mulReg(2*mulw,mulw).toSInt val mpcand = divisor.toSInt val prod = mplier(mulUnroll-1,0) * mpcand + accum - val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt + val eOut = Bool(earlyOut) && count > 0 && + (0 until mulw/mulUnroll).map(i => i > mulw/mulUnroll-1-count || mplier((i+1)*mulUnroll-1,i*mulUnroll) === 0).reduce(_&&_) + val eOutValue = mulReg >> (mulw/mulUnroll-count)(log2Up(mulw/mulUnroll)-1,0)*mulUnroll + val nextMulReg = Mux(eOut, eOutValue, Cat(prod, mplier(mulw-1,mulUnroll))) remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toSInt count := count + 1 - when (count === mulw/mulUnroll-1) { + when (count === mulw/mulUnroll-1 || eOut) { state := s_done when (AVec(FN_MULH, FN_MULHU, FN_MULHSU) contains req.fn) { state := s_move_rem From 2ca5127785f66cf18aceff7f58511983e0ca2878 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 24 Aug 2013 15:47:14 -0700 Subject: [PATCH 0623/1087] parameterize number of SCRs --- rocket/src/main/scala/htif.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 583e90ce..27a79063 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -35,16 +35,15 @@ class HTIFIO(ntiles: Int) extends Bundle val ipi_rep = (new FIFOIO) { Bool() }.flip } -class SCRIO extends Bundle +class SCRIO(n: Int) extends Bundle { - val n = 64 val rdata = Vec(n) { Bits(INPUT, 64) } val wen = Bool(OUTPUT) val waddr = UFix(OUTPUT, log2Up(n)) val wdata = Bits(OUTPUT, 64) } -class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component with ClientCoherenceAgent +class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extends Component with ClientCoherenceAgent { implicit val (ln, co) = (conf.ln, conf.co) val nTiles = ln.nClients-1 // This HTIF is itself a TileLink client @@ -52,7 +51,7 @@ class RocketHTIF(w: Int)(implicit conf: TileLinkConfiguration) extends Component val host = new HostIO(w) val cpu = Vec(nTiles) { new HTIFIO(nTiles).flip } val mem = new TileLinkIO - val scr = new SCRIO + val scr = new SCRIO(nSCR) } val short_request_bits = 64 From 3895b75a560f6a337b4822e0b283115b9aed53b1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Aug 2013 17:33:11 -0700 Subject: [PATCH 0624/1087] Support non-power-of-2 BTBs; prefer invalid entries --- rocket/src/main/scala/dpath_util.scala | 16 +++++++++------- rocket/src/main/scala/util.scala | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 6e3d00a9..127ac851 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -23,27 +23,29 @@ class rocketDpathBTB(entries: Int) extends Module { val io = new DpathBTBIO - val repl_way = LFSR16(io.wen)(log2Up(entries)-1,0) // TODO: pseudo-LRU - var hit_reduction = Bool(false) val hit = Bool() val update = Bool() var update_reduction = Bool(false) + val valid = Vec.fill(entries){Reg(init=Bool(false))} val hits = Vec.fill(entries){Bool()} val updates = Vec.fill(entries){Bool()} val targets = Vec.fill(entries){Reg(UInt())} val anyUpdate = updates.toBits.orR + val random_way = Random(entries, io.wen) + val invalid_way = valid.indexWhere((x: Bool) => !x) + val repl_way = Mux(valid.contains(Bool(false)), invalid_way, random_way) + for (i <- 0 until entries) { val tag = Reg(UInt()) - val valid = Reg(init=Bool(false)) - hits(i) := valid && tag === io.current_pc - updates(i) := valid && tag === io.correct_pc + hits(i) := valid(i) && tag === io.current_pc + updates(i) := valid(i) && tag === io.correct_pc when (io.wen && (updates(i) || !anyUpdate && UInt(i) === repl_way)) { - valid := Bool(false) + valid(i) := Bool(false) when (!io.clr) { - valid := Bool(true) + valid(i) := Bool(true) tag := io.correct_pc targets(i) := io.correct_target } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 9b735f1d..b99d3d43 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -130,3 +130,21 @@ case class WideCounter(width: Int, inc: Bool = Bool(true)) if (isWide) large := (if (w < smallWidth) UInt(0) else x(w.min(width)-1,smallWidth)) } } + +object Random +{ + def apply(mod: Int, inc: Bool = Bool(true)): UInt = { + if (isPow2(mod)) { + require(mod <= 65536) + LFSR16(inc)(log2Up(mod)-1,0).toUInt + } else { + val max = 1 << log2Up(mod*8) + val rand_pow2 = apply(max, inc) + + var res = UInt(mod-1) + for (i <- mod-1 to 1 by -1) + res = Mux(rand_pow2 < UInt(i*max/mod), UInt(i-1), res) + res + } + } +} From 44e92edf923577fd733fd75b95908d1cc3b664cb Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 24 Aug 2013 22:42:51 -0700 Subject: [PATCH 0625/1087] fix scr parameterization bug --- rocket/src/main/scala/dpath_util.scala | 3 +++ rocket/src/main/scala/htif.scala | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 65f377b2..9a861bca 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -113,6 +113,9 @@ class PCR(implicit conf: RocketConfiguration) extends Component val rdata = Bits(OUTPUT, conf.xprlen) val wdata = Bits(INPUT, conf.xprlen) } + + // there is a fixed constant related to this in PCRReq.addr + require(log2Up(conf.nxpr) == 5) val status = new Status().asOutput val ptbr = UFix(OUTPUT, PADDR_BITS) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 27a79063..93e54cc2 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -21,7 +21,7 @@ class HostIO(val w: Int) extends Bundle class PCRReq extends Bundle { val rw = Bool() - val addr = Bits(width = 6) + val addr = Bits(width = 5) val data = Bits(width = 64) } @@ -237,6 +237,7 @@ class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extend } } + val scr_addr = addr(log2Up(nSCR)-1, 0) val scr_rdata = Vec(io.scr.rdata.size){Bits(width = 64)} for (i <- 0 until scr_rdata.size) scr_rdata(i) := io.scr.rdata(i) @@ -245,10 +246,10 @@ class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extend io.scr.wen := false io.scr.wdata := pcr_wdata - io.scr.waddr := pcr_addr.toUFix + io.scr.waddr := scr_addr.toUFix when (state === state_pcr_req && pcr_coreid === Fix(-1)) { io.scr.wen := cmd === cmd_writecr - pcrReadData := scr_rdata(pcr_addr) + pcrReadData := scr_rdata(scr_addr) state := state_tx } From b9f6e1a7ecbf32fba1943aee34ee92a1be6c863c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 24 Aug 2013 14:40:13 -0700 Subject: [PATCH 0626/1087] Don't update BTB when garbage was fetched --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 682af625..649a8f80 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -655,7 +655,7 @@ class Control(implicit conf: RocketConfiguration) extends Component Mux(!ex_reg_btb_hit, PC_EX, // mispredicted taken branch PC_EX4))))) // mispredicted not taken branch - io.imem.req.bits.mispredict := !take_pc_wb && take_pc_ex + io.imem.req.bits.mispredict := !take_pc_wb && take_pc_ex && !ex_reg_xcpt io.imem.req.bits.taken := !ex_reg_btb_hit || ex_reg_jalr io.imem.req.valid := take_pc From cfbfa6b8952b1621ce264fd342ee1b0cfba68cf2 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Thu, 5 Sep 2013 19:22:34 -0700 Subject: [PATCH 0627/1087] Add errors due to merge issues. Note, DebugIO re-introduced here but slated for possible removal in later commits. --- rocket/src/main/scala/htif.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index f27b8f13..6df4d28c 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -5,6 +5,11 @@ import Node._ import uncore._ import Util._ +class DebugIO extends Bundle +{ + val error_mode = Bool(OUTPUT) +} + class HostIO(val w: Int) extends Bundle { val clk = Bool(OUTPUT) @@ -23,6 +28,7 @@ class PCRReq extends Bundle class HTIFIO(ntiles: Int) extends Bundle { val reset = Bool(INPUT) + val debug = new DebugIO val id = UInt(INPUT, log2Up(ntiles)) val pcr_req = Decoupled(new PCRReq).flip val pcr_rep = Decoupled(Bits(width = 64)) @@ -32,14 +38,13 @@ class HTIFIO(ntiles: Int) extends Bundle class SCRIO(n: Int) extends Bundle { - val n = 64 val rdata = Vec.fill(n){Bits(INPUT, 64)} val wen = Bool(OUTPUT) val waddr = UInt(OUTPUT, log2Up(n)) val wdata = Bits(OUTPUT, 64) } -class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extends Component with ClientCoherenceAgent +class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extends Module with ClientCoherenceAgent { implicit val (ln, co) = (conf.ln, conf.co) val nTiles = ln.nClients-1 // This HTIF is itself a TileLink client From d06e24ac243d6b519cba88cb66e15cb63792914c Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 10 Sep 2013 10:51:35 -0700 Subject: [PATCH 0628/1087] new enum syntax --- rocket/src/main/scala/divider.scala | 4 ++-- rocket/src/main/scala/htif.scala | 4 ++-- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 4 ++-- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/tlb.scala | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index bfdfcb2e..c18e47d6 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -9,7 +9,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val w = io.req.bits.in1.getWidth val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll - val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(7) { UInt() }; + val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 7) val state = Reg(init=s_ready) val req = Reg(io.req.bits.clone) @@ -122,7 +122,7 @@ class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) ext val io = new MultiplierIO val w = io.req.bits.in1.getWidth - val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(6) { UInt() }; + val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6) val state = Reg(init=s_ready) val count = Reg(UInt(width = log2Up(w+1))) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 6df4d28c..2392f3c6 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -89,7 +89,7 @@ class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extend packet_ram(rx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1)) := rx_shifter_in } - val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(6) { UInt() } + val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(UInt(), 6) val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) val pcr_coreid = addr(log2Up(nTiles)-1+20+1,20) @@ -124,7 +124,7 @@ class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extend } io.mem.grant.ready := Bool(true) - val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(9) { UInt() } + val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(UInt(), 9) val state = Reg(init=state_rx) val rx_cmd = Mux(rx_word_count === UInt(0), next_cmd, cmd) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index bae95113..17c6e88b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -143,7 +143,7 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module val mem = new UncachedTileLinkIO } - val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(4) { UInt() } + val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) val invalidated = Reg(Bool()) val stall = !io.resp.ready diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 5e8b4c96..89c772a4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -188,7 +188,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val probe_rdy = Bool(OUTPUT) } - val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(9) { UInt() } + val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) val acquire_type = Reg(UInt()) @@ -505,7 +505,7 @@ class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val line_state = UInt(INPUT, 2) } - val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(9) { UInt() } + val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) val line_state = Reg(UInt()) val way_en = Reg(Bits()) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index f4acb1c5..af5787e6 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -39,7 +39,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module val bitsPerLevel = VPN_BITS/levels require(VPN_BITS == levels * bitsPerLevel) - val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(5) { UInt() }; + val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(UInt(), 5) val state = Reg(init=s_ready) val count = Reg(UInt(width = log2Up(levels))) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 0e9d6112..45d5078a 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -93,7 +93,7 @@ class TLB(entries: Int) extends Module val ptw = new TLBPTWIO } - val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4) { UInt() } + val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) val r_refill_tag = Reg(UInt()) val r_refill_waddr = Reg(UInt()) From f9b85d8158432768e0d1c0093fe714065c5da5cd Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 10 Sep 2013 16:15:19 -0700 Subject: [PATCH 0629/1087] NetworkIOs no longer use thunks --- rocket/src/main/scala/nbdcache.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 89c772a4..2f81e406 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -181,9 +181,9 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val mem_resp = new DataWriteReq().asOutput val meta_read = Decoupled(new MetaReadReq) val meta_write = Decoupled(new MetaWriteReq) - val replay = Decoupled(new Replay()) - val mem_grant = Valid((new LogicalNetworkIO) {new Grant} ).flip - val mem_finish = Decoupled((new LogicalNetworkIO) {new GrantAck} ) + val replay = Decoupled(new Replay) + val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip + val mem_finish = Decoupled(new LogicalNetworkIO(new GrantAck)) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) } @@ -264,7 +264,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte } } - val ackq = Module(new Queue((new LogicalNetworkIO){new GrantAck}, 1)) + val ackq = Module(new Queue(new LogicalNetworkIO(new GrantAck), 1)) ackq.io.enq.valid := (wb_done || refill_done) && tl.co.requiresAck(io.mem_grant.bits.payload) ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src @@ -330,8 +330,8 @@ class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends M val meta_read = Decoupled(new MetaReadReq) val meta_write = Decoupled(new MetaWriteReq) val replay = Decoupled(new Replay) - val mem_grant = Valid((new LogicalNetworkIO){new Grant}).flip - val mem_finish = Decoupled((new LogicalNetworkIO){new GrantAck}) + val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip + val mem_finish = Decoupled(new LogicalNetworkIO(new GrantAck)) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) @@ -354,7 +354,7 @@ class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends M val meta_read_arb = Module(new Arbiter(new MetaReadReq, conf.nmshr)) val meta_write_arb = Module(new Arbiter(new MetaWriteReq, conf.nmshr)) val mem_req_arb = Module(new Arbiter(new Acquire, conf.nmshr)) - val mem_finish_arb = Module(new Arbiter((new LogicalNetworkIO){new GrantAck}, conf.nmshr)) + val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new GrantAck), conf.nmshr)) val wb_req_arb = Module(new Arbiter(new WritebackReq, conf.nmshr)) val replay_arb = Module(new Arbiter(new Replay, conf.nmshr)) val alloc_arb = Module(new Arbiter(Bool(), conf.nmshr)) From 95dd0d8be13184adf4f1bdfaac1c281571ee09c0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 11 Sep 2013 20:15:21 -0700 Subject: [PATCH 0630/1087] Remove DebugIO/error mode --- rocket/src/main/scala/htif.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 2392f3c6..05a9bef6 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -5,11 +5,6 @@ import Node._ import uncore._ import Util._ -class DebugIO extends Bundle -{ - val error_mode = Bool(OUTPUT) -} - class HostIO(val w: Int) extends Bundle { val clk = Bool(OUTPUT) @@ -28,7 +23,6 @@ class PCRReq extends Bundle class HTIFIO(ntiles: Int) extends Bundle { val reset = Bool(INPUT) - val debug = new DebugIO val id = UInt(INPUT, log2Up(ntiles)) val pcr_req = Decoupled(new PCRReq).flip val pcr_rep = Decoupled(Bits(width = 64)) From 243c4ae3422f1aee46f8adc313e20042c1b7c9b0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 12 Sep 2013 03:44:38 -0700 Subject: [PATCH 0631/1087] sync up rocket with new isa --- rocket/src/main/scala/consts.scala | 40 +- rocket/src/main/scala/ctrl.scala | 474 ++++++++++++----------- rocket/src/main/scala/dpath.scala | 85 ++-- rocket/src/main/scala/dpath_alu.scala | 11 +- rocket/src/main/scala/instructions.scala | 64 +-- 5 files changed, 341 insertions(+), 333 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index d8a16383..3309bab0 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -16,32 +16,36 @@ trait ScalarOpConstants { val BR_LTU = Bits(6, 3) val BR_GEU = Bits(7, 3) - val PC_EX4 = UInt(0, 2) - val PC_EX = UInt(1, 2) + val PC_EX = UInt(0, 2) val PC_WB = UInt(2, 2) val PC_PCR = UInt(3, 2) - val A2_X = Bits("b???", 3) - val A2_BTYPE = UInt(0, 3); - val A2_LTYPE = UInt(1, 3); - val A2_ITYPE = UInt(2, 3); - val A2_ZERO = UInt(4, 3); - val A2_JTYPE = UInt(5, 3); - val A2_RTYPE = UInt(6, 3); + val A1_X = Bits("b??", 2) + val A1_RS1 = UInt(0, 2) + val A1_PC = UInt(1, 2) + val A1_ZERO = UInt(2, 2) + + val IMM_X = Bits("b???", 3) + val IMM_S = UInt(0, 3); + val IMM_SB = UInt(1, 3); + val IMM_U = UInt(2, 3); + val IMM_UJ = UInt(3, 3); + val IMM_I = UInt(4, 3); + + val A2_X = Bits("b??", 2) + val A2_RS2 = UInt(0, 2) + val A2_IMM = UInt(1, 2) + val A2_ZERO = UInt(2, 3) + val A2_FOUR = UInt(3, 3) val X = Bits("b?", 1) val N = Bits(0, 1) val Y = Bits(1, 1) - val WA_X = UInt("b?", 1) - val WA_RD = UInt(0, 1) - val WA_RA = UInt(1, 1) - - val WB_X = UInt("b???", 3) - val WB_PC = UInt(0, 3); - val WB_ALU = UInt(2, 3); - val WB_TSC = UInt(4, 3); - val WB_IRT = UInt(5, 3); + val WB_X = UInt("b??", 2) + val WB_ALU = UInt(0, 3); + val WB_TSC = UInt(2, 3); + val WB_IRT = UInt(3, 3); val SZ_DW = 1 val DW_X = X diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 13e8fd55..53bb7dfa 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -14,15 +14,16 @@ class CtrlDpathIO extends Bundle() val killd = Bool(OUTPUT); val ren2 = Bool(OUTPUT); val ren1 = Bool(OUTPUT); - val sel_alu2 = UInt(OUTPUT, 3); - val fn_dw = Bool(OUTPUT); - val fn_alu = UInt(OUTPUT, SZ_ALU_FN); + val sel_alu2 = UInt(OUTPUT, 3) + val sel_alu1 = UInt(OUTPUT, 2) + val sel_imm = UInt(OUTPUT, 3) + val fn_dw = Bool(OUTPUT) + val fn_alu = UInt(OUTPUT, SZ_ALU_FN) val div_mul_val = Bool(OUTPUT) val div_mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); val div_kill = Bool(OUTPUT) - val sel_wa = Bool(OUTPUT); - val sel_wb = UInt(OUTPUT, 3); + val sel_wb = UInt(OUTPUT, 2) val pcr = UInt(OUTPUT, 3) val eret = Bool(OUTPUT); val mem_load = Bool(OUTPUT); @@ -31,6 +32,7 @@ class CtrlDpathIO extends Bundle() val mem_fp_val= Bool(OUTPUT); val ex_wen = Bool(OUTPUT); val ex_jalr = Bool(OUTPUT) + val ex_predicted_taken = Bool(OUTPUT) val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); val wb_valid = Bool(OUTPUT) @@ -66,13 +68,13 @@ abstract trait DecodeConstants val xpr64 = Y; val decode_default = - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WA_X, WB_X, PCR.X,N,X,X,X,X) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WB_X, PCR.X,N,X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -80,239 +82,238 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_BTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - J-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,N,A2_JTYPE,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WA_RA,WB_PC, PCR.N,N,N,N,N,N), - JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), - AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_PC, PCR.N,N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LB-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LUI-> List(Y, N,N,BR_N, N,N,N,A2_LTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_ITYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RTYPE,DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RTYPE,DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.S,N,N,N,Y,N), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_ITYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.C,N,N,N,Y,N), - ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,Y,N,N,N,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.F,N,N,N,Y,N), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RTYPE,DW_XPR,FN_OP2, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.T,N,N,N,Y,N), - RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR.N,N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_TSC,PCR.N,N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_IRT,PCR.N,N,N,N,N,N)) + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,Y,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.S,N,N,N,Y,N), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.C,N,N,N,Y,N), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,Y,N,Y,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WB_X, PCR.N,Y,N,N,N,Y), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.F,N,N,N,Y,N), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RS2, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.T,N,N,N,Y,N), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_IRT,PCR.N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FRSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FSSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), - FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_BTYPE,DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N)) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FRSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FSSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), + FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N)) } object VDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 dw alu | mem_cmd mem_type| | | s_wa s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | - VSETCFGVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), - VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WA_RD,WB_ALU,PCR.N,N,N,N,N,Y), - VF-> List(Y, N,Y,BR_N, N,N,Y,A2_ITYPE,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_X, WB_ALU,PCR.N,N,N,N,N,N), - VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_RD,WB_X, PCR.N,N,N,N,N,N), - FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,N,N), - VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), - VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,N,N), + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | + VSETCFGVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y), + VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y), + VF-> List(Y, N,Y,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), - VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), - VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), - VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), - VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WA_RD,WB_ALU,PCR.N,N,N,N,Y,N), - VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,Y,N), - VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WA_X, WB_X, PCR.N,N,N,N,Y,N)) + VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), + VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), + VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), + VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), + VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), + VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N), + VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N)) } class Control(implicit conf: RocketConfiguration) extends Module @@ -342,14 +343,15 @@ class Control(implicit conf: RocketConfiguration) extends Module case u => u } - val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_vec_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: id_sel_alu2 :: (id_fn_dw: Bool) :: id_fn_alu :: cs0 = cs - val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: id_sel_wa :: id_sel_wb :: cs1 = cs0 - val id_pcr :: (id_fence_i: Bool) :: (id_eret: Bool) :: (id_syscall: Bool) :: (id_privileged: Bool) :: (id_replay_next: Bool) :: Nil = cs1 + val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_vec_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs + val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 + val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: id_sel_wb :: cs2 = cs1 + val id_pcr :: (id_fence_i: Bool) :: (id_eret: Bool) :: (id_syscall: Bool) :: (id_privileged: Bool) :: (id_replay_next: Bool) :: Nil = cs2 - val id_raddr3 = io.dpath.inst(16,12); - val id_raddr2 = io.dpath.inst(21,17); - val id_raddr1 = io.dpath.inst(26,22); - val id_waddr = Mux(id_sel_wa === WA_RA, RA, io.dpath.inst(31,27)); + val id_raddr3 = io.dpath.inst(16,12) + val id_raddr2 = io.dpath.inst(21,17) + val id_raddr1 = io.dpath.inst(26,22) + val id_waddr = io.dpath.inst(31,27) val id_load_use = Bool(); val ex_reg_xcpt_interrupt = Reg(init=Bool(false)) @@ -656,9 +658,7 @@ class Control(implicit conf: RocketConfiguration) extends Module Mux(wb_reg_xcpt, PC_PCR, // exception Mux(wb_reg_eret, PC_PCR, // eret instruction Mux(replay_wb, PC_WB, // replay - Mux(ex_reg_jalr, PC_EX, // JALR - Mux(!ex_reg_btb_hit, PC_EX, // mispredicted taken branch - PC_EX4))))) // mispredicted not taken branch + PC_EX)))// branch/jal[r] io.imem.req.bits.mispredict := !take_pc_wb && take_pc_ex && !ex_reg_xcpt io.imem.req.bits.taken := !ex_reg_btb_hit || ex_reg_jalr @@ -731,6 +731,8 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.ren2 := id_renx2.toBool; io.dpath.ren1 := id_renx1.toBool; io.dpath.sel_alu2 := id_sel_alu2.toUInt + io.dpath.sel_alu1 := id_sel_alu1.toUInt + io.dpath.sel_imm := id_sel_imm.toUInt io.dpath.fn_dw := id_fn_dw.toBool; io.dpath.fn_alu := id_fn_alu.toUInt io.dpath.div_mul_val := ex_reg_div_mul_val @@ -738,11 +740,11 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.ex_fp_val:= ex_reg_fp_val; io.dpath.mem_fp_val:= mem_reg_fp_val; io.dpath.ex_jalr := ex_reg_jalr + io.dpath.ex_predicted_taken := ex_reg_btb_hit io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; io.dpath.wb_wen := wb_reg_wen && !replay_wb io.dpath.wb_valid := wb_reg_valid && !replay_wb - io.dpath.sel_wa := id_sel_wa.toBool; io.dpath.sel_wb := id_sel_wb.toUInt io.dpath.pcr := wb_reg_pcr.toUInt io.dpath.eret := wb_reg_eret diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f66caab9..77111f2b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -22,10 +22,11 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // execute definitions val ex_reg_pc = Reg(UInt()) val ex_reg_inst = Reg(Bits()) - val ex_reg_waddr = Reg(UInt()) val ex_reg_ctrl_fn_dw = Reg(UInt()) val ex_reg_ctrl_fn_alu = Reg(UInt()) val ex_reg_sel_alu2 = Reg(UInt()) + val ex_reg_sel_alu1 = Reg(UInt()) + val ex_reg_sel_imm = Reg(UInt()) val ex_reg_ctrl_sel_wb = Reg(UInt()) val ex_reg_kill = Reg(Bool()) val ex_reg_rs1_bypass = Reg(Bool()) @@ -38,7 +39,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // memory definitions val mem_reg_pc = Reg(UInt()) val mem_reg_inst = Reg(Bits()) - val mem_reg_waddr = Reg(UInt()) val mem_reg_wdata = Reg(Bits()) val mem_reg_kill = Reg(Bool()) val mem_reg_store_data = Reg(Bits()) @@ -70,30 +70,33 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // bypass muxes val id_rs1_zero = id_raddr1 === UInt(0) - val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === ex_reg_waddr - val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === mem_reg_waddr + val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === io.ctrl.ex_waddr + val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === io.ctrl.mem_waddr val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass || io.ctrl.mem_ll_bypass_rs1 val id_rs1_bypass_src = Mux(id_rs1_zero, UInt(0), Mux(id_rs1_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) val id_rs1 = Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, readRF(id_raddr1)) val id_rs2_zero = id_raddr2 === UInt(0) - val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === ex_reg_waddr - val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === mem_reg_waddr + val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === io.ctrl.ex_waddr + val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === io.ctrl.mem_waddr val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass || io.ctrl.mem_ll_bypass_rs2 val id_rs2_bypass_src = Mux(id_rs2_zero, UInt(0), Mux(id_rs2_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) val id_rs2 = Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, readRF(id_raddr2)) // immediate generation def imm(sel: Bits, inst: Bits) = { - val lsbs = Mux(sel === A2_LTYPE || sel === A2_ZERO, Bits(0), - Mux(sel === A2_BTYPE, Cat(inst(31,27), inst(16,10)), - Mux(sel === A2_JTYPE, inst(18,7), - inst(21,10)))) - val msbs = Mux(sel === A2_ZERO, SInt(0), - Mux(sel === A2_LTYPE, inst(26,7).toSInt, - Mux(sel === A2_JTYPE, inst(31,19).toSInt, - Mux(sel === A2_ITYPE, inst(21), inst(31)).toSInt))) - Cat(msbs, lsbs).toSInt + val sign = inst(10).toSInt + val b30_20 = Mux(sel === IMM_U, inst(21,11).toSInt, sign) + val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, + Cat(inst(9,7), inst(26,22)).toSInt) + val b11 = Mux(sel === IMM_U, SInt(0), + Mux(sel === IMM_SB || sel === IMM_UJ, inst(11).toSInt, sign)) + val b10_6 = Mux(sel === IMM_S || sel === IMM_SB, inst(31,27), + Mux(sel === IMM_U, Bits(0), inst(21,17))) + val b5_1 = Mux(sel === IMM_U, Bits(0), inst(16,12)) + val b0 = Mux(sel === IMM_I || sel === IMM_S, inst(11), Bits(0)) + + Cat(sign, b30_20, b19_12, b11, b10_6, b5_1, b0).toSInt } io.ctrl.inst := id_inst @@ -104,13 +107,15 @@ class Datapath(implicit conf: RocketConfiguration) extends Module when (!io.ctrl.killd) { ex_reg_pc := id_pc ex_reg_inst := id_inst - ex_reg_waddr := Mux(io.ctrl.sel_wa === WA_RD, id_inst(31,27).toUInt, RA) ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUInt ex_reg_ctrl_fn_alu := io.ctrl.fn_alu ex_reg_sel_alu2 := io.ctrl.sel_alu2 + ex_reg_sel_alu1 := io.ctrl.sel_alu1 + ex_reg_sel_imm := io.ctrl.sel_imm ex_reg_ctrl_sel_wb := io.ctrl.sel_wb + ex_reg_rs1_bypass := id_rs1_bypass && io.ctrl.ren1 + ex_reg_rs2_bypass := id_rs2_bypass && io.ctrl.ren2 when (io.ctrl.ren1) { - ex_reg_rs1_bypass := id_rs1_bypass ex_reg_rs1_lsb := id_rs1_bypass_src when (!id_rs1_bypass) { ex_reg_rs1_lsb := id_rs1(id_rs1_bypass_src.getWidth-1,0) @@ -118,7 +123,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } } when (io.ctrl.ren2) { - ex_reg_rs2_bypass := id_rs2_bypass ex_reg_rs2_lsb := id_rs2_bypass_src when (!id_rs2_bypass) { ex_reg_rs2_lsb := id_rs2(id_rs2_bypass_src.getWidth-1,0) @@ -136,21 +140,27 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(2), wb_reg_wdata, Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(1), mem_reg_wdata, Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(0), Bits(0), - Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb))))) + Mux(ex_reg_sel_alu1 === A1_ZERO, Bits(0), + Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb)))))) val ex_rs2 = Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(3) && Bool(conf.fastLoadWord), dmem_resp_data, Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(2), wb_reg_wdata, Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(1), mem_reg_wdata, Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(0), Bits(0), Cat(ex_reg_rs2_msb, ex_reg_rs2_lsb))))) - val ex_imm = imm(ex_reg_sel_alu2, ex_reg_inst) - val ex_op2 = Mux(ex_reg_sel_alu2 != A2_RTYPE, ex_imm, ex_rs2) + + val ex_imm = imm(ex_reg_sel_imm, ex_reg_inst) + val ex_op1 = Mux(ex_reg_sel_alu1 === A1_PC, ex_reg_pc.toSInt, ex_rs1) + val ex_op2 = Mux(ex_reg_sel_alu2 === A2_RS2, ex_rs2.toSInt, + Mux(ex_reg_sel_alu2 === A2_IMM, ex_imm, + Mux(ex_reg_sel_alu2 === A2_ZERO, SInt(0), + SInt(4)))) val alu = Module(new ALU) alu.io.dw := ex_reg_ctrl_fn_dw; alu.io.fn := ex_reg_ctrl_fn_alu; alu.io.in2 := ex_op2.toUInt - alu.io.in1 := ex_rs1.toUInt + alu.io.in1 := ex_op1.toUInt // multiplier and divider val div = Module(new MulDiv(mulUnroll = if (conf.fastMulDiv) 8 else 1, @@ -160,13 +170,13 @@ class Datapath(implicit conf: RocketConfiguration) extends Module div.io.req.bits.fn := ex_reg_ctrl_fn_alu div.io.req.bits.in1 := ex_rs1 div.io.req.bits.in2 := ex_rs2 - div.io.req.bits.tag := ex_reg_waddr + div.io.req.bits.tag := io.ctrl.ex_waddr div.io.kill := io.ctrl.div_mul_kill div.io.resp.ready := !io.ctrl.mem_wen io.ctrl.div_mul_rdy := div.io.req.ready io.fpu.fromint_data := ex_rs1 - io.ctrl.ex_waddr := ex_reg_waddr + io.ctrl.ex_waddr := ex_reg_inst(31,27) def vaSign(a0: UInt, ea: Bits) = { // efficient means to compress 64-bit VA into VADDR_BITS+1 bits @@ -177,13 +187,17 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), e(0))) } - val ex_effective_address = Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt + val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs1, ex_reg_pc) + val ex_br_offset = Mux(io.ctrl.ex_predicted_taken && !io.ctrl.ex_jalr, SInt(4), ex_imm) + val ex_br64 = ex_br_base + ex_br_offset + val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs1, ex_br64), vaSign(ex_reg_pc, ex_br64)) + val ex_br_addr = Cat(ex_br_msb, ex_br64(VADDR_BITS-1,0)) // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.addr := ex_effective_address + io.dmem.req.bits.addr := Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_store_data) - io.dmem.req.bits.tag := Cat(ex_reg_waddr, io.ctrl.ex_fp_val) + io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) // processor control regfile read @@ -209,26 +223,20 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, io.ctrl.ex_br_type === BR_J)))))) - val ex_pc_plus4 = ex_reg_pc.toSInt + Mux(ex_reg_sel_alu2 === A2_LTYPE, ex_reg_inst(26,7).toSInt << 12, SInt(4)) - val ex_branch_target = ex_reg_pc.toSInt + (ex_imm << 1) - val ex_jalr_target = (ex_effective_address >> 1 << 1).toSInt - val tsc_reg = WideCounter(64) val irt_reg = WideCounter(64, io.ctrl.wb_valid) // writeback select mux val ex_wdata = - Mux(ex_reg_ctrl_sel_wb === WB_PC, ex_pc_plus4, Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg.value, Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg.value, - alu.io.out))).toBits // WB_ALU + alu.io.out)).toBits // WB_ALU // memory stage mem_reg_kill := ex_reg_kill when (!ex_reg_kill) { mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst - mem_reg_waddr := ex_reg_waddr mem_reg_wdata := ex_wdata mem_reg_rs1 := ex_rs1 mem_reg_rs2 := ex_rs2 @@ -238,7 +246,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } // for load/use hazard detection (load byte/halfword) - io.ctrl.mem_waddr := mem_reg_waddr; + io.ctrl.mem_waddr := mem_reg_inst(31,27) // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool @@ -266,8 +274,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // writeback stage when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc + wb_reg_waddr := io.ctrl.mem_waddr wb_reg_inst := mem_reg_inst - wb_reg_waddr := mem_reg_waddr wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) wb_reg_rs1 := mem_reg_rs1 wb_reg_rs2 := mem_reg_rs2 @@ -322,10 +330,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // hook up I$ io.imem.req.bits.currentpc := ex_reg_pc io.imem.req.bits.pc := - Mux(io.ctrl.sel_pc === PC_EX4, ex_pc_plus4, - Mux(io.ctrl.sel_pc === PC_EX, Mux(io.ctrl.ex_jalr, ex_jalr_target, ex_branch_target), + Mux(io.ctrl.sel_pc === PC_EX, ex_br_addr, Mux(io.ctrl.sel_pc === PC_PCR, pcr.io.evec, - wb_reg_pc))).toUInt // PC_WB + wb_reg_pc)).toUInt // PC_WB printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] %s\n", tsc_reg(32,0), io.ctrl.wb_valid, wb_reg_pc, diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 4aeed0c6..e4cbcd8c 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -18,7 +18,6 @@ object ALU val FN_SLT = Bits(10) val FN_SLTU = Bits(11) val FN_SRA = Bits(13) - val FN_OP2 = Bits(15) val FN_DIV = FN_XOR val FN_DIVU = FN_SR @@ -65,18 +64,14 @@ class ALU(implicit conf: RocketConfiguration) extends Module val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toSInt >> shamt)(63,0) val shout_l = Reverse(shout_r) - val bitwise_logic = - Mux(io.fn === FN_AND, io.in1 & io.in2, - Mux(io.fn === FN_OR, io.in1 | io.in2, - Mux(io.fn === FN_XOR, io.in1 ^ io.in2, - io.in2))) // FN_OP2 - val out64 = Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less, Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, Mux(io.fn === FN_SL, shout_l, - bitwise_logic)))) + Mux(io.fn === FN_AND, io.in1 & io.in2, + Mux(io.fn === FN_OR, io.in1 | io.in2, + /*FN_XOR*/ io.in1 ^ io.in2)))))) val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) io.out := Cat(out_hi, out64(31,0)).toUInt diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index a5d15e2d..66d2d90d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -6,7 +6,6 @@ import Node._ object Instructions { /* Automatically generated by parse-opcodes */ - def J = Bits("b?????????????????????????1101011") def JAL = Bits("b?????????????????????????1101111") def JALR = Bits("b??????????????????????0001100111") def BEQ = Bits("b??????????????????????0001100011") @@ -18,12 +17,12 @@ object Instructions def LUI = Bits("b?????????????????????????0110111") def AUIPC = Bits("b?????????????????????????0010111") def ADDI = Bits("b??????????????????????0000010011") - def SLLI = Bits("b??????????000000??????0010010011") + def SLLI = Bits("b??????????00000??????00010010011") def SLTI = Bits("b??????????????????????0100010011") def SLTIU = Bits("b??????????????????????0110010011") def XORI = Bits("b??????????????????????1000010011") - def SRLI = Bits("b??????????000000??????1010010011") - def SRAI = Bits("b??????????000001??????1010010011") + def SRLI = Bits("b??????????00000??????01010010011") + def SRAI = Bits("b??????????00000??????11010010011") def ORI = Bits("b??????????????????????1100010011") def ANDI = Bits("b??????????????????????1110010011") def ADD = Bits("b???????????????00000000000110011") @@ -45,9 +44,9 @@ object Instructions def REM = Bits("b???????????????00000011100110011") def REMU = Bits("b???????????????00000011110110011") def ADDIW = Bits("b??????????????????????0000011011") - def SLLIW = Bits("b??????????0000000?????0010011011") - def SRLIW = Bits("b??????????0000000?????1010011011") - def SRAIW = Bits("b??????????0000010?????1010011011") + def SLLIW = Bits("b??????????000000?????00010011011") + def SRLIW = Bits("b??????????000000?????01010011011") + def SRAIW = Bits("b??????????000000?????11010011011") def ADDW = Bits("b???????????????00000000000111011") def SUBW = Bits("b???????????????10000000000111011") def SLLW = Bits("b???????????????00000000010111011") @@ -69,30 +68,32 @@ object Instructions def SH = Bits("b??????????????????????0010100011") def SW = Bits("b??????????????????????0100100011") def SD = Bits("b??????????????????????0110100011") - def AMOADD_W = Bits("b???????????????00000000100101011") - def AMOSWAP_W = Bits("b???????????????00000010100101011") - def AMOAND_W = Bits("b???????????????00000100100101011") - def AMOOR_W = Bits("b???????????????00000110100101011") - def AMOMIN_W = Bits("b???????????????00001000100101011") - def AMOMAX_W = Bits("b???????????????00001010100101011") - def AMOMINU_W = Bits("b???????????????00001100100101011") - def AMOMAXU_W = Bits("b???????????????00001110100101011") - def AMOADD_D = Bits("b???????????????00000000110101011") - def AMOSWAP_D = Bits("b???????????????00000010110101011") - def AMOAND_D = Bits("b???????????????00000100110101011") - def AMOOR_D = Bits("b???????????????00000110110101011") - def AMOMIN_D = Bits("b???????????????00001000110101011") - def AMOMAX_D = Bits("b???????????????00001010110101011") - def AMOMINU_D = Bits("b???????????????00001100110101011") - def AMOMAXU_D = Bits("b???????????????00001110110101011") - def LR_W = Bits("b??????????0000010000000100101011") - def LR_D = Bits("b??????????0000010000000110101011") - def SC_W = Bits("b???????????????10000010100101011") - def SC_D = Bits("b???????????????10000010110101011") - def FENCE_I = Bits("b??????????????????????0010101111") - def FENCE = Bits("b??????????????????????0100101111") - def FENCE_V_L = Bits("b??????????????????????1000101111") - def FENCE_V_G = Bits("b??????????????????????1010101111") + def AMOADD_W = Bits("b?????????????????000000100101011") + def AMOXOR_W = Bits("b?????????????????001000100101011") + def AMOOR_W = Bits("b?????????????????010000100101011") + def AMOAND_W = Bits("b?????????????????011000100101011") + def AMOMIN_W = Bits("b?????????????????100000100101011") + def AMOMAX_W = Bits("b?????????????????101000100101011") + def AMOMINU_W = Bits("b?????????????????110000100101011") + def AMOMAXU_W = Bits("b?????????????????111000100101011") + def AMOSWAP_W = Bits("b?????????????????000010100101011") + def LR_W = Bits("b??????????00000??000100100101011") + def SC_W = Bits("b?????????????????000110100101011") + def AMOADD_D = Bits("b?????????????????000000110101011") + def AMOXOR_D = Bits("b?????????????????001000110101011") + def AMOOR_D = Bits("b?????????????????010000110101011") + def AMOAND_D = Bits("b?????????????????011000110101011") + def AMOMIN_D = Bits("b?????????????????100000110101011") + def AMOMAX_D = Bits("b?????????????????101000110101011") + def AMOMINU_D = Bits("b?????????????????110000110101011") + def AMOMAXU_D = Bits("b?????????????????111000110101011") + def AMOSWAP_D = Bits("b?????????????????000010110101011") + def LR_D = Bits("b??????????00000??000100110101011") + def SC_D = Bits("b?????????????????000110110101011") + def FENCE = Bits("b???????????????????????000101111") + def FENCE_I = Bits("b???????????????????????010101111") + def FENCE_V_L = Bits("b???????????????????????100101111") + def FENCE_V_G = Bits("b???????????????????????110101111") def SYSCALL = Bits("b00000000000000000000000001110111") def BREAK = Bits("b00000000000000000000000011110111") def RDCYCLE = Bits("b?????000000000000000001001110111") @@ -357,7 +358,6 @@ object Disassemble BGE-> List(Str("bge "), FMT_B), BGEU-> List(Str("bgeu "), FMT_B), - J-> List(Str("j "), FMT_J), JAL-> List(Str("jal "), FMT_J), JALR-> List(Str("jalr "), FMT_LD), AUIPC-> List(Str("auipc "), FMT_L), From 59f5358435f1f52b18d32f2ee3807ca0b38a55fc Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 12 Sep 2013 16:07:30 -0700 Subject: [PATCH 0632/1087] Implement AQ/RL; move fence logic out of cache --- rocket/src/main/scala/ctrl.scala | 492 ++++++++++++++------------- rocket/src/main/scala/nbdcache.scala | 12 +- 2 files changed, 262 insertions(+), 242 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 53bb7dfa..273abf88 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -68,13 +68,15 @@ abstract trait DecodeConstants val xpr64 = Y; val decode_default = - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WB_X, PCR.X,N,X,X,X,X) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WB_X, PCR.X,N,X,X,X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -82,238 +84,246 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOXOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - LUI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,Y,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.S,N,N,N,Y,N), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.C,N,N,N,Y,N), - ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,Y,N,Y,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WB_X, PCR.N,Y,N,N,N,Y), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.F,N,N,N,Y,N), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RS2, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.T,N,N,N,Y,N), - RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_IRT,PCR.N,N,N,N,N,N)) + SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,Y,N,N,N,N), + SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.S,N,N,N,Y,N,N,N), + CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.C,N,N,N,Y,N,N,N), + ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,Y,N,Y,N,N,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,Y,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,Y,N,N,N,Y,Y,N), + MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.F,N,N,N,Y,N,N,N), + MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RS2, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.T,N,N,N,Y,N,N,N), + RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N,N,N), + RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N,N,N), + RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_IRT,PCR.N,N,N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FRSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FSSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N), - FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N)) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FRSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FSSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N)) } object VDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | - VSETCFGVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y), - VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y), - VF-> List(Y, N,Y,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FENCE, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N), - VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), - VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N), + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + VSETCFGVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y,N,N), + VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y,N,N), + VF-> List(Y, N,Y,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), - VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), - VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), - VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), - VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N), - VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N), - VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N)) + VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), + VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), + VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), + VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), + VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), + VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N,N,N), + VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N,N,N)) } class Control(implicit conf: RocketConfiguration) extends Module @@ -346,13 +356,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_vec_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: id_sel_wb :: cs2 = cs1 - val id_pcr :: (id_fence_i: Bool) :: (id_eret: Bool) :: (id_syscall: Bool) :: (id_privileged: Bool) :: (id_replay_next: Bool) :: Nil = cs2 - - val id_raddr3 = io.dpath.inst(16,12) - val id_raddr2 = io.dpath.inst(21,17) - val id_raddr1 = io.dpath.inst(26,22) - val id_waddr = io.dpath.inst(31,27) - val id_load_use = Bool(); + val id_pcr :: (id_fence_i: Bool) :: (id_eret: Bool) :: (id_syscall: Bool) :: (id_privileged: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 val ex_reg_xcpt_interrupt = Reg(init=Bool(false)) val ex_reg_valid = Reg(init=Bool(false)) @@ -412,6 +416,13 @@ class Control(implicit conf: RocketConfiguration) extends Module val ctrl_killx = Bool() val ctrl_killm = Bool() + val id_raddr3 = io.dpath.inst(16,12) + val id_raddr2 = io.dpath.inst(21,17) + val id_raddr1 = io.dpath.inst(26,22) + val id_waddr = io.dpath.inst(31,27) + val id_load_use = Bool(); + val id_reg_fence = Reg(init=Bool(false)) + val sr = io.dpath.status var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(CAUSE_INTERRUPT+i))) @@ -462,6 +473,14 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_pcr_flush = id_pcr != PCR.N && id_pcr != PCR.F && id_raddr1 != PCR.SUP0 && id_raddr1 != PCR.SUP1 && id_raddr1 != PCR.EPC + // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) + val id_amo_aq = io.dpath.inst(16) + val id_amo_rl = io.dpath.inst(15) + val id_fence_next = id_fence || id_amo && id_amo_rl + val id_fence_ok = io.dmem.ordered && !ex_reg_mem_val + id_reg_fence := id_fence_next || id_reg_fence && !id_fence_ok + val id_do_fence = id_amo && id_amo_aq || id_reg_fence && id_mem_val || id_pcr_flush + val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), (io.imem.resp.bits.xcpt_ma, UInt(0)), @@ -485,12 +504,12 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_fp_wen := Bool(false); ex_reg_eret := Bool(false); ex_reg_flush_inst := Bool(false); - ex_reg_fp_val := Bool(false); - ex_reg_vec_val := Bool(false); - ex_reg_replay_next := Bool(false); - ex_reg_load_use := Bool(false); - ex_reg_pcr := PCR.N - ex_reg_br_type := BR_N + ex_reg_fp_val := Bool(false) + ex_reg_vec_val := Bool(false) + ex_reg_replay_next := Bool(false) + ex_reg_load_use := Bool(false) + ex_reg_pcr := PCR.N + ex_reg_br_type := BR_N ex_reg_xcpt := Bool(false) } .otherwise { @@ -505,10 +524,10 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := id_fence_i - ex_reg_fp_val := id_fp_val - ex_reg_vec_val := id_vec_val.toBool - ex_reg_replay_next := id_replay_next || id_pcr_flush - ex_reg_load_use := id_load_use; + ex_reg_fp_val := id_fp_val + ex_reg_vec_val := id_vec_val.toBool + ex_reg_replay_next := id_replay_next || id_pcr_flush + ex_reg_load_use := id_load_use ex_reg_mem_cmd := id_mem_cmd ex_reg_mem_type := id_mem_type.toUInt ex_reg_xcpt := id_xcpt @@ -718,6 +737,7 @@ class Control(implicit conf: RocketConfiguration) extends Module id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_fp_val && id_stall_fpu || id_mem_val && !io.dmem.req.ready || + id_do_fence && !id_fence_ok || vec_stalld val ctrl_draind = id_interrupt || ex_reg_replay_next ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2f81e406..3d0dcb7a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -679,8 +679,9 @@ class AMOALU(implicit conf: DCacheConfig) extends Module { val out = Mux(io.cmd === M_XA_ADD, adder_out, Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, + Mux(io.cmd === M_XA_XOR, io.lhs ^ io.rhs, Mux(Mux(less, min, max), io.lhs, - io.rhs)))) + io.rhs))))) val wmask = FillInterleaved(8, StoreGen(io.typ, io.addr).mask) io.out := wmask & out | ~wmask & io.lhs @@ -724,6 +725,7 @@ class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { val resp = Valid(new HellaCacheResp).flip val xcpt = (new HellaCacheExceptions).asInput val ptw = (new TLBPTWIO).flip + val ordered = Bool(INPUT) } class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { @@ -1013,8 +1015,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshrs.io.secondary_miss val s2_nack_miss = !s2_hit && !mshrs.io.req.ready - val s2_nack_fence = s2_req.cmd === M_FENCE && !mshrs.io.fence_rdy - val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss || s2_nack_fence + val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss s2_valid_masked := s2_valid && !s2_nack val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable @@ -1023,11 +1024,9 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends s2_recycle := s2_recycle_ecc || s2_recycle_next // after a nack, block until nack condition resolves to save energy - val block_fence = Reg(init=Bool(false)) - block_fence := (s2_valid && s2_req.cmd === M_FENCE || block_fence) && !mshrs.io.fence_rdy val block_miss = Reg(init=Bool(false)) block_miss := (s2_valid || block_miss) && s2_nack_miss - when (block_fence || block_miss) { + when (block_miss) { io.cpu.req.ready := Bool(false) } @@ -1039,6 +1038,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) io.cpu.resp.bits.store_data := s2_req.data + io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.mem.grant_ack <> mshrs.io.mem_finish } From 1edb1e2a0aa73d94a5df35e90bdc1906d1fd8458 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 12 Sep 2013 17:55:58 -0700 Subject: [PATCH 0633/1087] Ignore LSB of PC --- rocket/src/main/scala/icache.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 17c6e88b..180caf32 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -59,7 +59,8 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val icache = Module(new ICache) val tlb = Module(new TLB(c.ntlb)) - val s1_pc = Reg(UInt()) + val s1_pc_ = Reg(UInt()) + val s1_pc = s1_pc_ & SInt(-2) // discard LSB of PC (throughout the pipeline) val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) val s2_pc = Reg(init=UInt(START_ADDR)) @@ -77,7 +78,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { s1_same_block := s0_same_block && !tlb.io.resp.miss - s1_pc := npc + s1_pc_ := npc s2_valid := !icmiss when (!icmiss) { s2_pc := s1_pc @@ -87,7 +88,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu } when (io.cpu.req.valid) { s1_same_block := Bool(false) - s1_pc := io.cpu.req.bits.pc + s1_pc_ := io.cpu.req.bits.pc s2_valid := Bool(false) } From d053bdc89f755a79d4759e4ff8a2a6ee9bb35cb4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 12 Sep 2013 22:34:38 -0700 Subject: [PATCH 0634/1087] Remove Hwacha from Rocket Soon it will use the coprocessor interface. --- rocket/src/main/scala/core.scala | 109 +-------- rocket/src/main/scala/ctrl.scala | 163 +++++--------- rocket/src/main/scala/ctrl_vec.scala | 273 ----------------------- rocket/src/main/scala/dpath.scala | 26 --- rocket/src/main/scala/dpath_util.scala | 45 ++-- rocket/src/main/scala/dpath_vec.scala | 186 --------------- rocket/src/main/scala/instructions.scala | 152 +++---------- rocket/src/main/scala/nbdcache.scala | 1 - rocket/src/main/scala/rocc.scala | 6 + rocket/src/main/scala/tile.scala | 33 +-- 10 files changed, 121 insertions(+), 873 deletions(-) delete mode 100644 rocket/src/main/scala/ctrl_vec.scala delete mode 100644 rocket/src/main/scala/dpath_vec.scala create mode 100644 rocket/src/main/scala/rocc.scala diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 56e4f0aa..8067f3c9 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -1,16 +1,15 @@ package rocket import Chisel._ -import hwacha._ import uncore.constants.MemoryOpConstants._ import Util._ class RocketIO(implicit conf: RocketConfiguration) extends Bundle { - val host = new HTIFIO(conf.tl.ln.nClients) - val imem = new CPUFrontendIO()(conf.icache) - val vimem = new CPUFrontendIO()(conf.icache) - val dmem = new HellaCacheIO()(conf.dcache) + val host = new HTIFIO(conf.tl.ln.nClients) + val imem = new CPUFrontendIO()(conf.icache) + val dmem = new HellaCacheIO()(conf.dcache) + val ptw = new DatapathPTWIO().flip } class Core(implicit conf: RocketConfiguration) extends Module @@ -26,107 +25,17 @@ class Core(implicit conf: RocketConfiguration) extends Module ctrl.io.imem <> io.imem dpath.io.imem <> io.imem - val dmemArb = Module(new HellaCacheArbiter(2 + conf.vec)) - dmemArb.io.mem <> io.dmem - val dmem = dmemArb.io.requestor - dmem(1) <> ctrl.io.dmem - dmem(1) <> dpath.io.dmem + ctrl.io.dmem <> io.dmem + dpath.io.dmem <> io.dmem - val ptw = collection.mutable.ArrayBuffer(io.imem.ptw, io.dmem.ptw) + dpath.io.ptw <> io.ptw val fpu: FPU = if (conf.fpu) { val fpu = Module(new FPU(4,6)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl + fpu.io.sfma.valid := Bool(false) // hook these up to coprocessor? + fpu.io.dfma.valid := Bool(false) fpu } else null - - if (conf.vec) { - val vu = Module(new vu(Reg(next=this.reset))) - - val vdtlb = Module(new TLB(8)) - ptw += vdtlb.io.ptw - vdtlb.io <> vu.io.vtlb - - val pftlb = Module(new TLB(2)) - pftlb.io <> vu.io.vpftlb - ptw += pftlb.io.ptw - - dpath.io.vec_ctrl <> ctrl.io.vec_dpath - - // hooking up vector I$ - ptw += io.vimem.ptw - io.vimem.req.bits.pc := vu.io.imem_req.bits - io.vimem.req.valid := vu.io.imem_req.valid - io.vimem.invalidate := ctrl.io.imem.invalidate - vu.io.imem_resp.valid := io.vimem.resp.valid - vu.io.imem_resp.bits.pc := io.vimem.resp.bits.pc - vu.io.imem_resp.bits.data := io.vimem.resp.bits.data - vu.io.imem_resp.bits.xcpt_ma := io.vimem.resp.bits.xcpt_ma - vu.io.imem_resp.bits.xcpt_if := io.vimem.resp.bits.xcpt_if - io.vimem.resp.ready := vu.io.imem_resp.ready - io.vimem.req.bits.mispredict := Bool(false) - io.vimem.req.bits.taken := Bool(false) - - ctrl.io.vec_iface.vcmdq <> vu.io.vcmdq - ctrl.io.vec_iface.vximm1q <> vu.io.vximm1q - ctrl.io.vec_iface.vximm2q <> vu.io.vximm2q - ctrl.io.vec_iface.vcntq <> vu.io.vcntq - - dpath.io.vec_iface.vcmdq <> vu.io.vcmdq - dpath.io.vec_iface.vximm1q <> vu.io.vximm1q - dpath.io.vec_iface.vximm2q <> vu.io.vximm2q - dpath.io.vec_iface.vcntq <> vu.io.vcntq - - ctrl.io.vec_iface.vpfcmdq <> vu.io.vpfcmdq - ctrl.io.vec_iface.vpfximm1q <> vu.io.vpfximm1q - ctrl.io.vec_iface.vpfximm2q <> vu.io.vpfximm2q - ctrl.io.vec_iface.vpfcntq <> vu.io.vpfcntq - - dpath.io.vec_iface.vpfcmdq <> vu.io.vpfcmdq - dpath.io.vec_iface.vpfximm1q <> vu.io.vpfximm1q - dpath.io.vec_iface.vpfximm2q <> vu.io.vpfximm2q - dpath.io.vec_iface.vpfcntq <> vu.io.vpfcntq - - // user level vector command queue ready signals - ctrl.io.vec_iface.vcmdq_user_ready := vu.io.vcmdq_user_ready - ctrl.io.vec_iface.vximm1q_user_ready := vu.io.vximm1q_user_ready - ctrl.io.vec_iface.vximm2q_user_ready := vu.io.vximm2q_user_ready - - // fences - ctrl.io.vec_iface.vfence_ready := vu.io.vfence_ready - - // irqs - ctrl.io.vec_iface.irq := vu.io.irq - ctrl.io.vec_iface.irq_cause := vu.io.irq_cause - dpath.io.vec_iface.irq_aux := vu.io.irq_aux - - // exceptions - vu.io.xcpt.exception := ctrl.io.vec_iface.exception - vu.io.xcpt.evac := ctrl.io.vec_iface.evac - vu.io.xcpt.evac_addr := dpath.io.vec_iface.evac_addr.toUInt - vu.io.xcpt.kill := ctrl.io.vec_iface.kill - vu.io.xcpt.hold := ctrl.io.vec_iface.hold - - // hooking up vector memory interface - dmem(2).req.bits.data := RegEnable(StoreGen(vu.io.dmem_req.bits).data, vu.io.dmem_req.valid && isWrite(vu.io.dmem_req.bits.cmd)) - dmem(2).req <> vu.io.dmem_req - dmem(2).resp <> vu.io.dmem_resp - - // DON'T share vector integer multiplier with rocket - vu.io.cp_imul_req.valid := Bool(false) - - // share sfma and dfma pipelines with rocket - require(conf.fpu) - fpu.io.sfma <> vu.io.cp_sfma - fpu.io.dfma <> vu.io.cp_dfma - } else if (conf.fpu) { - fpu.io.sfma.valid := Bool(false) - fpu.io.dfma.valid := Bool(false) - } - - val thePTW = Module(new PTW(ptw.length)) - ptw zip thePTW.io.requestor map { case (a, b) => a <> b } - thePTW.io.dpath <> dpath.io.ptw - dmem(0) <> thePTW.io.mem } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 273abf88..e1adea1a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -2,7 +2,6 @@ package rocket import Chisel._ import Instructions._ -import hwacha._ import uncore.constants.MemoryOpConstants._ import ALU._ import Util._ @@ -45,7 +44,6 @@ class CtrlDpathIO extends Bundle() val exception = Bool(OUTPUT); val cause = UInt(OUTPUT, 6); val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault - val vec_irq_aux_wen = Bool(OUTPUT) // inputs from datapath val inst = Bits(INPUT, 32); val jalr_eq = Bool(INPUT) @@ -71,7 +69,7 @@ abstract trait DecodeConstants // fence.i // jalr mul_val | eret // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | | fence // | | | | | | | | | | | | | | | | | | | | | | | | | | amo @@ -87,7 +85,7 @@ object XDecode extends DecodeConstants // fence.i // jalr mul_val | eret // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | | fence // | | | | | | | | | | | | | | | | | | | | | | | | | | amo @@ -204,7 +202,7 @@ object FDecode extends DecodeConstants // fence.i // jalr mul_val | eret // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next // | | | | | | | | | | | | | | | | | | | | | | | | | fence // | | | | | | | | | | | | | | | | | | | | | | | | | | amo @@ -269,61 +267,41 @@ object FDecode extends DecodeConstants FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N)) } -object VDecode extends DecodeConstants +object RoCCDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | vec_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - VSETCFGVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y,N,N), - VSETVL-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,Y,N,N), - VF-> List(Y, N,Y,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VMVV-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FENCE_V_L-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FENCE_V_G-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - VLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLWU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLHU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLBU-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSH-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSB-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFLD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFLW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFSD-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFSW-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLSTWU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLSTHU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VLSTBU-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSSTH-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VSSTB-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFLSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFLSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFSSTD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - VFSSTW-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - - VENQCMD-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), - VENQIMM1-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), - VENQIMM2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), - VENQCNT-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), - VXCPTEVAC-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,Y,N,N,N), - VXCPTKILL-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N,N,N), - VXCPTHOLD-> List(Y, N,Y,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,Y,N,N,N)) + // fence.i + // jalr mul_val | eret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | + CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N)) } class Control(implicit conf: RocketConfiguration) extends Module @@ -339,13 +317,11 @@ class Control(implicit conf: RocketConfiguration) extends Module val xcpt_dtlb_ld = Bool(INPUT) val xcpt_dtlb_st = Bool(INPUT) val fpu = new CtrlFPUIO - val vec_dpath = new CtrlDpathVecIO - val vec_iface = new CtrlVecInterfaceIO } var decode_table = XDecode.table if (conf.fpu) decode_table ++= FDecode.table - if (conf.vec) decode_table ++= VDecode.table + if (!conf.rocc.isEmpty) decode_table ++= RoCCDecode.table val logic = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) val cs = logic.map { @@ -353,7 +329,7 @@ class Control(implicit conf: RocketConfiguration) extends Module case u => u } - val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_vec_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs + val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: id_sel_wb :: cs2 = cs1 val id_pcr :: (id_fence_i: Bool) :: (id_eret: Bool) :: (id_syscall: Bool) :: (id_privileged: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 @@ -370,7 +346,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val ex_reg_mem_val = Reg(init=Bool(false)) val ex_reg_xcpt = Reg(init=Bool(false)) val ex_reg_fp_val = Reg(init=Bool(false)) - val ex_reg_vec_val = Reg(init=Bool(false)) + val ex_reg_rocc_val = Reg(init=Bool(false)) val ex_reg_replay_next = Reg(init=Bool(false)) val ex_reg_load_use = Reg(init=Bool(false)) val ex_reg_pcr = Reg(init=PCR.N) @@ -389,7 +365,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val mem_reg_mem_val = Reg(init=Bool(false)) val mem_reg_xcpt = Reg(init=Bool(false)) val mem_reg_fp_val = Reg(init=Bool(false)) - val mem_reg_vec_val = Reg(init=Bool(false)) + val mem_reg_rocc_val = Reg(init=Bool(false)) val mem_reg_replay = Reg(init=Bool(false)) val mem_reg_replay_next = Reg(init=Bool(false)) val mem_reg_pcr = Reg(init=PCR.N) @@ -426,43 +402,6 @@ class Control(implicit conf: RocketConfiguration) extends Module val sr = io.dpath.status var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(CAUSE_INTERRUPT+i))) - val (vec_replay, vec_stalld) = if (conf.vec) { - // vector control - val vec = Module(new rocketCtrlVec) - - io.vec_dpath <> vec.io.dpath - io.vec_iface <> vec.io.iface - - vec.io.valid := wb_reg_valid - vec.io.s := io.dpath.status.s - vec.io.sr_ev := io.dpath.status.ev - vec.io.exception := wb_reg_xcpt - vec.io.eret := wb_reg_eret - - val vec_dec = Module(new rocketCtrlVecDecoder) - vec_dec.io.inst := io.dpath.inst - - val s = io.dpath.status.s - val mask_cmdq_ready = !vec_dec.io.sigs.enq_cmdq || s && io.vec_iface.vcmdq.ready || !s && io.vec_iface.vcmdq_user_ready - val mask_ximm1q_ready = !vec_dec.io.sigs.enq_ximm1q || s && io.vec_iface.vximm1q.ready || !s && io.vec_iface.vximm1q_user_ready - val mask_ximm2q_ready = !vec_dec.io.sigs.enq_ximm2q || s && io.vec_iface.vximm2q.ready || !s && io.vec_iface.vximm2q_user_ready - val mask_cntq_ready = !vec_dec.io.sigs.enq_cntq || io.vec_iface.vcntq.ready - val mask_pfcmdq_ready = !vec_dec.io.sigs.enq_pfcmdq || io.vec_iface.vpfcmdq.ready - val mask_pfximm1q_ready = !vec_dec.io.sigs.enq_pfximm1q || io.vec_iface.vpfximm1q.ready - val mask_pfximm2q_ready = !vec_dec.io.sigs.enq_pfximm2q || io.vec_iface.vpfximm2q.ready - val mask_pfcntq_ready = !vec_dec.io.sigs.enq_pfcntq || io.vec_iface.vpfcntq.ready - - id_interrupts = id_interrupts :+ (vec.io.irq, vec.io.irq_cause) - - val stalld = - id_vec_val && ( - !mask_cmdq_ready || !mask_ximm1q_ready || !mask_ximm2q_ready || !mask_cntq_ready || - !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready || - vec_dec.io.sigs.vfence && !vec.io.vfence_ready) - - (vec.io.replay, stalld) - } else (Bool(false), Bool(false)) - val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) val id_interrupt = io.dpath.status.ei && id_interrupt_unmasked @@ -489,7 +428,7 @@ class Control(implicit conf: RocketConfiguration) extends Module (id_privileged && !io.dpath.status.s, UInt(3)), (id_fp_val && !io.dpath.status.ef, UInt(4)), (id_syscall, UInt(6)), - (id_vec_val && !io.dpath.status.ev, UInt(12)))) + (id_rocc_val && !io.dpath.status.er, UInt(12)))) ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } @@ -505,7 +444,7 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_eret := Bool(false); ex_reg_flush_inst := Bool(false); ex_reg_fp_val := Bool(false) - ex_reg_vec_val := Bool(false) + ex_reg_rocc_val := Bool(false) ex_reg_replay_next := Bool(false) ex_reg_load_use := Bool(false) ex_reg_pcr := PCR.N @@ -525,7 +464,7 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_eret := id_eret.toBool; ex_reg_flush_inst := id_fence_i ex_reg_fp_val := id_fp_val - ex_reg_vec_val := id_vec_val.toBool + ex_reg_rocc_val := id_rocc_val.toBool ex_reg_replay_next := id_replay_next || id_pcr_flush ex_reg_load_use := id_load_use ex_reg_mem_cmd := id_mem_cmd @@ -564,7 +503,7 @@ class Control(implicit conf: RocketConfiguration) extends Module mem_reg_mem_val := Bool(false); mem_reg_flush_inst := Bool(false); mem_reg_fp_val := Bool(false) - mem_reg_vec_val := Bool(false) + mem_reg_rocc_val := Bool(false) mem_reg_replay_next := Bool(false) mem_reg_xcpt := Bool(false) } @@ -577,7 +516,7 @@ class Control(implicit conf: RocketConfiguration) extends Module mem_reg_mem_val := ex_reg_mem_val; mem_reg_flush_inst := ex_reg_flush_inst; mem_reg_fp_val := ex_reg_fp_val - mem_reg_vec_val := ex_reg_vec_val + mem_reg_rocc_val := ex_reg_rocc_val mem_reg_replay_next := ex_reg_replay_next mem_reg_slow_bypass := ex_slow_bypass mem_reg_xcpt := ex_xcpt @@ -591,7 +530,7 @@ class Control(implicit conf: RocketConfiguration) extends Module (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_vec_val || mem_reg_pcr != PCR.N) + val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_rocc_val || mem_reg_pcr != PCR.N) val replay_mem = ll_wb_kill_mem || mem_reg_replay || fpu_kill_mem val killm_common = ll_wb_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem @@ -623,7 +562,7 @@ class Control(implicit conf: RocketConfiguration) extends Module wb_reg_fp_val := mem_reg_fp_val } - val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || vec_replay || io.dpath.pcr_replay + val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.pcr_replay class Scoreboard(n: Int) { @@ -667,7 +606,6 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.exception := wb_reg_xcpt io.dpath.cause := wb_reg_cause io.dpath.badvaddr_wen := wb_reg_xcpt && (wb_reg_cause === UInt(10) || wb_reg_cause === UInt(11)) - io.dpath.vec_irq_aux_wen := wb_reg_xcpt && wb_reg_cause >= UInt(24) && wb_reg_cause < UInt(32) // control transfer from ex/wb take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_eret @@ -737,8 +675,7 @@ class Control(implicit conf: RocketConfiguration) extends Module id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_fp_val && id_stall_fpu || id_mem_val && !io.dmem.req.ready || - id_do_fence && !id_fence_ok || - vec_stalld + id_do_fence && !id_fence_ok val ctrl_draind = id_interrupt || ex_reg_replay_next ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind @@ -770,8 +707,8 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.eret := wb_reg_eret io.dpath.ex_mem_type := ex_reg_mem_type io.dpath.ex_br_type := ex_reg_br_type - io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_vec_val - io.dpath.mem_rs2_val := mem_reg_vec_val + io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_rocc_val + io.dpath.mem_rs2_val := mem_reg_rocc_val io.fpu.valid := !ctrl_killd && id_fp_val io.fpu.killx := ctrl_killx diff --git a/rocket/src/main/scala/ctrl_vec.scala b/rocket/src/main/scala/ctrl_vec.scala deleted file mode 100644 index 5e048742..00000000 --- a/rocket/src/main/scala/ctrl_vec.scala +++ /dev/null @@ -1,273 +0,0 @@ -package rocket - -import Chisel._ -import Instructions._ -import hwacha.Constants._ - -class CtrlDpathVecIO extends Bundle -{ - val inst = Bits(INPUT, 32) - val appvl0 = Bool(INPUT) - val pfq = Bool(INPUT) - val wen = Bool(OUTPUT) - val fn = Bits(OUTPUT, 2) - val sel_vcmd = Bits(OUTPUT, 3) - val sel_vimm = Bits(OUTPUT, 1) - val sel_vimm2 = Bits(OUTPUT, 1) -} - -class CtrlVecInterfaceIO extends Bundle -{ - val vcmdq = Decoupled(Bits(width = SZ_VCMD)) - val vximm1q = Decoupled(Bits(width = SZ_VIMM)) - val vximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) - val vcntq = Decoupled(Bits(width = SZ_VLEN+1)) - - val vpfcmdq = Decoupled(Bits(width = SZ_VCMD)) - val vpfximm1q = Decoupled(Bits(width = SZ_VIMM)) - val vpfximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) - val vpfcntq = Decoupled(Bits(width = SZ_VLEN)) - - val vcmdq_user_ready = Bool(INPUT) - val vximm1q_user_ready = Bool(INPUT) - val vximm2q_user_ready = Bool(INPUT) - val vfence_ready = Bool(INPUT) - - val irq = Bool(INPUT) - val irq_cause = UInt(INPUT, 5) - - val exception = Bool(OUTPUT) - - val evac = Bool(OUTPUT) - val kill = Bool(OUTPUT) - val hold = Bool(OUTPUT) -} - -class CtrlVecIO extends Bundle -{ - val dpath = new CtrlDpathVecIO - val iface = new CtrlVecInterfaceIO - val valid = Bool(INPUT) - val s = Bool(INPUT) - val sr_ev = Bool(INPUT) - val exception = Bool(INPUT) - val eret = Bool(INPUT) - val replay = Bool(OUTPUT) - val vfence_ready = Bool(OUTPUT) - val irq = Bool(OUTPUT) - val irq_cause = UInt(OUTPUT, 5) -} - -class rocketCtrlVecSigs extends Bundle -{ - val valid = Bool() - val sel_vcmd = Bits(width = 3) - val sel_vimm = Bits(width = 1) - val sel_vimm2 = Bits(width = 1) - val wen = Bool() - val fn = Bits(width = 2) - val appvlmask = Bool() - val enq_cmdq = Bool() - val enq_ximm1q = Bool() - val enq_ximm2q = Bool() - val enq_cntq = Bool() - val enq_pfcmdq = Bool() - val enq_pfximm1q = Bool() - val enq_pfximm2q = Bool() - val enq_pfcntq = Bool() - val pfaq = Bool() - val vfence = Bool() - val xcptevac = Bool() - val xcptkill = Bool() - val xcpthold = Bool() -} - -class rocketCtrlVecDecoder extends Module -{ - val io = new Bundle - { - val inst = Bits(INPUT, 32) - val sigs = new rocketCtrlVecSigs().asOutput - } - - val veccs = - ListLookup(io.inst, - // appvlmask - // | vcmdq - // | | vximm1q - // | | | vximm2q - // | | | | vcntq - // | | | | | vpfcmdq - // | | | | | | vpfximm1q - // | | | | | | | vpfximm2q - // | | | | | | | | vpfcntq - // | | | | | | | | | pfq - // | | | | | | | | | | vfence - // | | | | | | | | | | | xcptevac - // | | | | | | | | | | | | xcptkill - // wen | | | | | | | | | | | | | xcpthold - // val vcmd vimm vimm2 | fn | | | | | | | | | | | | | | - // | | | | | | | | | | | | | | | | | | | | - List(N,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,N),Array( - VSETCFGVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_CFGVL,N,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,N), - VSETVL-> List(Y,VCMD_I, VIMM_VLEN,VIMM2_X, Y,VEC_VL, N,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VF-> List(Y,VCMD_I, VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,N,N,N,N,N,N,N,N,N), - VMVV-> List(Y,VCMD_TX,VIMM_X, VIMM2_X, N,VEC_FN_N, Y,Y,N,N,N,N,N,N,N,N,N,N,N,N), - FENCE_V_L-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N), - FENCE_V_G-> List(Y,VCMD_F, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,Y,N,N,N), - VLD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VLW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VLWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VLH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VLHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VLB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VLBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VSD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VSW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VSH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VSB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VFLD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VFLW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VFSD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VFSW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_X, N,VEC_FN_N, Y,Y,Y,N,N,Y,Y,N,N,N,N,N,N,N), - VLSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VLSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VLSTWU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VLSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VLSTHU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VLSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VLSTBU-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VSSTD-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VSSTW-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VSSTH-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VSSTB-> List(Y,VCMD_MX,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VFLSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VFLSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VFSSTD-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VFSSTW-> List(Y,VCMD_MF,VIMM_ALU, VIMM2_RS2,N,VEC_FN_N, Y,Y,Y,Y,N,Y,Y,Y,N,N,N,N,N,N), - VENQCMD-> List(Y,VCMD_A, VIMM_X, VIMM2_X, N,VEC_FN_N, N,Y,N,N,N,Y,N,N,N,Y,N,N,N,N), - VENQIMM1-> List(Y,VCMD_X, VIMM_ALU, VIMM2_X, N,VEC_FN_N, N,N,Y,N,N,N,Y,N,N,Y,N,N,N,N), - VENQIMM2-> List(Y,VCMD_X, VIMM_X, VIMM2_ALU,N,VEC_FN_N, N,N,N,Y,N,N,N,Y,N,Y,N,N,N,N), - VENQCNT-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,Y,N,N,N,Y,Y,N,N,N,N), - VXCPTEVAC-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,Y,N,N), - VXCPTKILL-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,Y,N), - VXCPTHOLD-> List(Y,VCMD_X, VIMM_X, VIMM2_X, N,VEC_FN_N, N,N,N,N,N,N,N,N,N,N,N,N,N,Y) - )) - - val valid :: sel_vcmd :: sel_vimm :: sel_vimm2 :: wen :: fn :: appvlmask :: veccs0 = veccs - val enq_cmdq :: enq_ximm1q :: enq_ximm2q :: enq_cntq :: veccs1 = veccs0 - val enq_pfcmdq :: enq_pfximm1q :: enq_pfximm2q :: enq_pfcntq :: veccs2 = veccs1 - val pfaq :: vfence :: xcptevac :: xcptkill :: xcpthold :: Nil = veccs2 - - io.sigs.valid := valid.toBool - io.sigs.sel_vcmd := sel_vcmd - io.sigs.sel_vimm := sel_vimm - io.sigs.sel_vimm2 := sel_vimm2 - io.sigs.wen := wen.toBool - io.sigs.fn := fn - io.sigs.appvlmask := appvlmask.toBool - io.sigs.enq_cmdq := enq_cmdq.toBool - io.sigs.enq_ximm1q := enq_ximm1q.toBool - io.sigs.enq_ximm2q := enq_ximm2q.toBool - io.sigs.enq_cntq := enq_cntq.toBool - io.sigs.enq_pfcmdq := enq_pfcmdq.toBool - io.sigs.enq_pfximm1q := enq_pfximm1q.toBool - io.sigs.enq_pfximm2q := enq_pfximm2q.toBool - io.sigs.enq_pfcntq := enq_pfcntq.toBool - io.sigs.pfaq := pfaq.toBool - io.sigs.vfence := vfence.toBool - io.sigs.xcptevac := xcptevac.toBool - io.sigs.xcptkill := xcptkill.toBool - io.sigs.xcpthold := xcpthold.toBool -} - -class rocketCtrlVec extends Module -{ - val io = new CtrlVecIO - - val dec = Module(new rocketCtrlVecDecoder) - dec.io.inst := io.dpath.inst - - val valid_common = io.valid && io.sr_ev && dec.io.sigs.valid && !(dec.io.sigs.appvlmask && io.dpath.appvl0) - - val enq_pfcmdq_mask_pfq = dec.io.sigs.enq_pfcmdq && (!dec.io.sigs.pfaq || io.dpath.pfq) - val enq_pfximm1q_mask_pfq = dec.io.sigs.enq_pfximm1q && (!dec.io.sigs.pfaq || io.dpath.pfq) - val enq_pfximm2q_mask_pfq = dec.io.sigs.enq_pfximm2q && (!dec.io.sigs.pfaq || io.dpath.pfq) - val enq_pfcntq_mask_pfq = dec.io.sigs.enq_pfcntq && (!dec.io.sigs.pfaq || io.dpath.pfq) - - val mask_cmdq_ready = !dec.io.sigs.enq_cmdq || io.s && io.iface.vcmdq.ready || !io.s && io.iface.vcmdq_user_ready - val mask_ximm1q_ready = !dec.io.sigs.enq_ximm1q || io.s && io.iface.vximm1q.ready || !io.s && io.iface.vximm1q_user_ready - val mask_ximm2q_ready = !dec.io.sigs.enq_ximm2q || io.s && io.iface.vximm2q.ready || !io.s && io.iface.vximm2q_user_ready - val mask_cntq_ready = !dec.io.sigs.enq_cntq || io.iface.vcntq.ready - val mask_pfcmdq_ready = !enq_pfcmdq_mask_pfq || io.iface.vpfcmdq.ready - val mask_pfximm1q_ready = !enq_pfximm1q_mask_pfq || io.iface.vpfximm1q.ready - val mask_pfximm2q_ready = !enq_pfximm2q_mask_pfq || io.iface.vpfximm2q.ready - val mask_pfcntq_ready = !enq_pfcntq_mask_pfq || io.iface.vpfcntq.ready - - io.dpath.wen := dec.io.sigs.wen - io.dpath.fn := dec.io.sigs.fn - io.dpath.sel_vcmd := dec.io.sigs.sel_vcmd - io.dpath.sel_vimm := dec.io.sigs.sel_vimm - io.dpath.sel_vimm2 := dec.io.sigs.sel_vimm2 - - io.iface.vcmdq.valid := - valid_common && - dec.io.sigs.enq_cmdq && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && - mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - - io.iface.vximm1q.valid := - valid_common && - mask_cmdq_ready && dec.io.sigs.enq_ximm1q && mask_ximm2q_ready && mask_cntq_ready && - mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - - io.iface.vximm2q.valid := - valid_common && - mask_cmdq_ready && mask_ximm1q_ready && dec.io.sigs.enq_ximm2q && mask_cntq_ready && - mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - - io.iface.vcntq.valid := - valid_common && - mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && dec.io.sigs.enq_cntq && - mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - - io.iface.vpfcmdq.valid := - valid_common && - mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && - enq_pfcmdq_mask_pfq && mask_pfximm1q_ready && mask_pfximm2q_ready && mask_pfcntq_ready - - io.iface.vpfximm1q.valid := - valid_common && - mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && - mask_pfcmdq_ready && enq_pfximm1q_mask_pfq && mask_pfximm2q_ready && mask_pfcntq_ready - - io.iface.vpfximm2q.valid := - valid_common && - mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && - mask_pfcmdq_ready && mask_pfximm1q_ready && enq_pfximm2q_mask_pfq && mask_pfcntq_ready - - io.iface.vpfcntq.valid := - valid_common && - mask_cmdq_ready && mask_ximm1q_ready && mask_ximm2q_ready && mask_cntq_ready && - mask_pfcmdq_ready && mask_pfximm1q_ready && mask_pfximm2q_ready && enq_pfcntq_mask_pfq - - io.replay := valid_common && ( - !mask_cmdq_ready || !mask_ximm1q_ready || !mask_ximm2q_ready || !mask_cntq_ready || - !mask_pfcmdq_ready || !mask_pfximm1q_ready || !mask_pfximm2q_ready || !mask_pfcntq_ready || - dec.io.sigs.vfence && !io.iface.vfence_ready - ) - - io.iface.exception := io.exception && io.sr_ev - - val reg_hold = Reg(init=Bool(false)) - - when (valid_common && dec.io.sigs.xcpthold) { reg_hold := Bool(true) } - when (io.eret) { reg_hold := Bool(false) } - - io.iface.evac := valid_common && dec.io.sigs.xcptevac - io.iface.kill := valid_common && dec.io.sigs.xcptkill - io.iface.hold := reg_hold - - io.vfence_ready := !io.sr_ev || io.iface.vfence_ready - io.irq := io.iface.irq - io.irq_cause := io.iface.irq_cause -} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 77111f2b..7f2e9701 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -3,7 +3,6 @@ package rocket import Chisel._ import Instructions._ import Util._ -import hwacha._ import uncore.constants.AddressConstants._ class Datapath(implicit conf: RocketConfiguration) extends Module @@ -15,8 +14,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val ptw = (new DatapathPTWIO).flip val imem = new CPUFrontendIO()(conf.icache) val fpu = new DpathFPUIO - val vec_ctrl = (new CtrlDpathVecIO).flip - val vec_iface = new DpathVecInterfaceIO } // execute definitions @@ -292,29 +289,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(io.ctrl.pcr != PCR.N, pcr.io.rw.rdata, wb_reg_wdata)) - if (conf.vec) - { - // vector datapath - val vec = Module(new rocketDpathVec) - - vec.io.ctrl <> io.vec_ctrl - io.vec_iface <> vec.io.iface - - vec.io.valid := io.ctrl.wb_valid && pcr.io.status.ev - vec.io.inst := wb_reg_inst - vec.io.vecbank := pcr.io.vecbank - vec.io.vecbankcnt := pcr.io.vecbankcnt - vec.io.wdata := wb_reg_wdata - vec.io.rs2 := wb_reg_store_data - - pcr.io.vec_irq_aux := vec.io.irq_aux - pcr.io.vec_appvl := vec.io.appvl - pcr.io.vec_nxregs := vec.io.nxregs - pcr.io.vec_nfregs := vec.io.nfregs - - when (vec.io.wen) { wb_wdata := vec.io.appvl } - } - when (wb_wen) { writeRF(wb_reg_waddr, wb_wdata) } io.ctrl.wb_waddr := wb_reg_waddr diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 2645ccae..6f357c0d 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -60,7 +60,7 @@ class Status extends Bundle { val ip = Bits(width = 8) val im = Bits(width = 8) val zero = Bits(width = 7) - val ev = Bool() + val er = Bool() val vm = Bool() val s64 = Bool() val u64 = Bool() @@ -99,8 +99,6 @@ object PCR val FATC = 13 val SEND_IPI = 14 val CLR_IPI = 15 - val VECBANK = 18 - val VECCFG = 19 val STATS = 28 val RESET = 29 val TOHOST = 30 @@ -127,8 +125,6 @@ class PCR(implicit conf: RocketConfiguration) extends Module val exception = Bool(INPUT) val cause = UInt(INPUT, 6) val badvaddr_wen = Bool(INPUT) - val vec_irq_aux = Bits(INPUT, conf.xprlen) - val vec_irq_aux_wen = Bool(INPUT) val pc = UInt(INPUT, VADDR_BITS+1) val eret = Bool(INPUT) val ei = Bool(INPUT) @@ -137,12 +133,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module val irq_timer = Bool(OUTPUT) val irq_ipi = Bool(OUTPUT) val replay = Bool(OUTPUT) - val vecbank = Bits(OUTPUT, 8) - val vecbankcnt = UInt(OUTPUT, 4) val stats = Bool(OUTPUT) - val vec_appvl = UInt(INPUT, 12) - val vec_nxregs = UInt(INPUT, 6) - val vec_nfregs = UInt(INPUT, 6) } import PCR._ @@ -157,7 +148,6 @@ class PCR(implicit conf: RocketConfiguration) extends Module val reg_sup0 = Reg(Bits(width = conf.xprlen)) val reg_sup1 = Reg(Bits(width = conf.xprlen)) val reg_ptbr = Reg(UInt(width = PADDR_BITS)) - val reg_vecbank = Reg(init=SInt(-1,8).toBits) val reg_stats = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below @@ -193,17 +183,10 @@ class PCR(implicit conf: RocketConfiguration) extends Module io.fatc := wen && addr === FATC io.evec := Mux(io.exception, reg_evec.toSInt, reg_epc).toUInt io.ptbr := reg_ptbr - - io.vecbank := reg_vecbank - var cnt = UInt(0,4) - for (i <- 0 until 8) - cnt = cnt + reg_vecbank(i) - io.vecbankcnt := cnt(3,0) - io.stats := reg_stats - when (io.badvaddr_wen || io.vec_irq_aux_wen) { - val wdata = Mux(io.badvaddr_wen, io.rw.wdata, io.vec_irq_aux) + when (io.badvaddr_wen) { + val wdata = io.rw.wdata val (upper, lower) = Split(wdata, VADDR_BITS) val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) reg_badvaddr := Cat(sign, lower).toSInt @@ -237,17 +220,16 @@ class PCR(implicit conf: RocketConfiguration) extends Module val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS - val read_veccfg = if (conf.vec) Cat(io.vec_nfregs, io.vec_nxregs, io.vec_appvl) else Bits(0) val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) io.rw.rdata := AVec[Bits]( - reg_sup0, reg_sup1, reg_epc, reg_badvaddr, - reg_ptbr, Bits(0)/*asid*/, reg_count, reg_compare, - reg_evec, reg_cause, io.status.toBits, io.host.id, - read_impl, read_impl/*x*/, read_impl/*x*/, read_impl/*x*/, - reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank, read_veccfg, - reg_vecbank/*x*/, read_veccfg/*x*/, reg_vecbank/*x*/, read_veccfg/*x*/, - reg_vecbank/*x*/, read_veccfg/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, - reg_stats/*x*/, read_veccfg/*x*/, reg_tohost, reg_fromhost + reg_sup0, reg_sup1, reg_epc, reg_badvaddr, + reg_ptbr, Bits(0)/*asid*/, reg_count, reg_compare, + reg_evec, reg_cause, io.status.toBits, io.host.id, + read_impl, read_impl/*x*/, read_impl/*x*/, read_impl/*x*/, + reg_stats/*x*/, reg_fromhost/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, + reg_stats/*x*/, reg_fromhost/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, + reg_stats/*x*/, reg_fromhost/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, + reg_stats, reg_fromhost/*x*/, reg_tohost, reg_fromhost )(addr) when (wen) { @@ -260,7 +242,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module reg_status.s64 := true reg_status.u64 := true reg_status.zero := 0 - if (!conf.vec) reg_status.ev := false + if (conf.rocc.isEmpty) reg_status.er := false if (!conf.fpu) reg_status.ef := false } when (addr === EPC) { reg_epc := wdata(VADDR_BITS,0).toSInt } @@ -273,7 +255,6 @@ class PCR(implicit conf: RocketConfiguration) extends Module when (addr === SUP0) { reg_sup0 := wdata; } when (addr === SUP1) { reg_sup1 := wdata; } when (addr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt; } - when (addr === VECBANK) { reg_vecbank:= wdata(7,0) } when (addr === STATS) { reg_stats := wdata(0) } } @@ -284,7 +265,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module reg_status.ei := false reg_status.pei := false reg_status.ef := false - reg_status.ev := false + reg_status.er := false reg_status.ps := false reg_status.s := true reg_status.u64 := true diff --git a/rocket/src/main/scala/dpath_vec.scala b/rocket/src/main/scala/dpath_vec.scala deleted file mode 100644 index a49cd2be..00000000 --- a/rocket/src/main/scala/dpath_vec.scala +++ /dev/null @@ -1,186 +0,0 @@ -package rocket - -import Chisel._ -import Node._ -import Instructions._ -import hwacha.Constants._ - -class DpathVecInterfaceIO extends Bundle -{ - val vcmdq = Decoupled(Bits(width = SZ_VCMD)) - val vximm1q = Decoupled(Bits(width = SZ_VIMM)) - val vximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) - val vcntq = Decoupled(Bits(width = SZ_VLEN+1)) - - val vpfcmdq = Decoupled(Bits(width = SZ_VCMD)) - val vpfximm1q = Decoupled(Bits(width = SZ_VIMM)) - val vpfximm2q = Decoupled(Bits(width = SZ_VSTRIDE)) - val vpfcntq = Decoupled(Bits(width = SZ_VLEN)) - - val evac_addr = Bits(OUTPUT, 64) - val irq_aux = Bits(INPUT, 64) -} - -class DpathVecIO extends Bundle -{ - val ctrl = (new CtrlDpathVecIO).flip - val iface = new DpathVecInterfaceIO - val valid = Bool(INPUT) - val inst = Bits(INPUT, 32) - val vecbank = Bits(INPUT, 8) - val vecbankcnt = UInt(INPUT, 4) - val wdata = Bits(INPUT, 64) - val rs2 = Bits(INPUT, 64) - val wen = Bool(OUTPUT) - val irq_aux = Bits(OUTPUT, 64) - val appvl = UInt(OUTPUT, 12) - val nxregs = UInt(OUTPUT, 6) - val nfregs = UInt(OUTPUT, 6) -} - -class rocketDpathVec extends Module -{ - val io = new DpathVecIO - - val nxregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.wdata(5,0), io.inst(15,10)) - val nfregs_stage = Mux(io.ctrl.fn === VEC_CFG, io.rs2(5,0), io.inst(21,16)) - val nxregs = Mux(nxregs_stage(5), Bits(32), Mux(nxregs_stage === Bits(0), Bits(1), nxregs_stage)) + UInt(0,7) - val nfregs = Mux(nfregs_stage(5), Bits(32), nfregs_stage) + UInt(0,7) - val nregs = nxregs + nfregs - - //val uts_per_bank = UInt(4,9) - - val nreg_mod_bank = MuxLookup( - nregs, UInt(4,9), Array( - UInt(0,7) -> UInt(256,9), - UInt(1,7) -> UInt(256,9), - UInt(2,7) -> UInt(256,9), - UInt(3,7) -> UInt(128,9), - UInt(4,7) -> UInt(85,9), - UInt(5,7) -> UInt(64,9), - UInt(6,7) -> UInt(51,9), - UInt(7,7) -> UInt(42,9), - UInt(8,7) -> UInt(36,9), - UInt(9,7) -> UInt(32,9), - UInt(10,7) -> UInt(28,9), - UInt(11,7) -> UInt(25,9), - UInt(12,7) -> UInt(23,9), - UInt(13,7) -> UInt(21,9), - UInt(14,7) -> UInt(19,9), - UInt(15,7) -> UInt(18,9), - UInt(16,7) -> UInt(17,9), - UInt(17,7) -> UInt(16,9), - UInt(18,7) -> UInt(15,9), - UInt(19,7) -> UInt(14,9), - UInt(20,7) -> UInt(13,9), - UInt(21,7) -> UInt(12,9), - UInt(22,7) -> UInt(12,9), - UInt(23,7) -> UInt(11,9), - UInt(24,7) -> UInt(11,9), - UInt(25,7) -> UInt(10,9), - UInt(26,7) -> UInt(10,9), - UInt(27,7) -> UInt(9,9), - UInt(28,7) -> UInt(9,9), - UInt(29,7) -> UInt(9,9), - UInt(30,7) -> UInt(8,9), - UInt(31,7) -> UInt(8,9), - UInt(32,7) -> UInt(8,9), - UInt(33,7) -> UInt(8,9), - UInt(34,7) -> UInt(7,9), - UInt(35,7) -> UInt(7,9), - UInt(36,7) -> UInt(7,9), - UInt(37,7) -> UInt(7,9), - UInt(38,7) -> UInt(6,9), - UInt(39,7) -> UInt(6,9), - UInt(40,7) -> UInt(6,9), - UInt(41,7) -> UInt(6,9), - UInt(42,7) -> UInt(6,9), - UInt(43,7) -> UInt(6,9), - UInt(44,7) -> UInt(5,9), - UInt(45,7) -> UInt(5,9), - UInt(46,7) -> UInt(5,9), - UInt(47,7) -> UInt(5,9), - UInt(48,7) -> UInt(5,9), - UInt(49,7) -> UInt(5,9), - UInt(50,7) -> UInt(5,9), - UInt(51,7) -> UInt(5,9), - UInt(52,7) -> UInt(5,9) - )) - - val max_threads = UInt(WIDTH_BMASK) - val uts_per_bank = Mux(Bool(HAVE_PVFB) & nreg_mod_bank > max_threads, max_threads, nreg_mod_bank) - - val reg_hwvl = Reg(init=UInt(32, 12)) - val reg_appvl0 = Reg(init=Bool(true)) - val hwvl_vcfg = (uts_per_bank * io.vecbankcnt)(11,0) - - val hwvl = - Mux(io.ctrl.fn === VEC_CFG || io.ctrl.fn === VEC_CFGVL, hwvl_vcfg, - reg_hwvl) - - val appvl = - Mux(io.ctrl.fn === VEC_CFG, UInt(0), - Mux(io.wdata(11,0) < hwvl, io.wdata(11,0).toUInt, - hwvl.toUInt)) - - val reg_nxregs = Reg(init=UInt(32, 6)) - val reg_nfregs = Reg(init=UInt(32, 6)) - val reg_appvl = Reg(init=UInt(0, 12)) - - when (io.valid) - { - when (io.ctrl.fn === VEC_CFG || io.ctrl.fn === VEC_CFGVL) - { - reg_hwvl := hwvl_vcfg - reg_nxregs := nxregs - reg_nfregs := nfregs - } - when (io.ctrl.fn === VEC_VL || io.ctrl.fn === VEC_CFGVL) - { - reg_appvl0 := !(appvl.orR()) - reg_appvl := appvl - } - } - - io.wen := io.valid && io.ctrl.wen - io.irq_aux := io.iface.irq_aux - io.appvl := Mux(io.ctrl.fn === VEC_VL || io.ctrl.fn === VEC_CFGVL, appvl, reg_appvl) - io.nxregs := reg_nxregs - io.nfregs := reg_nfregs - - val appvlm1 = appvl - UInt(1) - val waddr = io.inst(31,27) - val raddr1 = io.inst(26,22) - - io.iface.vcmdq.bits := - Mux(io.ctrl.sel_vcmd === VCMD_I, Cat(Bits(0,2), Bits(0,4), io.inst(9,8), Bits(0,6), Bits(0,6)), - Mux(io.ctrl.sel_vcmd === VCMD_F, Cat(Bits(0,2), Bits(1,3), io.inst(9,7), Bits(0,6), Bits(0,6)), - Mux(io.ctrl.sel_vcmd === VCMD_TX, Cat(Bits(1,2), io.inst(13,8), Bits(0,1), waddr, Bits(0,1), raddr1), - Mux(io.ctrl.sel_vcmd === VCMD_TF, Cat(Bits(1,2), io.inst(13,8), Bits(1,1), waddr, Bits(1,1), raddr1), - Mux(io.ctrl.sel_vcmd === VCMD_MX, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(0,1), waddr, Bits(0,1), waddr), - Mux(io.ctrl.sel_vcmd === VCMD_MF, Cat(Bits(1,1), io.inst(13,12), io.inst(2), io.inst(10,7), Bits(1,1), waddr, Bits(1,1), waddr), - Mux(io.ctrl.sel_vcmd === VCMD_A, io.wdata(SZ_VCMD-1, 0), - Bits(0,20)))))))) - - io.iface.vximm1q.bits := - Mux(io.ctrl.sel_vimm === VIMM_VLEN, Cat(Bits(0,29), io.vecbankcnt, io.vecbank, nfregs(5,0), nxregs(5,0), appvlm1(10,0)), - io.wdata) // VIMM_ALU - - io.iface.vximm2q.bits := - Mux(io.ctrl.sel_vimm2 === VIMM2_RS2, io.rs2, - io.wdata) // VIMM2_ALU - - val last = io.rs2(1) - io.iface.vcntq.bits := Cat(last, io.iface.vpfcntq.bits) - - io.iface.vpfcmdq.bits := io.iface.vcmdq.bits - io.iface.vpfximm1q.bits := io.iface.vximm1q.bits - io.iface.vpfximm2q.bits := io.iface.vximm2q.bits - io.iface.vpfcntq.bits := io.wdata(SZ_VLEN-1, 0) - - io.iface.evac_addr := io.wdata - - io.ctrl.inst := io.inst - io.ctrl.appvl0 := reg_appvl0 - io.ctrl.pfq := io.rs2(0) -} diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 66d2d90d..9a7dfd05 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -167,87 +167,30 @@ object Instructions def FNMSUB_D = Bits("b???????????????????????011001011") def FNMADD_D = Bits("b???????????????????????011001111") /* Automatically generated by parse-opcodes */ - def STOP = Bits("b00000000000000000000001011110111") - def UTIDX = Bits("b?????000000000000000001101110111") - def MOVZ = Bits("b???????????????00000001111110111") - def MOVN = Bits("b???????????????00000011111110111") - def FMOVZ = Bits("b???????????????00000101111110111") - def FMOVN = Bits("b???????????????00000111111110111") - def VSETCFGVL = Bits("b??????????????????????1110001011") - def VSETVL = Bits("b??????????0000000000001100001011") - def VMVV = Bits("b??????????0000000000011100001011") - def VGETCFG = Bits("b?????000000000000000001000001011") - def VGETVL = Bits("b?????000000000000000011000001011") - def VF = Bits("b??????????00000???????0100001011") - def VXCPTSAVE = Bits("b00000?????0000000000000110001011") - def VXCPTRESTORE = Bits("b00000?????0000000000010110001011") - def VXCPTKILL = Bits("b00000000000000000000100110001011") - def VXCPTEVAC = Bits("b00000?????0000000000110110001011") - def VXCPTHOLD = Bits("b00000000000000000001000110001011") - def VENQCMD = Bits("b00000??????????00001010110001011") - def VENQIMM1 = Bits("b00000??????????00001100110001011") - def VENQIMM2 = Bits("b00000??????????00001110110001011") - def VENQCNT = Bits("b00000??????????00010000110001011") - def VLD = Bits("b??????????0000000000110100001111") - def VLW = Bits("b??????????0000000000100100001111") - def VLWU = Bits("b??????????0000000001100100001111") - def VLH = Bits("b??????????0000000000010100001111") - def VLHU = Bits("b??????????0000000001010100001111") - def VLB = Bits("b??????????0000000000000100001111") - def VLBU = Bits("b??????????0000000001000100001111") - def VFLD = Bits("b??????????0000000010110100001111") - def VFLW = Bits("b??????????0000000010100100001111") - def VLSTD = Bits("b???????????????00000110110001111") - def VLSTW = Bits("b???????????????00000100110001111") - def VLSTWU = Bits("b???????????????00001100110001111") - def VLSTH = Bits("b???????????????00000010110001111") - def VLSTHU = Bits("b???????????????00001010110001111") - def VLSTB = Bits("b???????????????00000000110001111") - def VLSTBU = Bits("b???????????????00001000110001111") - def VFLSTD = Bits("b???????????????00010110110001111") - def VFLSTW = Bits("b???????????????00010100110001111") - def VLSEGD = Bits("b??????????00000???00110101011011") - def VLSEGW = Bits("b??????????00000???00100101011011") - def VLSEGWU = Bits("b??????????00000???01100101011011") - def VLSEGH = Bits("b??????????00000???00010101011011") - def VLSEGHU = Bits("b??????????00000???01010101011011") - def VLSEGB = Bits("b??????????00000???00000101011011") - def VLSEGBU = Bits("b??????????00000???01000101011011") - def VFLSEGD = Bits("b??????????00000???10110101011011") - def VFLSEGW = Bits("b??????????00000???10100101011011") - def VLSEGSTD = Bits("b??????????????????00110111011011") - def VLSEGSTW = Bits("b??????????????????00100111011011") - def VLSEGSTWU = Bits("b??????????????????01100111011011") - def VLSEGSTH = Bits("b??????????????????00010111011011") - def VLSEGSTHU = Bits("b??????????????????01010111011011") - def VLSEGSTB = Bits("b??????????????????00000111011011") - def VLSEGSTBU = Bits("b??????????????????01000111011011") - def VFLSEGSTD = Bits("b??????????00000???10110111011011") - def VFLSEGSTW = Bits("b??????????00000???10100111011011") - def VSD = Bits("b??????????0000010000110100001111") - def VSW = Bits("b??????????0000010000100100001111") - def VSH = Bits("b??????????0000010000010100001111") - def VSB = Bits("b??????????0000010000000100001111") - def VFSD = Bits("b??????????0000010010110100001111") - def VFSW = Bits("b??????????0000010010100100001111") - def VSSTD = Bits("b???????????????10000110110001111") - def VSSTW = Bits("b???????????????10000100110001111") - def VSSTH = Bits("b???????????????10000010110001111") - def VSSTB = Bits("b???????????????10000000110001111") - def VFSSTD = Bits("b???????????????10010110110001111") - def VFSSTW = Bits("b???????????????10010100110001111") - def VSSEGD = Bits("b??????????00000???00110101111011") - def VSSEGW = Bits("b??????????00000???00100101111011") - def VSSEGH = Bits("b??????????00000???00010101111011") - def VSSEGB = Bits("b??????????00000???00000101111011") - def VFSSEGD = Bits("b??????????00000???10110101111011") - def VFSSEGW = Bits("b??????????00000???10100101111011") - def VSSEGSTD = Bits("b??????????????????00110111111011") - def VSSEGSTW = Bits("b??????????????????00100111111011") - def VSSEGSTH = Bits("b??????????????????00010111111011") - def VSSEGSTB = Bits("b??????????????????00000111111011") - def VFSSEGSTD = Bits("b??????????00000???10110111111011") - def VFSSEGSTW = Bits("b??????????00000???10100111111011") + def CUSTOM0 = Bits("b??????????????????????0000001011") + def CUSTOM0_RS1 = Bits("b??????????????????????0100001011") + def CUSTOM0_RS1_RS2 = Bits("b??????????????????????0110001011") + def CUSTOM0_RD = Bits("b??????????????????????1000001011") + def CUSTOM0_RD_RS1 = Bits("b??????????????????????1100001011") + def CUSTOM0_RD_RS1_RS2 = Bits("b??????????????????????1110001011") + def CUSTOM1 = Bits("b??????????????????????0000001111") + def CUSTOM1_RS1 = Bits("b??????????????????????0100001111") + def CUSTOM1_RS1_RS2 = Bits("b??????????????????????0110001111") + def CUSTOM1_RD = Bits("b??????????????????????1000001111") + def CUSTOM1_RD_RS1 = Bits("b??????????????????????1100001111") + def CUSTOM1_RD_RS1_RS2 = Bits("b??????????????????????1110001111") + def CUSTOM2 = Bits("b??????????????????????0001010111") + def CUSTOM2_RS1 = Bits("b??????????????????????0101010111") + def CUSTOM2_RS1_RS2 = Bits("b??????????????????????0111010111") + def CUSTOM2_RD = Bits("b??????????????????????1001010111") + def CUSTOM2_RD_RS1 = Bits("b??????????????????????1101010111") + def CUSTOM2_RD_RS1_RS2 = Bits("b??????????????????????1111010111") + def CUSTOM3 = Bits("b??????????????????????0001111011") + def CUSTOM3_RS1 = Bits("b??????????????????????0101111011") + def CUSTOM3_RS1_RS2 = Bits("b??????????????????????0111111011") + def CUSTOM3_RD = Bits("b??????????????????????1001111011") + def CUSTOM3_RD_RS1 = Bits("b??????????????????????1101111011") + def CUSTOM3_RD_RS1_RS2 = Bits("b??????????????????????1111111011") } object Disassemble @@ -511,51 +454,6 @@ object Disassemble FLW-> List(Str("flw "), FMT_FLD), FLD-> List(Str("fld "), FMT_FLD), FSW-> List(Str("fsw "), FMT_FST), - FSD-> List(Str("fsd "), FMT_FST), - - VSETCFGVL-> List(Str("vecInst "), FMT_0), - VSETVL-> List(Str("vecInst "), FMT_0), - VF-> List(Str("vecInst "), FMT_0), - VMVV-> List(Str("vecInst "), FMT_0), - FENCE_V_L-> List(Str("vecInst "), FMT_0), - FENCE_V_G-> List(Str("vecInst "), FMT_0), - VLD-> List(Str("vecInst "), FMT_0), - VLW-> List(Str("vecInst "), FMT_0), - VLWU-> List(Str("vecInst "), FMT_0), - VLH-> List(Str("vecInst "), FMT_0), - VLHU-> List(Str("vecInst "), FMT_0), - VLB-> List(Str("vecInst "), FMT_0), - VLBU-> List(Str("vecInst "), FMT_0), - VSD-> List(Str("vecInst "), FMT_0), - VSW-> List(Str("vecInst "), FMT_0), - VSH-> List(Str("vecInst "), FMT_0), - VSB-> List(Str("vecInst "), FMT_0), - VFLD-> List(Str("vecInst "), FMT_0), - VFLW-> List(Str("vecInst "), FMT_0), - VFSD-> List(Str("vecInst "), FMT_0), - VFSW-> List(Str("vecInst "), FMT_0), - VLSTD-> List(Str("vecInst "), FMT_0), - VLSTW-> List(Str("vecInst "), FMT_0), - VLSTWU-> List(Str("vecInst "), FMT_0), - VLSTH-> List(Str("vecInst "), FMT_0), - VLSTHU-> List(Str("vecInst "), FMT_0), - VLSTB-> List(Str("vecInst "), FMT_0), - VLSTBU-> List(Str("vecInst "), FMT_0), - VSSTD-> List(Str("vecInst "), FMT_0), - VSSTW-> List(Str("vecInst "), FMT_0), - VSSTH-> List(Str("vecInst "), FMT_0), - VSSTB-> List(Str("vecInst "), FMT_0), - VFLSTD-> List(Str("vecInst "), FMT_0), - VFLSTW-> List(Str("vecInst "), FMT_0), - VFSSTD-> List(Str("vecInst "), FMT_0), - VFSSTW-> List(Str("vecInst "), FMT_0), - - VENQCMD-> List(Str("vecInst "), FMT_0), - VENQIMM1-> List(Str("vecInst "), FMT_0), - VENQIMM2-> List(Str("vecInst "), FMT_0), - VENQCNT-> List(Str("vecInst "), FMT_0), - VXCPTEVAC-> List(Str("vecInst "), FMT_0), - VXCPTKILL-> List(Str("vecInst "), FMT_0), - VXCPTHOLD-> List(Str("vecInst "), FMT_0) + FSD-> List(Str("fsd "), FMT_FST) ) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3d0dcb7a..b28a5802 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -69,7 +69,6 @@ class RandomReplacement(implicit conf: DCacheConfig) extends ReplacementPolicy object StoreGen { def apply(r: HellaCacheReq) = new StoreGen(r.typ, r.addr, r.data) - def apply(r: hwacha.io_dmem_req_bundle) = new StoreGen(r.typ, r.addr, r.data) def apply(typ: Bits, addr: Bits, data: Bits = Bits(0)) = new StoreGen(typ, addr, data) } diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala new file mode 100644 index 00000000..6207582f --- /dev/null +++ b/rocket/src/main/scala/rocc.scala @@ -0,0 +1,6 @@ +package rocket + +import Chisel._ +import Node._ + +abstract class RoCC extends Module diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 2bf49a68..6be26fc7 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,7 @@ import Util._ case class RocketConfiguration(tl: TileLinkConfiguration, icache: ICacheConfig, dcache: DCacheConfig, - fpu: Boolean, vec: Boolean, + fpu: Boolean, rocc: Option[RoCC] = None, fastLoadWord: Boolean = true, fastLoadByte: Boolean = false, fastMulDiv: Boolean = true) @@ -20,7 +20,7 @@ case class RocketConfiguration(tl: TileLinkConfiguration, class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) with ClientCoherenceAgent { - val memPorts = 2 + confIn.vec + val memPorts = 2 val dcachePortId = 0 val icachePortId = 1 val vicachePortId = 2 @@ -38,14 +38,23 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module val core = Module(new Core) val icache = Module(new Frontend) val dcache = Module(new HellaCache) + val ptw = Module(new PTW(2)) - val arbiter = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts)) - arbiter.io.in(dcachePortId) <> dcache.io.mem - arbiter.io.in(icachePortId) <> icache.io.mem + val dcacheArb = Module(new HellaCacheArbiter(2 + !conf.rocc.isEmpty)) + dcacheArb.io.requestor(0) <> ptw.io.mem + dcacheArb.io.requestor(1) <> core.io.dmem + dcache.io.cpu <> dcacheArb.io.mem - io.tilelink.acquire <> arbiter.io.out.acquire - arbiter.io.out.grant <> io.tilelink.grant - io.tilelink.grant_ack <> arbiter.io.out.grant_ack + ptw.io.requestor(0) <> icache.io.cpu.ptw + ptw.io.requestor(1) <> dcache.io.cpu.ptw + + val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts)) + memArb.io.in(dcachePortId) <> dcache.io.mem + memArb.io.in(icachePortId) <> icache.io.mem + + io.tilelink.acquire <> memArb.io.out.acquire + memArb.io.out.grant <> io.tilelink.grant + io.tilelink.grant_ack <> memArb.io.out.grant_ack dcache.io.mem.probe <> io.tilelink.probe io.tilelink.release.data <> dcache.io.mem.release.data io.tilelink.release.meta.valid := dcache.io.mem.release.meta.valid @@ -53,13 +62,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - if (conf.vec) { - val vicache = Module(new Frontend()(ICacheConfig(128, 1), tlConf)) // 128 sets x 1 ways (8KB) - arbiter.io.in(vicachePortId) <> vicache.io.mem - core.io.vimem <> vicache.io.cpu - } - core.io.host <> io.host core.io.imem <> icache.io.cpu - core.io.dmem <> dcache.io.cpu + core.io.ptw <> ptw.io.dpath } From a0cb711451bc9f34963dacecfab1db949e8c090a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 14 Sep 2013 15:31:50 -0700 Subject: [PATCH 0635/1087] Start adding RoCC --- rocket/src/main/scala/core.scala | 20 ++++++---- rocket/src/main/scala/ctrl.scala | 21 +++++++--- rocket/src/main/scala/dpath.scala | 34 ++++++++++------ rocket/src/main/scala/rocc.scala | 66 ++++++++++++++++++++++++++++++- 4 files changed, 114 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 8067f3c9..0bf3e561 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -10,6 +10,7 @@ class RocketIO(implicit conf: RocketConfiguration) extends Bundle val imem = new CPUFrontendIO()(conf.icache) val dmem = new HellaCacheIO()(conf.dcache) val ptw = new DatapathPTWIO().flip + val rocc = new RoCCInterface().flip } class Core(implicit conf: RocketConfiguration) extends Module @@ -19,6 +20,15 @@ class Core(implicit conf: RocketConfiguration) extends Module val ctrl = Module(new Control) val dpath = Module(new Datapath) + val fpu: FPU = if (conf.fpu) { + val fpu = Module(new FPU(4,6)) + dpath.io.fpu <> fpu.io.dpath + ctrl.io.fpu <> fpu.io.ctrl + fpu.io.sfma.valid := Bool(false) // hook these up to coprocessor? + fpu.io.dfma.valid := Bool(false) + fpu + } else null + ctrl.io.dpath <> dpath.io.ctrl dpath.io.host <> io.host @@ -30,12 +40,6 @@ class Core(implicit conf: RocketConfiguration) extends Module dpath.io.ptw <> io.ptw - val fpu: FPU = if (conf.fpu) { - val fpu = Module(new FPU(4,6)) - dpath.io.fpu <> fpu.io.dpath - ctrl.io.fpu <> fpu.io.ctrl - fpu.io.sfma.valid := Bool(false) // hook these up to coprocessor? - fpu.io.dfma.valid := Bool(false) - fpu - } else null + ctrl.io.rocc <> io.rocc + dpath.io.rocc <> io.rocc } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index e1adea1a..89769f71 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -37,7 +37,8 @@ class CtrlDpathIO extends Bundle() val wb_valid = Bool(OUTPUT) val ex_mem_type = Bits(OUTPUT, 3) val ex_rs2_val = Bool(OUTPUT) - val mem_rs2_val = Bool(OUTPUT) + val ex_rocc_val = Bool(OUTPUT) + val mem_rocc_val = Bool(OUTPUT) val mem_ll_bypass_rs1 = Bool(OUTPUT) val mem_ll_bypass_rs2 = Bool(OUTPUT) // exception handling @@ -317,6 +318,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val xcpt_dtlb_ld = Bool(INPUT) val xcpt_dtlb_st = Bool(INPUT) val fpu = new CtrlFPUIO + val rocc = new RoCCInterface().flip } var decode_table = XDecode.table @@ -376,6 +378,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val wb_reg_pcr = Reg(init=PCR.N) val wb_reg_wen = Reg(init=Bool(false)) val wb_reg_fp_wen = Reg(init=Bool(false)) + val wb_reg_rocc_val = Reg(init=Bool(false)) val wb_reg_flush_inst = Reg(init=Bool(false)) val wb_reg_mem_val = Reg(init=Bool(false)) val wb_reg_eret = Reg(init=Bool(false)) @@ -416,9 +419,11 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_amo_aq = io.dpath.inst(16) val id_amo_rl = io.dpath.inst(15) val id_fence_next = id_fence || id_amo && id_amo_rl - val id_fence_ok = io.dmem.ordered && !ex_reg_mem_val + val id_rocc_busy = io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val + val id_fence_ok = io.dmem.ordered && !ex_reg_mem_val && + (Bool(conf.rocc.isEmpty) || !id_rocc_busy) id_reg_fence := id_fence_next || id_reg_fence && !id_fence_ok - val id_do_fence = id_amo && id_amo_aq || id_reg_fence && id_mem_val || id_pcr_flush + val id_do_fence = id_amo && id_amo_aq || id_reg_fence && (id_mem_val || id_rocc_val) || id_pcr_flush val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), @@ -549,6 +554,7 @@ class Control(implicit conf: RocketConfiguration) extends Module wb_reg_mem_val := Bool(false) wb_reg_div_mul_val := Bool(false); wb_reg_fp_val := Bool(false) + wb_reg_rocc_val := Bool(false) } .otherwise { wb_reg_valid := mem_reg_valid @@ -560,9 +566,11 @@ class Control(implicit conf: RocketConfiguration) extends Module wb_reg_mem_val := mem_reg_mem_val wb_reg_div_mul_val := mem_reg_div_mul_val wb_reg_fp_val := mem_reg_fp_val + wb_reg_rocc_val := mem_reg_rocc_val } - val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.pcr_replay + val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || + io.dpath.pcr_replay || Bool(!conf.rocc.isEmpty) && wb_reg_rocc_val && !io.rocc.cmd.ready class Scoreboard(n: Int) { @@ -708,7 +716,8 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.ex_mem_type := ex_reg_mem_type io.dpath.ex_br_type := ex_reg_br_type io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_rocc_val - io.dpath.mem_rs2_val := mem_reg_rocc_val + io.dpath.ex_rocc_val := ex_reg_rocc_val + io.dpath.mem_rocc_val := mem_reg_rocc_val io.fpu.valid := !ctrl_killd && id_fp_val io.fpu.killx := ctrl_killx @@ -719,4 +728,6 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dmem.req.bits.cmd := ex_reg_mem_cmd io.dmem.req.bits.typ := ex_reg_mem_type io.dmem.req.bits.phys := Bool(false) + + io.rocc.cmd.valid := wb_reg_rocc_val } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7f2e9701..00c3d5d2 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -14,6 +14,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val ptw = (new DatapathPTWIO).flip val imem = new CPUFrontendIO()(conf.icache) val fpu = new DpathFPUIO + val rocc = new RoCCInterface().flip } // execute definitions @@ -39,8 +40,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val mem_reg_wdata = Reg(Bits()) val mem_reg_kill = Reg(Bool()) val mem_reg_store_data = Reg(Bits()) - val mem_reg_rs1 = Reg(Bits()) - val mem_reg_rs2 = Reg(Bits()) // writeback definitions val wb_reg_pc = Reg(UInt()) @@ -49,8 +48,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val wb_reg_wdata = Reg(Bits()) val wb_reg_ll_wb = Reg(init=Bool(false)) val wb_wdata = Bits() - val wb_reg_store_data = Reg(Bits()) - val wb_reg_rs1 = Reg(Bits()) val wb_reg_rs2 = Reg(Bits()) val wb_wen = io.ctrl.wb_wen && io.ctrl.wb_valid || wb_reg_ll_wb @@ -235,8 +232,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst mem_reg_wdata := ex_wdata - mem_reg_rs1 := ex_rs1 - mem_reg_rs2 := ex_rs2 when (io.ctrl.ex_rs2_val) { mem_reg_store_data := StoreGen(io.ctrl.ex_mem_type, Bits(0), ex_rs2).data } @@ -255,8 +250,19 @@ class Datapath(implicit conf: RocketConfiguration) extends Module mem_ll_wdata := div.io.resp.bits.data io.ctrl.mem_ll_waddr := div.io.resp.bits.tag io.ctrl.mem_ll_wb := div.io.resp.valid && !io.ctrl.mem_wen + if (!conf.rocc.isEmpty) { + io.rocc.resp.ready := !io.ctrl.mem_wen && !io.ctrl.mem_rocc_val + when (io.rocc.resp.fire()) { + div.io.resp.ready := Bool(false) + mem_ll_wdata := io.rocc.resp.bits.data + io.ctrl.mem_ll_waddr := io.rocc.resp.bits.rd + io.ctrl.mem_ll_wb := Bool(true) + } + } when (dmem_resp_replay) { div.io.resp.ready := Bool(false) + if (!conf.rocc.isEmpty) + io.rocc.resp.ready := Bool(false) mem_ll_wdata := io.dmem.resp.bits.data_subword io.ctrl.mem_ll_waddr := dmem_resp_waddr io.ctrl.mem_ll_wb := Bool(true) @@ -274,11 +280,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Module wb_reg_waddr := io.ctrl.mem_waddr wb_reg_inst := mem_reg_inst wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) - wb_reg_rs1 := mem_reg_rs1 - wb_reg_rs2 := mem_reg_rs2 - when (io.ctrl.mem_rs2_val) { - wb_reg_store_data := mem_reg_store_data - } + } + when (io.ctrl.mem_rocc_val) { + wb_reg_rs2 := Bits(0)//mem_reg_rs2 } wb_reg_ll_wb := io.ctrl.mem_ll_wb when (io.ctrl.mem_ll_wb) { @@ -301,6 +305,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Module pcr.io.rw.cmd := io.ctrl.pcr pcr.io.rw.wdata := wb_reg_wdata + io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) + io.rocc.cmd.bits.rs1 := wb_reg_wdata + io.rocc.cmd.bits.rs2 := wb_reg_rs2 + // hook up I$ io.imem.req.bits.currentpc := ex_reg_pc io.imem.req.bits.pc := @@ -311,7 +319,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] %s\n", tsc_reg(32,0), io.ctrl.wb_valid, wb_reg_pc, Mux(wb_wen, wb_reg_waddr, UInt(0)), wb_wdata, - wb_reg_inst(26,22), wb_reg_rs1, - wb_reg_inst(21,17), wb_reg_rs2, + wb_reg_inst(26,22), Reg(next=Reg(next=ex_rs1)), + wb_reg_inst(21,17), Reg(next=Reg(next=ex_rs2)), wb_reg_inst, Disassemble(wb_reg_inst)) } diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 6207582f..cbff596d 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -3,4 +3,68 @@ package rocket import Chisel._ import Node._ -abstract class RoCC extends Module +class RoCCInstruction extends Bundle +{ + val rd = Bits(width = 5) + val rs1 = Bits(width = 5) + val rs2 = Bits(width = 5) + val funct = Bits(width = 7) + val xd = Bool() + val xs1 = Bool() + val xs2 = Bool() + val opcode = Bits(width = 7) +} + +class RoCCCommand(implicit conf: RocketConfiguration) extends Bundle +{ + val inst = new RoCCInstruction + val rs1 = Bits(width = conf.xprlen) + val rs2 = Bits(width = conf.xprlen) + + override def clone = new RoCCCommand().asInstanceOf[this.type] +} + +class RoCCResponse(implicit conf: RocketConfiguration) extends Bundle +{ + val rd = Bits(width = 5) + val data = Bits(width = conf.xprlen) + + override def clone = new RoCCResponse().asInstanceOf[this.type] +} + +class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle +{ + val cmd = Decoupled(new RoCCCommand).flip + val resp = Decoupled(new RoCCResponse) + val busy = Bool(OUTPUT) + val interrupt = Bool(OUTPUT) + + override def clone = new RoCCInterface().asInstanceOf[this.type] +} + +abstract class RoCC(implicit conf: RocketConfiguration) extends Module +{ + val io = new RoCCInterface +} + +class AccumulatorExample(implicit conf: RocketConfiguration) extends RoCC +{ + val regfile = Mem(UInt(width = conf.xprlen), 4) + + val funct = io.cmd.bits.inst.funct + val addr = io.cmd.bits.inst.rs2 + val addend = io.cmd.bits.rs1 + val accum = regfile(addr) + val wdata = Mux(funct === UInt(0), addend, accum + addend) + + when (io.cmd.fire() && (funct === UInt(1) || funct === UInt(3))) { + regfile(addr) := wdata + } + + io.cmd.ready := io.resp.ready + io.resp.valid := io.cmd.valid && io.cmd.bits.inst.xd + io.resp.bits.rd := io.cmd.bits.inst.rd + io.resp.bits.data := accum + io.busy := Bool(false) + io.interrupt := Bool(false) +} From 18968dfbc7e7f3bc921f5f844548884e250bf253 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 14 Sep 2013 16:15:07 -0700 Subject: [PATCH 0636/1087] Move store data generation into cache --- rocket/src/main/scala/dpath.scala | 8 +++---- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 34 +++++++++++++--------------- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 00c3d5d2..898e5e5a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -39,7 +39,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val mem_reg_inst = Reg(Bits()) val mem_reg_wdata = Reg(Bits()) val mem_reg_kill = Reg(Bool()) - val mem_reg_store_data = Reg(Bits()) + val mem_reg_rs2 = Reg(Bits()) // writeback definitions val wb_reg_pc = Reg(UInt()) @@ -190,7 +190,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req.bits.addr := Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt - io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_store_data) + io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) @@ -233,7 +233,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module mem_reg_inst := ex_reg_inst mem_reg_wdata := ex_wdata when (io.ctrl.ex_rs2_val) { - mem_reg_store_data := StoreGen(io.ctrl.ex_mem_type, Bits(0), ex_rs2).data + mem_reg_rs2 := ex_rs2 } } @@ -282,7 +282,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) } when (io.ctrl.mem_rocc_val) { - wb_reg_rs2 := Bits(0)//mem_reg_rs2 + wb_reg_rs2 := mem_reg_rs2 } wb_reg_ll_wb := io.ctrl.mem_ll_wb when (io.ctrl.mem_ll_wb) { diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 7e564800..a83724af 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -245,7 +245,7 @@ class FPToInt extends Module } io.out.valid := valid - io.out.bits.store := Mux(in.single, Cat(unrec_s, unrec_s), unrec_d) + io.out.bits.store := Mux(in.single, Cat(unrec_d(63,32), unrec_s), unrec_d) io.out.bits.lt := dcmp.io.a_lt_b } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b28a5802..565059e4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -66,12 +66,6 @@ class RandomReplacement(implicit conf: DCacheConfig) extends ReplacementPolicy def hit = {} } -object StoreGen -{ - def apply(r: HellaCacheReq) = new StoreGen(r.typ, r.addr, r.data) - def apply(typ: Bits, addr: Bits, data: Bits = Bits(0)) = new StoreGen(typ, addr, data) -} - class StoreGen(typ: Bits, addr: Bits, dat: Bits) { val byte = typ === MT_B || typ === MT_BU @@ -85,13 +79,15 @@ class StoreGen(typ: Bits, addr: Bits, dat: Bits) def data = Mux(byte, Fill(8, dat( 7,0)), Mux(half, Fill(4, dat(15,0)), + wordData)) + lazy val wordData = Mux(word, Fill(2, dat(31,0)), - dat))) + dat) } class LoadGen(typ: Bits, addr: Bits, dat: Bits) { - val t = StoreGen(typ, addr, dat) + val t = new StoreGen(typ, addr, dat) val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D val wordShift = Mux(addr(2), dat(63,32), dat(31,0)) @@ -658,6 +654,8 @@ class AMOALU(implicit conf: DCacheConfig) extends Module { } require(conf.databits == 64) + val storegen = new StoreGen(io.typ, io.addr, io.rhs) + val rhs = storegen.wordData val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU @@ -665,24 +663,24 @@ class AMOALU(implicit conf: DCacheConfig) extends Module { val word = io.typ === MT_W || io.typ === MT_WU || io.typ === MT_B || io.typ === MT_BU val mask = SInt(-1,64) ^ (io.addr(2) << 31) - val adder_out = (io.lhs & mask).toUInt + (io.rhs & mask) + val adder_out = (io.lhs & mask).toUInt + (rhs & mask) val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) - val cmp_rhs = Mux(word && !io.addr(2), io.rhs(31), io.rhs(63)) - val lt_lo = io.lhs(31,0) < io.rhs(31,0) - val lt_hi = io.lhs(63,32) < io.rhs(63,32) - val eq_hi = io.lhs(63,32) === io.rhs(63,32) + val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63)) + val lt_lo = io.lhs(31,0) < rhs(31,0) + val lt_hi = io.lhs(63,32) < rhs(63,32) + val eq_hi = io.lhs(63,32) === rhs(63,32) val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo) val less = Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs)) val out = Mux(io.cmd === M_XA_ADD, adder_out, - Mux(io.cmd === M_XA_AND, io.lhs & io.rhs, - Mux(io.cmd === M_XA_OR, io.lhs | io.rhs, - Mux(io.cmd === M_XA_XOR, io.lhs ^ io.rhs, + Mux(io.cmd === M_XA_AND, io.lhs & rhs, + Mux(io.cmd === M_XA_OR, io.lhs | rhs, + Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs, Mux(Mux(less, min, max), io.lhs, - io.rhs))))) + storegen.data))))) - val wmask = FillInterleaved(8, StoreGen(io.typ, io.addr).mask) + val wmask = FillInterleaved(8, storegen.mask) io.out := wmask & out | ~wmask & io.lhs } From f12bbc1e438518fe774a8390c2f6de3f3dcc9823 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 14 Sep 2013 22:34:53 -0700 Subject: [PATCH 0637/1087] working RoCC AccumulatorExample --- rocket/src/main/scala/arbiter.scala | 1 + rocket/src/main/scala/ctrl.scala | 14 ++--- rocket/src/main/scala/decode.scala | 24 ++++---- rocket/src/main/scala/dpath.scala | 9 +-- rocket/src/main/scala/nbdcache.scala | 88 +++++++++++++++++++++++++++- rocket/src/main/scala/rocc.scala | 63 ++++++++++++++++---- rocket/src/main/scala/tile.scala | 19 ++++-- 7 files changed, 174 insertions(+), 44 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 3ad0e750..c876a243 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -39,6 +39,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i) resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).xcpt := io.mem.xcpt + io.requestor(i).ordered := io.mem.ordered resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) resp.bits.nack := io.mem.resp.bits.nack && tag_hit diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 89769f71..2a4da7af 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -282,25 +282,25 @@ object RoCCDecode extends DecodeConstants CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N)) } @@ -639,7 +639,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) || + val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. @@ -655,7 +655,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -669,7 +669,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) || + val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val || wb_reg_rocc_val) || fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1 diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 502c1fee..6c8432d8 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -5,12 +5,13 @@ import Node._ object DecodeLogic { - def term(b: Literal) = { - if (b.isZ) { - var (bits, mask, swidth) = Literal.parseLit(b.toString) - new Term(BigInt(bits, 2), BigInt(2).pow(b.width)-(BigInt(mask, 2)+1)) + def term(b: Bits) = { + val lit = b.litOf + if (lit.isZ) { + var (bits, mask, swidth) = Literal.parseLit(lit.toString) + new Term(BigInt(bits, 2), BigInt(2).pow(lit.width)-(BigInt(mask, 2)+1)) } else { - new Term(b.value) + new Term(lit.value) } } def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = { @@ -24,18 +25,17 @@ object DecodeLogic var map = mapping var cache = scala.collection.mutable.Map[Term,Bits]() default map { d => - val dlit = d.litOf - val dterm = term(dlit) + val dterm = term(d) val (keys, values) = map.unzip val addrWidth = keys.map(_.getWidth).max - val terms = keys.toList.map(k => term(k.litOf)) - val termvalues = terms zip values.toList.map(v => term(v.head.litOf)) + val terms = keys.toList.map(k => term(k)) + val termvalues = terms zip values.toList.map(v => term(v.head)) - for (t <- terms.tails; if !t.isEmpty) + for (t <- keys.zip(terms).tails; if !t.isEmpty) for (u <- t.tail) - assert(!t.head.intersects(u), "DecodeLogic: keys " + t + " and " + u + " overlap") + assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap") - val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => + val result = (0 until math.max(d.litOf.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 898e5e5a..7b4c7bc6 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -244,7 +244,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) - val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu + val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data + val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data val mem_ll_wdata = Bits() mem_ll_wdata := div.io.resp.bits.data @@ -259,7 +260,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.mem_ll_wb := Bool(true) } } - when (dmem_resp_replay) { + when (dmem_resp_replay && dmem_resp_xpu) { div.io.resp.ready := Bool(false) if (!conf.rocc.isEmpty) io.rocc.resp.ready := Bool(false) @@ -269,7 +270,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) } - io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu + io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data io.fpu.dmem_resp_type := io.dmem.resp.bits.typ io.fpu.dmem_resp_tag := dmem_resp_waddr @@ -297,7 +298,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.wb_waddr := wb_reg_waddr // scoreboard clear (for div/mul and D$ load miss writebacks) - io.ctrl.fp_sboard_clr := io.dmem.resp.bits.replay && dmem_resp_fpu + io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 565059e4..251aa17b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -698,6 +698,7 @@ class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val typ = Bits(width = 3) + val has_data = Bool() val data = Bits(width = conf.databits) val data_subword = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) @@ -1027,11 +1028,11 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.cpu.req.ready := Bool(false) } - val s2_do_resp = isRead(s2_req.cmd) || s2_sc - io.cpu.resp.valid := s2_do_resp && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable + io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits := s2_req - io.cpu.resp.bits.replay := s2_replay && s2_do_resp + io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc + io.cpu.resp.bits.replay := s2_replay io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) io.cpu.resp.bits.store_data := s2_req.data @@ -1039,3 +1040,84 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.mem.grant_ack <> mshrs.io.mem_finish } + +// exposes a sane decoupled request interface +class SimpleHellaCacheIF(implicit conf: DCacheConfig) extends Module +{ + val io = new Bundle { + val requestor = new HellaCacheIO().flip + val cache = new HellaCacheIO + } + + val replaying_cmb = Bool() + val replaying = Reg(next = replaying_cmb, init = Bool(false)) + replaying_cmb := replaying + + val replayq1 = Module(new Queue(new HellaCacheReq, 1, flow = true)) + val replayq2 = Module(new Queue(new HellaCacheReq, 1)) + val req_arb = Module(new Arbiter(new HellaCacheReq, 2)) + + req_arb.io.in(0) <> replayq1.io.deq + req_arb.io.in(1).valid := !replaying_cmb && io.requestor.req.valid + req_arb.io.in(1).bits := io.requestor.req.bits + io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready + + val s2_nack = io.cache.resp.bits.nack + val s3_nack = Reg(next=s2_nack) + + val s0_req_fire = io.cache.req.fire() + val s1_req_fire = Reg(next=s0_req_fire) + val s2_req_fire = Reg(next=s1_req_fire) + + io.cache.req.bits.kill := s2_nack + io.cache.req.bits.phys := Bool(true) + io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) + io.cache.req <> req_arb.io.out + + // replay queues + // replayq1 holds the older request + // replayq2 holds the newer request (for the first nack) + // we need to split the queues like this for the case where the older request + // goes through but gets nacked, while the newer request stalls + // if this happens, the newer request will go through before the older + // request + // we don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as + // there will only be two requests going through at most + + // stash d$ request in stage 2 if nacked (older request) + replayq1.io.enq.valid := Bool(false) + replayq1.io.enq.bits.cmd := io.cache.resp.bits.cmd + replayq1.io.enq.bits.typ := io.cache.resp.bits.typ + replayq1.io.enq.bits.addr := io.cache.resp.bits.addr + replayq1.io.enq.bits.data := io.cache.resp.bits.store_data + replayq1.io.enq.bits.tag := io.cache.resp.bits.tag + + // stash d$ request in stage 1 if nacked (newer request) + replayq2.io.enq.valid := s2_req_fire && s3_nack + replayq2.io.enq.bits.data := io.cache.resp.bits.store_data + replayq2.io.enq.bits <> io.cache.resp.bits + replayq2.io.deq.ready := Bool(false) + + when (s2_nack) { + replayq1.io.enq.valid := Bool(true) + replaying_cmb := Bool(true) + } + + // when replaying request got sunk into the d$ + when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) { + // see if there's a stashed request in replayq2 + when (replayq2.io.deq.valid) { + replayq1.io.enq.valid := Bool(true) + replayq1.io.enq.bits.cmd := replayq2.io.deq.bits.cmd + replayq1.io.enq.bits.typ := replayq2.io.deq.bits.typ + replayq1.io.enq.bits.addr := replayq2.io.deq.bits.addr + replayq1.io.enq.bits.data := replayq2.io.deq.bits.data + replayq1.io.enq.bits.tag := replayq2.io.deq.bits.tag + replayq2.io.deq.ready := Bool(true) + } .otherwise { + replaying_cmb := Bool(false) + } + } + + io.requestor.resp := io.cache.resp +} diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index cbff596d..ac314388 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -2,6 +2,7 @@ package rocket import Chisel._ import Node._ +import uncore._ class RoCCInstruction extends Bundle { @@ -36,35 +37,73 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle { val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) + val mem = new HellaCacheIO()(conf.dcache) val busy = Bool(OUTPUT) val interrupt = Bool(OUTPUT) override def clone = new RoCCInterface().asInstanceOf[this.type] } -abstract class RoCC(implicit conf: RocketConfiguration) extends Module +abstract class RoCC(conf: RocketConfiguration) extends Module { - val io = new RoCCInterface + val io = new RoCCInterface()(conf) } -class AccumulatorExample(implicit conf: RocketConfiguration) extends RoCC +class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) { - val regfile = Mem(UInt(width = conf.xprlen), 4) + val n = 4 + val regfile = Mem(UInt(width = conf.xprlen), n) + val busy = Vec.fill(n){Reg(init=Bool(false))} - val funct = io.cmd.bits.inst.funct - val addr = io.cmd.bits.inst.rs2 - val addend = io.cmd.bits.rs1 + val cmd = Queue(io.cmd) + val funct = cmd.bits.inst.funct + val addr = cmd.bits.inst.rs2(log2Up(n)-1,0) + val doWrite = funct === UInt(0) + val doRead = funct === UInt(1) + val doLoad = funct === UInt(2) + val doAccum = funct === UInt(3) + val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0) + + // datapath + val addend = cmd.bits.rs1 val accum = regfile(addr) - val wdata = Mux(funct === UInt(0), addend, accum + addend) + val wdata = Mux(doWrite, addend, accum + addend) - when (io.cmd.fire() && (funct === UInt(1) || funct === UInt(3))) { + when (cmd.fire() && (doWrite || doAccum)) { regfile(addr) := wdata } - io.cmd.ready := io.resp.ready - io.resp.valid := io.cmd.valid && io.cmd.bits.inst.xd - io.resp.bits.rd := io.cmd.bits.inst.rd + when (io.mem.resp.valid) { + regfile(memRespTag) := io.mem.resp.bits.data + } + + // control + when (io.mem.req.fire()) { + busy(addr) := Bool(true) + } + + when (io.mem.resp.valid) { + busy(memRespTag) := Bool(false) + } + + val doResp = cmd.bits.inst.xd + val stallReg = busy(addr) + val stallLoad = doLoad && !io.mem.req.ready + val stallResp = doResp && !io.resp.ready + + cmd.ready := !stallReg && !stallLoad && !stallResp + + io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad + io.resp.bits.rd := cmd.bits.inst.rd io.resp.bits.data := accum + io.busy := Bool(false) io.interrupt := Bool(false) + + io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp + io.mem.req.bits.addr := addend + io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) + io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 + io.mem.req.bits.data := Bits(0) // we're not performing any stores... + io.mem.req.bits.phys := Bool(true) // don't perform address translation } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 6be26fc7..022fbf23 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,7 @@ import Util._ case class RocketConfiguration(tl: TileLinkConfiguration, icache: ICacheConfig, dcache: DCacheConfig, - fpu: Boolean, rocc: Option[RoCC] = None, + fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None, fastLoadWord: Boolean = true, fastLoadByte: Boolean = false, fastMulDiv: Boolean = true) @@ -23,7 +23,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module val memPorts = 2 val dcachePortId = 0 val icachePortId = 1 - val vicachePortId = 2 implicit val tlConf = confIn.tl implicit val lnConf = confIn.tl.ln implicit val icConf = confIn.icache @@ -48,6 +47,18 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module ptw.io.requestor(0) <> icache.io.cpu.ptw ptw.io.requestor(1) <> dcache.io.cpu.ptw + if (!conf.rocc.isEmpty) { + val dcIF = Module(new SimpleHellaCacheIF) + val rocc = Module((conf.rocc.get)(conf)) + dcIF.io.requestor <> rocc.io.mem + core.io.rocc <> rocc.io + dcacheArb.io.requestor(2) <> dcIF.io.cache + } + + core.io.host <> io.host + core.io.imem <> icache.io.cpu + core.io.ptw <> ptw.io.dpath + val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts)) memArb.io.in(dcachePortId) <> dcache.io.mem memArb.io.in(icachePortId) <> icache.io.mem @@ -61,8 +72,4 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - - core.io.host <> io.host - core.io.imem <> icache.io.cpu - core.io.ptw <> ptw.io.dpath } From 88d1c47665278f0eacf9488dbebdaacaea058b85 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 15 Sep 2013 04:14:45 -0700 Subject: [PATCH 0638/1087] don't disassemble within chisel --- rocket/src/main/scala/dpath.scala | 4 +- rocket/src/main/scala/instructions.scala | 265 ----------------------- 2 files changed, 2 insertions(+), 267 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7b4c7bc6..21dd1c08 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -317,10 +317,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(io.ctrl.sel_pc === PC_PCR, pcr.io.evec, wb_reg_pc)).toUInt // PC_WB - printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] %s\n", + printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", tsc_reg(32,0), io.ctrl.wb_valid, wb_reg_pc, Mux(wb_wen, wb_reg_waddr, UInt(0)), wb_wdata, wb_reg_inst(26,22), Reg(next=Reg(next=ex_rs1)), wb_reg_inst(21,17), Reg(next=Reg(next=ex_rs2)), - wb_reg_inst, Disassemble(wb_reg_inst)) + wb_reg_inst, wb_reg_inst) } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 9a7dfd05..29fa3ea3 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -192,268 +192,3 @@ object Instructions def CUSTOM3_RD_RS1 = Bits("b??????????????????????1101111011") def CUSTOM3_RD_RS1_RS2 = Bits("b??????????????????????1111111011") } - -object Disassemble -{ - def apply(insn: UInt) = { - val name :: fmt :: Nil = ListLookup(insn, default, table) - sprintf("%s %s", name, operands(insn, fmt)) - } - - private def operands(insn: Bits, fmt: Bits): Bits = { - val x = AVec(Str(" x0"), Str(" ra"), Str(" s0"), Str(" s1"), - Str(" s2"), Str(" s3"), Str(" s4"), Str(" s5"), - Str(" s6"), Str(" s7"), Str(" s8"), Str(" s9"), - Str("s10"), Str("s11"), Str(" sp"), Str(" tp"), - Str(" v0"), Str(" v1"), Str(" a0"), Str(" a1"), - Str(" a2"), Str(" a3"), Str(" a4"), Str(" a5"), - Str(" a6"), Str(" a7"), Str(" t0"), Str(" t1"), - Str(" t2"), Str(" t3"), Str(" t4"), Str(" t5")) - val f = AVec(Str(" fs0"), Str(" fs1"), Str(" fs2"), Str(" fs3"), - Str(" fs4"), Str(" fs5"), Str(" fs6"), Str(" fs7"), - Str(" fs8"), Str(" fs9"), Str("fs10"), Str("fs11"), - Str("fs12"), Str("fs13"), Str("fs14"), Str("fs15"), - Str(" fv0"), Str(" fv1"), Str(" fa0"), Str(" fa1"), - Str(" fa2"), Str(" fa3"), Str(" fa4"), Str(" fa5"), - Str(" fa6"), Str(" fa7"), Str(" ft0"), Str(" ft1"), - Str(" ft2"), Str(" ft3"), Str(" ft4"), Str(" ft5")) - - def hex(x: SInt, plus: Char = ' ') = - Cat(Mux(x < SInt(0), Str("-0x"), Str(plus + "0x")), Str(x.abs, 16)) - - val comma = Str(',') - val lparen = Str('(') - val rparen = Str(')') - - val rd = insn(31,27) - val rs1 = insn(26,22) - val rs2 = insn(21,17) - val rs3 = insn(16,12) - val immv = insn(21,10).toSInt - val bmmv = Cat(insn(31,27), insn(16,10)).toSInt - val jmmv = insn(31,7).toSInt - - val imm = hex(Mux(fmt === FMT_B, bmmv << UInt(1), - Mux(fmt === FMT_I || fmt === FMT_LD || fmt === FMT_FLD, immv, - Mux(fmt === FMT_ST || fmt === FMT_FST, bmmv, - SInt(0))))) - val jmm = hex(jmmv << UInt(1)) - val lmm = Cat(Str("0x"), Str(insn(26,7).toUInt, 16)) - - val addr = Cat(comma, imm, lparen, x(rs1), rparen) - - val r0 = x(rd) - val r1 = Cat(r0, comma, x(rs1)) - val r2 = Cat(r1, comma, x(rs2)) - val f1 = Cat(f(rd), comma, f(rs1)) - val f2 = Cat(f1, comma, f(rs2)) - val f3 = Cat(f2, comma, f(rs3)) - val fx = Cat(f(rd), comma, x(rs1)) - val xf1 = Cat(x(rd), comma, f(rs1)) - val xf2 = Cat(xf1, comma, f(rs2)) - val z = Str(' ') - val i = Cat(r1, comma, imm) - val b = Cat(x(rs1), comma, x(rs2), comma, imm) - val j = jmm - val l = Cat(x(rd), comma, lmm) - val ld = Cat(x(rd), addr) - val st = Cat(x(rs2), addr) - val fld = Cat(f(rd), addr) - val fst = Cat(f(rs2), addr) - val amo = r2 - - val opts = Seq(r0, r1, r2, f1, f2, f3, fx, xf1, xf2, z, i, b, j, l, ld, st, - fld, fst, amo) - val maxLen = opts.map(_.getWidth).reduce(_ max _) - val padded = opts.map(x => x.toUInt << UInt(maxLen - x.getWidth)) - AVec(padded)(fmt.toUInt) - } - - private def FMT_X = Bits("b?????", 5) - private def FMT_R0 = Bits(0, 5) - private def FMT_R1 = Bits(1, 5) - private def FMT_R2 = Bits(2, 5) - private def FMT_F1 = Bits(3, 5) - private def FMT_F2 = Bits(4, 5) - private def FMT_F3 = Bits(5, 5) - private def FMT_FX = Bits(6, 5) - private def FMT_XF1 = Bits(7, 5) - private def FMT_XF2 = Bits(8, 5) - private def FMT_0 = Bits(9, 5) - private def FMT_I = Bits(10, 5) - private def FMT_B = Bits(11, 5) - private def FMT_J = Bits(12, 5) - private def FMT_L = Bits(13, 5) - private def FMT_LD = Bits(14, 5) - private def FMT_ST = Bits(15, 5) - private def FMT_FLD = Bits(16, 5) - private def FMT_FST = Bits(17, 5) - private def FMT_AMO = Bits(18, 5) - - private def default = List(Str("unknown "), FMT_0) - - import Instructions._ - private def table = Array( - BNE-> List(Str("bne "), FMT_B), - BEQ-> List(Str("beq "), FMT_B), - BLT-> List(Str("blt "), FMT_B), - BLTU-> List(Str("bltu "), FMT_B), - BGE-> List(Str("bge "), FMT_B), - BGEU-> List(Str("bgeu "), FMT_B), - - JAL-> List(Str("jal "), FMT_J), - JALR-> List(Str("jalr "), FMT_LD), - AUIPC-> List(Str("auipc "), FMT_L), - - LB-> List(Str("lb "), FMT_LD), - LH-> List(Str("lh "), FMT_LD), - LW-> List(Str("lw "), FMT_LD), - LD-> List(Str("ld "), FMT_LD), - LBU-> List(Str("lbu "), FMT_LD), - LHU-> List(Str("lhu "), FMT_LD), - LWU-> List(Str("lwu "), FMT_LD), - SB-> List(Str("sb "), FMT_ST), - SH-> List(Str("sh "), FMT_ST), - SW-> List(Str("sw "), FMT_ST), - SD-> List(Str("sd "), FMT_ST), - - AMOADD_W-> List(Str("amoaddw "), FMT_AMO), - AMOSWAP_W-> List(Str("amoswapw"), FMT_AMO), - AMOAND_W-> List(Str("amoandw "), FMT_AMO), - AMOOR_W-> List(Str("amoorw "), FMT_AMO), - AMOMIN_W-> List(Str("amominw "), FMT_AMO), - AMOMINU_W-> List(Str("amominuw"), FMT_AMO), - AMOMAX_W-> List(Str("amomaxw "), FMT_AMO), - AMOMAXU_W-> List(Str("amomaxuw"), FMT_AMO), - AMOADD_D-> List(Str("amoaddd "), FMT_AMO), - AMOSWAP_D-> List(Str("amoswapd"), FMT_AMO), - AMOAND_D-> List(Str("amoandd "), FMT_AMO), - AMOOR_D-> List(Str("amoord "), FMT_AMO), - AMOMIN_D-> List(Str("amomind "), FMT_AMO), - AMOMINU_D-> List(Str("amominud"), FMT_AMO), - AMOMAX_D-> List(Str("amomaxd "), FMT_AMO), - AMOMAXU_D-> List(Str("amomaxud"), FMT_AMO), - - LR_W-> List(Str("lr.w "), FMT_AMO), - LR_D-> List(Str("lr.d "), FMT_AMO), - SC_W-> List(Str("sc.w "), FMT_AMO), - SC_D-> List(Str("sc.d "), FMT_AMO), - - LUI-> List(Str("lui "), FMT_L), - ADDI-> List(Str("addi "), FMT_I), - SLTI -> List(Str("slti "), FMT_I), - SLTIU-> List(Str("sltiu "), FMT_I), - ANDI-> List(Str("andi "), FMT_I), - ORI-> List(Str("ori "), FMT_I), - XORI-> List(Str("xori "), FMT_I), - SLLI-> List(Str("slli "), FMT_I), - SRLI-> List(Str("srli "), FMT_I), - SRAI-> List(Str("srai "), FMT_I), - ADD-> List(Str("add "), FMT_R2), - SUB-> List(Str("sub "), FMT_R2), - SLT-> List(Str("slt "), FMT_R2), - SLTU-> List(Str("sltu "), FMT_R2), - AND-> List(Str("and "), FMT_R2), - OR-> List(Str("or "), FMT_R2), - XOR-> List(Str("xor "), FMT_R2), - SLL-> List(Str("sll "), FMT_R2), - SRL-> List(Str("srl "), FMT_R2), - SRA-> List(Str("sra "), FMT_R2), - - ADDIW-> List(Str("addiw "), FMT_I), - SLLIW-> List(Str("slliw "), FMT_I), - SRLIW-> List(Str("srliw "), FMT_I), - SRAIW-> List(Str("sraiw "), FMT_I), - ADDW-> List(Str("addw "), FMT_R2), - SUBW-> List(Str("subw "), FMT_R2), - SLLW-> List(Str("sllw "), FMT_R2), - SRLW-> List(Str("srlw "), FMT_R2), - SRAW-> List(Str("sraw "), FMT_R2), - - MUL-> List(Str("mul "), FMT_R2), - MULH-> List(Str("mulh "), FMT_R2), - MULHU-> List(Str("mulhu "), FMT_R2), - MULHSU-> List(Str("mulhsu "), FMT_R2), - MULW-> List(Str("mulw "), FMT_R2), - - DIV-> List(Str("div "), FMT_R2), - DIVU-> List(Str("divu "), FMT_R2), - REM-> List(Str("rem "), FMT_R2), - REMU-> List(Str("remu "), FMT_R2), - DIVW-> List(Str("divw "), FMT_R2), - DIVUW-> List(Str("divuw "), FMT_R2), - REMW-> List(Str("remw "), FMT_R2), - REMUW-> List(Str("remuw "), FMT_R2), - - SYSCALL-> List(Str("syscall "), FMT_0), - SETPCR-> List(Str("setpcr "), FMT_I), - CLEARPCR-> List(Str("clearpcr"), FMT_I), - ERET-> List(Str("eret "), FMT_0), - FENCE-> List(Str("fence "), FMT_0), - FENCE_I-> List(Str("fence.i "), FMT_0), - MFPCR-> List(Str("mfpcr "), FMT_R2), - MTPCR-> List(Str("mtpcr "), FMT_R2), - RDTIME-> List(Str("rdtime "), FMT_R0), - RDCYCLE-> List(Str("rdcycle "), FMT_R0), - RDINSTRET-> List(Str("rdinstrt"), FMT_R0), - - FCVT_S_D-> List(Str("fcvt.sd "), FMT_F1), - FCVT_D_S-> List(Str("fcvt.ds "), FMT_F1), - FSGNJ_S-> List(Str("fsgnj.s "), FMT_F2), - FSGNJ_D-> List(Str("fsgnj.d "), FMT_F2), - FSGNJX_S-> List(Str("fsgnx.s "), FMT_F2), - FSGNJX_D-> List(Str("fsgnx.d "), FMT_F2), - FSGNJN_S-> List(Str("fsgnjn.s"), FMT_F2), - FSGNJN_D-> List(Str("fsgnjn.d"), FMT_F2), - FMIN_S-> List(Str("fmin.s "), FMT_F2), - FMIN_D-> List(Str("fmin.d "), FMT_F2), - FMAX_S-> List(Str("fmax.s "), FMT_F2), - FMAX_D-> List(Str("fmax.d "), FMT_F2), - FADD_S-> List(Str("fadd.s "), FMT_F2), - FADD_D-> List(Str("fadd.d "), FMT_F2), - FSUB_S-> List(Str("fsub.s "), FMT_F2), - FSUB_D-> List(Str("fsub.d "), FMT_F2), - FMUL_S-> List(Str("fmul.s "), FMT_F2), - FMUL_D-> List(Str("fmul.d "), FMT_F2), - FMADD_S-> List(Str("fmadd.s "), FMT_F3), - FMADD_D-> List(Str("fmadd.d "), FMT_F3), - FMSUB_S-> List(Str("fmsub.s "), FMT_F3), - FMSUB_D-> List(Str("fmsub.d "), FMT_F3), - FNMADD_S-> List(Str("fnmadd.s"), FMT_F3), - FNMADD_D-> List(Str("fnmadd.d"), FMT_F3), - FNMSUB_S-> List(Str("fnmsub.s"), FMT_F3), - FNMSUB_D-> List(Str("fnmsub.d"), FMT_F3), - FMV_X_S-> List(Str("fmv.x.s "), FMT_XF1), - FMV_X_D-> List(Str("fmv.x.d "), FMT_XF1), - FCVT_W_S-> List(Str("fcvt.ws "), FMT_XF1), - FCVT_W_D-> List(Str("fcvt.wd "), FMT_XF1), - FCVT_WU_S-> List(Str("fcvt.wus"), FMT_XF1), - FCVT_WU_D-> List(Str("fcvt.wud"), FMT_XF1), - FCVT_L_S-> List(Str("fcvt.ls "), FMT_XF1), - FCVT_L_D-> List(Str("fcvt.ld "), FMT_XF1), - FCVT_LU_S-> List(Str("fcvt.lus"), FMT_XF1), - FCVT_LU_D-> List(Str("fcvt.lud"), FMT_XF1), - FEQ_S-> List(Str("feq.s "), FMT_XF2), - FEQ_D-> List(Str("feq.d "), FMT_XF2), - FLT_S-> List(Str("flt.s "), FMT_XF2), - FLT_D-> List(Str("flt.d "), FMT_XF2), - FLE_S-> List(Str("fle.s "), FMT_XF2), - FLE_D-> List(Str("fle.d "), FMT_XF2), - FMV_S_X-> List(Str("fmv.s.x "), FMT_FX), - FMV_D_X-> List(Str("fmv.d.x "), FMT_FX), - FCVT_S_W-> List(Str("fcvt.sw "), FMT_FX), - FCVT_D_W-> List(Str("fcvt.dw "), FMT_FX), - FCVT_S_WU-> List(Str("fcvt.swu"), FMT_FX), - FCVT_D_WU-> List(Str("fcvt.dwu"), FMT_FX), - FCVT_S_L-> List(Str("fcvt.sl "), FMT_FX), - FCVT_D_L-> List(Str("fcvt.dl "), FMT_FX), - FCVT_S_LU-> List(Str("fcvt.slu"), FMT_FX), - FCVT_D_LU-> List(Str("fcvt.dlu"), FMT_FX), - FRSR-> List(Str("frsr "), FMT_R0), - FSSR-> List(Str("fssr "), FMT_R1), - FLW-> List(Str("flw "), FMT_FLD), - FLD-> List(Str("fld "), FMT_FLD), - FSW-> List(Str("fsw "), FMT_FST), - FSD-> List(Str("fsd "), FMT_FST) - ) -} From 110e53cb48f6fbb3b77736e05e07e67c693f6db2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 15 Sep 2013 04:15:32 -0700 Subject: [PATCH 0639/1087] Revert "Add early out to multiplier" This broke recently and I don't have time to figure out why. --- rocket/src/main/scala/divider.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index c18e47d6..1ea4dd29 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -54,14 +54,11 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val accum = mulReg(2*mulw,mulw).toSInt val mpcand = divisor.toSInt val prod = mplier(mulUnroll-1,0) * mpcand + accum - val eOut = Bool(earlyOut) && count > 0 && - (0 until mulw/mulUnroll).map(i => i > mulw/mulUnroll-1-count || mplier((i+1)*mulUnroll-1,i*mulUnroll) === 0).reduce(_&&_) - val eOutValue = mulReg >> (mulw/mulUnroll-count)(log2Up(mulw/mulUnroll)-1,0)*mulUnroll - val nextMulReg = Mux(eOut, eOutValue, Cat(prod, mplier(mulw-1,mulUnroll))) + val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toSInt count := count + 1 - when (count === mulw/mulUnroll-1 || eOut) { + when (count === mulw/mulUnroll-1) { state := s_done when (AVec(FN_MULH, FN_MULHU, FN_MULHSU) contains req.fn) { state := s_move_rem From 25ab4029321707063d487a880b91a3029a8d041d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 15 Sep 2013 04:29:06 -0700 Subject: [PATCH 0640/1087] swap JAL, JALR encodings --- rocket/src/main/scala/instructions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 29fa3ea3..9273d1c2 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -6,8 +6,8 @@ import Node._ object Instructions { /* Automatically generated by parse-opcodes */ - def JAL = Bits("b?????????????????????????1101111") - def JALR = Bits("b??????????????????????0001100111") + def JAL = Bits("b?????????????????????????1100111") + def JALR = Bits("b??????????????????????0001101111") def BEQ = Bits("b??????????????????????0001100011") def BNE = Bits("b??????????????????????0011100011") def BLT = Bits("b??????????????????????1001100011") From 1d2f4f8437baf16051d74d35ec4d65b93c9ce40e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 Sep 2013 06:32:40 -0700 Subject: [PATCH 0641/1087] New ISA encoding, AUIPC semantics --- rocket/src/main/scala/consts.scala | 5 +- rocket/src/main/scala/ctrl.scala | 20 +- rocket/src/main/scala/dpath.scala | 46 +-- rocket/src/main/scala/fpu.scala | 10 +- rocket/src/main/scala/instructions.scala | 366 +++++++++++------------ 5 files changed, 224 insertions(+), 223 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 3309bab0..b8970e9e 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -22,8 +22,9 @@ trait ScalarOpConstants { val A1_X = Bits("b??", 2) val A1_RS1 = UInt(0, 2) - val A1_PC = UInt(1, 2) - val A1_ZERO = UInt(2, 2) + val A1_ZERO = UInt(1, 2) + val A1_PC = UInt(2, 2) + val A1_PCHI = UInt(3, 2) val IMM_X = Bits("b???", 3) val IMM_S = UInt(0, 3); diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2a4da7af..60fb8dd7 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -100,7 +100,7 @@ object XDecode extends DecodeConstants JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PCHI,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), @@ -189,7 +189,7 @@ object XDecode extends DecodeConstants CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.C,N,N,N,Y,N,N,N), ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,Y,N,Y,N,N,N), FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,Y,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,Y,N,N,N,Y,Y,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,Y,N,N,N,Y,N,N), MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.F,N,N,N,Y,N,N,N), MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RS2, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.T,N,N,N,Y,N,N,N), RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N,N,N), @@ -395,11 +395,11 @@ class Control(implicit conf: RocketConfiguration) extends Module val ctrl_killx = Bool() val ctrl_killm = Bool() - val id_raddr3 = io.dpath.inst(16,12) - val id_raddr2 = io.dpath.inst(21,17) - val id_raddr1 = io.dpath.inst(26,22) - val id_waddr = io.dpath.inst(31,27) - val id_load_use = Bool(); + val id_raddr3 = io.dpath.inst(31,27) + val id_raddr2 = io.dpath.inst(24,20) + val id_raddr1 = io.dpath.inst(19,15) + val id_waddr = io.dpath.inst(11,7) + val id_load_use = Bool() val id_reg_fence = Reg(init=Bool(false)) val sr = io.dpath.status @@ -416,14 +416,14 @@ class Control(implicit conf: RocketConfiguration) extends Module id_raddr1 != PCR.SUP0 && id_raddr1 != PCR.SUP1 && id_raddr1 != PCR.EPC // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) - val id_amo_aq = io.dpath.inst(16) - val id_amo_rl = io.dpath.inst(15) + val id_amo_aq = io.dpath.inst(26) + val id_amo_rl = io.dpath.inst(25) val id_fence_next = id_fence || id_amo && id_amo_rl val id_rocc_busy = io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val val id_fence_ok = io.dmem.ordered && !ex_reg_mem_val && (Bool(conf.rocc.isEmpty) || !id_rocc_busy) id_reg_fence := id_fence_next || id_reg_fence && !id_fence_ok - val id_do_fence = id_amo && id_amo_aq || id_reg_fence && (id_mem_val || id_rocc_val) || id_pcr_flush + val id_do_fence = id_amo && id_amo_aq || id_fence_i || id_reg_fence && (id_mem_val || id_rocc_val) || id_pcr_flush val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 21dd1c08..23c54a4a 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -59,8 +59,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module def readRF(a: UInt) = regfile_(~a) def writeRF(a: UInt, d: Bits) = regfile_(~a) := d - val id_raddr1 = id_inst(26,22).toUInt; - val id_raddr2 = id_inst(21,17).toUInt; + val id_raddr1 = id_inst(19,15).toUInt; + val id_raddr2 = id_inst(24,20).toUInt; // bypass muxes val id_rs1_zero = id_raddr1 === UInt(0) @@ -79,18 +79,18 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // immediate generation def imm(sel: Bits, inst: Bits) = { - val sign = inst(10).toSInt - val b30_20 = Mux(sel === IMM_U, inst(21,11).toSInt, sign) - val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, - Cat(inst(9,7), inst(26,22)).toSInt) + val sign = inst(31).toSInt + val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) + val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) val b11 = Mux(sel === IMM_U, SInt(0), - Mux(sel === IMM_SB || sel === IMM_UJ, inst(11).toSInt, sign)) - val b10_6 = Mux(sel === IMM_S || sel === IMM_SB, inst(31,27), - Mux(sel === IMM_U, Bits(0), inst(21,17))) - val b5_1 = Mux(sel === IMM_U, Bits(0), inst(16,12)) - val b0 = Mux(sel === IMM_I || sel === IMM_S, inst(11), Bits(0)) + Mux(sel === IMM_UJ, inst(20).toSInt, + Mux(sel === IMM_SB, inst(7).toSInt, sign))) + val b10_5 = Mux(sel === IMM_U, Bits(0), inst(30,25)) + val b4_1 = Mux(sel === IMM_U, Bits(0), + Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), inst(24,21))) + val b0 = Mux(sel === IMM_S, inst(7), Mux(sel === IMM_I, inst(20), Bits(0))) - Cat(sign, b30_20, b19_12, b11, b10_6, b5_1, b0).toSInt + Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt } io.ctrl.inst := id_inst @@ -125,8 +125,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } } - val ex_raddr1 = ex_reg_inst(26,22) - val ex_raddr2 = ex_reg_inst(21,17) + val ex_raddr1 = ex_reg_inst(19,15) + val ex_raddr2 = ex_reg_inst(24,20) val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data val ex_rs1 = @@ -134,7 +134,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(2), wb_reg_wdata, Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(1), mem_reg_wdata, Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(0), Bits(0), - Mux(ex_reg_sel_alu1 === A1_ZERO, Bits(0), + Mux(AVec(A1_ZERO, A1_PCHI) contains ex_reg_sel_alu1, Bits(0), Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb)))))) val ex_rs2 = Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(3) && Bool(conf.fastLoadWord), dmem_resp_data, @@ -144,7 +144,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Cat(ex_reg_rs2_msb, ex_reg_rs2_lsb))))) val ex_imm = imm(ex_reg_sel_imm, ex_reg_inst) - val ex_op1 = Mux(ex_reg_sel_alu1 === A1_PC, ex_reg_pc.toSInt, ex_rs1) + val ex_op1_hi = Mux(AVec(A1_PC, A1_PCHI) contains ex_reg_sel_alu1, ex_reg_pc >> 12, ex_rs1 >> 12).toSInt + val ex_op1_lo = Mux(ex_reg_sel_alu1 === A1_PC, ex_reg_pc(11,0), ex_rs1(11,0)).toSInt + val ex_op1 = Cat(ex_op1_hi, ex_op1_lo) val ex_op2 = Mux(ex_reg_sel_alu2 === A2_RS2, ex_rs2.toSInt, Mux(ex_reg_sel_alu2 === A2_IMM, ex_imm, Mux(ex_reg_sel_alu2 === A2_ZERO, SInt(0), @@ -170,7 +172,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.div_mul_rdy := div.io.req.ready io.fpu.fromint_data := ex_rs1 - io.ctrl.ex_waddr := ex_reg_inst(31,27) + io.ctrl.ex_waddr := ex_reg_inst(11,7) def vaSign(a0: UInt, ea: Bits) = { // efficient means to compress 64-bit VA into VADDR_BITS+1 bits @@ -207,7 +209,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ptw.status := pcr.io.status // branch resolution logic - io.ctrl.jalr_eq := ex_rs1 === id_pc.toSInt && ex_reg_inst(21,10) === UInt(0) + io.ctrl.jalr_eq := ex_rs1 === id_pc.toSInt && ex_reg_inst(31,20) === UInt(0) io.ctrl.ex_br_taken := Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2, Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2, @@ -238,7 +240,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } // for load/use hazard detection (load byte/halfword) - io.ctrl.mem_waddr := mem_reg_inst(31,27) + io.ctrl.mem_waddr := mem_reg_inst(11,7) // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool @@ -302,7 +304,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write - pcr.io.rw.addr := wb_reg_inst(26,22).toUInt + pcr.io.rw.addr := wb_reg_inst(19,15).toUInt pcr.io.rw.cmd := io.ctrl.pcr pcr.io.rw.wdata := wb_reg_wdata @@ -320,7 +322,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", tsc_reg(32,0), io.ctrl.wb_valid, wb_reg_pc, Mux(wb_wen, wb_reg_waddr, UInt(0)), wb_wdata, - wb_reg_inst(26,22), Reg(next=Reg(next=ex_rs1)), - wb_reg_inst(21,17), Reg(next=Reg(next=ex_rs2)), + wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs1)), + wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs2)), wb_reg_inst, wb_reg_inst) } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index a83724af..c272999a 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -479,10 +479,10 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val regfile = Mem(Bits(width = 65), 32) when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } - val ex_rs1 = regfile(ex_reg_inst(26,22)) - val ex_rs2 = regfile(ex_reg_inst(21,17)) - val ex_rs3 = regfile(ex_reg_inst(16,12)) - val ex_rm = Mux(ex_reg_inst(11,9) === Bits(7), fsr_rm, ex_reg_inst(11,9)) + val ex_rs1 = regfile(ex_reg_inst(19,15)) + val ex_rs2 = regfile(ex_reg_inst(24,20)) + val ex_rs3 = regfile(ex_reg_inst(31,27)) + val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), fsr_rm, ex_reg_inst(14,12)) val fpiu = Module(new FPToInt) fpiu.io.in.valid := ex_reg_valid && ctrl.toint @@ -552,7 +552,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val (write_port_busy, mem_winfo) = (Reg(Bool()), Reg(Bits())) when (ex_reg_valid) { write_port_busy := mem_wen && (memLatencyMask & latencyMask(ctrl, 1)).orR || (wen & latencyMask(ctrl, 0)).orR - mem_winfo := Cat(pipeid(ctrl), ex_reg_inst(31,27)) + mem_winfo := Cat(pipeid(ctrl), ex_reg_inst(11,7)) } for (i <- 0 until maxLatency-2) { diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 9273d1c2..91349e66 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -6,189 +6,187 @@ import Node._ object Instructions { /* Automatically generated by parse-opcodes */ - def JAL = Bits("b?????????????????????????1100111") - def JALR = Bits("b??????????????????????0001101111") - def BEQ = Bits("b??????????????????????0001100011") - def BNE = Bits("b??????????????????????0011100011") - def BLT = Bits("b??????????????????????1001100011") - def BGE = Bits("b??????????????????????1011100011") - def BLTU = Bits("b??????????????????????1101100011") - def BGEU = Bits("b??????????????????????1111100011") - def LUI = Bits("b?????????????????????????0110111") - def AUIPC = Bits("b?????????????????????????0010111") - def ADDI = Bits("b??????????????????????0000010011") - def SLLI = Bits("b??????????00000??????00010010011") - def SLTI = Bits("b??????????????????????0100010011") - def SLTIU = Bits("b??????????????????????0110010011") - def XORI = Bits("b??????????????????????1000010011") - def SRLI = Bits("b??????????00000??????01010010011") - def SRAI = Bits("b??????????00000??????11010010011") - def ORI = Bits("b??????????????????????1100010011") - def ANDI = Bits("b??????????????????????1110010011") - def ADD = Bits("b???????????????00000000000110011") - def SUB = Bits("b???????????????10000000000110011") - def SLL = Bits("b???????????????00000000010110011") - def SLT = Bits("b???????????????00000000100110011") - def SLTU = Bits("b???????????????00000000110110011") - def XOR = Bits("b???????????????00000001000110011") - def SRL = Bits("b???????????????00000001010110011") - def SRA = Bits("b???????????????10000001010110011") - def OR = Bits("b???????????????00000001100110011") - def AND = Bits("b???????????????00000001110110011") - def MUL = Bits("b???????????????00000010000110011") - def MULH = Bits("b???????????????00000010010110011") - def MULHSU = Bits("b???????????????00000010100110011") - def MULHU = Bits("b???????????????00000010110110011") - def DIV = Bits("b???????????????00000011000110011") - def DIVU = Bits("b???????????????00000011010110011") - def REM = Bits("b???????????????00000011100110011") - def REMU = Bits("b???????????????00000011110110011") - def ADDIW = Bits("b??????????????????????0000011011") - def SLLIW = Bits("b??????????000000?????00010011011") - def SRLIW = Bits("b??????????000000?????01010011011") - def SRAIW = Bits("b??????????000000?????11010011011") - def ADDW = Bits("b???????????????00000000000111011") - def SUBW = Bits("b???????????????10000000000111011") - def SLLW = Bits("b???????????????00000000010111011") - def SRLW = Bits("b???????????????00000001010111011") - def SRAW = Bits("b???????????????10000001010111011") - def MULW = Bits("b???????????????00000010000111011") - def DIVW = Bits("b???????????????00000011000111011") - def DIVUW = Bits("b???????????????00000011010111011") - def REMW = Bits("b???????????????00000011100111011") - def REMUW = Bits("b???????????????00000011110111011") - def LB = Bits("b??????????????????????0000000011") - def LH = Bits("b??????????????????????0010000011") - def LW = Bits("b??????????????????????0100000011") - def LD = Bits("b??????????????????????0110000011") - def LBU = Bits("b??????????????????????1000000011") - def LHU = Bits("b??????????????????????1010000011") - def LWU = Bits("b??????????????????????1100000011") - def SB = Bits("b??????????????????????0000100011") - def SH = Bits("b??????????????????????0010100011") - def SW = Bits("b??????????????????????0100100011") - def SD = Bits("b??????????????????????0110100011") - def AMOADD_W = Bits("b?????????????????000000100101011") - def AMOXOR_W = Bits("b?????????????????001000100101011") - def AMOOR_W = Bits("b?????????????????010000100101011") - def AMOAND_W = Bits("b?????????????????011000100101011") - def AMOMIN_W = Bits("b?????????????????100000100101011") - def AMOMAX_W = Bits("b?????????????????101000100101011") - def AMOMINU_W = Bits("b?????????????????110000100101011") - def AMOMAXU_W = Bits("b?????????????????111000100101011") - def AMOSWAP_W = Bits("b?????????????????000010100101011") - def LR_W = Bits("b??????????00000??000100100101011") - def SC_W = Bits("b?????????????????000110100101011") - def AMOADD_D = Bits("b?????????????????000000110101011") - def AMOXOR_D = Bits("b?????????????????001000110101011") - def AMOOR_D = Bits("b?????????????????010000110101011") - def AMOAND_D = Bits("b?????????????????011000110101011") - def AMOMIN_D = Bits("b?????????????????100000110101011") - def AMOMAX_D = Bits("b?????????????????101000110101011") - def AMOMINU_D = Bits("b?????????????????110000110101011") - def AMOMAXU_D = Bits("b?????????????????111000110101011") - def AMOSWAP_D = Bits("b?????????????????000010110101011") - def LR_D = Bits("b??????????00000??000100110101011") - def SC_D = Bits("b?????????????????000110110101011") - def FENCE = Bits("b???????????????????????000101111") - def FENCE_I = Bits("b???????????????????????010101111") - def FENCE_V_L = Bits("b???????????????????????100101111") - def FENCE_V_G = Bits("b???????????????????????110101111") - def SYSCALL = Bits("b00000000000000000000000001110111") - def BREAK = Bits("b00000000000000000000000011110111") - def RDCYCLE = Bits("b?????000000000000000001001110111") - def RDTIME = Bits("b?????000000000000000011001110111") - def RDINSTRET = Bits("b?????000000000000000101001110111") - def MTPCR = Bits("b???????????????00000000001110011") - def MFPCR = Bits("b??????????0000000000000011110011") - def SETPCR = Bits("b??????????????????????0101110011") - def CLEARPCR = Bits("b??????????????????????0111110011") - def ERET = Bits("b00000000000000000000001001110011") - def FADD_S = Bits("b???????????????00000???001010011") - def FSUB_S = Bits("b???????????????00001???001010011") - def FMUL_S = Bits("b???????????????00010???001010011") - def FDIV_S = Bits("b???????????????00011???001010011") - def FSQRT_S = Bits("b??????????0000000100???001010011") - def FSGNJ_S = Bits("b???????????????00101000001010011") - def FSGNJN_S = Bits("b???????????????00110000001010011") - def FSGNJX_S = Bits("b???????????????00111000001010011") - def FADD_D = Bits("b???????????????00000???011010011") - def FSUB_D = Bits("b???????????????00001???011010011") - def FMUL_D = Bits("b???????????????00010???011010011") - def FDIV_D = Bits("b???????????????00011???011010011") - def FSQRT_D = Bits("b??????????0000000100???011010011") - def FSGNJ_D = Bits("b???????????????00101000011010011") - def FSGNJN_D = Bits("b???????????????00110000011010011") - def FSGNJX_D = Bits("b???????????????00111000011010011") - def FCVT_L_S = Bits("b??????????0000001000???001010011") - def FCVT_LU_S = Bits("b??????????0000001001???001010011") - def FCVT_W_S = Bits("b??????????0000001010???001010011") - def FCVT_WU_S = Bits("b??????????0000001011???001010011") - def FCVT_L_D = Bits("b??????????0000001000???011010011") - def FCVT_LU_D = Bits("b??????????0000001001???011010011") - def FCVT_W_D = Bits("b??????????0000001010???011010011") - def FCVT_WU_D = Bits("b??????????0000001011???011010011") - def FCVT_S_L = Bits("b??????????0000001100???001010011") - def FCVT_S_LU = Bits("b??????????0000001101???001010011") - def FCVT_S_W = Bits("b??????????0000001110???001010011") - def FCVT_S_WU = Bits("b??????????0000001111???001010011") - def FCVT_D_L = Bits("b??????????0000001100???011010011") - def FCVT_D_LU = Bits("b??????????0000001101???011010011") - def FCVT_D_W = Bits("b??????????0000001110???011010011") - def FCVT_D_WU = Bits("b??????????0000001111???011010011") - def FCVT_S_D = Bits("b??????????0000010001???001010011") - def FCVT_D_S = Bits("b??????????0000010000???011010011") - def FEQ_S = Bits("b???????????????10101000001010011") - def FLT_S = Bits("b???????????????10110000001010011") - def FLE_S = Bits("b???????????????10111000001010011") - def FEQ_D = Bits("b???????????????10101000011010011") - def FLT_D = Bits("b???????????????10110000011010011") - def FLE_D = Bits("b???????????????10111000011010011") - def FMIN_S = Bits("b???????????????11000000001010011") - def FMAX_S = Bits("b???????????????11001000001010011") - def FMIN_D = Bits("b???????????????11000000011010011") - def FMAX_D = Bits("b???????????????11001000011010011") - def FMV_X_S = Bits("b??????????0000011100000001010011") - def FMV_X_D = Bits("b??????????0000011100000011010011") - def FRSR = Bits("b?????000000000011101000001010011") - def FMV_S_X = Bits("b??????????0000011110000001010011") - def FMV_D_X = Bits("b??????????0000011110000011010011") - def FSSR = Bits("b??????????0000011111000001010011") - def FLW = Bits("b??????????????????????0100000111") - def FLD = Bits("b??????????????????????0110000111") - def FSW = Bits("b??????????????????????0100100111") - def FSD = Bits("b??????????????????????0110100111") - def FMADD_S = Bits("b???????????????????????001000011") - def FMSUB_S = Bits("b???????????????????????001000111") - def FNMSUB_S = Bits("b???????????????????????001001011") - def FNMADD_S = Bits("b???????????????????????001001111") - def FMADD_D = Bits("b???????????????????????011000011") - def FMSUB_D = Bits("b???????????????????????011000111") - def FNMSUB_D = Bits("b???????????????????????011001011") - def FNMADD_D = Bits("b???????????????????????011001111") + def JAL = Bits("b?????????????????????????1100111") + def JALR = Bits("b?????????????????000?????1101111") + def BEQ = Bits("b?????????????????000?????1100011") + def BNE = Bits("b?????????????????001?????1100011") + def BLT = Bits("b?????????????????100?????1100011") + def BGE = Bits("b?????????????????101?????1100011") + def BLTU = Bits("b?????????????????110?????1100011") + def BGEU = Bits("b?????????????????111?????1100011") + def LUI = Bits("b?????????????????????????0110111") + def AUIPC = Bits("b?????????????????????????0010111") + def ADDI = Bits("b?????????????????000?????0010011") + def SLLI = Bits("b010000???????????001?????0010011") + def SLTI = Bits("b?????????????????010?????0010011") + def SLTIU = Bits("b?????????????????011?????0010011") + def XORI = Bits("b?????????????????100?????0010011") + def SRLI = Bits("b000000???????????101?????0010011") + def SRAI = Bits("b010000???????????101?????0010011") + def ORI = Bits("b?????????????????110?????0010011") + def ANDI = Bits("b?????????????????111?????0010011") + def ADD = Bits("b0000000??????????000?????0110011") + def SUB = Bits("b0100000??????????000?????0110011") + def SLL = Bits("b0000000??????????001?????0110011") + def SLT = Bits("b0000000??????????010?????0110011") + def SLTU = Bits("b0000000??????????011?????0110011") + def XOR = Bits("b0000000??????????100?????0110011") + def SRL = Bits("b0000000??????????101?????0110011") + def SRA = Bits("b0100000??????????101?????0110011") + def OR = Bits("b0000000??????????110?????0110011") + def AND = Bits("b0000000??????????111?????0110011") + def MUL = Bits("b0000001??????????000?????0110011") + def MULH = Bits("b0000001??????????001?????0110011") + def MULHSU = Bits("b0000001??????????010?????0110011") + def MULHU = Bits("b0000001??????????011?????0110011") + def DIV = Bits("b0000001??????????100?????0110011") + def DIVU = Bits("b0000001??????????101?????0110011") + def REM = Bits("b0000001??????????110?????0110011") + def REMU = Bits("b0000001??????????111?????0110011") + def ADDIW = Bits("b?????????????????000?????0011011") + def SLLIW = Bits("b0100000??????????001?????0011011") + def SRLIW = Bits("b0000000??????????101?????0011011") + def SRAIW = Bits("b0100000??????????101?????0011011") + def ADDW = Bits("b0000000??????????000?????0111011") + def SUBW = Bits("b0100000??????????000?????0111011") + def SLLW = Bits("b0000000??????????001?????0111011") + def SRLW = Bits("b0000000??????????101?????0111011") + def SRAW = Bits("b0100000??????????101?????0111011") + def MULW = Bits("b0000001??????????000?????0111011") + def DIVW = Bits("b0000001??????????100?????0111011") + def DIVUW = Bits("b0000001??????????101?????0111011") + def REMW = Bits("b0000001??????????110?????0111011") + def REMUW = Bits("b0000001??????????111?????0111011") + def LB = Bits("b?????????????????000?????0000011") + def LH = Bits("b?????????????????001?????0000011") + def LW = Bits("b?????????????????010?????0000011") + def LD = Bits("b?????????????????011?????0000011") + def LBU = Bits("b?????????????????100?????0000011") + def LHU = Bits("b?????????????????101?????0000011") + def LWU = Bits("b?????????????????110?????0000011") + def SB = Bits("b?????????????????000?????0100011") + def SH = Bits("b?????????????????001?????0100011") + def SW = Bits("b?????????????????010?????0100011") + def SD = Bits("b?????????????????011?????0100011") + def AMOADD_W = Bits("b00000????????????010?????0101111") + def AMOXOR_W = Bits("b00100????????????010?????0101111") + def AMOOR_W = Bits("b01000????????????010?????0101111") + def AMOAND_W = Bits("b01100????????????010?????0101111") + def AMOMIN_W = Bits("b10000????????????010?????0101111") + def AMOMAX_W = Bits("b10100????????????010?????0101111") + def AMOMINU_W = Bits("b11000????????????010?????0101111") + def AMOMAXU_W = Bits("b11100????????????010?????0101111") + def AMOSWAP_W = Bits("b00001????????????010?????0101111") + def LR_W = Bits("b00010??00000?????010?????0101111") + def SC_W = Bits("b00011????????????010?????0101111") + def AMOADD_D = Bits("b00000????????????011?????0101111") + def AMOXOR_D = Bits("b00100????????????011?????0101111") + def AMOOR_D = Bits("b01000????????????011?????0101111") + def AMOAND_D = Bits("b01100????????????011?????0101111") + def AMOMIN_D = Bits("b10000????????????011?????0101111") + def AMOMAX_D = Bits("b10100????????????011?????0101111") + def AMOMINU_D = Bits("b11000????????????011?????0101111") + def AMOMAXU_D = Bits("b11100????????????011?????0101111") + def AMOSWAP_D = Bits("b00001????????????011?????0101111") + def LR_D = Bits("b00010??00000?????011?????0101111") + def SC_D = Bits("b00011????????????011?????0101111") + def FENCE = Bits("b?????????????????000?????0001111") + def FENCE_I = Bits("b?????????????????001?????0001111") + def SYSCALL = Bits("b00000000000000000000000001110111") + def BREAK = Bits("b00000000000000000001000001110111") + def RDCYCLE = Bits("b00000000000000000100?????1110111") + def RDTIME = Bits("b00000010000000000100?????1110111") + def RDINSTRET = Bits("b00000100000000000100?????1110111") + def MTPCR = Bits("b0000000??????????000?????1110011") + def MFPCR = Bits("b000000000000?????001?????1110011") + def SETPCR = Bits("b?????????????????010?????1110011") + def CLEARPCR = Bits("b?????????????????011?????1110011") + def ERET = Bits("b00000000000000000100000001110011") + def FADD_S = Bits("b0000000??????????????????1010011") + def FSUB_S = Bits("b0000100??????????????????1010011") + def FMUL_S = Bits("b0001000??????????????????1010011") + def FDIV_S = Bits("b0001100??????????????????1010011") + def FSQRT_S = Bits("b001000000000?????????????1010011") + def FSGNJ_S = Bits("b0010100??????????000?????1010011") + def FSGNJN_S = Bits("b0011000??????????000?????1010011") + def FSGNJX_S = Bits("b0011100??????????000?????1010011") + def FADD_D = Bits("b0000001??????????????????1010011") + def FSUB_D = Bits("b0000101??????????????????1010011") + def FMUL_D = Bits("b0001001??????????????????1010011") + def FDIV_D = Bits("b0001101??????????????????1010011") + def FSQRT_D = Bits("b001000100000?????????????1010011") + def FSGNJ_D = Bits("b0010101??????????000?????1010011") + def FSGNJN_D = Bits("b0011001??????????000?????1010011") + def FSGNJX_D = Bits("b0011101??????????000?????1010011") + def FCVT_L_S = Bits("b010000000000?????????????1010011") + def FCVT_LU_S = Bits("b010010000000?????????????1010011") + def FCVT_W_S = Bits("b010100000000?????????????1010011") + def FCVT_WU_S = Bits("b010110000000?????????????1010011") + def FCVT_L_D = Bits("b010000100000?????????????1010011") + def FCVT_LU_D = Bits("b010010100000?????????????1010011") + def FCVT_W_D = Bits("b010100100000?????????????1010011") + def FCVT_WU_D = Bits("b010110100000?????????????1010011") + def FCVT_S_L = Bits("b011000000000?????????????1010011") + def FCVT_S_LU = Bits("b011010000000?????????????1010011") + def FCVT_S_W = Bits("b011100000000?????????????1010011") + def FCVT_S_WU = Bits("b011110000000?????????????1010011") + def FCVT_D_L = Bits("b011000100000?????????????1010011") + def FCVT_D_LU = Bits("b011010100000?????????????1010011") + def FCVT_D_W = Bits("b011100100000?????????????1010011") + def FCVT_D_WU = Bits("b011110100000?????????????1010011") + def FCVT_S_D = Bits("b100010000000?????????????1010011") + def FCVT_D_S = Bits("b100000100000?????????????1010011") + def FEQ_S = Bits("b1010100??????????000?????1010011") + def FLT_S = Bits("b1011000??????????000?????1010011") + def FLE_S = Bits("b1011100??????????000?????1010011") + def FEQ_D = Bits("b1010101??????????000?????1010011") + def FLT_D = Bits("b1011001??????????000?????1010011") + def FLE_D = Bits("b1011101??????????000?????1010011") + def FMIN_S = Bits("b1100000??????????000?????1010011") + def FMAX_S = Bits("b1100100??????????000?????1010011") + def FMIN_D = Bits("b1100001??????????000?????1010011") + def FMAX_D = Bits("b1100101??????????000?????1010011") + def FMV_X_S = Bits("b111000000000?????000?????1010011") + def FMV_X_D = Bits("b111000100000?????000?????1010011") + def FRSR = Bits("b11101000000000000000?????1010011") + def FMV_S_X = Bits("b111100000000?????000?????1010011") + def FMV_D_X = Bits("b111100100000?????000?????1010011") + def FSSR = Bits("b111110000000?????000?????1010011") + def FLW = Bits("b?????????????????010?????0000111") + def FLD = Bits("b?????????????????011?????0000111") + def FSW = Bits("b?????????????????010?????0100111") + def FSD = Bits("b?????????????????011?????0100111") + def FMADD_S = Bits("b?????00??????????????????1000011") + def FMSUB_S = Bits("b?????00??????????????????1000111") + def FNMSUB_S = Bits("b?????00??????????????????1001011") + def FNMADD_S = Bits("b?????00??????????????????1001111") + def FMADD_D = Bits("b?????01??????????????????1000011") + def FMSUB_D = Bits("b?????01??????????????????1000111") + def FNMSUB_D = Bits("b?????01??????????????????1001011") + def FNMADD_D = Bits("b?????01??????????????????1001111") /* Automatically generated by parse-opcodes */ - def CUSTOM0 = Bits("b??????????????????????0000001011") - def CUSTOM0_RS1 = Bits("b??????????????????????0100001011") - def CUSTOM0_RS1_RS2 = Bits("b??????????????????????0110001011") - def CUSTOM0_RD = Bits("b??????????????????????1000001011") - def CUSTOM0_RD_RS1 = Bits("b??????????????????????1100001011") - def CUSTOM0_RD_RS1_RS2 = Bits("b??????????????????????1110001011") - def CUSTOM1 = Bits("b??????????????????????0000001111") - def CUSTOM1_RS1 = Bits("b??????????????????????0100001111") - def CUSTOM1_RS1_RS2 = Bits("b??????????????????????0110001111") - def CUSTOM1_RD = Bits("b??????????????????????1000001111") - def CUSTOM1_RD_RS1 = Bits("b??????????????????????1100001111") - def CUSTOM1_RD_RS1_RS2 = Bits("b??????????????????????1110001111") - def CUSTOM2 = Bits("b??????????????????????0001010111") - def CUSTOM2_RS1 = Bits("b??????????????????????0101010111") - def CUSTOM2_RS1_RS2 = Bits("b??????????????????????0111010111") - def CUSTOM2_RD = Bits("b??????????????????????1001010111") - def CUSTOM2_RD_RS1 = Bits("b??????????????????????1101010111") - def CUSTOM2_RD_RS1_RS2 = Bits("b??????????????????????1111010111") - def CUSTOM3 = Bits("b??????????????????????0001111011") - def CUSTOM3_RS1 = Bits("b??????????????????????0101111011") - def CUSTOM3_RS1_RS2 = Bits("b??????????????????????0111111011") - def CUSTOM3_RD = Bits("b??????????????????????1001111011") - def CUSTOM3_RD_RS1 = Bits("b??????????????????????1101111011") - def CUSTOM3_RD_RS1_RS2 = Bits("b??????????????????????1111111011") + def CUSTOM0 = Bits("b?????????????????000?????0001011") + def CUSTOM0_RS1 = Bits("b?????????????????010?????0001011") + def CUSTOM0_RS1_RS2 = Bits("b?????????????????011?????0001011") + def CUSTOM0_RD = Bits("b?????????????????100?????0001011") + def CUSTOM0_RD_RS1 = Bits("b?????????????????110?????0001011") + def CUSTOM0_RD_RS1_RS2 = Bits("b?????????????????111?????0001011") + def CUSTOM1 = Bits("b?????????????????000?????0101011") + def CUSTOM1_RS1 = Bits("b?????????????????010?????0101011") + def CUSTOM1_RS1_RS2 = Bits("b?????????????????011?????0101011") + def CUSTOM1_RD = Bits("b?????????????????100?????0101011") + def CUSTOM1_RD_RS1 = Bits("b?????????????????110?????0101011") + def CUSTOM1_RD_RS1_RS2 = Bits("b?????????????????111?????0101011") + def CUSTOM2 = Bits("b?????????????????000?????1011011") + def CUSTOM2_RS1 = Bits("b?????????????????010?????1011011") + def CUSTOM2_RS1_RS2 = Bits("b?????????????????011?????1011011") + def CUSTOM2_RD = Bits("b?????????????????100?????1011011") + def CUSTOM2_RD_RS1 = Bits("b?????????????????110?????1011011") + def CUSTOM2_RD_RS1_RS2 = Bits("b?????????????????111?????1011011") + def CUSTOM3 = Bits("b?????????????????000?????1111011") + def CUSTOM3_RS1 = Bits("b?????????????????010?????1111011") + def CUSTOM3_RS1_RS2 = Bits("b?????????????????011?????1111011") + def CUSTOM3_RD = Bits("b?????????????????100?????1111011") + def CUSTOM3_RD_RS1 = Bits("b?????????????????110?????1111011") + def CUSTOM3_RD_RS1_RS2 = Bits("b?????????????????111?????1111011") } From 158cee08af1d689167166becaa40462c2d8a78d9 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Sun, 22 Sep 2013 03:18:06 -0700 Subject: [PATCH 0642/1087] Adjust ordering of RoCCInstruction to reflect new ISA encoding. (Note: Fixes register op issues with AccumulatorExample but still slight issue with executing memory loads) --- rocket/src/main/scala/rocc.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index ac314388..80592c03 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -6,13 +6,13 @@ import uncore._ class RoCCInstruction extends Bundle { - val rd = Bits(width = 5) - val rs1 = Bits(width = 5) - val rs2 = Bits(width = 5) val funct = Bits(width = 7) + val rs2 = Bits(width = 5) + val rs1 = Bits(width = 5) val xd = Bool() val xs1 = Bool() val xs2 = Bool() + val rd = Bits(width = 5) val opcode = Bits(width = 7) } From db1e09f0d0fe14c066551a30404dead519941e49 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Mon, 23 Sep 2013 00:21:43 -0700 Subject: [PATCH 0643/1087] Fix issues with RoCC AccumulatorExample stalls on memory interface --- rocket/src/main/scala/rocc.scala | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 80592c03..543ce559 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -91,16 +91,23 @@ class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) val stallLoad = doLoad && !io.mem.req.ready val stallResp = doResp && !io.resp.ready - cmd.ready := !stallReg && !stallLoad && !stallResp + val loadSent = Reg(init=Bool(false)) + when(cmd.fire()) { loadSent := Bool(false) }.elsewhen(io.mem.req.fire()) {loadSent := Bool(true)} + // This ensures that, even if we hold a command at the queue, it is only processed once - io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad + cmd.ready := !stallReg && !stallLoad && !stallResp && (!doLoad || !doResp || loadSent) + // command resolved if no stalls AND not issuing a load that will need a request + // note, loadSent = true will occur when the load response comes back + + io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad && (!doLoad || loadSent) + // valid response if valid command, need a response, no stalls on needed reg AND not issuing a load io.resp.bits.rd := cmd.bits.inst.rd io.resp.bits.data := accum io.busy := Bool(false) io.interrupt := Bool(false) - io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp + io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp && !loadSent io.mem.req.bits.addr := addend io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 From 3532ae0b79ac3ebe70bf877e019fa0c184981674 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Tue, 24 Sep 2013 10:54:09 -0700 Subject: [PATCH 0644/1087] From Andrew, actually mark scoreboard when rocc instruction with a writeback is issued. Also, fix an issue with AccumulatorExample not properly tagging its memory requests. Finally, reverted changes from f27429c to more properly follow the spike model (always return previous value of accumulator). --- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/rocc.scala | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 60fb8dd7..90dcbd36 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -595,7 +595,7 @@ class Control(implicit conf: RocketConfiguration) extends Module } val sboard = new Scoreboard(32) - sboard.set((wb_reg_div_mul_val || wb_dcache_miss) && io.dpath.wb_wen, io.dpath.wb_waddr) + sboard.set((wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val) && io.dpath.wb_wen, io.dpath.wb_waddr) sboard.clear(io.dpath.mem_ll_wb, io.dpath.mem_ll_waddr) val id_stall_fpu = if (conf.fpu) { diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 543ce559..b1ca5956 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -91,24 +91,21 @@ class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) val stallLoad = doLoad && !io.mem.req.ready val stallResp = doResp && !io.resp.ready - val loadSent = Reg(init=Bool(false)) - when(cmd.fire()) { loadSent := Bool(false) }.elsewhen(io.mem.req.fire()) {loadSent := Bool(true)} - // This ensures that, even if we hold a command at the queue, it is only processed once - - cmd.ready := !stallReg && !stallLoad && !stallResp && (!doLoad || !doResp || loadSent) + cmd.ready := !stallReg && !stallLoad && !stallResp // command resolved if no stalls AND not issuing a load that will need a request // note, loadSent = true will occur when the load response comes back - io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad && (!doLoad || loadSent) - // valid response if valid command, need a response, no stalls on needed reg AND not issuing a load + io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad + // valid response if valid command, need a response, and no stalls io.resp.bits.rd := cmd.bits.inst.rd - io.resp.bits.data := accum + io.resp.bits.data := accum // Semantics is to always send out prior accumulator register value io.busy := Bool(false) io.interrupt := Bool(false) - io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp && !loadSent + io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp io.mem.req.bits.addr := addend + io.mem.req.bits.tag := addr io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 io.mem.req.bits.data := Bits(0) // we're not performing any stores... From adc386f8899c4d974a898d9392db354c43fac2f6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Sep 2013 13:53:49 -0700 Subject: [PATCH 0645/1087] Turn off virtual memory inside RoCC base class --- rocket/src/main/scala/rocc.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index b1ca5956..7f822487 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -47,6 +47,8 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle abstract class RoCC(conf: RocketConfiguration) extends Module { val io = new RoCCInterface()(conf) + + io.mem.req.bits.phys := Bool(true) // don't perform address translation } class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) @@ -109,5 +111,4 @@ class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 io.mem.req.bits.data := Bits(0) // we're not performing any stores... - io.mem.req.bits.phys := Bool(true) // don't perform address translation } From 81c752de8402741ec8a6b664e04d3464af49b73e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Sep 2013 13:58:23 -0700 Subject: [PATCH 0646/1087] Support disabling virtual memory --- rocket/src/main/scala/dpath_util.scala | 1 + rocket/src/main/scala/tile.scala | 1 + 2 files changed, 2 insertions(+) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 6f357c0d..7528cead 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -242,6 +242,7 @@ class PCR(implicit conf: RocketConfiguration) extends Module reg_status.s64 := true reg_status.u64 := true reg_status.zero := 0 + if (!conf.vm) reg_status.vm := false if (conf.rocc.isEmpty) reg_status.er := false if (!conf.fpu) reg_status.ef := false } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 022fbf23..7786823b 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -7,6 +7,7 @@ import Util._ case class RocketConfiguration(tl: TileLinkConfiguration, icache: ICacheConfig, dcache: DCacheConfig, fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None, + vm: Boolean = true, fastLoadWord: Boolean = true, fastLoadByte: Boolean = false, fastMulDiv: Boolean = true) From 730a6ec76b6adeffa948070340afd69f4a36bfdf Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Tue, 24 Sep 2013 16:32:49 -0700 Subject: [PATCH 0647/1087] AccumulatorExample now properly sets its busy bit. Also, pepper some helpful comments into AccumulatorExample --- rocket/src/main/scala/rocc.scala | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 7f822487..4e9f9a45 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -95,16 +95,21 @@ class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) cmd.ready := !stallReg && !stallLoad && !stallResp // command resolved if no stalls AND not issuing a load that will need a request - // note, loadSent = true will occur when the load response comes back + // PROC RESPONSE INTERFACE io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad // valid response if valid command, need a response, and no stalls io.resp.bits.rd := cmd.bits.inst.rd - io.resp.bits.data := accum // Semantics is to always send out prior accumulator register value + // Must respond with the appropriate tag or undefined behavior + io.resp.bits.data := accum + // Semantics is to always send out prior accumulator register value - io.busy := Bool(false) + io.busy := cmd.valid || busy.reduce(_||_) + // Be busy when have pending memory requests or committed possibility of pending requests io.interrupt := Bool(false) + // Set this true to trigger an interrupt on the processor (please refer to supervisor documentation) + // MEMORY REQUEST INTERFACE io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp io.mem.req.bits.addr := addend io.mem.req.bits.tag := addr From 891e459625f00485830303148ab83227ed6afdb0 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Wed, 25 Sep 2013 01:16:32 -0700 Subject: [PATCH 0648/1087] Export stats pcr register (#28 currently) to the top-level --- rocket/src/main/scala/dpath_util.scala | 4 ++-- rocket/src/main/scala/htif.scala | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 7528cead..a5dffc45 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -133,7 +133,6 @@ class PCR(implicit conf: RocketConfiguration) extends Module val irq_timer = Bool(OUTPUT) val irq_ipi = Bool(OUTPUT) val replay = Bool(OUTPUT) - val stats = Bool(OUTPUT) } import PCR._ @@ -171,6 +170,8 @@ class PCR(implicit conf: RocketConfiguration) extends Module host_pcr_bits.data := io.rw.rdata } when (io.host.pcr_rep.fire()) { host_pcr_rep_valid := false } + + io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy val addr = Mux(io.rw.cmd != PCR.N, io.rw.addr, host_pcr_bits.addr) val wen = io.rw.cmd === PCR.T || io.rw.cmd === PCR.S || io.rw.cmd === PCR.C || @@ -183,7 +184,6 @@ class PCR(implicit conf: RocketConfiguration) extends Module io.fatc := wen && addr === FATC io.evec := Mux(io.exception, reg_evec.toSInt, reg_epc).toUInt io.ptbr := reg_ptbr - io.stats := reg_stats when (io.badvaddr_wen) { val wdata = io.rw.wdata diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala index 05a9bef6..a0085ac3 100644 --- a/rocket/src/main/scala/htif.scala +++ b/rocket/src/main/scala/htif.scala @@ -11,6 +11,7 @@ class HostIO(val w: Int) extends Bundle val clk_edge = Bool(OUTPUT) val in = Decoupled(Bits(width = w)).flip val out = Decoupled(Bits(width = w)) + val debug_stats_pcr = Bool(OUTPUT) } class PCRReq extends Bundle @@ -28,6 +29,9 @@ class HTIFIO(ntiles: Int) extends Bundle val pcr_rep = Decoupled(Bits(width = 64)) val ipi_req = Decoupled(Bits(width = log2Up(ntiles))) val ipi_rep = Decoupled(Bool()).flip + val debug_stats_pcr = Bool(OUTPUT) + // wired directly to stats register + // expected to be used to quickly indicate to testbench to do logging b/c in 'interesting' work } class SCRIO(n: Int) extends Bundle @@ -49,6 +53,9 @@ class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extend val scr = new SCRIO(nSCR) } + io.host.debug_stats_pcr := io.cpu.map(_.debug_stats_pcr).reduce(_||_) + // system is 'interesting' if any tile is 'interesting' + val short_request_bits = 64 val long_request_bits = 576 require(short_request_bits % w == 0) From 36b85b8ee207c59beeab0fe9fb2309eb5406e94c Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Wed, 25 Sep 2013 11:51:10 -0700 Subject: [PATCH 0649/1087] Fix issue where the MSB of D$ req tag was getting lost for all agents when an accelerator was attached. --- rocket/src/main/scala/tile.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 7786823b..0a4fc7a5 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -21,13 +21,14 @@ case class RocketConfiguration(tl: TileLinkConfiguration, class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) with ClientCoherenceAgent { - val memPorts = 2 + val memPorts = 2 // Number of ports to outer memory system from tile: 1 from I$, 1 from D$ val dcachePortId = 0 val icachePortId = 1 + val dcachePorts = 2 + !confIn.rocc.isEmpty // Number of ports into D$: 1 from core, 1 from PTW, maybe 1 from RoCC implicit val tlConf = confIn.tl implicit val lnConf = confIn.tl.ln implicit val icConf = confIn.icache - implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(memPorts), databits = confIn.xprlen) + implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(dcachePorts), databits = confIn.xprlen) implicit val conf = confIn.copy(dcache = dcConf) val io = new Bundle { @@ -38,9 +39,9 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module val core = Module(new Core) val icache = Module(new Frontend) val dcache = Module(new HellaCache) - val ptw = Module(new PTW(2)) + val ptw = Module(new PTW(2)) // 2 ports, 1 from I$, 1 from D$ - val dcacheArb = Module(new HellaCacheArbiter(2 + !conf.rocc.isEmpty)) + val dcacheArb = Module(new HellaCacheArbiter(dcachePorts)) dcacheArb.io.requestor(0) <> ptw.io.mem dcacheArb.io.requestor(1) <> core.io.dmem dcache.io.cpu <> dcacheArb.io.mem From 15835607574917b48bc8de4bd32cf9621241f925 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 28 Oct 2013 22:35:18 -0700 Subject: [PATCH 0650/1087] fix replay bug, don't respond when cmd is a NOP --- rocket/src/main/scala/nbdcache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 251aa17b..9d5c9dbc 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -310,7 +310,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte when (!io.meta_read.ready) { rpq.io.deq.ready := Bool(false) - io.replay.bits.cmd := M_FENCE // NOP + io.replay.bits.cmd := M_NOP } } @@ -751,7 +751,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_req = Reg(io.cpu.req.bits.clone) - val s2_replay = Reg(next=s1_replay, init=Bool(false)) + val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd != M_NOP val s2_recycle = Bool() val s2_valid_masked = Bool() From 23f7bab4f332f7e11bec99073e03039b36e78bdb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 25 Oct 2013 15:27:24 -0700 Subject: [PATCH 0651/1087] Reduce FMA pipeline depths FMA QoR has improved enough to allow this change. --- rocket/src/main/scala/core.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 0bf3e561..e0e23c28 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -21,7 +21,7 @@ class Core(implicit conf: RocketConfiguration) extends Module val dpath = Module(new Datapath) val fpu: FPU = if (conf.fpu) { - val fpu = Module(new FPU(4,6)) + val fpu = Module(new FPU(2,3)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl fpu.io.sfma.valid := Bool(false) // hook these up to coprocessor? From b44dafbdca5ab6a90320b84c99a3ff05689afbe5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 29 Oct 2013 04:13:50 -0700 Subject: [PATCH 0652/1087] Simplify branch offset mux --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 23c54a4a..e6951109 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -184,7 +184,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module e(0))) } val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs1, ex_reg_pc) - val ex_br_offset = Mux(io.ctrl.ex_predicted_taken && !io.ctrl.ex_jalr, SInt(4), ex_imm) + val ex_br_offset = Mux(io.ctrl.ex_predicted_taken && !io.ctrl.ex_jalr, SInt(4), ex_imm(19,0).toSInt) val ex_br64 = ex_br_base + ex_br_offset val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs1, ex_br64), vaSign(ex_reg_pc, ex_br64)) val ex_br_addr = Cat(ex_br_msb, ex_br64(VADDR_BITS-1,0)) From 12f0369e6e58d56e4233f73e49dce9937cf5c217 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 29 Oct 2013 04:14:35 -0700 Subject: [PATCH 0653/1087] Simplify divide early out circuitry --- rocket/src/main/scala/divider.scala | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala index 1ea4dd29..55bd2aa5 100644 --- a/rocket/src/main/scala/divider.scala +++ b/rocket/src/main/scala/divider.scala @@ -80,16 +80,13 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val divisorMSB = Log2(divisor(w-1,0), w) val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UInt(w-1, log2Up(2*w)) + divisorMSB - dividendMSB - val eOut = count === UInt(0) && eOutPos > 0 && (divisorMSB != UInt(0) || divisor(0)) + val eOutPos = UInt(w-1) + divisorMSB - dividendMSB + val eOutZero = divisorMSB > dividendMSB + val eOut = count === UInt(0) && (eOutPos > 0 || eOutZero) && (divisorMSB != UInt(0) || divisor(0)) when (Bool(earlyOut) && eOut) { - val shift = eOutPos(log2Up(w)-1,0) + val shift = Mux(eOutZero, UInt(w-1), eOutPos) remainder := remainder(w-1,0) << shift count := shift - when (eOutPos(log2Up(w))) { - remainder := remainder(w-1,0) << w-1 - count := w-1 - } } } when (io.resp.fire() || io.kill) { From eae571e3711d32e139920bf21f9aa66d5e02dff9 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Tue, 5 Nov 2013 15:31:03 -0800 Subject: [PATCH 0654/1087] Remove rocc memory simplifye module (Hwacha has its own) --- rocket/src/main/scala/tile.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 0a4fc7a5..321e8c25 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -50,11 +50,9 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module ptw.io.requestor(1) <> dcache.io.cpu.ptw if (!conf.rocc.isEmpty) { - val dcIF = Module(new SimpleHellaCacheIF) val rocc = Module((conf.rocc.get)(conf)) - dcIF.io.requestor <> rocc.io.mem core.io.rocc <> rocc.io - dcacheArb.io.requestor(2) <> dcIF.io.cache + dcacheArb.io.requestor(2) <> rocc.io.mem } core.io.host <> io.host From 4c56323f6f58bbfb8d03d7c206203252e6240ab6 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 5 Nov 2013 17:12:09 -0800 Subject: [PATCH 0655/1087] hookup all memory ports --- rocket/src/main/scala/rocc.scala | 4 ++++ rocket/src/main/scala/tile.scala | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 4e9f9a45..b6501c40 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -38,6 +38,10 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(conf.dcache) + val imem = new UncachedTileLinkIO()(conf.tl) + val iptw = new TLBPTWIO + val dptw = new TLBPTWIO + val pptw = new TLBPTWIO val busy = Bool(OUTPUT) val interrupt = Bool(OUTPUT) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 321e8c25..312f7700 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -21,9 +21,10 @@ case class RocketConfiguration(tl: TileLinkConfiguration, class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) with ClientCoherenceAgent { - val memPorts = 2 // Number of ports to outer memory system from tile: 1 from I$, 1 from D$ + val memPorts = 2 + !confIn.rocc.isEmpty // Number of ports to outer memory system from tile: 1 from I$, 1 from D$, maybe 1 from Rocc val dcachePortId = 0 val icachePortId = 1 + val roccPortId = 2 val dcachePorts = 2 + !confIn.rocc.isEmpty // Number of ports into D$: 1 from core, 1 from PTW, maybe 1 from RoCC implicit val tlConf = confIn.tl implicit val lnConf = confIn.tl.ln @@ -39,7 +40,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module val core = Module(new Core) val icache = Module(new Frontend) val dcache = Module(new HellaCache) - val ptw = Module(new PTW(2)) // 2 ports, 1 from I$, 1 from D$ + val ptw = Module(new PTW(if (confIn.rocc.isEmpty) 2 else 5)) // 2 ports, 1 from I$, 1 from D$, maybe 3 from RoCC val dcacheArb = Module(new HellaCacheArbiter(dcachePorts)) dcacheArb.io.requestor(0) <> ptw.io.mem @@ -49,12 +50,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module ptw.io.requestor(0) <> icache.io.cpu.ptw ptw.io.requestor(1) <> dcache.io.cpu.ptw - if (!conf.rocc.isEmpty) { - val rocc = Module((conf.rocc.get)(conf)) - core.io.rocc <> rocc.io - dcacheArb.io.requestor(2) <> rocc.io.mem - } - core.io.host <> io.host core.io.imem <> icache.io.cpu core.io.ptw <> ptw.io.dpath @@ -63,6 +58,16 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module memArb.io.in(dcachePortId) <> dcache.io.mem memArb.io.in(icachePortId) <> icache.io.mem + if (!conf.rocc.isEmpty) { + val rocc = Module((conf.rocc.get)(conf)) + core.io.rocc <> rocc.io + dcacheArb.io.requestor(2) <> rocc.io.mem + memArb.io.in(roccPortId) <> rocc.io.imem + ptw.io.requestor(2) <> rocc.io.iptw + ptw.io.requestor(3) <> rocc.io.dptw + ptw.io.requestor(4) <> rocc.io.pptw + } + io.tilelink.acquire <> memArb.io.out.acquire memArb.io.out.grant <> io.tilelink.grant io.tilelink.grant_ack <> memArb.io.out.grant_ack From da033af0b0a1f5e7e333707cf3a975162a863aa9 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 7 Nov 2013 13:18:46 -0800 Subject: [PATCH 0656/1087] move htif to uncore --- rocket/src/main/scala/htif.scala | 268 ------------------------------- 1 file changed, 268 deletions(-) delete mode 100644 rocket/src/main/scala/htif.scala diff --git a/rocket/src/main/scala/htif.scala b/rocket/src/main/scala/htif.scala deleted file mode 100644 index a0085ac3..00000000 --- a/rocket/src/main/scala/htif.scala +++ /dev/null @@ -1,268 +0,0 @@ -package rocket - -import Chisel._ -import Node._ -import uncore._ -import Util._ - -class HostIO(val w: Int) extends Bundle -{ - val clk = Bool(OUTPUT) - val clk_edge = Bool(OUTPUT) - val in = Decoupled(Bits(width = w)).flip - val out = Decoupled(Bits(width = w)) - val debug_stats_pcr = Bool(OUTPUT) -} - -class PCRReq extends Bundle -{ - val rw = Bool() - val addr = Bits(width = 5) - val data = Bits(width = 64) -} - -class HTIFIO(ntiles: Int) extends Bundle -{ - val reset = Bool(INPUT) - val id = UInt(INPUT, log2Up(ntiles)) - val pcr_req = Decoupled(new PCRReq).flip - val pcr_rep = Decoupled(Bits(width = 64)) - val ipi_req = Decoupled(Bits(width = log2Up(ntiles))) - val ipi_rep = Decoupled(Bool()).flip - val debug_stats_pcr = Bool(OUTPUT) - // wired directly to stats register - // expected to be used to quickly indicate to testbench to do logging b/c in 'interesting' work -} - -class SCRIO(n: Int) extends Bundle -{ - val rdata = Vec.fill(n){Bits(INPUT, 64)} - val wen = Bool(OUTPUT) - val waddr = UInt(OUTPUT, log2Up(n)) - val wdata = Bits(OUTPUT, 64) -} - -class RocketHTIF(w: Int, nSCR: Int)(implicit conf: TileLinkConfiguration) extends Module with ClientCoherenceAgent -{ - implicit val (ln, co) = (conf.ln, conf.co) - val nTiles = ln.nClients-1 // This HTIF is itself a TileLink client - val io = new Bundle { - val host = new HostIO(w) - val cpu = Vec.fill(nTiles){new HTIFIO(nTiles).flip} - val mem = new TileLinkIO - val scr = new SCRIO(nSCR) - } - - io.host.debug_stats_pcr := io.cpu.map(_.debug_stats_pcr).reduce(_||_) - // system is 'interesting' if any tile is 'interesting' - - val short_request_bits = 64 - val long_request_bits = 576 - require(short_request_bits % w == 0) - - val rx_count_w = 13 + log2Up(64) - log2Up(w) // data size field is 12 bits - val rx_count = Reg(init=UInt(0,rx_count_w)) - val rx_shifter = Reg(Bits(width = short_request_bits)) - val rx_shifter_in = Cat(io.host.in.bits, rx_shifter(short_request_bits-1,w)) - val next_cmd = rx_shifter_in(3,0) - val cmd = Reg(Bits()) - val size = Reg(Bits()) - val pos = Reg(Bits()) - val seqno = Reg(Bits()) - val addr = Reg(Bits()) - when (io.host.in.valid && io.host.in.ready) { - rx_shifter := rx_shifter_in - rx_count := rx_count + UInt(1) - when (rx_count === UInt(short_request_bits/w-1)) { - cmd := next_cmd - size := rx_shifter_in(15,4) - pos := rx_shifter_in(15,4+OFFSET_BITS-3) - seqno := rx_shifter_in(23,16) - addr := rx_shifter_in(63,24) - } - } - - val rx_word_count = (rx_count >> UInt(log2Up(short_request_bits/w))) - val rx_word_done = io.host.in.valid && rx_count(log2Up(short_request_bits/w)-1,0).andR - val packet_ram_depth = long_request_bits/short_request_bits-1 - val packet_ram = Vec.fill(packet_ram_depth){Reg(Bits(width = short_request_bits))} - when (rx_word_done && io.host.in.ready) { - packet_ram(rx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1)) := rx_shifter_in - } - - val cmd_readmem :: cmd_writemem :: cmd_readcr :: cmd_writecr :: cmd_ack :: cmd_nack :: Nil = Enum(UInt(), 6) - - val pcr_addr = addr(io.cpu(0).pcr_req.bits.addr.width-1, 0) - val pcr_coreid = addr(log2Up(nTiles)-1+20+1,20) - val pcr_wdata = packet_ram(0) - - val bad_mem_packet = size(OFFSET_BITS-1-3,0).orR || addr(OFFSET_BITS-1-3,0).orR - val nack = Mux(cmd === cmd_readmem || cmd === cmd_writemem, bad_mem_packet, - Mux(cmd === cmd_readcr || cmd === cmd_writecr, size != UInt(1), - Bool(true))) - - val tx_count = Reg(init=UInt(0, rx_count_w)) - val tx_subword_count = tx_count(log2Up(short_request_bits/w)-1,0) - val tx_word_count = tx_count(rx_count_w-1, log2Up(short_request_bits/w)) - val packet_ram_raddr = tx_word_count(log2Up(packet_ram_depth)-1,0) - UInt(1) - when (io.host.out.valid && io.host.out.ready) { - tx_count := tx_count + UInt(1) - } - - val rx_done = rx_word_done && Mux(rx_word_count === UInt(0), next_cmd != cmd_writemem && next_cmd != cmd_writecr, rx_word_count === size || rx_word_count(log2Up(packet_ram_depth)-1,0) === UInt(0)) - val tx_size = Mux(!nack && (cmd === cmd_readmem || cmd === cmd_readcr || cmd === cmd_writecr), size, UInt(0)) - val tx_done = io.host.out.ready && tx_subword_count.andR && (tx_word_count === tx_size || tx_word_count > UInt(0) && packet_ram_raddr.andR) - - val mem_acked = Reg(init=Bool(false)) - val mem_gxid = Reg(Bits()) - val mem_gsrc = Reg(UInt(width = conf.ln.idBits)) - val mem_needs_ack = Reg(Bool()) - when (io.mem.grant.valid) { - mem_acked := Bool(true) - mem_gxid := io.mem.grant.bits.payload.master_xact_id - mem_gsrc := io.mem.grant.bits.header.src - mem_needs_ack := conf.co.requiresAck(io.mem.grant.bits.payload) - } - io.mem.grant.ready := Bool(true) - - val state_rx :: state_pcr_req :: state_pcr_resp :: state_mem_req :: state_mem_wdata :: state_mem_wresp :: state_mem_rdata :: state_mem_finish :: state_tx :: Nil = Enum(UInt(), 9) - val state = Reg(init=state_rx) - - val rx_cmd = Mux(rx_word_count === UInt(0), next_cmd, cmd) - when (state === state_rx && rx_done) { - state := Mux(rx_cmd === cmd_readmem || rx_cmd === cmd_writemem, state_mem_req, - Mux(rx_cmd === cmd_readcr || rx_cmd === cmd_writecr, state_pcr_req, - state_tx)) - } - - val mem_cnt = Reg(init=UInt(0, log2Up(REFILL_CYCLES))) - val x_init = Module(new Queue(new Acquire, 1)) - when (state === state_mem_req && x_init.io.enq.ready) { - state := Mux(cmd === cmd_writemem, state_mem_wdata, state_mem_rdata) - } - when (state === state_mem_wdata && io.mem.acquire.data.ready) { - when (mem_cnt.andR) { - state := state_mem_wresp - } - mem_cnt := mem_cnt + UInt(1) - } - when (state === state_mem_wresp) { - when (mem_acked) { - state := state_mem_finish - mem_acked := Bool(false) - } - } - when (state === state_mem_rdata) { - when (io.mem.grant.valid) { - when (mem_cnt.andR) { - state := state_mem_finish - } - mem_cnt := mem_cnt + UInt(1) - } - mem_acked := Bool(false) - } - when (state === state_mem_finish && io.mem.grant_ack.ready) { - state := Mux(cmd === cmd_readmem || pos === UInt(1), state_tx, state_rx) - pos := pos - UInt(1) - addr := addr + UInt(1 << OFFSET_BITS-3) - } - when (state === state_tx && tx_done) { - when (tx_word_count === tx_size) { - rx_count := UInt(0) - tx_count := UInt(0) - } - state := Mux(cmd === cmd_readmem && pos != UInt(0), state_mem_req, state_rx) - } - - var mem_req_data: Bits = null - for (i <- 0 until MEM_DATA_BITS/short_request_bits) { - val idx = Cat(mem_cnt, UInt(i, log2Up(MEM_DATA_BITS/short_request_bits))) - when (state === state_mem_rdata && io.mem.grant.valid) { - packet_ram(idx) := io.mem.grant.bits.payload.data((i+1)*short_request_bits-1, i*short_request_bits) - } - mem_req_data = Cat(packet_ram(idx), mem_req_data) - } - x_init.io.enq.valid := state === state_mem_req - val init_addr = addr.toUInt >> UInt(OFFSET_BITS-3) - x_init.io.enq.bits := Mux(cmd === cmd_writemem, - Acquire(co.getUncachedWriteAcquireType, init_addr, UInt(0)), - Acquire(co.getUncachedReadAcquireType, init_addr, UInt(0))) - io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(x_init.io.deq, UInt(conf.ln.nClients), UInt(0)) // By convention HTIF is the client with the largest id - io.mem.acquire.data.valid := state === state_mem_wdata - io.mem.acquire.data.bits.payload.data := mem_req_data - io.mem.grant_ack.valid := (state === state_mem_finish) && mem_needs_ack - io.mem.grant_ack.bits.payload.master_xact_id := mem_gxid - io.mem.grant_ack.bits.header.dst := mem_gsrc - io.mem.probe.ready := Bool(false) - io.mem.release.meta.valid := Bool(false) - io.mem.release.data.valid := Bool(false) - - val pcrReadData = Reg(Bits(width = io.cpu(0).pcr_rep.bits.getWidth)) - for (i <- 0 until nTiles) { - val my_reset = Reg(init=Bool(true)) - val my_ipi = Reg(init=Bool(false)) - - val cpu = io.cpu(i) - val me = pcr_coreid === UInt(i) - cpu.pcr_req.valid := state === state_pcr_req && me && pcr_addr != PCR.RESET - cpu.pcr_req.bits.rw := cmd === cmd_writecr - cpu.pcr_req.bits.addr := pcr_addr - cpu.pcr_req.bits.data := pcr_wdata - cpu.reset := my_reset - - when (cpu.ipi_rep.ready) { - my_ipi := Bool(false) - } - cpu.ipi_rep.valid := my_ipi - cpu.ipi_req.ready := Bool(true) - for (j <- 0 until nTiles) { - when (io.cpu(j).ipi_req.valid && io.cpu(j).ipi_req.bits === UInt(i)) { - my_ipi := Bool(true) - } - } - - when (cpu.pcr_req.valid && cpu.pcr_req.ready) { - state := state_pcr_resp - } - when (state === state_pcr_req && me && pcr_addr === PCR.RESET) { - when (cmd === cmd_writecr) { - my_reset := pcr_wdata(0) - } - pcrReadData := my_reset.toBits - state := state_tx - } - - cpu.pcr_rep.ready := Bool(true) - when (cpu.pcr_rep.valid) { - pcrReadData := cpu.pcr_rep.bits - state := state_tx - } - } - - val scr_addr = addr(log2Up(nSCR)-1, 0) - val scr_rdata = Vec.fill(io.scr.rdata.size){Bits(width = 64)} - for (i <- 0 until scr_rdata.size) - scr_rdata(i) := io.scr.rdata(i) - scr_rdata(0) := nTiles - scr_rdata(1) := (UInt(REFILL_CYCLES*MEM_DATA_BITS/8) << x_init.io.enq.bits.addr.getWidth) >> 20 - - io.scr.wen := false - io.scr.wdata := pcr_wdata - io.scr.waddr := scr_addr.toUInt - when (state === state_pcr_req && pcr_coreid === SInt(-1)) { - io.scr.wen := cmd === cmd_writecr - pcrReadData := scr_rdata(scr_addr) - state := state_tx - } - - val tx_cmd = Mux(nack, cmd_nack, cmd_ack) - val tx_cmd_ext = Cat(Bits(0, 4-tx_cmd.getWidth), tx_cmd) - val tx_header = Cat(addr, seqno, tx_size, tx_cmd_ext) - val tx_data = Mux(tx_word_count === UInt(0), tx_header, - Mux(cmd === cmd_readcr || cmd === cmd_writecr, pcrReadData, - packet_ram(packet_ram_raddr))) - - io.host.in.ready := state === state_rx - io.host.out.valid := state === state_tx - io.host.out.bits := tx_data >> Cat(tx_count(log2Up(short_request_bits/w)-1,0), Bits(0, log2Up(w))) -} From c1966e2b0ab911bf0024acec4a22b106e80ad245 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 7 Nov 2013 15:42:03 -0800 Subject: [PATCH 0657/1087] forgot to put htif into uncore package --- rocket/src/main/scala/core.scala | 3 ++- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/dpath_util.scala | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index e0e23c28..f4c8dc18 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -1,8 +1,9 @@ package rocket import Chisel._ -import uncore.constants.MemoryOpConstants._ import Util._ +import uncore.HTIFIO +import uncore.constants.MemoryOpConstants._ class RocketIO(implicit conf: RocketConfiguration) extends Bundle { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index e6951109..0f5255fc 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import Instructions._ import Util._ +import uncore.HTIFIO import uncore.constants.AddressConstants._ class Datapath(implicit conf: RocketConfiguration) extends Module diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a5dffc45..b41d4e57 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import Util._ import Node._ +import uncore.HTIFIO import uncore.constants.AddressConstants._ import scala.math._ From 3b109763ad85c521eb9adc714e8055544a5496b3 Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Tue, 19 Nov 2013 20:54:47 -0800 Subject: [PATCH 0658/1087] Connect FMA to Hwacha pipes --- rocket/src/main/scala/core.scala | 7 +++++-- rocket/src/main/scala/rocc.scala | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index f4c8dc18..89cbdc32 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -25,8 +25,6 @@ class Core(implicit conf: RocketConfiguration) extends Module val fpu = Module(new FPU(2,3)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl - fpu.io.sfma.valid := Bool(false) // hook these up to coprocessor? - fpu.io.dfma.valid := Bool(false) fpu } else null @@ -43,4 +41,9 @@ class Core(implicit conf: RocketConfiguration) extends Module ctrl.io.rocc <> io.rocc dpath.io.rocc <> io.rocc + + require(conf.fpu) + // Hookup the {S,D}FMA pipes + fpu.io.sfma <> io.rocc.cp_sfma + fpu.io.dfma <> io.rocc.cp_dfma } diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index b6501c40..31cd572f 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -42,6 +42,8 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO + val cp_dfma = new ioFMA(65).flip + val cp_sfma = new ioFMA(33).flip val busy = Bool(OUTPUT) val interrupt = Bool(OUTPUT) From 68e270eeb23092674d85d77d00ec098a8c703c0e Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 21 Nov 2013 14:44:58 -0800 Subject: [PATCH 0659/1087] fix slli/slliw encoding bug --- rocket/src/main/scala/instructions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 91349e66..172c034d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -17,7 +17,7 @@ object Instructions def LUI = Bits("b?????????????????????????0110111") def AUIPC = Bits("b?????????????????????????0010111") def ADDI = Bits("b?????????????????000?????0010011") - def SLLI = Bits("b010000???????????001?????0010011") + def SLLI = Bits("b000000???????????001?????0010011") def SLTI = Bits("b?????????????????010?????0010011") def SLTIU = Bits("b?????????????????011?????0010011") def XORI = Bits("b?????????????????100?????0010011") @@ -44,7 +44,7 @@ object Instructions def REM = Bits("b0000001??????????110?????0110011") def REMU = Bits("b0000001??????????111?????0110011") def ADDIW = Bits("b?????????????????000?????0011011") - def SLLIW = Bits("b0100000??????????001?????0011011") + def SLLIW = Bits("b0000000??????????001?????0011011") def SRLIW = Bits("b0000000??????????101?????0011011") def SRAIW = Bits("b0100000??????????101?????0011011") def ADDW = Bits("b0000000??????????000?????0111011") From 53f726008bf80761006208b0e53b02c476c43a28 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 24 Nov 2013 14:16:53 -0800 Subject: [PATCH 0660/1087] Use Mem instead of Vec[Reg] for TLB QoR-neutral, improves simulation speed --- rocket/src/main/scala/tlb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 45d5078a..cb999ed0 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -20,7 +20,7 @@ class CAMIO(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { class RocketCAM(entries: Int, tag_bits: Int) extends Module { val addr_bits = ceil(log(entries)/log(2)).toInt; val io = new CAMIO(entries, addr_bits, tag_bits); - val cam_tags = Vec.fill(entries){Reg(Bits(width = tag_bits))} + val cam_tags = Mem(Bits(width = tag_bits), entries) val vb_array = Reg(init=Bits(0, entries)) when (io.write) { From 65b8340cea83c2ebd5ba25cd70e0110dd595508f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 24 Nov 2013 14:19:46 -0800 Subject: [PATCH 0661/1087] Mitigate D$ hit -> branch -> NPC critical path --- rocket/src/main/scala/ctrl.scala | 8 +++----- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/icache.scala | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 90dcbd36..cbb2df0f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -459,7 +459,7 @@ class Control(implicit conf: RocketConfiguration) extends Module .otherwise { ex_reg_br_type := id_br_type; ex_reg_jalr := id_jalr - ex_reg_btb_hit := io.imem.resp.bits.taken + ex_reg_btb_hit := io.imem.resp.bits.taken && !id_jalr ex_reg_div_mul_val := id_mul_val || id_div_val ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) @@ -484,9 +484,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val replay_ex_other = wb_dcache_miss && ex_reg_load_use || mem_reg_replay_next val replay_ex = replay_ex_structural || replay_ex_other ctrl_killx := take_pc_wb || replay_ex - val take_pc_ex = Mux(ex_reg_jalr, - !(ex_reg_btb_hit && io.dpath.jalr_eq) && !replay_ex_other, - ex_reg_btb_hit != io.dpath.ex_br_taken) + val take_pc_ex = ex_reg_jalr && !io.dpath.jalr_eq || io.dpath.ex_br_taken // detect 2-cycle load-use delay for LB/LH/SC val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || AVec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) @@ -714,7 +712,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.pcr := wb_reg_pcr.toUInt io.dpath.eret := wb_reg_eret io.dpath.ex_mem_type := ex_reg_mem_type - io.dpath.ex_br_type := ex_reg_br_type + io.dpath.ex_br_type := ex_reg_br_type ^ ex_reg_btb_hit io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_rocc_val io.dpath.ex_rocc_val := ex_reg_rocc_val io.dpath.mem_rocc_val := mem_reg_rocc_val diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0f5255fc..ed1a5e9d 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -185,7 +185,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module e(0))) } val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs1, ex_reg_pc) - val ex_br_offset = Mux(io.ctrl.ex_predicted_taken && !io.ctrl.ex_jalr, SInt(4), ex_imm(19,0).toSInt) + val ex_br_offset = Mux(io.ctrl.ex_predicted_taken, SInt(4), ex_imm(19,0).toSInt) val ex_br64 = ex_br_base + ex_br_offset val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs1, ex_br64), vaSign(ex_reg_pc, ex_br64)) val ex_br_addr = Cat(ex_br_msb, ex_br64(VADDR_BITS-1,0)) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 180caf32..8a60548b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -160,11 +160,11 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module val s1_tag = s1_addr(c.tagbits+c.untagbits-1,c.untagbits) val s0_valid = io.req.valid || s1_valid && stall - val s0_pgoff = Mux(io.req.valid, io.req.bits.idx, s1_pgoff) + val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx) s1_valid := io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill when (io.req.valid && rdy) { - s1_pgoff := s0_pgoff + s1_pgoff := io.req.bits.idx } s2_valid := s1_valid && rdy && !io.req.bits.kill || io.resp.valid && stall From 924261e2b28701c401a8dd86be90fa8f5a9f16f5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 25 Nov 2013 04:35:15 -0800 Subject: [PATCH 0662/1087] Update to new privileged ISA... phew --- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/ctrl.scala | 513 ++++++++++++----------- rocket/src/main/scala/dpath.scala | 41 +- rocket/src/main/scala/dpath_util.scala | 176 ++++---- rocket/src/main/scala/fpu.scala | 164 ++++---- rocket/src/main/scala/instructions.scala | 115 +++-- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/ptw.scala | 6 +- 8 files changed, 534 insertions(+), 484 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index b8970e9e..e0f29f1f 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -32,6 +32,7 @@ trait ScalarOpConstants { val IMM_U = UInt(2, 3); val IMM_UJ = UInt(3, 3); val IMM_I = UInt(4, 3); + val IMM_Z = UInt(5, 3); val A2_X = Bits("b??", 2) val A2_RS2 = UInt(0, 2) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index cbb2df0f..904b671a 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -22,9 +22,8 @@ class CtrlDpathIO extends Bundle() val div_mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT); val div_kill = Bool(OUTPUT) - val sel_wb = UInt(OUTPUT, 2) - val pcr = UInt(OUTPUT, 3) - val eret = Bool(OUTPUT); + val csr = UInt(OUTPUT, 3) + val sret = Bool(OUTPUT) val mem_load = Bool(OUTPUT); val wb_load = Bool(OUTPUT) val ex_fp_val= Bool(OUTPUT); @@ -34,7 +33,6 @@ class CtrlDpathIO extends Bundle() val ex_predicted_taken = Bool(OUTPUT) val mem_wen = Bool(OUTPUT); val wb_wen = Bool(OUTPUT); - val wb_valid = Bool(OUTPUT) val ex_mem_type = Bits(OUTPUT, 3) val ex_rs2_val = Bool(OUTPUT) val ex_rocc_val = Bool(OUTPUT) @@ -42,6 +40,7 @@ class CtrlDpathIO extends Bundle() val mem_ll_bypass_rs1 = Bool(OUTPUT) val mem_ll_bypass_rs2 = Bool(OUTPUT) // exception handling + val retire = Bool(OUTPUT) val exception = Bool(OUTPUT); val cause = UInt(OUTPUT, 6); val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault @@ -59,7 +58,7 @@ class CtrlDpathIO extends Bundle() val status = new Status().asInput val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UInt(INPUT, 5); - val pcr_replay = Bool(INPUT) + val csr_replay = Bool(INPUT) } abstract trait DecodeConstants @@ -67,15 +66,15 @@ abstract trait DecodeConstants val xpr64 = Y; val decode_default = - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,WB_X, PCR.X,N,X,X,X,X,X,X) + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -83,226 +82,223 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PCHI,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PCHI,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,CSR.N,N,N,N,N,N,N), + LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,CSR.N,N,N,N,N,N,N), + LW-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,N), + LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,N), + LBU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,CSR.N,N,N,N,N,N,N), + LHU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,CSR.N,N,N,N,N,N,N), + LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,CSR.N,N,N,N,N,N,N), + SB-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,CSR.N,N,N,N,N,N,N), + SH-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,CSR.N,N,N,N,N,N,N), + SW-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOXOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), - SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,Y), + LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - LUI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + LUI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + XORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,WB_X, PCR.N,N,N,N,N,N,N,N), + DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - SYSCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,Y,N,N,N,N), - SETPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.S,N,N,N,Y,N,N,N), - CLEARPCR-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.C,N,N,N,Y,N,N,N), - ERET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,Y,N,Y,N,N,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,Y,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,Y,N,N,N,Y,N,N), - MFPCR-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.F,N,N,N,Y,N,N,N), - MTPCR-> List(Y, N,N,BR_N, N,Y,N,A2_RS2, A1_ZERO,IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.T,N,N,N,Y,N,N,N), - RDTIME-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N,N,N), - RDCYCLE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_TSC,PCR.N,N,N,N,N,N,N,N), - RDINSTRET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_XPR,FN_X, N,M_X, MT_X, N,N,Y,WB_IRT,PCR.N,N,N,N,N,N,N,N)) + SCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,Y,N,N,N), + SRET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,Y,N,N,N,N), + FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,Y,N), + FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,Y,N,N,Y,N,N), + CSRRW-> List(Y, N,N,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRS-> List(Y, N,N,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRC-> List(Y, N,N,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N), + CSRRWI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRSI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRCI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,WB_X, PCR.N,N,N,N,N,N,N,N), - FRSR-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FSSR-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,WB_X, PCR.N,N,N,N,N,N,N,N), - FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N)) + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), + FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N)) } object RoCCDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | eret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen pcr | | | privileged - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | s_wb | | | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | - CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM0_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N)) + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | + CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N)) } class Control(implicit conf: RocketConfiguration) extends Module @@ -333,12 +329,12 @@ class Control(implicit conf: RocketConfiguration) extends Module val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 - val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: id_sel_wb :: cs2 = cs1 - val id_pcr :: (id_fence_i: Bool) :: (id_eret: Bool) :: (id_syscall: Bool) :: (id_privileged: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 + val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: cs2 = cs1 + val id_csr :: (id_fence_i: Bool) :: (id_sret: Bool) :: (id_syscall: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 val ex_reg_xcpt_interrupt = Reg(init=Bool(false)) val ex_reg_valid = Reg(init=Bool(false)) - val ex_reg_eret = Reg(init=Bool(false)) + val ex_reg_sret = Reg(init=Bool(false)) val ex_reg_wen = Reg(init=Bool(false)) val ex_reg_fp_wen = Reg(init=Bool(false)) val ex_reg_flush_inst = Reg(init=Bool(false)) @@ -351,7 +347,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val ex_reg_rocc_val = Reg(init=Bool(false)) val ex_reg_replay_next = Reg(init=Bool(false)) val ex_reg_load_use = Reg(init=Bool(false)) - val ex_reg_pcr = Reg(init=PCR.N) + val ex_reg_csr = Reg(init=CSR.N) val ex_reg_br_type = Reg(init=BR_N) val ex_reg_mem_cmd = Reg(Bits()) val ex_reg_mem_type = Reg(Bits()) @@ -359,7 +355,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val mem_reg_xcpt_interrupt = Reg(init=Bool(false)) val mem_reg_valid = Reg(init=Bool(false)) - val mem_reg_eret = Reg(init=Bool(false)) + val mem_reg_sret = Reg(init=Bool(false)) val mem_reg_wen = Reg(init=Bool(false)) val mem_reg_fp_wen = Reg(init=Bool(false)) val mem_reg_flush_inst = Reg(init=Bool(false)) @@ -370,18 +366,18 @@ class Control(implicit conf: RocketConfiguration) extends Module val mem_reg_rocc_val = Reg(init=Bool(false)) val mem_reg_replay = Reg(init=Bool(false)) val mem_reg_replay_next = Reg(init=Bool(false)) - val mem_reg_pcr = Reg(init=PCR.N) + val mem_reg_csr = Reg(init=CSR.N) val mem_reg_cause = Reg(UInt()) val mem_reg_slow_bypass = Reg(Bool()) val wb_reg_valid = Reg(init=Bool(false)) - val wb_reg_pcr = Reg(init=PCR.N) + val wb_reg_csr = Reg(init=CSR.N) val wb_reg_wen = Reg(init=Bool(false)) val wb_reg_fp_wen = Reg(init=Bool(false)) val wb_reg_rocc_val = Reg(init=Bool(false)) val wb_reg_flush_inst = Reg(init=Bool(false)) val wb_reg_mem_val = Reg(init=Bool(false)) - val wb_reg_eret = Reg(init=Bool(false)) + val wb_reg_sret = Reg(init=Bool(false)) val wb_reg_xcpt = Reg(init=Bool(false)) val wb_reg_replay = Reg(init=Bool(false)) val wb_reg_cause = Reg(UInt()) @@ -411,9 +407,18 @@ class Control(implicit conf: RocketConfiguration) extends Module def checkExceptions(x: Seq[(Bool, UInt)]) = (x.map(_._1).reduce(_||_), PriorityMux(x)) - // flush pipeline on PCR writes that may have side effects - val id_pcr_flush = id_pcr != PCR.N && id_pcr != PCR.F && - id_raddr1 != PCR.SUP0 && id_raddr1 != PCR.SUP1 && id_raddr1 != PCR.EPC + val id_csr_addr = io.dpath.inst(31,20) + val id_csr_en = id_csr != CSR.N + val id_csr_ren = id_raddr1 === UInt(0) && Vec(CSR.S, CSR.C).contains(id_csr) + val id_csr_privileged = + id_csr_addr(9,8) != UInt(0) && id_csr_ren || + id_csr_addr(11,10) != UInt(0) && id_csr_en + val id_csr_invalid = id_csr_en && + !Vec(CSRs.all.map(a => UInt(a))).contains(id_csr_addr) + // flush pipeline on CSR writes that may have side effects + val id_csr_flush = id_csr_en && !id_csr_ren && + id_csr_addr != CSRs.sup0 && id_csr_addr != CSRs.sup1 && id_csr_addr != CSRs.epc + val id_privileged = id_sret || id_csr_privileged // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) val id_amo_aq = io.dpath.inst(26) @@ -423,13 +428,13 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_fence_ok = io.dmem.ordered && !ex_reg_mem_val && (Bool(conf.rocc.isEmpty) || !id_rocc_busy) id_reg_fence := id_fence_next || id_reg_fence && !id_fence_ok - val id_do_fence = id_amo && id_amo_aq || id_fence_i || id_reg_fence && (id_mem_val || id_rocc_val) || id_pcr_flush + val id_do_fence = id_amo && id_amo_aq || id_fence_i || id_reg_fence && (id_mem_val || id_rocc_val) || id_csr_flush val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), (io.imem.resp.bits.xcpt_ma, UInt(0)), (io.imem.resp.bits.xcpt_if, UInt(1)), - (!id_int_val.toBool, UInt(2)), + (!id_int_val.toBool || id_csr_invalid, UInt(2)), (id_privileged && !io.dpath.status.s, UInt(3)), (id_fp_val && !io.dpath.status.ef, UInt(4)), (id_syscall, UInt(6)), @@ -446,13 +451,13 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_valid := Bool(false); ex_reg_wen := Bool(false); ex_reg_fp_wen := Bool(false); - ex_reg_eret := Bool(false); + ex_reg_sret := Bool(false) ex_reg_flush_inst := Bool(false); ex_reg_fp_val := Bool(false) ex_reg_rocc_val := Bool(false) ex_reg_replay_next := Bool(false) ex_reg_load_use := Bool(false) - ex_reg_pcr := PCR.N + ex_reg_csr := CSR.N ex_reg_br_type := BR_N ex_reg_xcpt := Bool(false) } @@ -463,14 +468,14 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_div_mul_val := id_mul_val || id_div_val ex_reg_mem_val := id_mem_val.toBool; ex_reg_valid := Bool(true) - ex_reg_pcr := id_pcr - ex_reg_wen := id_wen && id_waddr != UInt(0) + ex_reg_csr := id_csr + ex_reg_wen := id_wen ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen - ex_reg_eret := id_eret.toBool; + ex_reg_sret := id_sret ex_reg_flush_inst := id_fence_i ex_reg_fp_val := id_fp_val ex_reg_rocc_val := id_rocc_val.toBool - ex_reg_replay_next := id_replay_next || id_pcr_flush + ex_reg_replay_next := id_replay_next || id_csr_flush ex_reg_load_use := id_load_use ex_reg_mem_cmd := id_mem_cmd ex_reg_mem_type := id_mem_type.toUInt @@ -499,10 +504,10 @@ class Control(implicit conf: RocketConfiguration) extends Module when (ctrl_killx) { mem_reg_valid := Bool(false); - mem_reg_pcr := PCR.N + mem_reg_csr := CSR.N mem_reg_wen := Bool(false); mem_reg_fp_wen := Bool(false); - mem_reg_eret := Bool(false); + mem_reg_sret := Bool(false) mem_reg_mem_val := Bool(false); mem_reg_flush_inst := Bool(false); mem_reg_fp_val := Bool(false) @@ -512,10 +517,10 @@ class Control(implicit conf: RocketConfiguration) extends Module } .otherwise { mem_reg_valid := ex_reg_valid - mem_reg_pcr := ex_reg_pcr + mem_reg_csr := ex_reg_csr mem_reg_wen := ex_reg_wen; mem_reg_fp_wen := ex_reg_fp_wen; - mem_reg_eret := ex_reg_eret; + mem_reg_sret := ex_reg_sret mem_reg_mem_val := ex_reg_mem_val; mem_reg_flush_inst := ex_reg_flush_inst; mem_reg_fp_val := ex_reg_fp_val @@ -533,7 +538,7 @@ class Control(implicit conf: RocketConfiguration) extends Module (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_rocc_val || mem_reg_pcr != PCR.N) + val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_rocc_val || mem_reg_csr != CSR.N) val replay_mem = ll_wb_kill_mem || mem_reg_replay || fpu_kill_mem val killm_common = ll_wb_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem @@ -544,10 +549,10 @@ class Control(implicit conf: RocketConfiguration) extends Module when (ctrl_killm) { wb_reg_valid := Bool(false) - wb_reg_pcr := PCR.N + wb_reg_csr := CSR.N wb_reg_wen := Bool(false); wb_reg_fp_wen := Bool(false); - wb_reg_eret := Bool(false); + wb_reg_sret := Bool(false) wb_reg_flush_inst := Bool(false); wb_reg_mem_val := Bool(false) wb_reg_div_mul_val := Bool(false); @@ -556,10 +561,10 @@ class Control(implicit conf: RocketConfiguration) extends Module } .otherwise { wb_reg_valid := mem_reg_valid - wb_reg_pcr := mem_reg_pcr + wb_reg_csr := mem_reg_csr wb_reg_wen := mem_reg_wen; wb_reg_fp_wen := mem_reg_fp_wen; - wb_reg_eret := mem_reg_eret && !mem_reg_replay + wb_reg_sret := mem_reg_sret && !mem_reg_replay wb_reg_flush_inst := mem_reg_flush_inst; wb_reg_mem_val := mem_reg_mem_val wb_reg_div_mul_val := mem_reg_div_mul_val @@ -568,7 +573,7 @@ class Control(implicit conf: RocketConfiguration) extends Module } val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || - io.dpath.pcr_replay || Bool(!conf.rocc.isEmpty) && wb_reg_rocc_val && !io.rocc.cmd.ready + io.dpath.csr_replay || Bool(!conf.rocc.isEmpty) && wb_reg_rocc_val && !io.rocc.cmd.ready class Scoreboard(n: Int) { @@ -602,24 +607,25 @@ class Control(implicit conf: RocketConfiguration) extends Module fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) + id_csr_en && !io.fpu.fcsr_rdy || io.fpu.dec.ren1 && fp_sboard.readBypassed(id_raddr1) || io.fpu.dec.ren2 && fp_sboard.readBypassed(id_raddr2) || io.fpu.dec.ren3 && fp_sboard.readBypassed(id_raddr3) || io.fpu.dec.wen && fp_sboard.readBypassed(id_waddr) } else Bool(false) - // write cause to PCR on an exception + // write CAUSE CSR on an exception io.dpath.exception := wb_reg_xcpt io.dpath.cause := wb_reg_cause - io.dpath.badvaddr_wen := wb_reg_xcpt && (wb_reg_cause === UInt(10) || wb_reg_cause === UInt(11)) + io.dpath.badvaddr_wen := wb_reg_xcpt // don't care for non-memory exceptions // control transfer from ex/wb - take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_eret + take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_sret take_pc := take_pc_ex || take_pc_wb; io.dpath.sel_pc := Mux(wb_reg_xcpt, PC_PCR, // exception - Mux(wb_reg_eret, PC_PCR, // eret instruction + Mux(wb_reg_sret, PC_PCR, // sret instruction Mux(replay_wb, PC_WB, // replay PC_EX)))// branch/jal[r] @@ -637,7 +643,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || + val id_ex_hazard = data_hazard_ex && (ex_reg_csr != CSR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. @@ -653,7 +659,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) || + val id_mem_hazard = data_hazard_mem && (mem_reg_csr != CSR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) || fp_data_hazard_mem && mem_reg_fp_val id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) @@ -707,10 +713,9 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; io.dpath.wb_wen := wb_reg_wen && !replay_wb - io.dpath.wb_valid := wb_reg_valid && !replay_wb - io.dpath.sel_wb := id_sel_wb.toUInt - io.dpath.pcr := wb_reg_pcr.toUInt - io.dpath.eret := wb_reg_eret + io.dpath.retire := wb_reg_valid && !replay_wb + io.dpath.csr := wb_reg_csr + io.dpath.sret := wb_reg_sret io.dpath.ex_mem_type := ex_reg_mem_type io.dpath.ex_br_type := ex_reg_br_type ^ ex_reg_btb_hit io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_rocc_val diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ed1a5e9d..88f8ea50 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -26,7 +26,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val ex_reg_sel_alu2 = Reg(UInt()) val ex_reg_sel_alu1 = Reg(UInt()) val ex_reg_sel_imm = Reg(UInt()) - val ex_reg_ctrl_sel_wb = Reg(UInt()) val ex_reg_kill = Reg(Bool()) val ex_reg_rs1_bypass = Reg(Bool()) val ex_reg_rs1_lsb = Reg(Bits()) @@ -50,7 +49,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val wb_reg_ll_wb = Reg(init=Bool(false)) val wb_wdata = Bits() val wb_reg_rs2 = Reg(Bits()) - val wb_wen = io.ctrl.wb_wen && io.ctrl.wb_valid || wb_reg_ll_wb + val wb_wen = io.ctrl.wb_wen || wb_reg_ll_wb // instruction decode stage val id_inst = io.imem.resp.bits.data @@ -83,13 +82,16 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val sign = inst(31).toSInt val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) - val b11 = Mux(sel === IMM_U, SInt(0), + val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), Mux(sel === IMM_UJ, inst(20).toSInt, Mux(sel === IMM_SB, inst(7).toSInt, sign))) - val b10_5 = Mux(sel === IMM_U, Bits(0), inst(30,25)) + val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25)) val b4_1 = Mux(sel === IMM_U, Bits(0), - Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), inst(24,21))) - val b0 = Mux(sel === IMM_S, inst(7), Mux(sel === IMM_I, inst(20), Bits(0))) + Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), + Mux(sel === IMM_Z, inst(19,16), inst(24,21)))) + val b0 = Mux(sel === IMM_S, inst(7), + Mux(sel === IMM_I, inst(20), + Mux(sel === IMM_Z, inst(15), Bits(0)))) Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt } @@ -107,7 +109,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module ex_reg_sel_alu2 := io.ctrl.sel_alu2 ex_reg_sel_alu1 := io.ctrl.sel_alu1 ex_reg_sel_imm := io.ctrl.sel_imm - ex_reg_ctrl_sel_wb := io.ctrl.sel_wb ex_reg_rs1_bypass := id_rs1_bypass && io.ctrl.ren1 ex_reg_rs2_bypass := id_rs2_bypass && io.ctrl.ren2 when (io.ctrl.ren1) { @@ -198,15 +199,16 @@ class Datapath(implicit conf: RocketConfiguration) extends Module require(io.dmem.req.bits.tag.getWidth >= 6) // processor control regfile read - val pcr = Module(new PCR) + val pcr = Module(new CSRFile) pcr.io.host <> io.host pcr.io <> io.ctrl + pcr.io <> io.fpu pcr.io.pc := wb_reg_pc - io.ctrl.pcr_replay := pcr.io.replay + io.ctrl.csr_replay := pcr.io.replay io.ptw.ptbr := pcr.io.ptbr io.ptw.invalidate := pcr.io.fatc - io.ptw.eret := io.ctrl.eret + io.ptw.sret := io.ctrl.sret io.ptw.status := pcr.io.status // branch resolution logic @@ -220,21 +222,12 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, io.ctrl.ex_br_type === BR_J)))))) - val tsc_reg = WideCounter(64) - val irt_reg = WideCounter(64, io.ctrl.wb_valid) - - // writeback select mux - val ex_wdata = - Mux(ex_reg_ctrl_sel_wb === WB_TSC, tsc_reg.value, - Mux(ex_reg_ctrl_sel_wb === WB_IRT, irt_reg.value, - alu.io.out)).toBits // WB_ALU - // memory stage mem_reg_kill := ex_reg_kill when (!ex_reg_kill) { mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst - mem_reg_wdata := ex_wdata + mem_reg_wdata := alu.io.out when (io.ctrl.ex_rs2_val) { mem_reg_rs2 := ex_rs2 } @@ -294,7 +287,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module wb_reg_wdata := mem_ll_wdata } wb_wdata := Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, - Mux(io.ctrl.pcr != PCR.N, pcr.io.rw.rdata, + Mux(io.ctrl.csr != CSR.N, pcr.io.rw.rdata, wb_reg_wdata)) when (wb_wen) { writeRF(wb_reg_waddr, wb_wdata) } @@ -305,8 +298,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write - pcr.io.rw.addr := wb_reg_inst(19,15).toUInt - pcr.io.rw.cmd := io.ctrl.pcr + pcr.io.rw.addr := wb_reg_inst(31,20) + pcr.io.rw.cmd := io.ctrl.csr pcr.io.rw.wdata := wb_reg_wdata io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) @@ -321,7 +314,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module wb_reg_pc)).toUInt // PC_WB printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", - tsc_reg(32,0), io.ctrl.wb_valid, wb_reg_pc, + pcr.io.time(32,0), io.ctrl.retire, wb_reg_pc, Mux(wb_wen, wb_reg_waddr, UInt(0)), wb_wdata, wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs1)), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs2)), diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index b41d4e57..09376647 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -72,75 +72,47 @@ class Status extends Bundle { val s = Bool() } -object PCR +object CSR { // commands - val SZ = 3 - val X = Bits("b???", 3) - val N = Bits(0,3) - val F = Bits(1,3) // mfpcr - val T = Bits(4,3) // mtpcr - val C = Bits(6,3) // clearpcr - val S = Bits(7,3) // setpcr - - // regs - val SUP0 = 0 - val SUP1 = 1 - val EPC = 2 - val BADVADDR = 3 - val PTBR = 4 - val ASID = 5 - val COUNT = 6 - val COMPARE = 7 - val EVEC = 8 - val CAUSE = 9 - val STATUS = 10 - val HARTID = 11 - val IMPL = 12 - val FATC = 13 - val SEND_IPI = 14 - val CLR_IPI = 15 - val STATS = 28 - val RESET = 29 - val TOHOST = 30 - val FROMHOST = 31 + val SZ = 2 + val X = Bits("b??", 2) + val N = Bits(0,2) + val W = Bits(1,2) + val S = Bits(2,2) + val C = Bits(3,2) } -class PCR(implicit conf: RocketConfiguration) extends Module +class CSRFile(implicit conf: RocketConfiguration) extends Module { val io = new Bundle { val host = new HTIFIO(conf.tl.ln.nClients) val rw = new Bundle { - val addr = UInt(INPUT, log2Up(conf.nxpr)) - val cmd = Bits(INPUT, PCR.SZ) + val addr = UInt(INPUT, 12) + val cmd = Bits(INPUT, CSR.SZ) val rdata = Bits(OUTPUT, conf.xprlen) val wdata = Bits(INPUT, conf.xprlen) } - - // there is a fixed constant related to this in PCRReq.addr - require(log2Up(conf.nxpr) == 5) val status = new Status().asOutput val ptbr = UInt(OUTPUT, PADDR_BITS) val evec = UInt(OUTPUT, VADDR_BITS+1) val exception = Bool(INPUT) + val retire = Bool(INPUT) val cause = UInt(INPUT, 6) val badvaddr_wen = Bool(INPUT) val pc = UInt(INPUT, VADDR_BITS+1) - val eret = Bool(INPUT) - val ei = Bool(INPUT) - val di = Bool(INPUT) + val sret = Bool(INPUT) val fatc = Bool(OUTPUT) - val irq_timer = Bool(OUTPUT) - val irq_ipi = Bool(OUTPUT) val replay = Bool(OUTPUT) + val time = UInt(OUTPUT, 64) + val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) + val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip } - import PCR._ val reg_epc = Reg(Bits(width = VADDR_BITS+1)) val reg_badvaddr = Reg(Bits(width = VADDR_BITS)) val reg_evec = Reg(Bits(width = VADDR_BITS)) - val reg_count = WideCounter(32) val reg_compare = Reg(Bits(width = 32)) val reg_cause = Reg(Bits(width = io.cause.getWidth)) val reg_tohost = Reg(init=Bits(0, conf.xprlen)) @@ -150,12 +122,17 @@ class PCR(implicit conf: RocketConfiguration) extends Module val reg_ptbr = Reg(UInt(width = PADDR_BITS)) val reg_stats = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below + val reg_time = WideCounter(64) + val reg_instret = WideCounter(64, io.retire) + val reg_fflags = Reg(UInt(width = 5)) + val reg_frm = Reg(UInt(width = 3)) val r_irq_timer = Reg(init=Bool(false)) val r_irq_ipi = Reg(init=Bool(true)) + val cpu_req_valid = io.rw.cmd != CSR.N val host_pcr_req_valid = Reg(Bool()) // don't reset - val host_pcr_req_fire = host_pcr_req_valid && io.rw.cmd === PCR.N + val host_pcr_req_fire = host_pcr_req_valid && !cpu_req_valid val host_pcr_rep_valid = Reg(Bool()) // don't reset val host_pcr_bits = Reg(io.host.pcr_req.bits) io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid @@ -174,15 +151,26 @@ class PCR(implicit conf: RocketConfiguration) extends Module io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy - val addr = Mux(io.rw.cmd != PCR.N, io.rw.addr, host_pcr_bits.addr) - val wen = io.rw.cmd === PCR.T || io.rw.cmd === PCR.S || io.rw.cmd === PCR.C || - host_pcr_req_fire && host_pcr_bits.rw - val wdata = Mux(io.rw.cmd != PCR.N, io.rw.wdata, host_pcr_bits.data) + val addr = Mux(cpu_req_valid, io.rw.addr, host_pcr_bits.addr | 0x500) + val decoded_addr = { + val default = List(Bits("b" + ("?"*CSRs.all.size), CSRs.all.size)) + val outs = for (i <- 0 until CSRs.all.size) + yield UInt(CSRs.all(i), addr.getWidth) -> List(UInt(BigInt(1) << i, CSRs.all.size)) + + val d = DecodeLogic(addr, default, outs).toArray + val a = Array.fill(CSRs.all.max+1)(null.asInstanceOf[Bool]) + for (i <- 0 until CSRs.all.size) + a(CSRs.all(i)) = d(0)(i) + a + } + + val wen = cpu_req_valid || host_pcr_req_fire && host_pcr_bits.rw + val wdata = Mux(cpu_req_valid, io.rw.wdata, host_pcr_bits.data) io.status := reg_status io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), Bool(false), Bool(false), Bool(false), Bool(false)) - io.fatc := wen && addr === FATC + io.fatc := wen && decoded_addr(CSRs.fatc) io.evec := Mux(io.exception, reg_evec.toSInt, reg_epc).toUInt io.ptbr := reg_ptbr @@ -202,41 +190,64 @@ class PCR(implicit conf: RocketConfiguration) extends Module reg_cause := io.cause } - when (io.eret) { + when (io.sret) { reg_status.s := reg_status.ps reg_status.ei := reg_status.pei } - when (reg_count === reg_compare) { - r_irq_timer := Bool(true); + when (reg_time(reg_compare.getWidth-1,0) === reg_compare) { + r_irq_timer := true } - io.irq_timer := r_irq_timer; - io.irq_ipi := r_irq_ipi; - io.host.ipi_req.valid := io.rw.cmd === PCR.T && io.rw.addr === SEND_IPI + io.time := reg_time + io.host.ipi_req.valid := cpu_req_valid && decoded_addr(CSRs.send_ipi) io.host.ipi_req.bits := io.rw.wdata io.replay := io.host.ipi_req.valid && !io.host.ipi_req.ready - when (host_pcr_req_fire && !host_pcr_bits.rw && host_pcr_bits.addr === TOHOST) { reg_tohost := UInt(0) } + when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) - io.rw.rdata := AVec[Bits]( - reg_sup0, reg_sup1, reg_epc, reg_badvaddr, - reg_ptbr, Bits(0)/*asid*/, reg_count, reg_compare, - reg_evec, reg_cause, io.status.toBits, io.host.id, - read_impl, read_impl/*x*/, read_impl/*x*/, read_impl/*x*/, - reg_stats/*x*/, reg_fromhost/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, - reg_stats/*x*/, reg_fromhost/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, - reg_stats/*x*/, reg_fromhost/*x*/, reg_tohost/*x*/, reg_fromhost/*x*/, - reg_stats, reg_fromhost/*x*/, reg_tohost, reg_fromhost - )(addr) + + val read_mapping = Map[Int,Bits]( + CSRs.fflags -> (if (conf.fpu) reg_fflags else UInt(0)), + CSRs.frm -> (if (conf.fpu) reg_frm else UInt(0)), + CSRs.fcsr -> (if (conf.fpu) Cat(reg_frm, reg_fflags) else UInt(0)), + CSRs.cycle -> reg_time, + CSRs.time -> reg_time, + CSRs.instret -> reg_instret, + CSRs.sup0 -> reg_sup0, + CSRs.sup1 -> reg_sup1, + CSRs.epc -> reg_epc, + CSRs.badvaddr -> reg_badvaddr, + CSRs.ptbr -> read_ptbr, + CSRs.asid -> UInt(0), + CSRs.count -> reg_time, + CSRs.compare -> reg_compare, + CSRs.evec -> reg_evec, + CSRs.cause -> read_cause, + CSRs.status -> io.status.toBits, + CSRs.hartid -> io.host.id, + CSRs.impl -> read_impl, + CSRs.fatc -> read_impl, // don't care + CSRs.send_ipi -> read_impl, // don't care + CSRs.clear_ipi -> read_impl, // don't care + CSRs.stats -> reg_stats, + CSRs.tohost -> reg_tohost, + CSRs.fromhost -> reg_fromhost) + + io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) + + io.fcsr_rm := reg_frm + when (io.fcsr_flags.valid) { + reg_fflags := reg_fflags | io.fcsr_flags.bits + } when (wen) { - when (addr === STATUS) { - val sr_wdata = Mux(io.rw.cmd === PCR.S, reg_status.toBits | wdata, - Mux(io.rw.cmd === PCR.C, reg_status.toBits & ~wdata, + when (decoded_addr(CSRs.status)) { + val sr_wdata = Mux(io.rw.cmd === CSR.S, reg_status.toBits | wdata, + Mux(io.rw.cmd === CSR.C, reg_status.toBits & ~wdata, wdata)) reg_status := new Status().fromBits(sr_wdata) @@ -247,17 +258,22 @@ class PCR(implicit conf: RocketConfiguration) extends Module if (conf.rocc.isEmpty) reg_status.er := false if (!conf.fpu) reg_status.ef := false } - when (addr === EPC) { reg_epc := wdata(VADDR_BITS,0).toSInt } - when (addr === EVEC) { reg_evec := wdata(VADDR_BITS-1,0).toSInt } - when (addr === COUNT) { reg_count := wdata.toUInt } - when (addr === COMPARE) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false); } - when (addr === FROMHOST) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } - when (addr === TOHOST) { when (reg_tohost === UInt(0)) { reg_tohost := wdata } } - when (addr === CLR_IPI) { r_irq_ipi := wdata(0) } - when (addr === SUP0) { reg_sup0 := wdata; } - when (addr === SUP1) { reg_sup1 := wdata; } - when (addr === PTBR) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt; } - when (addr === STATS) { reg_stats := wdata(0) } + when (io.rw.cmd != CSR.C && io.rw.cmd != CSR.S) { + when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } + when (decoded_addr(CSRs.frm)) { reg_frm := wdata } + when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } + when (decoded_addr(CSRs.epc)) { reg_epc := wdata(VADDR_BITS,0).toSInt } + when (decoded_addr(CSRs.evec)) { reg_evec := wdata(VADDR_BITS-1,0).toSInt } + when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } + when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false); } + when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } + when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0)) { reg_tohost := wdata } } + when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } + when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata; } + when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata; } + when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt; } + when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } + } } io.host.ipi_rep.ready := Bool(true) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index c272999a..b41b4265 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -32,9 +32,7 @@ object FPConstants val FCMD_MIN = Bits("b011000") val FCMD_MAX = Bits("b011001") val FCMD_MFTX = Bits("b011100") - val FCMD_MFFSR = Bits("b011101") val FCMD_MXTF = Bits("b011110") - val FCMD_MTFSR = Bits("b011111") val FCMD_MADD = Bits("b100100") val FCMD_MSUB = Bits("b100101") val FCMD_NMSUB = Bits("b100110") @@ -43,7 +41,9 @@ object FPConstants val FCMD_STORE = Bits("b111001") val FCMD_X = Bits("b??????") val FCMD_WIDTH = 6 - val FSR_WIDTH = 8 + + val RM_SZ = 3 + val FLAGS_SZ = 5 } class FPUCtrlSigs extends Bundle @@ -59,8 +59,6 @@ class FPUCtrlSigs extends Bundle val fastpipe = Bool() val fma = Bool() val round = Bool() - val rdfsr = Bool() - val wrfsr = Bool() } class FPUDecoder extends Module @@ -74,67 +72,65 @@ class FPUDecoder extends Module val Y = Bool(true) val X = Bool(false) val decoder = DecodeLogic(io.inst, - List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X), - Array(FLW -> List(FCMD_LOAD, Y,N,N,N,Y,N,N,N,N,N,N,N), - FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N,N,N), - FSW -> List(FCMD_STORE, N,N,Y,N,Y,N,Y,N,N,N,N,N), - FSD -> List(FCMD_STORE, N,N,Y,N,N,N,Y,N,N,N,N,N), - FMV_S_X -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,Y,N,N), - FMV_D_X -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,Y,N,N), - FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N,Y,N,N), - FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N,Y,N,N), - FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N,Y,N,N), - FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N,Y,N,N), - FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N,Y,N,N), - FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N,Y,N,N), - FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N,Y,N,N), - FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,Y,N,N), - FMV_X_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,Y,N,N), - FMV_X_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,Y,N,N), - FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,Y,N,N), - FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,Y,N,N), - FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,Y,N,N), - FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N,Y,N,N), - FCVT_W_D -> List(FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N,Y,N,N), - FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N,Y,N,N), - FCVT_L_D -> List(FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N,Y,N,N), - FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N,Y,N,N), - FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,Y,N,N,Y,N,N,Y,N,Y,N,N), - FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,Y,N,N,N,N,N,Y,N,Y,N,N), - FEQ_S -> List(FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N,Y,N,N), - FLT_S -> List(FCMD_LT, N,Y,Y,N,Y,N,Y,N,N,Y,N,N), - FLE_S -> List(FCMD_LE, N,Y,Y,N,Y,N,Y,N,N,Y,N,N), - FEQ_D -> List(FCMD_EQ, N,Y,Y,N,N,N,Y,N,N,Y,N,N), - FLT_D -> List(FCMD_LT, N,Y,Y,N,N,N,Y,N,N,Y,N,N), - FLE_D -> List(FCMD_LE, N,Y,Y,N,N,N,Y,N,N,Y,N,N), - FSSR -> List(FCMD_MTFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,Y), - FRSR -> List(FCMD_MFFSR, N,N,N,N,Y,N,Y,N,N,Y,Y,N), - FSGNJ_S -> List(FCMD_SGNJ, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), - FSGNJN_S -> List(FCMD_SGNJN, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), - FSGNJX_S -> List(FCMD_SGNJX, Y,Y,Y,N,Y,N,N,Y,N,Y,N,N), - FSGNJ_D -> List(FCMD_SGNJ, Y,Y,Y,N,N,N,N,Y,N,Y,N,N), - FSGNJN_D -> List(FCMD_SGNJN, Y,Y,Y,N,N,N,N,Y,N,Y,N,N), - FSGNJX_D -> List(FCMD_SGNJX, Y,Y,Y,N,N,N,N,Y,N,Y,N,N), - FMIN_S -> List(FCMD_MIN, Y,Y,Y,N,Y,N,Y,Y,N,Y,N,N), - FMAX_S -> List(FCMD_MAX, Y,Y,Y,N,Y,N,Y,Y,N,Y,N,N), - FMIN_D -> List(FCMD_MIN, Y,Y,Y,N,N,N,Y,Y,N,Y,N,N), - FMAX_D -> List(FCMD_MAX, Y,Y,Y,N,N,N,Y,Y,N,Y,N,N), - FADD_S -> List(FCMD_ADD, Y,Y,Y,N,Y,N,N,N,Y,Y,N,N), - FSUB_S -> List(FCMD_SUB, Y,Y,Y,N,Y,N,N,N,Y,Y,N,N), - FMUL_S -> List(FCMD_MUL, Y,Y,Y,N,Y,N,N,N,Y,Y,N,N), - FADD_D -> List(FCMD_ADD, Y,Y,Y,N,N,N,N,N,Y,Y,N,N), - FSUB_D -> List(FCMD_SUB, Y,Y,Y,N,N,N,N,N,Y,Y,N,N), - FMUL_D -> List(FCMD_MUL, Y,Y,Y,N,N,N,N,N,Y,Y,N,N), - FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), - FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), - FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), - FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y,N,N), - FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N), - FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N), - FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N), - FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,N,N,N,N,Y,Y,N,N) + List (FCMD_X, X,X,X,X,X,X,X,X,X,X), + Array(FLW -> List(FCMD_LOAD, Y,N,N,N,Y,N,N,N,N,N), + FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N), + FSW -> List(FCMD_STORE, N,N,Y,N,Y,N,Y,N,N,N), + FSD -> List(FCMD_STORE, N,N,Y,N,N,N,Y,N,N,N), + FMV_S_X -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,Y), + FMV_D_X -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,Y), + FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N,Y), + FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N,Y), + FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N,Y), + FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N,Y), + FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N,Y), + FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N,Y), + FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N,Y), + FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,Y), + FMV_X_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,Y), + FMV_X_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,Y), + FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,Y), + FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,Y), + FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,Y), + FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N,Y), + FCVT_W_D -> List(FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N,Y), + FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N,Y), + FCVT_L_D -> List(FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N,Y), + FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N,Y), + FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,Y,N,N,Y,N,N,Y,N,Y), + FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,Y,N,N,N,N,N,Y,N,Y), + FEQ_S -> List(FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N,Y), + FLT_S -> List(FCMD_LT, N,Y,Y,N,Y,N,Y,N,N,Y), + FLE_S -> List(FCMD_LE, N,Y,Y,N,Y,N,Y,N,N,Y), + FEQ_D -> List(FCMD_EQ, N,Y,Y,N,N,N,Y,N,N,Y), + FLT_D -> List(FCMD_LT, N,Y,Y,N,N,N,Y,N,N,Y), + FLE_D -> List(FCMD_LE, N,Y,Y,N,N,N,Y,N,N,Y), + FSGNJ_S -> List(FCMD_SGNJ, Y,Y,Y,N,Y,N,N,Y,N,Y), + FSGNJN_S -> List(FCMD_SGNJN, Y,Y,Y,N,Y,N,N,Y,N,Y), + FSGNJX_S -> List(FCMD_SGNJX, Y,Y,Y,N,Y,N,N,Y,N,Y), + FSGNJ_D -> List(FCMD_SGNJ, Y,Y,Y,N,N,N,N,Y,N,Y), + FSGNJN_D -> List(FCMD_SGNJN, Y,Y,Y,N,N,N,N,Y,N,Y), + FSGNJX_D -> List(FCMD_SGNJX, Y,Y,Y,N,N,N,N,Y,N,Y), + FMIN_S -> List(FCMD_MIN, Y,Y,Y,N,Y,N,Y,Y,N,Y), + FMAX_S -> List(FCMD_MAX, Y,Y,Y,N,Y,N,Y,Y,N,Y), + FMIN_D -> List(FCMD_MIN, Y,Y,Y,N,N,N,Y,Y,N,Y), + FMAX_D -> List(FCMD_MAX, Y,Y,Y,N,N,N,Y,Y,N,Y), + FADD_S -> List(FCMD_ADD, Y,Y,Y,N,Y,N,N,N,Y,Y), + FSUB_S -> List(FCMD_SUB, Y,Y,Y,N,Y,N,N,N,Y,Y), + FMUL_S -> List(FCMD_MUL, Y,Y,Y,N,Y,N,N,N,Y,Y), + FADD_D -> List(FCMD_ADD, Y,Y,Y,N,N,N,N,N,Y,Y), + FSUB_D -> List(FCMD_SUB, Y,Y,Y,N,N,N,N,N,Y,Y), + FMUL_D -> List(FCMD_MUL, Y,Y,Y,N,N,N,N,N,Y,Y), + FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,Y,Y), + FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y), + FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,Y,Y), + FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y), + FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,N,N,N,N,Y,Y), + FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,N,N,N,N,Y,Y), + FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,N,N,N,N,Y,Y), + FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,N,N,N,N,Y,Y) )) - val cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: round :: rdfsr :: wrfsr :: Nil = decoder + val cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: round :: Nil = decoder io.sigs.cmd := cmd io.sigs.wen := wen.toBool @@ -147,14 +143,15 @@ class FPUDecoder extends Module io.sigs.fastpipe := fastpipe.toBool io.sigs.fma := fma.toBool io.sigs.round := round.toBool - io.sigs.rdfsr := rdfsr.toBool - io.sigs.wrfsr := wrfsr.toBool } class DpathFPUIO extends Bundle { val inst = Bits(OUTPUT, 32) val fromint_data = Bits(OUTPUT, 64) + val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) + val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip + val store_data = Bits(INPUT, 64) val toint_data = Bits(INPUT, 64) @@ -166,6 +163,7 @@ class DpathFPUIO extends Bundle { class CtrlFPUIO extends Bundle { val valid = Bool(OUTPUT) + val fcsr_rdy = Bool(INPUT) val nack_mem = Bool(INPUT) val illegal_rm = Bool(INPUT) val killx = Bool(OUTPUT) @@ -182,7 +180,6 @@ class FPToInt extends Module val single = Bool() val cmd = Bits(width = FCMD_WIDTH) val rm = Bits(width = 3) - val fsr = Bits(width = FSR_WIDTH) val in1 = Bits(width = 65) val in2 = Bits(width = 65) override def clone = new Input().asInstanceOf[this.type] @@ -211,7 +208,6 @@ class FPToInt extends Module in.single := io.in.bits.single in.cmd := io.in.bits.cmd in.rm := io.in.bits.rm - in.fsr := io.in.bits.fsr } val unrec_s = hardfloat.recodedFloatNToFloatN(in.in1, 23, 9) @@ -228,9 +224,6 @@ class FPToInt extends Module io.out.bits.toint := Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d) io.out.bits.exc := Bits(0) - when (in.cmd === FCMD_MTFSR || in.cmd === FCMD_MFFSR) { - io.out.bits.toint := io.in.bits.fsr - } when (in.cmd === FCMD_CVT_W_FMT || in.cmd === FCMD_CVT_WU_FMT) { io.out.bits.toint := Cat(Fill(32, d2i._1(31)), d2i._1(31,0)) io.out.bits.exc := d2i._2 @@ -472,9 +465,6 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1), rec_s), rec_d) - val fsr_rm = Reg(Bits(width = 3)) - val fsr_exc = Reg(Bits(width = 5)) - // regfile val regfile = Mem(Bits(width = 65), 32) when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } @@ -482,13 +472,12 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val ex_rs1 = regfile(ex_reg_inst(19,15)) val ex_rs2 = regfile(ex_reg_inst(24,20)) val ex_rs3 = regfile(ex_reg_inst(31,27)) - val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), fsr_rm, ex_reg_inst(14,12)) + val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12)) val fpiu = Module(new FPToInt) fpiu.io.in.valid := ex_reg_valid && ctrl.toint fpiu.io.in.bits := ctrl fpiu.io.in.bits.rm := ex_rm - fpiu.io.in.bits.fsr := Cat(fsr_rm, fsr_exc) fpiu.io.in.bits.in1 := ex_rs1 fpiu.io.in.bits.in2 := ex_rs2 @@ -576,24 +565,17 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val wexc = Vec(pipes.map(_.wexc))(wsrc) when (wen(0)) { regfile(waddr(4,0)) := wdata } + val wb_toint_valid = wb_reg_valid && wb_ctrl.toint val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) - when (wb_reg_valid && wb_ctrl.toint || wen(0)) { - fsr_exc := fsr_exc | - Fill(fsr_exc.getWidth, wb_reg_valid && wb_ctrl.toint) & wb_toint_exc | - Fill(fsr_exc.getWidth, wen(0)) & wexc - } - - val mem_fsr_wdata = RegEnable(io.dpath.fromint_data(FSR_WIDTH-1,0), ex_reg_valid && ctrl.wrfsr) - val wb_fsr_wdata = RegEnable(mem_fsr_wdata, mem_reg_valid && mem_ctrl.wrfsr) - when (wb_reg_valid && wb_ctrl.wrfsr) { - fsr_exc := wb_fsr_wdata - fsr_rm := wb_fsr_wdata >> fsr_exc.getWidth - } + io.dpath.fcsr_flags.valid := wb_toint_valid || wen(0) + io.dpath.fcsr_flags.bits := + Mux(wb_toint_valid, wb_toint_exc, UInt(0)) | + Mux(wen(0), wexc, UInt(0)) val fp_inflight = wb_reg_valid && wb_ctrl.toint || wen.orR - val fsr_busy = mem_ctrl.rdfsr && fp_inflight || wb_reg_valid && wb_ctrl.wrfsr val units_busy = mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ctrl.single, io.sfma.valid, io.dfma.valid)) - io.ctrl.nack_mem := fsr_busy || units_busy || write_port_busy + io.ctrl.fcsr_rdy := !fp_inflight + io.ctrl.nack_mem := units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) io.ctrl.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl))) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 172c034d..6ff0b683 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -3,9 +3,8 @@ package rocket import Chisel._ import Node._ -object Instructions -{ - /* Automatically generated by parse-opcodes */ +/* Automatically generated by parse-opcodes */ +object Instructions { def JAL = Bits("b?????????????????????????1100111") def JALR = Bits("b?????????????????000?????1101111") def BEQ = Bits("b?????????????????000?????1100011") @@ -35,14 +34,6 @@ object Instructions def SRA = Bits("b0100000??????????101?????0110011") def OR = Bits("b0000000??????????110?????0110011") def AND = Bits("b0000000??????????111?????0110011") - def MUL = Bits("b0000001??????????000?????0110011") - def MULH = Bits("b0000001??????????001?????0110011") - def MULHSU = Bits("b0000001??????????010?????0110011") - def MULHU = Bits("b0000001??????????011?????0110011") - def DIV = Bits("b0000001??????????100?????0110011") - def DIVU = Bits("b0000001??????????101?????0110011") - def REM = Bits("b0000001??????????110?????0110011") - def REMU = Bits("b0000001??????????111?????0110011") def ADDIW = Bits("b?????????????????000?????0011011") def SLLIW = Bits("b0000000??????????001?????0011011") def SRLIW = Bits("b0000000??????????101?????0011011") @@ -52,11 +43,6 @@ object Instructions def SLLW = Bits("b0000000??????????001?????0111011") def SRLW = Bits("b0000000??????????101?????0111011") def SRAW = Bits("b0100000??????????101?????0111011") - def MULW = Bits("b0000001??????????000?????0111011") - def DIVW = Bits("b0000001??????????100?????0111011") - def DIVUW = Bits("b0000001??????????101?????0111011") - def REMW = Bits("b0000001??????????110?????0111011") - def REMUW = Bits("b0000001??????????111?????0111011") def LB = Bits("b?????????????????000?????0000011") def LH = Bits("b?????????????????001?????0000011") def LW = Bits("b?????????????????010?????0000011") @@ -68,6 +54,21 @@ object Instructions def SH = Bits("b?????????????????001?????0100011") def SW = Bits("b?????????????????010?????0100011") def SD = Bits("b?????????????????011?????0100011") + def FENCE = Bits("b?????????????????000?????0001111") + def FENCE_I = Bits("b?????????????????001?????0001111") + def MUL = Bits("b0000001??????????000?????0110011") + def MULH = Bits("b0000001??????????001?????0110011") + def MULHSU = Bits("b0000001??????????010?????0110011") + def MULHU = Bits("b0000001??????????011?????0110011") + def DIV = Bits("b0000001??????????100?????0110011") + def DIVU = Bits("b0000001??????????101?????0110011") + def REM = Bits("b0000001??????????110?????0110011") + def REMU = Bits("b0000001??????????111?????0110011") + def MULW = Bits("b0000001??????????000?????0111011") + def DIVW = Bits("b0000001??????????100?????0111011") + def DIVUW = Bits("b0000001??????????101?????0111011") + def REMW = Bits("b0000001??????????110?????0111011") + def REMUW = Bits("b0000001??????????111?????0111011") def AMOADD_W = Bits("b00000????????????010?????0101111") def AMOXOR_W = Bits("b00100????????????010?????0101111") def AMOOR_W = Bits("b01000????????????010?????0101111") @@ -90,18 +91,15 @@ object Instructions def AMOSWAP_D = Bits("b00001????????????011?????0101111") def LR_D = Bits("b00010??00000?????011?????0101111") def SC_D = Bits("b00011????????????011?????0101111") - def FENCE = Bits("b?????????????????000?????0001111") - def FENCE_I = Bits("b?????????????????001?????0001111") - def SYSCALL = Bits("b00000000000000000000000001110111") - def BREAK = Bits("b00000000000000000001000001110111") - def RDCYCLE = Bits("b00000000000000000100?????1110111") - def RDTIME = Bits("b00000010000000000100?????1110111") - def RDINSTRET = Bits("b00000100000000000100?????1110111") - def MTPCR = Bits("b0000000??????????000?????1110011") - def MFPCR = Bits("b000000000000?????001?????1110011") - def SETPCR = Bits("b?????????????????010?????1110011") - def CLEARPCR = Bits("b?????????????????011?????1110011") - def ERET = Bits("b00000000000000000100000001110011") + def SCALL = Bits("b00000000000000000000000001110011") + def SBREAK = Bits("b00000000000100000000000001110011") + def SRET = Bits("b10000000000000000000000001110011") + def CSRRW = Bits("b?????????????????001?????1110011") + def CSRRS = Bits("b?????????????????010?????1110011") + def CSRRC = Bits("b?????????????????011?????1110011") + def CSRRWI = Bits("b?????????????????101?????1110011") + def CSRRSI = Bits("b?????????????????110?????1110011") + def CSRRCI = Bits("b?????????????????111?????1110011") def FADD_S = Bits("b0000000??????????????????1010011") def FSUB_S = Bits("b0000100??????????????????1010011") def FMUL_S = Bits("b0001000??????????????????1010011") @@ -148,10 +146,8 @@ object Instructions def FMAX_D = Bits("b1100101??????????000?????1010011") def FMV_X_S = Bits("b111000000000?????000?????1010011") def FMV_X_D = Bits("b111000100000?????000?????1010011") - def FRSR = Bits("b11101000000000000000?????1010011") def FMV_S_X = Bits("b111100000000?????000?????1010011") def FMV_D_X = Bits("b111100100000?????000?????1010011") - def FSSR = Bits("b111110000000?????000?????1010011") def FLW = Bits("b?????????????????010?????0000111") def FLD = Bits("b?????????????????011?????0000111") def FSW = Bits("b?????????????????010?????0100111") @@ -164,7 +160,6 @@ object Instructions def FMSUB_D = Bits("b?????01??????????????????1000111") def FNMSUB_D = Bits("b?????01??????????????????1001011") def FNMADD_D = Bits("b?????01??????????????????1001111") - /* Automatically generated by parse-opcodes */ def CUSTOM0 = Bits("b?????????????????000?????0001011") def CUSTOM0_RS1 = Bits("b?????????????????010?????0001011") def CUSTOM0_RS1_RS2 = Bits("b?????????????????011?????0001011") @@ -190,3 +185,61 @@ object Instructions def CUSTOM3_RD_RS1 = Bits("b?????????????????110?????1111011") def CUSTOM3_RD_RS1_RS2 = Bits("b?????????????????111?????1111011") } +object CSRs { + val sup0 = 1280 + val fflags = 1 + val frm = 2 + val fcsr = 3 + val cycle = 4 + val time = 5 + val instret = 6 + val sup1 = 1281 + val evec = 1288 + val cause = 1289 + val status = 1290 + val hartid = 1291 + val impl = 1292 + val epc = 1282 + val send_ipi = 1294 + val clear_ipi = 1295 + val badvaddr = 1283 + val ptbr = 1284 + val stats = 1308 + val reset = 1309 + val tohost = 1310 + val asid = 1285 + val count = 1286 + val compare = 1287 + val fromhost = 1311 + val fatc = 1293 + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += sup0 + res += fflags + res += frm + res += fcsr + res += cycle + res += time + res += instret + res += sup1 + res += evec + res += cause + res += status + res += hartid + res += impl + res += epc + res += send_ipi + res += clear_ipi + res += badvaddr + res += ptbr + res += stats + res += reset + res += tohost + res += asid + res += count + res += compare + res += fromhost + res += fatc + res.toArray + } +} diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9d5c9dbc..e5dd334b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -881,7 +881,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends lrsc_count := 0 } } - when (io.cpu.ptw.eret) { lrsc_count := 0 } + when (io.cpu.ptw.sret) { lrsc_count := 0 } val s2_data = Vec.fill(conf.ways){Bits(width = conf.bitsperrow)} for (w <- 0 until conf.ways) { diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index af5787e6..48fa036a 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -17,13 +17,13 @@ class TLBPTWIO extends Bundle { val status = new Status().asInput val invalidate = Bool(INPUT) - val eret = Bool(INPUT) + val sret = Bool(INPUT) } class DatapathPTWIO extends Bundle { val ptbr = UInt(INPUT, PADDR_BITS) val invalidate = Bool(INPUT) - val eret = Bool(INPUT) + val sret = Bool(INPUT) val status = new Status().asInput } @@ -83,7 +83,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module io.requestor(i).resp.bits.perm := r_pte(8,3) io.requestor(i).resp.bits.ppn := resp_ppn.toUInt io.requestor(i).invalidate := io.dpath.invalidate - io.requestor(i).eret := io.dpath.eret + io.requestor(i).sret := io.dpath.sret io.requestor(i).status := io.dpath.status } From 5814a90472b391cf0873f35e3879263fba9361ba Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 5 Dec 2013 04:16:48 -0800 Subject: [PATCH 0663/1087] Make DecodeLogic interface more flexible --- rocket/src/main/scala/consts.scala | 6 +-- rocket/src/main/scala/decode.scala | 68 ++++++++++++++++-------------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e0f29f1f..7d13ca21 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -40,9 +40,9 @@ trait ScalarOpConstants { val A2_ZERO = UInt(2, 3) val A2_FOUR = UInt(3, 3) - val X = Bits("b?", 1) - val N = Bits(0, 1) - val Y = Bits(1, 1) + val X = Bool.DC + val N = Bool(false) + val Y = Bool(true) val WB_X = UInt("b??", 2) val WB_ALU = UInt(0, 3); diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 6c8432d8..5ce768b0 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -14,45 +14,51 @@ object DecodeLogic new Term(lit.value) } } - def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = { + def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = { terms.map { t => - if (!cache.contains(t)) - cache += t -> ((if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth){Bits()}) === Lit(t.value, addrWidth){Bits()}) - cache(t).toBool + cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth){Bits()}) === Lit(t.value, addrWidth){Bits()}) }.foldLeft(Bool(false))(_||_) } - def apply(addr: Bits, default: Iterable[Bits], mapping: Iterable[(Bits, Iterable[Bits])]) = { - var map = mapping - var cache = scala.collection.mutable.Map[Term,Bits]() - default map { d => - val dterm = term(d) - val (keys, values) = map.unzip - val addrWidth = keys.map(_.getWidth).max - val terms = keys.toList.map(k => term(k)) - val termvalues = terms zip values.toList.map(v => term(v.head)) + def apply[T <: Bits](addr: UInt, default: T, mapping: Iterable[(UInt, T)]): T = { + val cache = caches.getOrElseUpdate(Module.current, collection.mutable.Map[Term,Bool]()) + val dterm = term(default) + val (keys, values) = mapping.unzip + val addrWidth = keys.map(_.getWidth).max + val terms = keys.toList.map(k => term(k)) + val termvalues = terms zip values.toList.map(term(_)) - for (t <- keys.zip(terms).tails; if !t.isEmpty) - for (u <- t.tail) - assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap") + for (t <- keys.zip(terms).tails; if !t.isEmpty) + for (u <- t.tail) + assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap") - val result = (0 until math.max(d.litOf.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) => - val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) - val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) - val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) + val result = (0 until default.litOf.width.max(values.map(_.litOf.width).max)).map({ case (i: Int) => + val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) + val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) + val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) - if (((dterm.mask >> i) & 1) != 0) { - logic(addr, addrWidth, cache, SimplifyDC(mint, maxt, addrWidth)).toBits - } else { - val defbit = (dterm.value.toInt >> i) & 1 - val t = if (defbit == 0) mint else maxt - val bit = logic(addr, addrWidth, cache, Simplify(t, dc, addrWidth)).toBits - if (defbit == 0) bit else ~bit - } - }).reverse.reduceRight(Cat(_,_)) - map = map map { case (x,y) => (x, y.tail) } - result + if (((dterm.mask >> i) & 1) != 0) { + logic(addr, addrWidth, cache, SimplifyDC(mint, maxt, addrWidth)).toBits + } else { + val defbit = (dterm.value.toInt >> i) & 1 + val t = if (defbit == 0) mint else maxt + val bit = logic(addr, addrWidth, cache, Simplify(t, dc, addrWidth)).toBits + if (defbit == 0) bit else ~bit + } + }).reverse.reduceRight(Cat(_,_)) + default.fromBits(result) + } + def apply[T <: Bits](addr: UInt, default: Iterable[T], mappingIn: Iterable[(UInt, Iterable[T])]): Iterable[T] = { + var mapping = mappingIn + default map { thisDefault => + val thisMapping = for ((key, values) <- mapping) yield key -> values.head + val res = apply(addr, thisDefault, thisMapping) + mapping = for ((key, values) <- mapping) yield key -> values.tail + res } } + def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = + apply(addr, Bool.DC, trues.map(_ -> Bool(true)) ++ falses.map(_ -> Bool(false))) + private val caches = collection.mutable.Map[Module,collection.mutable.Map[Term,Bool]]() } class Term(val value: BigInt, val mask: BigInt = 0) From 16d5250924b1f94b2f8bba5098e7b56bbc514184 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 5 Dec 2013 04:18:04 -0800 Subject: [PATCH 0664/1087] Correct FP trap behavior on FCSR --- rocket/src/main/scala/ctrl.scala | 37 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 904b671a..4c03daf5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -407,17 +407,22 @@ class Control(implicit conf: RocketConfiguration) extends Module def checkExceptions(x: Seq[(Bool, UInt)]) = (x.map(_._1).reduce(_||_), PriorityMux(x)) + val fp_csrs = CSRs.fcsr :: CSRs.frm :: CSRs.fflags :: Nil + val legal_csrs = if (conf.fpu) CSRs.all.toSet else CSRs.all.toSet -- fp_csrs + val id_csr_addr = io.dpath.inst(31,20) val id_csr_en = id_csr != CSR.N - val id_csr_ren = id_raddr1 === UInt(0) && Vec(CSR.S, CSR.C).contains(id_csr) - val id_csr_privileged = - id_csr_addr(9,8) != UInt(0) && id_csr_ren || - id_csr_addr(11,10) != UInt(0) && id_csr_en - val id_csr_invalid = id_csr_en && - !Vec(CSRs.all.map(a => UInt(a))).contains(id_csr_addr) + val id_csr_fp = Bool(conf.fpu) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) + val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) + val id_csr_privileged = id_csr_en && + (id_csr_addr(9,8) != UInt(0) || + id_csr_addr(11,10) != UInt(0) && id_csr_wen) + val id_csr_invalid = id_csr_en && !Vec(legal_csrs.map(UInt(_))).contains(id_csr_addr) // flush pipeline on CSR writes that may have side effects - val id_csr_flush = id_csr_en && !id_csr_ren && - id_csr_addr != CSRs.sup0 && id_csr_addr != CSRs.sup1 && id_csr_addr != CSRs.epc + val id_csr_flush = { + val safe_csrs = CSRs.sup0 :: CSRs.sup1 :: CSRs.epc :: Nil + id_csr_en && id_csr_wen && DecodeLogic(id_csr_addr, legal_csrs -- safe_csrs, safe_csrs) + } val id_privileged = id_sret || id_csr_privileged // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) @@ -431,14 +436,14 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_do_fence = id_amo && id_amo_aq || id_fence_i || id_reg_fence && (id_mem_val || id_rocc_val) || id_csr_flush val (id_xcpt, id_cause) = checkExceptions(List( - (id_interrupt, id_interrupt_cause), - (io.imem.resp.bits.xcpt_ma, UInt(0)), - (io.imem.resp.bits.xcpt_if, UInt(1)), - (!id_int_val.toBool || id_csr_invalid, UInt(2)), - (id_privileged && !io.dpath.status.s, UInt(3)), - (id_fp_val && !io.dpath.status.ef, UInt(4)), - (id_syscall, UInt(6)), - (id_rocc_val && !io.dpath.status.er, UInt(12)))) + (id_interrupt, id_interrupt_cause), + (io.imem.resp.bits.xcpt_ma, UInt(0)), + (io.imem.resp.bits.xcpt_if, UInt(1)), + (!id_int_val || id_csr_invalid, UInt(2)), + (id_privileged && !io.dpath.status.s, UInt(3)), + ((id_fp_val || id_csr_fp) && !io.dpath.status.ef, UInt(4)), + (id_syscall, UInt(6)), + (id_rocc_val && !io.dpath.status.er, UInt(12)))) ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } From da3135ac9bf7d39a37e1cd471bbdff4b72626ac7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 9 Dec 2013 15:06:13 -0800 Subject: [PATCH 0665/1087] Begin integer unit clean-up ...to make it easier to generate the superscalar version of the core. --- rocket/src/main/scala/consts.scala | 26 +-- rocket/src/main/scala/ctrl.scala | 92 +++++----- rocket/src/main/scala/dpath.scala | 210 +++++++++++------------ rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/instructions.scala | 74 ++++---- rocket/src/main/scala/util.scala | 1 + 6 files changed, 199 insertions(+), 206 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 7d13ca21..e58ce9fb 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -21,10 +21,10 @@ trait ScalarOpConstants { val PC_PCR = UInt(3, 2) val A1_X = Bits("b??", 2) - val A1_RS1 = UInt(0, 2) - val A1_ZERO = UInt(1, 2) - val A1_PC = UInt(2, 2) - val A1_PCHI = UInt(3, 2) + val A1_ZERO = UInt(0, 2) + val A1_RS1 = UInt(1, 2) + val A1_PCHI = UInt(2, 2) + val A1_PC = UInt(3, 2) val IMM_X = Bits("b???", 3) val IMM_S = UInt(0, 3); @@ -35,19 +35,21 @@ trait ScalarOpConstants { val IMM_Z = UInt(5, 3); val A2_X = Bits("b??", 2) - val A2_RS2 = UInt(0, 2) - val A2_IMM = UInt(1, 2) - val A2_ZERO = UInt(2, 3) - val A2_FOUR = UInt(3, 3) + val A2_ZERO = UInt(0, 2) + val A2_FOUR = UInt(1, 2) + val A2_RS2 = UInt(2, 2) + val A2_IMM = UInt(3, 2) val X = Bool.DC val N = Bool(false) val Y = Bool(true) - val WB_X = UInt("b??", 2) - val WB_ALU = UInt(0, 3); - val WB_TSC = UInt(2, 3); - val WB_IRT = UInt(3, 3); + val NBYP = 4 + val SZ_BYP = log2Up(NBYP) + val BYP_0 = 0 + val BYP_EX = 1 + val BYP_MEM = 2 + val BYP_DC = 3 val SZ_DW = 1 val DW_X = X diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 4c03daf5..55965ece 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -11,8 +11,7 @@ class CtrlDpathIO extends Bundle() // outputs to datapath val sel_pc = UInt(OUTPUT, 3); val killd = Bool(OUTPUT); - val ren2 = Bool(OUTPUT); - val ren1 = Bool(OUTPUT); + val ren = Vec.fill(2)(Bool(OUTPUT)) val sel_alu2 = UInt(OUTPUT, 3) val sel_alu1 = UInt(OUTPUT, 2) val sel_imm = UInt(OUTPUT, 3) @@ -37,8 +36,9 @@ class CtrlDpathIO extends Bundle() val ex_rs2_val = Bool(OUTPUT) val ex_rocc_val = Bool(OUTPUT) val mem_rocc_val = Bool(OUTPUT) - val mem_ll_bypass_rs1 = Bool(OUTPUT) - val mem_ll_bypass_rs2 = Bool(OUTPUT) + val bypass = Vec.fill(2)(Bool(OUTPUT)) + val bypass_src = Vec.fill(2)(Bits(OUTPUT, SZ_BYP)) + val ll_ready = Bool(OUTPUT) // exception handling val retire = Bool(OUTPUT) val exception = Bool(OUTPUT); @@ -50,11 +50,11 @@ class CtrlDpathIO extends Bundle() val ex_br_type = Bits(OUTPUT, SZ_BR) val ex_br_taken = Bool(INPUT) val div_mul_rdy = Bool(INPUT) - val mem_ll_wb = Bool(INPUT) - val mem_ll_waddr = UInt(INPUT, 5) - val ex_waddr = UInt(INPUT, 5); // write addr from execute stage - val mem_waddr = UInt(INPUT, 5); // write addr from memory stage - val wb_waddr = UInt(INPUT, 5); // write addr from writeback stage + val ll_wen = Bool(INPUT) + val ll_waddr = UInt(INPUT, 5) + val ex_waddr = UInt(INPUT, 5) + val mem_waddr = UInt(INPUT, 5) + val wb_waddr = UInt(INPUT, 5) val status = new Status().asInput val fp_sboard_clr = Bool(INPUT); val fp_sboard_clra = UInt(INPUT, 5); @@ -543,9 +543,8 @@ class Control(implicit conf: RocketConfiguration) extends Module (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val ll_wb_kill_mem = io.dpath.mem_ll_wb && (mem_reg_wen || mem_reg_fp_wen || mem_reg_rocc_val || mem_reg_csr != CSR.N) - val replay_mem = ll_wb_kill_mem || mem_reg_replay || fpu_kill_mem - val killm_common = ll_wb_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid + val replay_mem = mem_reg_replay || fpu_kill_mem + val killm_common = take_pc_wb || mem_reg_xcpt || !mem_reg_valid ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem wb_reg_replay := replay_mem && !take_pc_wb @@ -577,23 +576,22 @@ class Control(implicit conf: RocketConfiguration) extends Module wb_reg_rocc_val := mem_reg_rocc_val } - val replay_wb = io.dmem.resp.bits.nack || wb_reg_replay || - io.dpath.csr_replay || Bool(!conf.rocc.isEmpty) && wb_reg_rocc_val && !io.rocc.cmd.ready + val replay_wb_common = + io.dmem.resp.bits.nack || wb_reg_replay || + io.dpath.ll_wen && wb_reg_wen || io.dpath.csr_replay + val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common + val replay_wb = replay_wb_common || wb_reg_rocc_val && !io.rocc.cmd.ready class Scoreboard(n: Int) { - val r = Reg(init=Bits(0, n)) - private var _next = r - private var cur = r - var ens = Bool(false) def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) - def clear(en: Bool, addr: UInt): Unit = { - val m = ~mask(en, addr) - update(en, _next & m) - //cur = cur & m - } - def read(addr: UInt) = r(addr) - def readBypassed(addr: UInt) = cur(addr) + def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) + def read(addr: UInt): Bool = r(addr) + def readBypassed(addr: UInt): Bool = _next(addr) + + private val r = Reg(init=Bits(0, n)) + private var _next = r + private var ens = Bool(false) private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) private def update(en: Bool, update: UInt) = { _next = update @@ -604,7 +602,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val sboard = new Scoreboard(32) sboard.set((wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val) && io.dpath.wb_wen, io.dpath.wb_waddr) - sboard.clear(io.dpath.mem_ll_wb, io.dpath.mem_ll_waddr) + sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr) val id_stall_fpu = if (conf.fpu) { val fp_sboard = new Scoreboard(32) @@ -613,10 +611,10 @@ class Control(implicit conf: RocketConfiguration) extends Module fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) id_csr_en && !io.fpu.fcsr_rdy || - io.fpu.dec.ren1 && fp_sboard.readBypassed(id_raddr1) || - io.fpu.dec.ren2 && fp_sboard.readBypassed(id_raddr2) || - io.fpu.dec.ren3 && fp_sboard.readBypassed(id_raddr3) || - io.fpu.dec.wen && fp_sboard.readBypassed(id_waddr) + io.fpu.dec.ren1 && fp_sboard.read(id_raddr1) || + io.fpu.dec.ren2 && fp_sboard.read(id_raddr2) || + io.fpu.dec.ren3 && fp_sboard.read(id_raddr3) || + io.fpu.dec.wen && fp_sboard.read(id_waddr) } else Bool(false) // write CAUSE CSR on an exception @@ -638,6 +636,18 @@ class Control(implicit conf: RocketConfiguration) extends Module io.imem.req.bits.taken := !ex_reg_btb_hit || ex_reg_jalr io.imem.req.valid := take_pc + val bypassDst = Array(id_raddr1, id_raddr2) + val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0))) + bypassSrc(BYP_EX) = (ex_reg_wen, io.dpath.ex_waddr) + bypassSrc(BYP_MEM) = (mem_reg_wen && !mem_reg_mem_val, io.dpath.mem_waddr) + bypassSrc(BYP_DC) = (mem_reg_wen, io.dpath.mem_waddr) + + val doBypass = bypassDst.map(d => bypassSrc.map(s => s._1 && s._2 === d)) + for (i <- 0 until io.dpath.bypass.size) { + io.dpath.bypass(i) := doBypass(i).reduce(_||_) + io.dpath.bypass_src(i) := PriorityEncoder(doBypass(i)) + } + // stall for RAW/WAW hazards on PCRs, loads, AMOs, and mul/div in execute stage. val data_hazard_ex = ex_reg_wen && (id_renx1.toBool && id_raddr1 === io.dpath.ex_waddr || @@ -669,24 +679,17 @@ class Control(implicit conf: RocketConfiguration) extends Module id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. - val data_hazard_wb = wb_reg_wen && - (id_raddr1 != UInt(0) && id_renx1 && (id_raddr1 === io.dpath.wb_waddr) || - id_raddr2 != UInt(0) && id_renx2 && (id_raddr2 === io.dpath.wb_waddr) || - id_waddr != UInt(0) && id_wen && (id_waddr === io.dpath.wb_waddr)) val fp_data_hazard_wb = wb_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val || wb_reg_rocc_val) || - fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) + val id_wb_hazard = fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) - io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1 - io.dpath.mem_ll_bypass_rs2 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr2 val id_sboard_hazard = - (id_raddr1 != UInt(0) && id_renx1 && sboard.read(id_raddr1) && !io.dpath.mem_ll_bypass_rs1 || - id_raddr2 != UInt(0) && id_renx2 && sboard.read(id_raddr2) && !io.dpath.mem_ll_bypass_rs2 || - id_waddr != UInt(0) && id_wen && sboard.read(id_waddr)) + (id_raddr1 != UInt(0) && id_renx1 && sboard.readBypassed(id_raddr1) || + id_raddr2 != UInt(0) && id_renx2 && sboard.readBypassed(id_raddr2) || + id_waddr != UInt(0) && id_wen && sboard.readBypassed(id_waddr)) val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || @@ -702,8 +705,8 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.wb_load := wb_reg_mem_val && wb_reg_wen - io.dpath.ren2 := id_renx2.toBool; - io.dpath.ren1 := id_renx1.toBool; + io.dpath.ren(1) := id_renx2 + io.dpath.ren(0) := id_renx1 io.dpath.sel_alu2 := id_sel_alu2.toUInt io.dpath.sel_alu1 := id_sel_alu1.toUInt io.dpath.sel_imm := id_sel_imm.toUInt @@ -717,6 +720,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.ex_predicted_taken := ex_reg_btb_hit io.dpath.ex_wen := ex_reg_wen; io.dpath.mem_wen := mem_reg_wen; + io.dpath.ll_ready := !wb_reg_wen io.dpath.wb_wen := wb_reg_wen && !replay_wb io.dpath.retire := wb_reg_valid && !replay_wb io.dpath.csr := wb_reg_csr @@ -737,5 +741,5 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dmem.req.bits.typ := ex_reg_mem_type io.dmem.req.bits.phys := Bool(false) - io.rocc.cmd.valid := wb_reg_rocc_val + io.rocc.cmd.valid := wb_rocc_val } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 88f8ea50..06efa4f7 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -27,12 +27,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val ex_reg_sel_alu1 = Reg(UInt()) val ex_reg_sel_imm = Reg(UInt()) val ex_reg_kill = Reg(Bool()) - val ex_reg_rs1_bypass = Reg(Bool()) - val ex_reg_rs1_lsb = Reg(Bits()) - val ex_reg_rs1_msb = Reg(Bits()) - val ex_reg_rs2_bypass = Reg(Bool()) - val ex_reg_rs2_lsb = Reg(Bits()) - val ex_reg_rs2_msb = Reg(Bits()) + val ex_reg_rs_bypass = Vec.fill(2)(Reg(Bool())) + val ex_reg_rs_lsb = Vec.fill(2)(Reg(Bits())) + val ex_reg_rs_msb = Vec.fill(2)(Reg(Bits())) // memory definitions val mem_reg_pc = Reg(UInt()) @@ -44,38 +41,38 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // writeback definitions val wb_reg_pc = Reg(UInt()) val wb_reg_inst = Reg(Bits()) - val wb_reg_waddr = Reg(UInt()) val wb_reg_wdata = Reg(Bits()) - val wb_reg_ll_wb = Reg(init=Bool(false)) val wb_wdata = Bits() val wb_reg_rs2 = Reg(Bits()) - val wb_wen = io.ctrl.wb_wen || wb_reg_ll_wb // instruction decode stage val id_inst = io.imem.resp.bits.data val id_pc = io.imem.resp.bits.pc - - val regfile_ = Mem(Bits(width = 64), 31) - def readRF(a: UInt) = regfile_(~a) - def writeRF(a: UInt, d: Bits) = regfile_(~a) := d + + class RegFile { + private val rf = Mem(UInt(width = 64), 31) + private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() + private var canRead = true + def read(addr: UInt) = { + require(canRead) + reads += addr -> UInt() + reads.last._2 := rf(~addr) + reads.last._2 + } + def write(addr: UInt, data: UInt) = { + canRead = false + when (addr != UInt(0)) { + rf(~addr) := data + for ((raddr, rdata) <- reads) + when (addr === raddr) { rdata := data } + } + } + } + val rf = new RegFile - val id_raddr1 = id_inst(19,15).toUInt; - val id_raddr2 = id_inst(24,20).toUInt; - - // bypass muxes - val id_rs1_zero = id_raddr1 === UInt(0) - val id_rs1_ex_bypass = io.ctrl.ex_wen && id_raddr1 === io.ctrl.ex_waddr - val id_rs1_mem_bypass = io.ctrl.mem_wen && id_raddr1 === io.ctrl.mem_waddr - val id_rs1_bypass = id_rs1_zero || id_rs1_ex_bypass || id_rs1_mem_bypass || io.ctrl.mem_ll_bypass_rs1 - val id_rs1_bypass_src = Mux(id_rs1_zero, UInt(0), Mux(id_rs1_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) - val id_rs1 = Mux(wb_wen && id_raddr1 === wb_reg_waddr, wb_wdata, readRF(id_raddr1)) - - val id_rs2_zero = id_raddr2 === UInt(0) - val id_rs2_ex_bypass = io.ctrl.ex_wen && id_raddr2 === io.ctrl.ex_waddr - val id_rs2_mem_bypass = io.ctrl.mem_wen && id_raddr2 === io.ctrl.mem_waddr - val id_rs2_bypass = id_rs2_zero || id_rs2_ex_bypass || id_rs2_mem_bypass || io.ctrl.mem_ll_bypass_rs2 - val id_rs2_bypass_src = Mux(id_rs2_zero, UInt(0), Mux(id_rs2_ex_bypass, UInt(1), Mux(io.ctrl.mem_load, UInt(3), UInt(2)))) - val id_rs2 = Mux(wb_wen && id_raddr2 === wb_reg_waddr, wb_wdata, readRF(id_raddr2)) + // RF read ports + bypass from WB stage + val id_raddr = Vec(id_inst(19,15), id_inst(24,20)) + val id_rs = id_raddr.map(rf.read _) // immediate generation def imm(sel: Bits, inst: Bits) = { @@ -109,56 +106,49 @@ class Datapath(implicit conf: RocketConfiguration) extends Module ex_reg_sel_alu2 := io.ctrl.sel_alu2 ex_reg_sel_alu1 := io.ctrl.sel_alu1 ex_reg_sel_imm := io.ctrl.sel_imm - ex_reg_rs1_bypass := id_rs1_bypass && io.ctrl.ren1 - ex_reg_rs2_bypass := id_rs2_bypass && io.ctrl.ren2 - when (io.ctrl.ren1) { - ex_reg_rs1_lsb := id_rs1_bypass_src - when (!id_rs1_bypass) { - ex_reg_rs1_lsb := id_rs1(id_rs1_bypass_src.getWidth-1,0) - ex_reg_rs1_msb := id_rs1(63,id_rs1_bypass_src.getWidth) - } - } - when (io.ctrl.ren2) { - ex_reg_rs2_lsb := id_rs2_bypass_src - when (!id_rs2_bypass) { - ex_reg_rs2_lsb := id_rs2(id_rs2_bypass_src.getWidth-1,0) - ex_reg_rs2_msb := id_rs2(63,id_rs2_bypass_src.getWidth) + ex_reg_rs_bypass := io.ctrl.bypass + for (i <- 0 until id_rs.size) { + when (io.ctrl.ren(i)) { + ex_reg_rs_lsb(i) := id_rs(i)(SZ_BYP-1,0) + when (!io.ctrl.bypass(i)) { + ex_reg_rs_msb(i) := id_rs(i) >> SZ_BYP + } } + when (io.ctrl.bypass(i)) { ex_reg_rs_lsb(i) := io.ctrl.bypass_src(i) } } } val ex_raddr1 = ex_reg_inst(19,15) val ex_raddr2 = ex_reg_inst(24,20) - val dmem_resp_data = if (conf.fastLoadByte) io.dmem.resp.bits.data_subword else io.dmem.resp.bits.data - val ex_rs1 = - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(3) && Bool(conf.fastLoadWord), dmem_resp_data, - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(2), wb_reg_wdata, - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(1), mem_reg_wdata, - Mux(ex_reg_rs1_bypass && ex_reg_rs1_lsb === UInt(0), Bits(0), - Mux(AVec(A1_ZERO, A1_PCHI) contains ex_reg_sel_alu1, Bits(0), - Cat(ex_reg_rs1_msb, ex_reg_rs1_lsb)))))) - val ex_rs2 = - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(3) && Bool(conf.fastLoadWord), dmem_resp_data, - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(2), wb_reg_wdata, - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(1), mem_reg_wdata, - Mux(ex_reg_rs2_bypass && ex_reg_rs2_lsb === UInt(0), Bits(0), - Cat(ex_reg_rs2_msb, ex_reg_rs2_lsb))))) + val bypass = Vec.fill(NBYP)(Bits()) + bypass(BYP_0) := Bits(0) + bypass(BYP_EX) := mem_reg_wdata + bypass(BYP_MEM) := wb_reg_wdata + bypass(BYP_DC) := (if (conf.fastLoadByte) io.dmem.resp.bits.data_subword + else if (conf.fastLoadWord) io.dmem.resp.bits.data + else wb_reg_wdata) + val ex_rs = for (i <- 0 until id_rs.size) + yield Mux(ex_reg_rs_bypass(i), bypass(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) val ex_imm = imm(ex_reg_sel_imm, ex_reg_inst) - val ex_op1_hi = Mux(AVec(A1_PC, A1_PCHI) contains ex_reg_sel_alu1, ex_reg_pc >> 12, ex_rs1 >> 12).toSInt - val ex_op1_lo = Mux(ex_reg_sel_alu1 === A1_PC, ex_reg_pc(11,0), ex_rs1(11,0)).toSInt + val ex_op1_hi = MuxLookup(ex_reg_sel_alu1, ex_reg_pc.toSInt >> 12, Seq( + A1_RS1 -> (ex_rs(0).toSInt >> 12), + A1_ZERO -> SInt(0))) + val ex_op1_lo = MuxLookup(ex_reg_sel_alu1, UInt(0), Seq( + A1_RS1 -> ex_rs(0)(11,0), + A1_PC -> ex_reg_pc(11,0))) val ex_op1 = Cat(ex_op1_hi, ex_op1_lo) - val ex_op2 = Mux(ex_reg_sel_alu2 === A2_RS2, ex_rs2.toSInt, - Mux(ex_reg_sel_alu2 === A2_IMM, ex_imm, - Mux(ex_reg_sel_alu2 === A2_ZERO, SInt(0), - SInt(4)))) + val ex_op2 = MuxLookup(ex_reg_sel_alu2, SInt(0), Seq( + A2_RS2 -> ex_rs(1).toSInt, + A2_IMM -> ex_imm, + A2_FOUR -> SInt(4))) val alu = Module(new ALU) alu.io.dw := ex_reg_ctrl_fn_dw; alu.io.fn := ex_reg_ctrl_fn_alu; alu.io.in2 := ex_op2.toUInt - alu.io.in1 := ex_op1.toUInt + alu.io.in1 := ex_op1 // multiplier and divider val div = Module(new MulDiv(mulUnroll = if (conf.fastMulDiv) 8 else 1, @@ -166,15 +156,13 @@ class Datapath(implicit conf: RocketConfiguration) extends Module div.io.req.valid := io.ctrl.div_mul_val div.io.req.bits.dw := ex_reg_ctrl_fn_dw div.io.req.bits.fn := ex_reg_ctrl_fn_alu - div.io.req.bits.in1 := ex_rs1 - div.io.req.bits.in2 := ex_rs2 + div.io.req.bits.in1 := ex_rs(0) + div.io.req.bits.in2 := ex_rs(1) div.io.req.bits.tag := io.ctrl.ex_waddr div.io.kill := io.ctrl.div_mul_kill - div.io.resp.ready := !io.ctrl.mem_wen io.ctrl.div_mul_rdy := div.io.req.ready - io.fpu.fromint_data := ex_rs1 - io.ctrl.ex_waddr := ex_reg_inst(11,7) + io.fpu.fromint_data := ex_rs(0) def vaSign(a0: UInt, ea: Bits) = { // efficient means to compress 64-bit VA into VADDR_BITS+1 bits @@ -185,16 +173,15 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), e(0))) } - val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs1, ex_reg_pc) + val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs(0), ex_reg_pc) val ex_br_offset = Mux(io.ctrl.ex_predicted_taken, SInt(4), ex_imm(19,0).toSInt) val ex_br64 = ex_br_base + ex_br_offset - val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs1, ex_br64), vaSign(ex_reg_pc, ex_br64)) + val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs(0), ex_br64), vaSign(ex_reg_pc, ex_br64)) val ex_br_addr = Cat(ex_br_msb, ex_br64(VADDR_BITS-1,0)) // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.addr := Cat(vaSign(ex_rs1, alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt - io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) + io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) @@ -212,14 +199,14 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ptw.status := pcr.io.status // branch resolution logic - io.ctrl.jalr_eq := ex_rs1 === id_pc.toSInt && ex_reg_inst(31,20) === UInt(0) + io.ctrl.jalr_eq := ex_rs(0) === id_pc.toSInt && ex_reg_inst(31,20) === UInt(0) io.ctrl.ex_br_taken := - Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs1 === ex_rs2, - Mux(io.ctrl.ex_br_type === BR_NE, ex_rs1 != ex_rs2, - Mux(io.ctrl.ex_br_type === BR_LT, ex_rs1.toSInt < ex_rs2.toSInt, - Mux(io.ctrl.ex_br_type === BR_GE, ex_rs1.toSInt >= ex_rs2.toSInt, - Mux(io.ctrl.ex_br_type === BR_LTU, ex_rs1 < ex_rs2, - Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs1 >= ex_rs2, + Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs(0) === ex_rs(1), + Mux(io.ctrl.ex_br_type === BR_NE, ex_rs(0) != ex_rs(1), + Mux(io.ctrl.ex_br_type === BR_LT, ex_rs(0).toSInt < ex_rs(1).toSInt, + Mux(io.ctrl.ex_br_type === BR_GE, ex_rs(0).toSInt >= ex_rs(1).toSInt, + Mux(io.ctrl.ex_br_type === BR_LTU, ex_rs(0) < ex_rs(1), + Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs(0) >= ex_rs(1), io.ctrl.ex_br_type === BR_J)))))) // memory stage @@ -228,13 +215,12 @@ class Datapath(implicit conf: RocketConfiguration) extends Module mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst mem_reg_wdata := alu.io.out - when (io.ctrl.ex_rs2_val) { - mem_reg_rs2 := ex_rs2 - } } - - // for load/use hazard detection (load byte/halfword) - io.ctrl.mem_waddr := mem_reg_inst(11,7) + when (io.ctrl.ex_rs2_val) { + mem_reg_rs2 := ex_rs(1) + } + + io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool @@ -243,28 +229,27 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data - val mem_ll_wdata = Bits() - mem_ll_wdata := div.io.resp.bits.data - io.ctrl.mem_ll_waddr := div.io.resp.bits.tag - io.ctrl.mem_ll_wb := div.io.resp.valid && !io.ctrl.mem_wen + val ll_wdata = Bits() + div.io.resp.ready := io.ctrl.ll_ready + ll_wdata := div.io.resp.bits.data + io.ctrl.ll_waddr := div.io.resp.bits.tag + io.ctrl.ll_wen := div.io.resp.fire() if (!conf.rocc.isEmpty) { - io.rocc.resp.ready := !io.ctrl.mem_wen && !io.ctrl.mem_rocc_val + io.rocc.resp.ready := io.ctrl.ll_ready when (io.rocc.resp.fire()) { div.io.resp.ready := Bool(false) - mem_ll_wdata := io.rocc.resp.bits.data - io.ctrl.mem_ll_waddr := io.rocc.resp.bits.rd - io.ctrl.mem_ll_wb := Bool(true) + ll_wdata := io.rocc.resp.bits.data + io.ctrl.ll_waddr := io.rocc.resp.bits.rd + io.ctrl.ll_wen := Bool(true) } } when (dmem_resp_replay && dmem_resp_xpu) { div.io.resp.ready := Bool(false) if (!conf.rocc.isEmpty) io.rocc.resp.ready := Bool(false) - mem_ll_wdata := io.dmem.resp.bits.data_subword - io.ctrl.mem_ll_waddr := dmem_resp_waddr - io.ctrl.mem_ll_wb := Bool(true) + io.ctrl.ll_waddr := dmem_resp_waddr + io.ctrl.ll_wen := Bool(true) } - when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) } io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu io.fpu.dmem_resp_data := io.dmem.resp.bits.data @@ -274,24 +259,20 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // writeback stage when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc - wb_reg_waddr := io.ctrl.mem_waddr wb_reg_inst := mem_reg_inst wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) } when (io.ctrl.mem_rocc_val) { wb_reg_rs2 := mem_reg_rs2 } - wb_reg_ll_wb := io.ctrl.mem_ll_wb - when (io.ctrl.mem_ll_wb) { - wb_reg_waddr := io.ctrl.mem_ll_waddr - wb_reg_wdata := mem_ll_wdata - } - wb_wdata := Mux(io.ctrl.wb_load, io.dmem.resp.bits.data_subword, + wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, + Mux(io.ctrl.ll_wen, ll_wdata, Mux(io.ctrl.csr != CSR.N, pcr.io.rw.rdata, - wb_reg_wdata)) + wb_reg_wdata))) - when (wb_wen) { writeRF(wb_reg_waddr, wb_wdata) } - io.ctrl.wb_waddr := wb_reg_waddr + val wb_wen = io.ctrl.ll_wen || io.ctrl.wb_wen + val wb_waddr = Mux(io.ctrl.ll_wen, io.ctrl.ll_waddr, io.ctrl.wb_waddr) + when (wb_wen) { rf.write(wb_waddr, wb_wdata) } // scoreboard clear (for div/mul and D$ load miss writebacks) io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu @@ -312,11 +293,16 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(io.ctrl.sel_pc === PC_EX, ex_br_addr, Mux(io.ctrl.sel_pc === PC_PCR, pcr.io.evec, wb_reg_pc)).toUInt // PC_WB + + // for hazard/bypass opportunity detection + io.ctrl.ex_waddr := ex_reg_inst(11,7) + io.ctrl.mem_waddr := mem_reg_inst(11,7) + io.ctrl.wb_waddr := wb_reg_inst(11,7) printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", pcr.io.time(32,0), io.ctrl.retire, wb_reg_pc, - Mux(wb_wen, wb_reg_waddr, UInt(0)), wb_wdata, - wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs1)), - wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs2)), + Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, + wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), + wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), wb_reg_inst, wb_reg_inst) } diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 09376647..b5be1378 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -267,7 +267,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false); } when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } - when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0)) { reg_tohost := wdata } } + when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata; } when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata; } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 6ff0b683..98d1f4b5 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -186,60 +186,60 @@ object Instructions { def CUSTOM3_RD_RS1_RS2 = Bits("b?????????????????111?????1111011") } object CSRs { - val sup0 = 1280 - val fflags = 1 - val frm = 2 - val fcsr = 3 - val cycle = 4 - val time = 5 - val instret = 6 - val sup1 = 1281 - val evec = 1288 - val cause = 1289 - val status = 1290 - val hartid = 1291 - val impl = 1292 - val epc = 1282 - val send_ipi = 1294 - val clear_ipi = 1295 - val badvaddr = 1283 - val ptbr = 1284 - val stats = 1308 - val reset = 1309 - val tohost = 1310 - val asid = 1285 - val count = 1286 - val compare = 1287 - val fromhost = 1311 - val fatc = 1293 + val fflags = 0x1 + val frm = 0x2 + val fcsr = 0x3 + val sup0 = 0x500 + val sup1 = 0x501 + val epc = 0x502 + val badvaddr = 0x503 + val ptbr = 0x504 + val asid = 0x505 + val count = 0x506 + val compare = 0x507 + val evec = 0x508 + val cause = 0x509 + val status = 0x50a + val hartid = 0x50b + val impl = 0x50c + val fatc = 0x50d + val send_ipi = 0x50e + val clear_ipi = 0x50f + val stats = 0x51c + val reset = 0x51d + val tohost = 0x51e + val fromhost = 0x51f + val cycle = 0xc00 + val time = 0xc01 + val instret = 0xc02 val all = { val res = collection.mutable.ArrayBuffer[Int]() - res += sup0 res += fflags res += frm res += fcsr - res += cycle - res += time - res += instret + res += sup0 res += sup1 + res += epc + res += badvaddr + res += ptbr + res += asid + res += count + res += compare res += evec res += cause res += status res += hartid res += impl - res += epc + res += fatc res += send_ipi res += clear_ipi - res += badvaddr - res += ptbr res += stats res += reset res += tohost - res += asid - res += count - res += compare res += fromhost - res += fatc + res += cycle + res += time + res += instret res.toArray } } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index b99d3d43..8bab71b0 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -9,6 +9,7 @@ object Util implicit def intToBoolean(x: Int): Boolean = if (x != 0) true else false implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 implicit def booleanToBool(x: Boolean): Bits = Bool(x) + implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_)) implicit def wcToUInt(c: WideCounter): UInt = c.value } From 07a91bb99a7cf726b7a83c117b43375436df4e6b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 9 Dec 2013 19:52:47 -0800 Subject: [PATCH 0666/1087] Miscellaneous cleanup --- rocket/src/main/scala/ctrl.scala | 6 +----- rocket/src/main/scala/decode.scala | 15 +++++++-------- rocket/src/main/scala/nbdcache.scala | 6 +++--- rocket/src/main/scala/util.scala | 18 +++++++++--------- 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 55965ece..320ca15b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -321,11 +321,7 @@ class Control(implicit conf: RocketConfiguration) extends Module if (conf.fpu) decode_table ++= FDecode.table if (!conf.rocc.isEmpty) decode_table ++= RoCCDecode.table - val logic = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) - val cs = logic.map { - case b if b.inputs.head.getClass == classOf[Bool] => b.toBool - case u => u - } + val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 5ce768b0..6a969d76 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -16,7 +16,7 @@ object DecodeLogic } def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = { terms.map { t => - cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Lit(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth){Bits()}) === Lit(t.value, addrWidth){Bits()}) + cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Bits(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth)) === Bits(t.value, addrWidth)) }.foldLeft(Bool(false))(_||_) } def apply[T <: Bits](addr: UInt, default: T, mapping: Iterable[(UInt, T)]): T = { @@ -48,13 +48,12 @@ object DecodeLogic default.fromBits(result) } def apply[T <: Bits](addr: UInt, default: Iterable[T], mappingIn: Iterable[(UInt, Iterable[T])]): Iterable[T] = { - var mapping = mappingIn - default map { thisDefault => - val thisMapping = for ((key, values) <- mapping) yield key -> values.head - val res = apply(addr, thisDefault, thisMapping) - mapping = for ((key, values) <- mapping) yield key -> values.tail - res - } + val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(UInt, T)]()) + for ((key, values) <- mappingIn) + for ((value, i) <- values zipWithIndex) + mapping(i) += key -> value + for ((thisDefault, thisMapping) <- default zip mapping) + yield apply(addr, thisDefault, thisMapping) } def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = apply(addr, Bool.DC, trues.map(_ -> Bool(true)) ++ falses.map(_ -> Bool(false))) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e5dd334b..39d7840f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -276,7 +276,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.req_sec_rdy := sec_rdy && rpq.io.enq.ready val meta_hazard = Reg(init=UInt(0,2)) - when (meta_hazard != 0) { meta_hazard := meta_hazard + 1 } + when (meta_hazard != UInt(0)) { meta_hazard := meta_hazard + 1 } when (io.meta_write.fire()) { meta_hazard := 1 } io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && meta_hazard === 0) @@ -574,7 +574,7 @@ class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val rst = rst_cnt < conf.sets when (rst) { rst_cnt := rst_cnt+1 } - val metabits = io.write.bits.data.state.width + conf.tagbits + val metabits = io.write.bits.data.state.getWidth + conf.tagbits val tags = Mem(UInt(width = metabits*conf.ways), conf.sets, seqRead = true) when (rst || io.write.valid) { @@ -992,7 +992,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends when (s1_clk_en) { s2_store_bypass := false when (bypasses.map(_._1).reduce(_||_)) { - s2_store_bypass_data := PriorityMux(bypasses.map(x => (x._1, x._2))) + s2_store_bypass_data := PriorityMux(bypasses) s2_store_bypass := true } } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 8bab71b0..09dc7e8b 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -28,19 +28,19 @@ object AVec object Str { - def apply(s: String): Bits = { + def apply(s: String): UInt = { var i = BigInt(0) require(s.forall(validChar _)) for (c <- s) i = (i << 8) | c - Lit(i, s.length*8){Bits()} + UInt(i, s.length*8) } - def apply(x: Char): Bits = { + def apply(x: Char): UInt = { require(validChar(x)) - Lit(x, 8){Bits()} + UInt(x.toInt, 8) } - def apply(x: UInt): Bits = apply(x, 10) - def apply(x: UInt, radix: Int): Bits = { + def apply(x: UInt): UInt = apply(x, 10) + def apply(x: UInt, radix: Int): UInt = { val rad = UInt(radix) val w = x.getWidth require(w > 0) @@ -53,8 +53,8 @@ object Str } s } - def apply(x: SInt): Bits = apply(x, 10) - def apply(x: SInt, radix: Int): Bits = { + def apply(x: SInt): UInt = apply(x, 10) + def apply(x: SInt, radix: Int): UInt = { val neg = x < SInt(0) val abs = x.abs if (radix != 10) { @@ -78,7 +78,7 @@ object Str } } - private def digit(d: UInt): Bits = Mux(d < UInt(10), Str('0')+d, Str(('a'-10).toChar)+d)(7,0) + private def digit(d: UInt): UInt = Mux(d < UInt(10), Str('0')+d, Str(('a'-10).toChar)+d)(7,0) private def validChar(x: Char) = x == (x & 0xFF) } From ebec444ad29114ad0b7d6ba28b139f6365e95531 Mon Sep 17 00:00:00 2001 From: Quan Nguyen Date: Fri, 13 Dec 2013 03:33:02 -0800 Subject: [PATCH 0667/1087] Increase tag width for configurable precision in Hwacha --- rocket/src/main/scala/tile.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 312f7700..6423c40f 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -12,7 +12,7 @@ case class RocketConfiguration(tl: TileLinkConfiguration, fastLoadByte: Boolean = false, fastMulDiv: Boolean = true) { - val dcacheReqTagBits = 9 // enforce compliance with require() + val dcacheReqTagBits = 10 // enforce compliance with require() // hue hue hue val xprlen = 64 val nxpr = 32 val nxprbits = log2Up(nxpr) From c546f66404bba8e7e32fb9a83827738e9ea40dcf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Jan 2014 00:54:49 -0800 Subject: [PATCH 0668/1087] Swap JAL/JALR encodings (again) --- rocket/src/main/scala/instructions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 98d1f4b5..5acb84a8 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -5,8 +5,8 @@ import Node._ /* Automatically generated by parse-opcodes */ object Instructions { - def JAL = Bits("b?????????????????????????1100111") - def JALR = Bits("b?????????????????000?????1101111") + def JAL = Bits("b?????????????????????????1101111") + def JALR = Bits("b?????????????????000?????1100111") def BEQ = Bits("b?????????????????000?????1100011") def BNE = Bits("b?????????????????001?????1100011") def BLT = Bits("b?????????????????100?????1100011") From 4d236979bdf194d62bd6efeeff08c6465d0db9ec Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Jan 2014 00:55:48 -0800 Subject: [PATCH 0669/1087] Fix very far forward JALs We were sign-extending from the wrong bit, causing a backwards jump. --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 06efa4f7..0990392c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -174,7 +174,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module e(0))) } val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs(0), ex_reg_pc) - val ex_br_offset = Mux(io.ctrl.ex_predicted_taken, SInt(4), ex_imm(19,0).toSInt) + val ex_br_offset = Mux(io.ctrl.ex_predicted_taken, SInt(4), ex_imm(20,0).toSInt) val ex_br64 = ex_br_base + ex_br_offset val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs(0), ex_br64), vaSign(ex_reg_pc, ex_br64)) val ex_br_addr = Cat(ex_br_msb, ex_br64(VADDR_BITS-1,0)) From a50a1f7d50a74ad65f4d61f143cdc103f1769878 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Jan 2014 21:37:16 -0800 Subject: [PATCH 0670/1087] Clean up multiplier/divider stuff --- rocket/src/main/scala/divider.scala | 199 ------------------------- rocket/src/main/scala/multiplier.scala | 154 ++++++++++++------- 2 files changed, 100 insertions(+), 253 deletions(-) delete mode 100644 rocket/src/main/scala/divider.scala diff --git a/rocket/src/main/scala/divider.scala b/rocket/src/main/scala/divider.scala deleted file mode 100644 index 55bd2aa5..00000000 --- a/rocket/src/main/scala/divider.scala +++ /dev/null @@ -1,199 +0,0 @@ -package rocket - -import Chisel._ -import ALU._ -import Util._ - -class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { - val io = new MultiplierIO - val w = io.req.bits.in1.getWidth - val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll - - val s_ready :: s_neg_inputs :: s_mul_busy :: s_div_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 7) - val state = Reg(init=s_ready) - - val req = Reg(io.req.bits.clone) - val count = Reg(UInt(width = log2Up(w+1))) - val divby0 = Reg(Bool()) - val neg_out = Reg(Bool()) - val divisor = Reg(Bits(width = w+1)) // div only needs w bits - val remainder = Reg(Bits(width = 2*mulw+2)) // div only needs 2*w+1 bits - - def sext(x: Bits, cmds: Vec[Bits]) = { - val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) - val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) - (Cat(hi, x(w/2-1,0)), sign) - } - val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM, FN_MULH, FN_MULHSU)) - val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM, FN_MULH)) - - val subtractor = remainder(2*w,w) - divisor(w,0) - val negated_remainder = -remainder(w-1,0) - - when (state === s_neg_inputs) { - val isMul = AVec(FN_MUL, FN_MULH, FN_MULHU, FN_MULHSU).contains(req.fn) - state := Mux(isMul, s_mul_busy, s_div_busy) - when (remainder(w-1) || isMul) { - remainder := negated_remainder - } - when (divisor(w-1) || isMul) { - divisor := subtractor - } - } - when (state === s_neg_output) { - remainder := negated_remainder - state := s_done - } - when (state === s_move_rem) { - remainder := remainder(2*w, w+1) - state := Mux(neg_out, s_neg_output, s_done) - } - when (state === s_mul_busy) { - val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0)) - val mplier = mulReg(mulw-1,0) - val accum = mulReg(2*mulw,mulw).toSInt - val mpcand = divisor.toSInt - val prod = mplier(mulUnroll-1,0) * mpcand + accum - val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt - remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toSInt - - count := count + 1 - when (count === mulw/mulUnroll-1) { - state := s_done - when (AVec(FN_MULH, FN_MULHU, FN_MULHSU) contains req.fn) { - state := s_move_rem - } - } - } - when (state === s_div_busy) { - when (count === UInt(w)) { - state := Mux(neg_out && !divby0, s_neg_output, s_done) - when (AVec(FN_REM, FN_REMU) contains req.fn) { - state := s_move_rem - } - } - count := count + UInt(1) - - val msb = subtractor(w) - divby0 := divby0 && !msb - remainder := Cat(Mux(msb, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !msb) - - val divisorMSB = Log2(divisor(w-1,0), w) - val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UInt(w-1) + divisorMSB - dividendMSB - val eOutZero = divisorMSB > dividendMSB - val eOut = count === UInt(0) && (eOutPos > 0 || eOutZero) && (divisorMSB != UInt(0) || divisor(0)) - when (Bool(earlyOut) && eOut) { - val shift = Mux(eOutZero, UInt(w-1), eOutPos) - remainder := remainder(w-1,0) << shift - count := shift - } - } - when (io.resp.fire() || io.kill) { - state := s_ready - } - when (io.req.fire()) { - val isMul = AVec(FN_MUL, FN_MULH, FN_MULHU, FN_MULHSU).contains(io.req.bits.fn) - val isRem = AVec(FN_REM, FN_REMU).contains(io.req.bits.fn) - val mulState = Mux(lhs_sign, s_neg_inputs, s_mul_busy) - val divState = Mux(lhs_sign || rhs_sign, s_neg_inputs, s_div_busy) - state := Mux(isMul, mulState, divState) - count := UInt(0) - neg_out := !isMul && Mux(isRem, lhs_sign, lhs_sign != rhs_sign) - divby0 := true - divisor := Cat(rhs_sign, rhs_in) - remainder := lhs_in - req := io.req.bits - } - - io.resp.bits := req - io.resp.bits.data := Mux(req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) - io.resp.valid := state === s_done - io.req.ready := state === s_ready -} - -class Divider(earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { - val io = new MultiplierIO - val w = io.req.bits.in1.getWidth - - val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6) - val state = Reg(init=s_ready) - - val count = Reg(UInt(width = log2Up(w+1))) - val divby0 = Reg(Bool()) - val neg_out = Reg(Bool()) - val r_req = Reg(io.req.bits) - - val divisor = Reg(Bits()) - val remainder = Reg(Bits(width = 2*w+1)) - val subtractor = remainder(2*w,w) - divisor - - def sext(x: Bits, cmds: Vec[Bits]) = { - val sign = Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) && cmds.contains(io.req.bits.fn) - val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) - (Cat(hi, x(w/2-1,0)), sign) - } - val (lhs_in, lhs_sign) = sext(io.req.bits.in1, AVec(FN_DIV, FN_REM)) - val (rhs_in, rhs_sign) = sext(io.req.bits.in2, AVec(FN_DIV, FN_REM)) - - val r_isRem = isMulFN(r_req.fn, FN_REM) || isMulFN(r_req.fn, FN_REMU) - - when (state === s_neg_inputs) { - state := s_busy - when (remainder(w-1)) { - remainder := -remainder(w-1,0) - } - when (divisor(w-1)) { - divisor := subtractor(w-1,0) - } - } - when (state === s_neg_output) { - remainder := -remainder(w-1,0) - state := s_done - } - when (state === s_move_rem) { - remainder := remainder(2*w, w+1) - state := Mux(neg_out, s_neg_output, s_done) - } - when (state === s_busy) { - when (count === UInt(w)) { - state := Mux(r_isRem, s_move_rem, Mux(neg_out && !divby0, s_neg_output, s_done)) - } - count := count + UInt(1) - - val msb = subtractor(w) - divby0 := divby0 && !msb - remainder := Cat(Mux(msb, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !msb) - - val divisorMSB = Log2(divisor, w) - val dividendMSB = Log2(remainder(w-1,0), w) - val eOutPos = UInt(w-1, log2Up(2*w)) + divisorMSB - dividendMSB - val eOut = count === UInt(0) && eOutPos > 0 && (divisorMSB != UInt(0) || divisor(0)) - when (Bool(earlyOut) && eOut) { - val shift = eOutPos(log2Up(w)-1,0) - remainder := remainder(w-1,0) << shift - count := shift - when (eOutPos(log2Up(w))) { - remainder := remainder(w-1,0) << w-1 - count := w-1 - } - } - } - when (io.resp.fire() || io.kill) { - state := s_ready - } - when (io.req.fire()) { - state := Mux(lhs_sign || rhs_sign, s_neg_inputs, s_busy) - count := UInt(0) - neg_out := Mux(AVec(FN_REM, FN_REMU).contains(io.req.bits.fn), lhs_sign, lhs_sign != rhs_sign) - divby0 := true - divisor := rhs_in - remainder := lhs_in - r_req := io.req.bits - } - - io.resp.bits := r_req - io.resp.bits.data := Mux(r_req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) - io.resp.valid := state === s_done - io.req.ready := state === s_ready -} diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 95d1218b..ae4ba082 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -2,6 +2,7 @@ package rocket import Chisel._ import ALU._ +import Util._ class MultiplierReq(implicit conf: RocketConfiguration) extends Bundle { val fn = Bits(width = SZ_ALU_FN) @@ -26,68 +27,113 @@ class MultiplierIO(implicit conf: RocketConfiguration) extends Bundle { val resp = Decoupled(new MultiplierResp) } -class Multiplier(unroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { +class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { val io = new MultiplierIO + val w = io.req.bits.in1.getWidth + val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll + + val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6) + val state = Reg(init=s_ready) + + val req = Reg(io.req.bits) + val count = Reg(UInt(width = log2Up(w+1))) + val neg_out = Reg(Bool()) + val isMul = Reg(Bool()) + val isHi = Reg(Bool()) + val divisor = Reg(Bits(width = w+1)) // div only needs w bits + val remainder = Reg(Bits(width = 2*mulw+2)) // div only needs 2*w+1 bits - val w0 = io.req.bits.in1.getWidth - val w = (w0+1+unroll-1)/unroll*unroll - val cycles = w/unroll - - val r_val = Reg(init=Bool(false)) - val r_prod = Reg(Bits(width = w*2)) - val r_lsb = Reg(Bits()) - val r_cnt = Reg(UInt(width = log2Up(cycles+1))) - val r_req = Reg(new MultiplierReq) - val r_lhs = Reg(Bits(width = w0+1)) + val cmdMul :: cmdHi :: lhsSigned :: rhsSigned :: Nil = + DecodeLogic(io.req.bits.fn, List(X, X, X, X), List( + FN_DIV -> List(N, N, Y, Y), + FN_REM -> List(N, Y, Y, Y), + FN_DIVU -> List(N, N, N, N), + FN_REMU -> List(N, Y, N, N), + FN_MUL -> List(Y, N, X, X), + FN_MULH -> List(Y, Y, Y, Y), + FN_MULHU -> List(Y, Y, N, N), + FN_MULHSU -> List(Y, Y, Y, N))) - val dw = io.req.bits.dw - val fn = io.req.bits.fn - - val lhs_msb = Mux(dw === DW_64, io.req.bits.in1(w0-1), io.req.bits.in1(w0/2-1)).toBool - val lhs_sign = (isMulFN(fn, FN_MULH) || isMulFN(fn, FN_MULHSU)) && lhs_msb - val lhs_hi = Mux(dw === DW_64, io.req.bits.in1(w0-1,w0/2), Fill(w0/2, lhs_sign)) - val lhs_in = Cat(lhs_sign, lhs_hi, io.req.bits.in1(w0/2-1,0)) - - val rhs_msb = Mux(dw === DW_64, io.req.bits.in2(w0-1), io.req.bits.in2(w0/2-1)).toBool - val rhs_sign = isMulFN(fn, FN_MULH) && rhs_msb - val rhs_hi = Mux(dw === DW_64, io.req.bits.in2(w0-1,w0/2), Fill(w0/2, rhs_sign)) - val rhs_in = Cat(Fill(w-w0, rhs_sign), rhs_hi, io.req.bits.in2(w0/2-1,0)) - - when (io.req.fire()) { - r_val := Bool(true) - r_cnt := UInt(0, log2Up(cycles+1)) - r_req := io.req.bits - r_lhs := lhs_in - r_prod:= rhs_in - r_lsb := Bool(false) + def sext(x: Bits, signed: Bool) = { + val sign = signed && Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) + val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) + (Cat(hi, x(w/2-1,0)), sign) } - .elsewhen (io.resp.fire() || io.kill) { - r_val := Bool(false) + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, lhsSigned) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, rhsSigned) + + val subtractor = remainder(2*w,w) - divisor(w,0) + val less = subtractor(w) + val negated_remainder = -remainder(w-1,0) + + when (state === s_neg_inputs) { + when (remainder(w-1) || isMul) { + remainder := negated_remainder + } + when (divisor(w-1) || isMul) { + divisor := subtractor + } + state := s_busy } - val eOutDist = (UInt(cycles)-r_cnt)*UInt(unroll) - val outShift = Mux(isMulFN(r_req.fn, FN_MUL), UInt(0), Mux(r_req.dw === DW_64, UInt(64), UInt(32))) - val shiftDist = Mux(r_cnt === UInt(cycles), outShift, eOutDist) - val eOutMask = (UInt(1) << eOutDist) - UInt(1) - val eOut = r_cnt != UInt(0) && Bool(earlyOut) && !((r_prod(w-1,0) ^ r_lsb.toSInt) & eOutMask).orR - val shift = r_prod.toSInt >> shiftDist + when (state === s_neg_output) { + remainder := negated_remainder + state := s_done + } + when (state === s_move_rem) { + remainder := remainder(2*w, w+1) + state := Mux(neg_out, s_neg_output, s_done) + } + when (state === s_busy && isMul) { + val mulReg = Cat(remainder(2*mulw+1,w+1),remainder(w-1,0)) + val mplier = mulReg(mulw-1,0) + val accum = mulReg(2*mulw,mulw).toSInt + val mpcand = divisor.toSInt + val prod = mplier(mulUnroll-1,0) * mpcand + accum + val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt + remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toSInt - val sum = r_prod(2*w-1,w).toSInt + r_prod(unroll-1,0).toSInt * r_lhs.toSInt + Mux(r_lsb.toBool, r_lhs.toSInt, SInt(0)) - when (r_val && (r_cnt != UInt(cycles))) { - r_lsb := r_prod(unroll-1) - r_prod := Cat(sum, r_prod(w-1,unroll)).toSInt - r_cnt := r_cnt + UInt(1) - when (eOut) { - r_prod := shift - r_cnt := UInt(cycles) + count := count + 1 + when (count === mulw/mulUnroll-1) { + state := Mux(isHi, s_move_rem, s_done) } } + when (state === s_busy && !isMul) { + when (count === w) { + state := Mux(isHi, s_move_rem, Mux(neg_out, s_neg_output, s_done)) + } + count := count + 1 - val out32 = Cat(Fill(w0/2, shift(w0/2-1)), shift(w0/2-1,0)) - val out64 = shift(w0-1,0) - - io.req.ready := !r_val - io.resp.bits := r_req - io.resp.bits.data := Mux(r_req.dw === DW_64, out64, out32) - io.resp.valid := r_val && (r_cnt === UInt(cycles)) + remainder := Cat(Mux(less, remainder(2*w-1,w), subtractor(w-1,0)), remainder(w-1,0), !less) + + val divisorMSB = Log2(divisor(w-1,0), w) + val dividendMSB = Log2(remainder(w-1,0), w) + val eOutPos = UInt(w-1) + divisorMSB - dividendMSB + val eOutZero = divisorMSB > dividendMSB + val eOut = count === 0 && less /* not divby0 */ && (eOutPos > 0 || eOutZero) + when (Bool(earlyOut) && eOut) { + val shift = Mux(eOutZero, UInt(w-1), eOutPos(log2Up(w)-1,0)) + remainder := remainder(w-1,0) << shift + count := shift + } + when (count === 0 && !less /* divby0 */) { neg_out := false } + } + when (io.resp.fire() || io.kill) { + state := s_ready + } + when (io.req.fire()) { + state := Mux(lhs_sign || rhs_sign && !cmdMul, s_neg_inputs, s_busy) + isMul := cmdMul + isHi := cmdHi + count := 0 + neg_out := !cmdMul && Mux(cmdHi, lhs_sign, lhs_sign != rhs_sign) + divisor := Cat(rhs_sign, rhs_in) + remainder := lhs_in + req := io.req.bits + } + + io.resp.bits := req + io.resp.bits.data := Mux(req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) + io.resp.valid := state === s_done + io.req.ready := state === s_ready } From e8486817e6d835d9818365321039f50b67c12249 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 13 Jan 2014 21:43:56 -0800 Subject: [PATCH 0671/1087] Clean up formatting (i.e. remove tabs, semicolons) --- rocket/src/main/scala/consts.scala | 14 ++-- rocket/src/main/scala/ctrl.scala | 102 ++++++++++++------------- rocket/src/main/scala/dpath.scala | 10 +-- rocket/src/main/scala/dpath_util.scala | 24 +++--- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/ptw.scala | 8 +- rocket/src/main/scala/tlb.scala | 26 +++---- 7 files changed, 93 insertions(+), 93 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index e58ce9fb..6f7ca640 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -27,12 +27,12 @@ trait ScalarOpConstants { val A1_PC = UInt(3, 2) val IMM_X = Bits("b???", 3) - val IMM_S = UInt(0, 3); - val IMM_SB = UInt(1, 3); - val IMM_U = UInt(2, 3); - val IMM_UJ = UInt(3, 3); - val IMM_I = UInt(4, 3); - val IMM_Z = UInt(5, 3); + val IMM_S = UInt(0, 3) + val IMM_SB = UInt(1, 3) + val IMM_U = UInt(2, 3) + val IMM_UJ = UInt(3, 3) + val IMM_I = UInt(4, 3) + val IMM_Z = UInt(5, 3) val A2_X = Bits("b??", 2) val A2_ZERO = UInt(0, 2) @@ -57,7 +57,7 @@ trait ScalarOpConstants { val DW_64 = Y val DW_XPR = Y - val RA = UInt(1, 5); + val RA = UInt(1, 5) } trait InterruptConstants { diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 320ca15b..e559c286 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -9,8 +9,8 @@ import Util._ class CtrlDpathIO extends Bundle() { // outputs to datapath - val sel_pc = UInt(OUTPUT, 3); - val killd = Bool(OUTPUT); + val sel_pc = UInt(OUTPUT, 3) + val killd = Bool(OUTPUT) val ren = Vec.fill(2)(Bool(OUTPUT)) val sel_alu2 = UInt(OUTPUT, 3) val sel_alu1 = UInt(OUTPUT, 2) @@ -19,19 +19,19 @@ class CtrlDpathIO extends Bundle() val fn_alu = UInt(OUTPUT, SZ_ALU_FN) val div_mul_val = Bool(OUTPUT) val div_mul_kill = Bool(OUTPUT) - val div_val = Bool(OUTPUT); + val div_val = Bool(OUTPUT) val div_kill = Bool(OUTPUT) val csr = UInt(OUTPUT, 3) val sret = Bool(OUTPUT) - val mem_load = Bool(OUTPUT); + val mem_load = Bool(OUTPUT) val wb_load = Bool(OUTPUT) - val ex_fp_val= Bool(OUTPUT); - val mem_fp_val= Bool(OUTPUT); - val ex_wen = Bool(OUTPUT); + val ex_fp_val= Bool(OUTPUT) + val mem_fp_val= Bool(OUTPUT) + val ex_wen = Bool(OUTPUT) val ex_jalr = Bool(OUTPUT) val ex_predicted_taken = Bool(OUTPUT) - val mem_wen = Bool(OUTPUT); - val wb_wen = Bool(OUTPUT); + val mem_wen = Bool(OUTPUT) + val wb_wen = Bool(OUTPUT) val ex_mem_type = Bits(OUTPUT, 3) val ex_rs2_val = Bool(OUTPUT) val ex_rocc_val = Bool(OUTPUT) @@ -41,11 +41,11 @@ class CtrlDpathIO extends Bundle() val ll_ready = Bool(OUTPUT) // exception handling val retire = Bool(OUTPUT) - val exception = Bool(OUTPUT); - val cause = UInt(OUTPUT, 6); - val badvaddr_wen = Bool(OUTPUT); // high for a load/store access fault + val exception = Bool(OUTPUT) + val cause = UInt(OUTPUT, 6) + val badvaddr_wen = Bool(OUTPUT) // high for a load/store access fault // inputs from datapath - val inst = Bits(INPUT, 32); + val inst = Bits(INPUT, 32) val jalr_eq = Bool(INPUT) val ex_br_type = Bits(OUTPUT, SZ_BR) val ex_br_taken = Bool(INPUT) @@ -56,14 +56,14 @@ class CtrlDpathIO extends Bundle() val mem_waddr = UInt(INPUT, 5) val wb_waddr = UInt(INPUT, 5) val status = new Status().asInput - val fp_sboard_clr = Bool(INPUT); - val fp_sboard_clra = UInt(INPUT, 5); + val fp_sboard_clr = Bool(INPUT) + val fp_sboard_clra = UInt(INPUT, 5) val csr_replay = Bool(INPUT) } abstract trait DecodeConstants { - val xpr64 = Y; + val xpr64 = Y val decode_default = // fence.i @@ -446,14 +446,14 @@ class Control(implicit conf: RocketConfiguration) extends Module when (ctrl_killd) { ex_reg_jalr := Bool(false) - ex_reg_btb_hit := Bool(false); + ex_reg_btb_hit := Bool(false) ex_reg_div_mul_val := Bool(false) - ex_reg_mem_val := Bool(false); - ex_reg_valid := Bool(false); - ex_reg_wen := Bool(false); - ex_reg_fp_wen := Bool(false); + ex_reg_mem_val := Bool(false) + ex_reg_valid := Bool(false) + ex_reg_wen := Bool(false) + ex_reg_fp_wen := Bool(false) ex_reg_sret := Bool(false) - ex_reg_flush_inst := Bool(false); + ex_reg_flush_inst := Bool(false) ex_reg_fp_val := Bool(false) ex_reg_rocc_val := Bool(false) ex_reg_replay_next := Bool(false) @@ -463,11 +463,11 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_xcpt := Bool(false) } .otherwise { - ex_reg_br_type := id_br_type; + ex_reg_br_type := id_br_type ex_reg_jalr := id_jalr ex_reg_btb_hit := io.imem.resp.bits.taken && !id_jalr ex_reg_div_mul_val := id_mul_val || id_div_val - ex_reg_mem_val := id_mem_val.toBool; + ex_reg_mem_val := id_mem_val.toBool ex_reg_valid := Bool(true) ex_reg_csr := id_csr ex_reg_wen := id_wen @@ -498,19 +498,19 @@ class Control(implicit conf: RocketConfiguration) extends Module (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), (ex_reg_fp_val && io.fpu.illegal_rm, UInt(2)))) - mem_reg_replay := replay_ex && !take_pc_wb; + mem_reg_replay := replay_ex && !take_pc_wb mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb && !mem_reg_replay_next when (ex_xcpt) { mem_reg_cause := ex_cause } mem_reg_div_mul_val := ex_reg_div_mul_val && io.dpath.div_mul_rdy when (ctrl_killx) { - mem_reg_valid := Bool(false); + mem_reg_valid := Bool(false) mem_reg_csr := CSR.N - mem_reg_wen := Bool(false); - mem_reg_fp_wen := Bool(false); + mem_reg_wen := Bool(false) + mem_reg_fp_wen := Bool(false) mem_reg_sret := Bool(false) - mem_reg_mem_val := Bool(false); - mem_reg_flush_inst := Bool(false); + mem_reg_mem_val := Bool(false) + mem_reg_flush_inst := Bool(false) mem_reg_fp_val := Bool(false) mem_reg_rocc_val := Bool(false) mem_reg_replay_next := Bool(false) @@ -519,11 +519,11 @@ class Control(implicit conf: RocketConfiguration) extends Module .otherwise { mem_reg_valid := ex_reg_valid mem_reg_csr := ex_reg_csr - mem_reg_wen := ex_reg_wen; - mem_reg_fp_wen := ex_reg_fp_wen; + mem_reg_wen := ex_reg_wen + mem_reg_fp_wen := ex_reg_fp_wen mem_reg_sret := ex_reg_sret - mem_reg_mem_val := ex_reg_mem_val; - mem_reg_flush_inst := ex_reg_flush_inst; + mem_reg_mem_val := ex_reg_mem_val + mem_reg_flush_inst := ex_reg_flush_inst mem_reg_fp_val := ex_reg_fp_val mem_reg_rocc_val := ex_reg_rocc_val mem_reg_replay_next := ex_reg_replay_next @@ -550,22 +550,22 @@ class Control(implicit conf: RocketConfiguration) extends Module when (ctrl_killm) { wb_reg_valid := Bool(false) wb_reg_csr := CSR.N - wb_reg_wen := Bool(false); - wb_reg_fp_wen := Bool(false); + wb_reg_wen := Bool(false) + wb_reg_fp_wen := Bool(false) wb_reg_sret := Bool(false) - wb_reg_flush_inst := Bool(false); + wb_reg_flush_inst := Bool(false) wb_reg_mem_val := Bool(false) - wb_reg_div_mul_val := Bool(false); + wb_reg_div_mul_val := Bool(false) wb_reg_fp_val := Bool(false) wb_reg_rocc_val := Bool(false) } .otherwise { wb_reg_valid := mem_reg_valid wb_reg_csr := mem_reg_csr - wb_reg_wen := mem_reg_wen; - wb_reg_fp_wen := mem_reg_fp_wen; + wb_reg_wen := mem_reg_wen + wb_reg_fp_wen := mem_reg_fp_wen wb_reg_sret := mem_reg_sret && !mem_reg_replay - wb_reg_flush_inst := mem_reg_flush_inst; + wb_reg_flush_inst := mem_reg_flush_inst wb_reg_mem_val := mem_reg_mem_val wb_reg_div_mul_val := mem_reg_div_mul_val wb_reg_fp_val := mem_reg_fp_val @@ -613,14 +613,14 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.wen && fp_sboard.read(id_waddr) } else Bool(false) - // write CAUSE CSR on an exception - io.dpath.exception := wb_reg_xcpt - io.dpath.cause := wb_reg_cause - io.dpath.badvaddr_wen := wb_reg_xcpt // don't care for non-memory exceptions + // write CAUSE CSR on an exception + io.dpath.exception := wb_reg_xcpt + io.dpath.cause := wb_reg_cause + io.dpath.badvaddr_wen := wb_reg_xcpt // don't care for non-memory exceptions // control transfer from ex/wb take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_sret - take_pc := take_pc_ex || take_pc_wb; + take_pc := take_pc_ex || take_pc_wb io.dpath.sel_pc := Mux(wb_reg_xcpt, PC_PCR, // exception @@ -706,16 +706,16 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.sel_alu2 := id_sel_alu2.toUInt io.dpath.sel_alu1 := id_sel_alu1.toUInt io.dpath.sel_imm := id_sel_imm.toUInt - io.dpath.fn_dw := id_fn_dw.toBool; + io.dpath.fn_dw := id_fn_dw.toBool io.dpath.fn_alu := id_fn_alu.toUInt io.dpath.div_mul_val := ex_reg_div_mul_val io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common - io.dpath.ex_fp_val:= ex_reg_fp_val; - io.dpath.mem_fp_val:= mem_reg_fp_val; + io.dpath.ex_fp_val:= ex_reg_fp_val + io.dpath.mem_fp_val:= mem_reg_fp_val io.dpath.ex_jalr := ex_reg_jalr io.dpath.ex_predicted_taken := ex_reg_btb_hit - io.dpath.ex_wen := ex_reg_wen; - io.dpath.mem_wen := mem_reg_wen; + io.dpath.ex_wen := ex_reg_wen + io.dpath.mem_wen := mem_reg_wen io.dpath.ll_ready := !wb_reg_wen io.dpath.wb_wen := wb_reg_wen && !replay_wb io.dpath.retire := wb_reg_valid && !replay_wb diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0990392c..a767bd6f 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -145,8 +145,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module A2_FOUR -> SInt(4))) val alu = Module(new ALU) - alu.io.dw := ex_reg_ctrl_fn_dw; - alu.io.fn := ex_reg_ctrl_fn_alu; + alu.io.dw := ex_reg_ctrl_fn_dw + alu.io.fn := ex_reg_ctrl_fn_alu alu.io.in2 := ex_op2.toUInt alu.io.in1 := ex_op1 @@ -185,7 +185,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) - // processor control regfile read + // processor control regfile read val pcr = Module(new CSRFile) pcr.io.host <> io.host pcr.io <> io.ctrl @@ -198,7 +198,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ptw.sret := io.ctrl.sret io.ptw.status := pcr.io.status - // branch resolution logic + // branch resolution logic io.ctrl.jalr_eq := ex_rs(0) === id_pc.toSInt && ex_reg_inst(31,20) === UInt(0) io.ctrl.ex_br_taken := Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs(0) === ex_rs(1), @@ -278,7 +278,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu io.ctrl.fp_sboard_clra := dmem_resp_waddr - // processor control regfile write + // processor control regfile write pcr.io.rw.addr := wb_reg_inst(31,20) pcr.io.rw.cmd := io.ctrl.csr pcr.io.rw.wdata := wb_reg_wdata diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index b5be1378..1262bc1a 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -9,14 +9,14 @@ import scala.math._ class DpathBTBIO extends Bundle { - val current_pc = UInt(INPUT, VADDR_BITS); - val hit = Bool(OUTPUT); - val target = UInt(OUTPUT, VADDR_BITS); - val wen = Bool(INPUT); - val clr = Bool(INPUT); - val invalidate = Bool(INPUT); - val correct_pc = UInt(INPUT, VADDR_BITS); - val correct_target = UInt(INPUT, VADDR_BITS); + val current_pc = UInt(INPUT, VADDR_BITS) + val hit = Bool(OUTPUT) + val target = UInt(OUTPUT, VADDR_BITS) + val wen = Bool(INPUT) + val clr = Bool(INPUT) + val invalidate = Bool(INPUT) + val correct_pc = UInt(INPUT, VADDR_BITS) + val correct_target = UInt(INPUT, VADDR_BITS) } // fully-associative branch target buffer @@ -265,13 +265,13 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (decoded_addr(CSRs.epc)) { reg_epc := wdata(VADDR_BITS,0).toSInt } when (decoded_addr(CSRs.evec)) { reg_evec := wdata(VADDR_BITS-1,0).toSInt } when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } - when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false); } + when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false) } when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } - when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata; } - when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata; } - when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt; } + when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata } + when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata } + when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 39d7840f..97684440 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -808,7 +808,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val misaligned = (((s1_req.typ === MT_H) || (s1_req.typ === MT_HU)) && (s1_req.addr(0) != Bits(0))) || (((s1_req.typ === MT_W) || (s1_req.typ === MT_WU)) && (s1_req.addr(1,0) != Bits(0))) || - ((s1_req.typ === MT_D) && (s1_req.addr(2,0) != Bits(0))); + ((s1_req.typ === MT_D) && (s1_req.addr(2,0) != Bits(0))) io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 48fa036a..7fe20ed0 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -91,13 +91,13 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module switch (state) { is (s_ready) { when (arb.io.out.valid) { - state := s_req; + state := s_req } count := UInt(0) } is (s_req) { when (io.mem.req.ready) { - state := s_wait; + state := s_wait } } is (s_wait) { @@ -117,10 +117,10 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module } } is (s_done) { - state := s_ready; + state := s_ready } is (s_error) { - state := s_ready; + state := s_ready } } } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index cb999ed0..a5631402 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -5,30 +5,30 @@ import uncore.constants.AddressConstants._ import scala.math._ class CAMIO(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { - val clear = Bool(INPUT); + val clear = Bool(INPUT) val clear_hit = Bool(INPUT) - val tag = Bits(INPUT, tag_bits); - val hit = Bool(OUTPUT); - val hits = UInt(OUTPUT, entries); - val valid_bits = Bits(OUTPUT, entries); + val tag = Bits(INPUT, tag_bits) + val hit = Bool(OUTPUT) + val hits = UInt(OUTPUT, entries) + val valid_bits = Bits(OUTPUT, entries) - val write = Bool(INPUT); - val write_tag = Bits(INPUT, tag_bits); - val write_addr = UInt(INPUT, addr_bits); + val write = Bool(INPUT) + val write_tag = Bits(INPUT, tag_bits) + val write_addr = UInt(INPUT, addr_bits) } class RocketCAM(entries: Int, tag_bits: Int) extends Module { - val addr_bits = ceil(log(entries)/log(2)).toInt; - val io = new CAMIO(entries, addr_bits, tag_bits); + val addr_bits = ceil(log(entries)/log(2)).toInt + val io = new CAMIO(entries, addr_bits, tag_bits) val cam_tags = Mem(Bits(width = tag_bits), entries) val vb_array = Reg(init=Bits(0, entries)) when (io.write) { - vb_array := vb_array.bitSet(io.write_addr, Bool(true)); + vb_array := vb_array.bitSet(io.write_addr, Bool(true)) cam_tags(io.write_addr) := io.write_tag } when (io.clear) { - vb_array := Bits(0, entries); + vb_array := Bits(0, entries) } .elsewhen (io.clear_hit) { vb_array := vb_array & ~io.hits @@ -36,7 +36,7 @@ class RocketCAM(entries: Int, tag_bits: Int) extends Module { val hits = (0 until entries).map(i => vb_array(i) && cam_tags(i) === io.tag) - io.valid_bits := vb_array; + io.valid_bits := vb_array io.hits := Vec(hits).toBits io.hit := io.hits.orR } From 31060ea8ae630bced748a04be90fae61a4c7ec7b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Jan 2014 04:02:43 -0800 Subject: [PATCH 0672/1087] Fix fubar long-latency writeback control logic Load miss writebacks happening at the same time as multiplication wasn't working. Hopefully this does it. --- rocket/src/main/scala/arbiter.scala | 1 + rocket/src/main/scala/ctrl.scala | 8 ++++---- rocket/src/main/scala/nbdcache.scala | 3 +++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index c876a243..a87fcf01 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -44,5 +44,6 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit + resp.bits.load_replay_next := io.mem.resp.bits.load_replay_next && tag_hit } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index e559c286..7ffb0623 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -538,9 +538,10 @@ class Control(implicit conf: RocketConfiguration) extends Module (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(10)), (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) + val dcache_kill_mem = mem_reg_wen && io.dmem.resp.bits.load_replay_next // structural hazard on writeback port val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem - val replay_mem = mem_reg_replay || fpu_kill_mem - val killm_common = take_pc_wb || mem_reg_xcpt || !mem_reg_valid + val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem + val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem wb_reg_replay := replay_mem && !take_pc_wb @@ -573,8 +574,7 @@ class Control(implicit conf: RocketConfiguration) extends Module } val replay_wb_common = - io.dmem.resp.bits.nack || wb_reg_replay || - io.dpath.ll_wen && wb_reg_wen || io.dpath.csr_replay + io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_rocc_val && !io.rocc.cmd.ready diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 97684440..c1625256 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -697,6 +697,7 @@ class HellaCacheReq(implicit val conf: DCacheConfig) extends DCacheBundle { class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() + val load_replay_next = Bool() // next cycle, replay and has_data will be true val typ = Bits(width = 3) val has_data = Bool() val data = Bits(width = conf.databits) @@ -762,6 +763,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends val s1_recycled = RegEnable(s2_recycle, s1_clk_en) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) + val s1_sc = s1_req.cmd === M_XSC val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val dtlb = Module(new TLB(8)) @@ -1032,6 +1034,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits := s2_req io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc + io.cpu.resp.bits.load_replay_next := s1_replay && (s1_read || s1_sc) io.cpu.resp.bits.replay := s2_replay io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) From 6ebdc4d94e83f9099ef715b5dc6bec7fed2f170b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 16 Jan 2014 00:15:48 -0800 Subject: [PATCH 0673/1087] Simplify store conditional failure code generation --- rocket/src/main/scala/nbdcache.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c1625256..6e5c471a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -85,7 +85,7 @@ class StoreGen(typ: Bits, addr: Bits, dat: Bits) dat) } -class LoadGen(typ: Bits, addr: Bits, dat: Bits) +class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) { val t = new StoreGen(typ, addr, dat) val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D @@ -94,8 +94,8 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits) val word = Cat(Mux(t.word, Fill(32, sign && wordShift(31)), dat(63,32)), wordShift) val halfShift = Mux(addr(1), word(31,16), word(15,0)) val half = Cat(Mux(t.half, Fill(48, sign && halfShift(15)), word(63,16)), halfShift) - val byteShift = Mux(addr(0), half(15,8), half(7,0)) - val byte = Cat(Mux(t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) + val byteShift = Mux(zero, UInt(0), Mux(addr(0), half(15,8), half(7,0))) + val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } class MSHRReq(implicit conf: DCacheConfig) extends HellaCacheReq { @@ -1002,7 +1002,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends // load data subword mux/sign extension val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(conf.databits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) - val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word) + val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) amoalu.io := s2_req amoalu.io.lhs := s2_data_word @@ -1037,7 +1037,7 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.cpu.resp.bits.load_replay_next := s1_replay && (s1_read || s1_sc) io.cpu.resp.bits.replay := s2_replay io.cpu.resp.bits.data := loadgen.word - io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte) + io.cpu.resp.bits.data_subword := loadgen.byte | s2_sc_fail io.cpu.resp.bits.store_data := s2_req.data io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid From 57f4d89c90b249b0daee1174457535282f181fac Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 16 Jan 2014 00:16:09 -0800 Subject: [PATCH 0674/1087] Generate D$ replay_next signals correctly --- rocket/src/main/scala/arbiter.scala | 5 ++++- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 6 ++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index a87fcf01..48ea127e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -44,6 +44,9 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit - resp.bits.load_replay_next := io.mem.resp.bits.load_replay_next && tag_hit + + io.requestor(i).replay_next.valid := io.mem.replay_next.valid && + io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(log2Up(n)) + io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> UInt(log2Up(n)) } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7ffb0623..f7f41a94 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -538,7 +538,7 @@ class Control(implicit conf: RocketConfiguration) extends Module (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(10)), (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) - val dcache_kill_mem = mem_reg_wen && io.dmem.resp.bits.load_replay_next // structural hazard on writeback port + val dcache_kill_mem = mem_reg_wen && io.dmem.replay_next.valid // structural hazard on writeback port val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 6e5c471a..ba08110c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -697,7 +697,6 @@ class HellaCacheReq(implicit val conf: DCacheConfig) extends DCacheBundle { class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() - val load_replay_next = Bool() // next cycle, replay and has_data will be true val typ = Bits(width = 3) val has_data = Bool() val data = Bits(width = conf.databits) @@ -722,6 +721,7 @@ class HellaCacheExceptions extends Bundle { class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { val req = Decoupled(new HellaCacheReq) val resp = Valid(new HellaCacheResp).flip + val replay_next = Valid(Bits(width = conf.reqtagbits)).flip val xcpt = (new HellaCacheExceptions).asInput val ptw = (new TLBPTWIO).flip val ordered = Bool(INPUT) @@ -1034,13 +1034,15 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.cpu.resp.bits.nack := s2_valid && s2_nack io.cpu.resp.bits := s2_req io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc - io.cpu.resp.bits.load_replay_next := s1_replay && (s1_read || s1_sc) io.cpu.resp.bits.replay := s2_replay io.cpu.resp.bits.data := loadgen.word io.cpu.resp.bits.data_subword := loadgen.byte | s2_sc_fail io.cpu.resp.bits.store_data := s2_req.data io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid + io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) + io.cpu.replay_next.bits := s1_req.tag + io.mem.grant_ack <> mshrs.io.mem_finish } From 6bbbf369790794aa1c8d2e2796ba64d4e6d745f9 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 16 Jan 2014 16:01:41 -0800 Subject: [PATCH 0675/1087] push accel/rocket dmem port back to rocket --- rocket/src/main/scala/tile.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 6423c40f..618a30bc 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -60,8 +60,10 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module if (!conf.rocc.isEmpty) { val rocc = Module((conf.rocc.get)(conf)) + val dcIF = Module(new SimpleHellaCacheIF) + dcIF.io.requestor <> rocc.io.mem core.io.rocc <> rocc.io - dcacheArb.io.requestor(2) <> rocc.io.mem + dcacheArb.io.requestor(2) <> dcIF.io.cache memArb.io.in(roccPortId) <> rocc.io.imem ptw.io.requestor(2) <> rocc.io.iptw ptw.io.requestor(3) <> rocc.io.dptw From cf38001e98f8400b6adc01864596b3f058ac30bd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 17 Jan 2014 03:52:35 -0800 Subject: [PATCH 0676/1087] Fix fmv.s.x -> fsd --- rocket/src/main/scala/fpu.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index b41b4265..ed9d537f 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -212,6 +212,7 @@ class FPToInt extends Module val unrec_s = hardfloat.recodedFloatNToFloatN(in.in1, 23, 9) val unrec_d = hardfloat.recodedFloatNToFloatN(in.in1, 52, 12) + val unrec_out = Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d) val dcmp = Module(new hardfloat.recodedFloatNCompare(52, 12)) dcmp.io.a := in.in1 @@ -221,7 +222,8 @@ class FPToInt extends Module val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, ~in.cmd(1,0), 52, 12, 64) - io.out.bits.toint := Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d) + io.out.bits.toint := unrec_out + io.out.bits.store := unrec_out io.out.bits.exc := Bits(0) when (in.cmd === FCMD_CVT_W_FMT || in.cmd === FCMD_CVT_WU_FMT) { @@ -238,7 +240,6 @@ class FPToInt extends Module } io.out.valid := valid - io.out.bits.store := Mux(in.single, Cat(unrec_d(63,32), unrec_s), unrec_d) io.out.bits.lt := dcmp.io.a_lt_b } @@ -268,7 +269,7 @@ class IntToFP(val latency: Int) extends Module mux.exc := Bits(0) mux.data := hardfloat.floatNToRecodedFloatN(in.bits.data, 52, 12) when (in.bits.single) { - mux.data := hardfloat.floatNToRecodedFloatN(in.bits.data, 23, 9) + mux.data := Cat(SInt(-1, 32), hardfloat.floatNToRecodedFloatN(in.bits.data, 23, 9)) } when (in.bits.cmd === FCMD_CVT_FMT_W || in.bits.cmd === FCMD_CVT_FMT_WU || From 95de358a9607765c35cf3d775a604f57d4a9f50a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 17 Jan 2014 14:09:30 -0800 Subject: [PATCH 0677/1087] More of the same FPU fix some SP ops followed by DP stores were not working because they were encoded as subnormals, not NaNs. --- rocket/src/main/scala/fpu.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index ed9d537f..15c3f83f 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -464,7 +464,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val load_wb_tag = RegEnable(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) - val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1), rec_s), rec_d) + val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) // regfile val regfile = Mem(Bits(width = 65), 32) @@ -526,7 +526,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val pipes = List( Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), - Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out, sfma.io.exc), + Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, Cat(SInt(-1, 32), sfma.io.out), sfma.io.exc), Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out, dfma.io.exc)) def latencyMask(c: FPUCtrlSigs, offset: Int) = { require(pipes.forall(_.lat >= offset)) From 6ba2c1abe59ddcc1aba0d11e7a6ce7b083d14572 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Jan 2014 15:01:54 -0800 Subject: [PATCH 0678/1087] Use auto-generated CAUSE constants --- rocket/src/main/scala/ctrl.scala | 24 ++++++++--------- rocket/src/main/scala/instructions.scala | 34 ++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f7f41a94..8a20f137 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -433,13 +433,13 @@ class Control(implicit conf: RocketConfiguration) extends Module val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), - (io.imem.resp.bits.xcpt_ma, UInt(0)), - (io.imem.resp.bits.xcpt_if, UInt(1)), - (!id_int_val || id_csr_invalid, UInt(2)), - (id_privileged && !io.dpath.status.s, UInt(3)), - ((id_fp_val || id_csr_fp) && !io.dpath.status.ef, UInt(4)), - (id_syscall, UInt(6)), - (id_rocc_val && !io.dpath.status.er, UInt(12)))) + (io.imem.resp.bits.xcpt_ma, UInt(Causes.misaligned_fetch)), + (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), + (!id_int_val || id_csr_invalid, UInt(Causes.illegal_instruction)), + (id_privileged && !io.dpath.status.s, UInt(Causes.privileged_instruction)), + ((id_fp_val || id_csr_fp) && !io.dpath.status.ef, UInt(Causes.fp_disabled)), + (id_syscall, UInt(Causes.syscall)), + (id_rocc_val && !io.dpath.status.er, UInt(Causes.accelerator_disabled)))) ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } @@ -496,7 +496,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), - (ex_reg_fp_val && io.fpu.illegal_rm, UInt(2)))) + (ex_reg_fp_val && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) mem_reg_replay := replay_ex && !take_pc_wb mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb && !mem_reg_replay_next @@ -533,10 +533,10 @@ class Control(implicit conf: RocketConfiguration) extends Module val (mem_xcpt, mem_cause) = checkExceptions(List( (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), - (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UInt( 8)), - (mem_reg_mem_val && io.dmem.xcpt.ma.st, UInt( 9)), - (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(10)), - (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(11)))) + (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), + (mem_reg_mem_val && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), + (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)), + (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)))) val dcache_kill_mem = mem_reg_wen && io.dmem.replay_next.valid // structural hazard on writeback port val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 5acb84a8..2d91d92c 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -5,14 +5,14 @@ import Node._ /* Automatically generated by parse-opcodes */ object Instructions { - def JAL = Bits("b?????????????????????????1101111") - def JALR = Bits("b?????????????????000?????1100111") def BEQ = Bits("b?????????????????000?????1100011") def BNE = Bits("b?????????????????001?????1100011") def BLT = Bits("b?????????????????100?????1100011") def BGE = Bits("b?????????????????101?????1100011") def BLTU = Bits("b?????????????????110?????1100011") def BGEU = Bits("b?????????????????111?????1100011") + def JALR = Bits("b?????????????????000?????1100111") + def JAL = Bits("b?????????????????????????1101111") def LUI = Bits("b?????????????????????????0110111") def AUIPC = Bits("b?????????????????????????0010111") def ADDI = Bits("b?????????????????000?????0010011") @@ -185,6 +185,36 @@ object Instructions { def CUSTOM3_RD_RS1 = Bits("b?????????????????110?????1111011") def CUSTOM3_RD_RS1_RS2 = Bits("b?????????????????111?????1111011") } +object Causes { + val misaligned_fetch = 0x0 + val fault_fetch = 0x1 + val illegal_instruction = 0x2 + val privileged_instruction = 0x3 + val fp_disabled = 0x4 + val syscall = 0x6 + val breakpoint = 0x7 + val misaligned_load = 0x8 + val misaligned_store = 0x9 + val fault_load = 0xa + val fault_store = 0xb + val accelerator_disabled = 0xc + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += misaligned_fetch + res += fault_fetch + res += illegal_instruction + res += privileged_instruction + res += fp_disabled + res += syscall + res += breakpoint + res += misaligned_load + res += misaligned_store + res += fault_load + res += fault_store + res += accelerator_disabled + res.toArray + } +} object CSRs { val fflags = 0x1 val frm = 0x2 From a7489920cec4a13be1062658f1e9026ce2dd6d5b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Jan 2014 16:17:39 -0800 Subject: [PATCH 0679/1087] Support CSR atomics on all CSRs, not just STATUS --- rocket/src/main/scala/dpath.scala | 4 ++- rocket/src/main/scala/dpath_util.scala | 36 +++++++++++--------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a767bd6f..8ae151e7 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -281,7 +281,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // processor control regfile write pcr.io.rw.addr := wb_reg_inst(31,20) pcr.io.rw.cmd := io.ctrl.csr - pcr.io.rw.wdata := wb_reg_wdata + pcr.io.rw.wdata := Mux(io.ctrl.csr === CSR.S, pcr.io.rw.rdata | wb_reg_wdata, + Mux(io.ctrl.csr === CSR.C, pcr.io.rw.rdata & ~wb_reg_wdata, + wb_reg_wdata)) io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) io.rocc.cmd.bits.rs1 := wb_reg_wdata diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 1262bc1a..a763c370 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -246,11 +246,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (wen) { when (decoded_addr(CSRs.status)) { - val sr_wdata = Mux(io.rw.cmd === CSR.S, reg_status.toBits | wdata, - Mux(io.rw.cmd === CSR.C, reg_status.toBits & ~wdata, - wdata)) - reg_status := new Status().fromBits(sr_wdata) - + reg_status := new Status().fromBits(wdata) reg_status.s64 := true reg_status.u64 := true reg_status.zero := 0 @@ -258,22 +254,20 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module if (conf.rocc.isEmpty) reg_status.er := false if (!conf.fpu) reg_status.ef := false } - when (io.rw.cmd != CSR.C && io.rw.cmd != CSR.S) { - when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } - when (decoded_addr(CSRs.frm)) { reg_frm := wdata } - when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.epc)) { reg_epc := wdata(VADDR_BITS,0).toSInt } - when (decoded_addr(CSRs.evec)) { reg_evec := wdata(VADDR_BITS-1,0).toSInt } - when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } - when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false) } - when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } - when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } - when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } - when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata } - when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata } - when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt } - when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } - } + when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } + when (decoded_addr(CSRs.frm)) { reg_frm := wdata } + when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } + when (decoded_addr(CSRs.epc)) { reg_epc := wdata(VADDR_BITS,0).toSInt } + when (decoded_addr(CSRs.evec)) { reg_evec := wdata(VADDR_BITS-1,0).toSInt } + when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } + when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false) } + when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } + when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } + when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } + when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata } + when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata } + when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt } + when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } } io.host.ipi_rep.ready := Bool(true) From a2be21361e9181fcdec7f005ee31b168058b2137 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Wed, 22 Jan 2014 16:19:57 -0800 Subject: [PATCH 0680/1087] Allow ICacheConfig to toggle fetch-width. --- rocket/src/main/scala/icache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 8a60548b..5a61adb8 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,11 +5,11 @@ import uncore._ import Util._ case class ICacheConfig(sets: Int, assoc: Int, + ibytes: Int = 4, ntlb: Int = 8, nbtb: Int = 8, code: Code = new IdentityCode) { val w = 1 - val ibytes = 4 val dm = assoc == 1 val lines = sets * assoc From a1b7774f5dc99457f52f118688256a91593730bc Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 24 Jan 2014 15:56:01 -0800 Subject: [PATCH 0681/1087] Simplify handling of CAUSE register --- rocket/src/main/scala/consts.scala | 4 ---- rocket/src/main/scala/ctrl.scala | 6 +++--- rocket/src/main/scala/dpath_util.scala | 13 ++++++------- rocket/src/main/scala/package.scala | 1 - 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 6f7ca640..b58e94a3 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -59,10 +59,6 @@ trait ScalarOpConstants { val RA = UInt(1, 5) } - -trait InterruptConstants { - val CAUSE_INTERRUPT = 32 -} trait VectorOpConstants { val VEC_X = Bits("b??", 2).toUInt diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 8a20f137..f01e241f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -6,7 +6,7 @@ import uncore.constants.MemoryOpConstants._ import ALU._ import Util._ -class CtrlDpathIO extends Bundle() +class CtrlDpathIO(implicit conf: RocketConfiguration) extends Bundle { // outputs to datapath val sel_pc = UInt(OUTPUT, 3) @@ -42,7 +42,7 @@ class CtrlDpathIO extends Bundle() // exception handling val retire = Bool(OUTPUT) val exception = Bool(OUTPUT) - val cause = UInt(OUTPUT, 6) + val cause = UInt(OUTPUT, conf.xprlen) val badvaddr_wen = Bool(OUTPUT) // high for a load/store access fault // inputs from datapath val inst = Bits(INPUT, 32) @@ -395,7 +395,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_reg_fence = Reg(init=Bool(false)) val sr = io.dpath.status - var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(CAUSE_INTERRUPT+i))) + var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (conf.xprlen-1) | i))) val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) val id_interrupt = io.dpath.status.ei && id_interrupt_unmasked diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a763c370..70e04ec6 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -99,13 +99,13 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val evec = UInt(OUTPUT, VADDR_BITS+1) val exception = Bool(INPUT) val retire = Bool(INPUT) - val cause = UInt(INPUT, 6) + val cause = UInt(INPUT, conf.xprlen) val badvaddr_wen = Bool(INPUT) val pc = UInt(INPUT, VADDR_BITS+1) val sret = Bool(INPUT) val fatc = Bool(OUTPUT) val replay = Bool(OUTPUT) - val time = UInt(OUTPUT, 64) + val time = UInt(OUTPUT, conf.xprlen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip } @@ -114,7 +114,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val reg_badvaddr = Reg(Bits(width = VADDR_BITS)) val reg_evec = Reg(Bits(width = VADDR_BITS)) val reg_compare = Reg(Bits(width = 32)) - val reg_cause = Reg(Bits(width = io.cause.getWidth)) + val reg_cause = Reg(Bits(width = conf.xprlen)) val reg_tohost = Reg(init=Bits(0, conf.xprlen)) val reg_fromhost = Reg(init=Bits(0, conf.xprlen)) val reg_sup0 = Reg(Bits(width = conf.xprlen)) @@ -122,8 +122,8 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val reg_ptbr = Reg(UInt(width = PADDR_BITS)) val reg_stats = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below - val reg_time = WideCounter(64) - val reg_instret = WideCounter(64, io.retire) + val reg_time = WideCounter(conf.xprlen) + val reg_instret = WideCounter(conf.xprlen, io.retire) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) @@ -208,7 +208,6 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS - val read_cause = reg_cause(reg_cause.getWidth-1) << conf.xprlen-1 | reg_cause(reg_cause.getWidth-2,0) val read_mapping = Map[Int,Bits]( CSRs.fflags -> (if (conf.fpu) reg_fflags else UInt(0)), @@ -226,7 +225,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module CSRs.count -> reg_time, CSRs.compare -> reg_compare, CSRs.evec -> reg_evec, - CSRs.cause -> read_cause, + CSRs.cause -> reg_cause, CSRs.status -> io.status.toBits, CSRs.hartid -> io.host.id, CSRs.impl -> read_impl, diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index ca2926fc..0fcb6580 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -1,6 +1,5 @@ package object rocket extends rocket.constants.ScalarOpConstants with - rocket.constants.InterruptConstants with rocket.constants.VectorOpConstants { val START_ADDR = 0x2000 From 1f986d1c9682a66bdfc925bdb7592e55046c7d32 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 24 Jan 2014 15:57:58 -0800 Subject: [PATCH 0682/1087] Branches don't care about the ALU input/function --- rocket/src/main/scala/ctrl.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f01e241f..3734b111 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -90,12 +90,12 @@ object XDecode extends DecodeConstants // | | | | | | | | | | | | | | | | | | | | | | | fence // | | | | | | | | | | | | | | | | | | | | | | | | amo // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), From 267394d3cc7c29160bf318bcaece5f78a897d148 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 24 Jan 2014 15:59:11 -0800 Subject: [PATCH 0683/1087] Fix CSR interlocks --- rocket/src/main/scala/ctrl.scala | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3734b111..8eac2d0b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -417,7 +417,7 @@ class Control(implicit conf: RocketConfiguration) extends Module // flush pipeline on CSR writes that may have side effects val id_csr_flush = { val safe_csrs = CSRs.sup0 :: CSRs.sup1 :: CSRs.epc :: Nil - id_csr_en && id_csr_wen && DecodeLogic(id_csr_addr, legal_csrs -- safe_csrs, safe_csrs) + id_csr_en && id_csr_wen && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs) } val id_privileged = id_sret || id_csr_privileged @@ -645,10 +645,13 @@ class Control(implicit conf: RocketConfiguration) extends Module } // stall for RAW/WAW hazards on PCRs, loads, AMOs, and mul/div in execute stage. + val id_renx1_not0 = id_renx1 && id_raddr1 != UInt(0) + val id_renx2_not0 = id_renx2 && id_raddr2 != UInt(0) + val id_wen_not0 = id_wen && id_waddr != UInt(0) val data_hazard_ex = ex_reg_wen && - (id_renx1.toBool && id_raddr1 === io.dpath.ex_waddr || - id_renx2.toBool && id_raddr2 === io.dpath.ex_waddr || - id_wen.toBool && id_waddr === io.dpath.ex_waddr) + (id_renx1_not0 && id_raddr1 === io.dpath.ex_waddr || + id_renx2_not0 && id_raddr2 === io.dpath.ex_waddr || + id_wen_not0 && id_waddr === io.dpath.ex_waddr) val fp_data_hazard_ex = ex_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.ex_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || @@ -662,9 +665,9 @@ class Control(implicit conf: RocketConfiguration) extends Module if (conf.fastLoadWord) Bool(!conf.fastLoadByte) && mem_reg_slow_bypass else Bool(true) val data_hazard_mem = mem_reg_wen && - (id_raddr1 != UInt(0) && id_renx1 && id_raddr1 === io.dpath.mem_waddr || - id_raddr2 != UInt(0) && id_renx2 && id_raddr2 === io.dpath.mem_waddr || - id_waddr != UInt(0) && id_wen && id_waddr === io.dpath.mem_waddr) + (id_renx1_not0 && id_raddr1 === io.dpath.mem_waddr || + id_renx2_not0 && id_raddr2 === io.dpath.mem_waddr || + id_wen_not0 && id_waddr === io.dpath.mem_waddr) val fp_data_hazard_mem = mem_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || @@ -683,9 +686,9 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_wb_hazard = fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) val id_sboard_hazard = - (id_raddr1 != UInt(0) && id_renx1 && sboard.readBypassed(id_raddr1) || - id_raddr2 != UInt(0) && id_renx2 && sboard.readBypassed(id_raddr2) || - id_waddr != UInt(0) && id_wen && sboard.readBypassed(id_waddr)) + (id_renx1_not0 && sboard.readBypassed(id_raddr1) || + id_renx2_not0 && sboard.readBypassed(id_raddr2) || + id_wen_not0 && sboard.readBypassed(id_waddr)) val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || From 0266c1f76a489a9fed0ef5c08199f3fef57a1637 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 24 Jan 2014 16:36:36 -0800 Subject: [PATCH 0684/1087] Support retirement width > 1 in CSR file --- rocket/src/main/scala/dpath_util.scala | 2 +- rocket/src/main/scala/tile.scala | 2 ++ rocket/src/main/scala/util.scala | 16 ++++++++++------ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 70e04ec6..08b463b3 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -98,7 +98,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val ptbr = UInt(OUTPUT, PADDR_BITS) val evec = UInt(OUTPUT, VADDR_BITS+1) val exception = Bool(INPUT) - val retire = Bool(INPUT) + val retire = UInt(INPUT, log2Up(1+conf.retireWidth)) val cause = UInt(INPUT, conf.xprlen) val badvaddr_wen = Bool(INPUT) val pc = UInt(INPUT, VADDR_BITS+1) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 0a4fc7a5..a194fbab 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -7,6 +7,7 @@ import Util._ case class RocketConfiguration(tl: TileLinkConfiguration, icache: ICacheConfig, dcache: DCacheConfig, fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None, + retireWidth: Int = 1, vm: Boolean = true, fastLoadWord: Boolean = true, fastLoadByte: Boolean = false, @@ -30,6 +31,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module implicit val icConf = confIn.icache implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(dcachePorts), databits = confIn.xprlen) implicit val conf = confIn.copy(dcache = dcConf) + require(conf.retireWidth == 1) // for now... val io = new Bundle { val tilelink = new TileLinkIO diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 09dc7e8b..7735847c 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -109,17 +109,21 @@ object Split } // a counter that clock gates most of its MSBs using the LSB carry-out -case class WideCounter(width: Int, inc: Bool = Bool(true)) +case class WideCounter(width: Int, inc: UInt = UInt(1)) { - private val isWide = width >= 4 - private val smallWidth = if (isWide) log2Up(width) else width + require(inc.getWidth > 0) + private val isWide = width > 2*inc.getWidth + private val smallWidth = if (isWide) inc.getWidth max log2Up(width) else width private val small = Reg(init=UInt(0, smallWidth)) - private val nextSmall = small + UInt(1, smallWidth+1) - when (inc) { small := nextSmall(smallWidth-1,0) } + private val doInc = inc.orR + private val nextSmall = + if (inc.getWidth == 1) small + UInt(1, smallWidth+1) + else Cat(UInt(0,1), small) + inc + when (doInc) { small := nextSmall(smallWidth-1,0) } private val large = if (isWide) { val r = Reg(init=UInt(0, width - smallWidth)) - when (inc && nextSmall(smallWidth)) { r := r + UInt(1) } + when (doInc && nextSmall(smallWidth)) { r := r + UInt(1) } r } else null From 3c3c469725e80f451481bacda896993f6ac324ec Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Tue, 28 Jan 2014 22:13:16 -0800 Subject: [PATCH 0685/1087] Add exception signal to rocc interface --- rocket/src/main/scala/ctrl.scala | 1 + rocket/src/main/scala/rocc.scala | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f7f41a94..97525f0b 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -738,4 +738,5 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dmem.req.bits.phys := Bool(false) io.rocc.cmd.valid := wb_rocc_val + io.rocc.exception := wb_reg_xcpt && sr.er } diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 31cd572f..aa099d00 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -38,14 +38,17 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(conf.dcache) + val busy = Bool(OUTPUT) + val interrupt = Bool(OUTPUT) + + // These should be handled differently, eventually val imem = new UncachedTileLinkIO()(conf.tl) val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO val cp_dfma = new ioFMA(65).flip val cp_sfma = new ioFMA(33).flip - val busy = Bool(OUTPUT) - val interrupt = Bool(OUTPUT) + val exception = Bool(INPUT) override def clone = new RoCCInterface().asInstanceOf[this.type] } From febd26f50519a2cef72b0419c90da189bdf5b3f8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 31 Jan 2014 01:03:17 -0800 Subject: [PATCH 0686/1087] Correct CSR privilege logic --- rocket/src/main/scala/ctrl.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 8eac2d0b..6ee07a37 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -410,16 +410,18 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_csr_en = id_csr != CSR.N val id_csr_fp = Bool(conf.fpu) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) - val id_csr_privileged = id_csr_en && - (id_csr_addr(9,8) != UInt(0) || - id_csr_addr(11,10) != UInt(0) && id_csr_wen) val id_csr_invalid = id_csr_en && !Vec(legal_csrs.map(UInt(_))).contains(id_csr_addr) + val id_csr_privileged = id_csr_en && + (id_csr_addr(11,10) === UInt(3) && id_csr_wen || + id_csr_addr(11,10) === UInt(2) || + id_csr_addr(11,10) === UInt(1) && !io.dpath.status.s || + id_csr_addr(9,8) >= UInt(2) || + id_csr_addr(9,8) === UInt(1) && !io.dpath.status.s && id_csr_wen) // flush pipeline on CSR writes that may have side effects val id_csr_flush = { val safe_csrs = CSRs.sup0 :: CSRs.sup1 :: CSRs.epc :: Nil id_csr_en && id_csr_wen && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs) } - val id_privileged = id_sret || id_csr_privileged // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) val id_amo_aq = io.dpath.inst(26) @@ -436,7 +438,8 @@ class Control(implicit conf: RocketConfiguration) extends Module (io.imem.resp.bits.xcpt_ma, UInt(Causes.misaligned_fetch)), (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), (!id_int_val || id_csr_invalid, UInt(Causes.illegal_instruction)), - (id_privileged && !io.dpath.status.s, UInt(Causes.privileged_instruction)), + (id_csr_privileged, UInt(Causes.privileged_instruction)), + (id_sret && !io.dpath.status.s, UInt(Causes.privileged_instruction)), ((id_fp_val || id_csr_fp) && !io.dpath.status.ef, UInt(Causes.fp_disabled)), (id_syscall, UInt(Causes.syscall)), (id_rocc_val && !io.dpath.status.er, UInt(Causes.accelerator_disabled)))) From 2c2b3a7678e3437e042ed83d976b764839cc8b7a Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 21 Jan 2014 12:08:42 -0800 Subject: [PATCH 0687/1087] cleanups supporting uncore hierarchy --- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 6 +++--- rocket/src/main/scala/tile.scala | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 5a61adb8..88742586 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -248,7 +248,7 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val finish_q = Module(new Queue(new GrantAck, 1)) - finish_q.io.enq.valid := refill_done && tl.co.requiresAck(io.mem.grant.bits.payload) + finish_q.io.enq.valid := refill_done && tl.co.requiresAckForGrant(io.mem.grant.bits.payload.g_type) finish_q.io.enq.bits.master_xact_id := io.mem.grant.bits.payload.master_xact_id // output signals diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ba08110c..7eb5b15d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -260,7 +260,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte } val ackq = Module(new Queue(new LogicalNetworkIO(new GrantAck), 1)) - ackq.io.enq.valid := (wb_done || refill_done) && tl.co.requiresAck(io.mem_grant.bits.payload) + ackq.io.enq.valid := (wb_done || refill_done) && tl.co.requiresAckForGrant(io.mem_grant.bits.payload.g_type) ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp @@ -686,12 +686,12 @@ class AMOALU(implicit conf: DCacheConfig) extends Module { class HellaCacheReq(implicit val conf: DCacheConfig) extends DCacheBundle { val kill = Bool() - val typ = Bits(width = 3) + val typ = Bits(width = MT_SZ) val phys = Bool() val addr = UInt(width = conf.maxaddrbits) val data = Bits(width = conf.databits) val tag = Bits(width = conf.reqtagbits) - val cmd = Bits(width = 4) + val cmd = Bits(width = M_SZ) } class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index a194fbab..840a8c3d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -20,7 +20,7 @@ case class RocketConfiguration(tl: TileLinkConfiguration, if (fastLoadByte) require(fastLoadWord) } -class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) with ClientCoherenceAgent +class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) { val memPorts = 2 // Number of ports to outer memory system from tile: 1 from I$, 1 from D$ val dcachePortId = 0 From ab4a3e937b4c8d7cdb0a16884888ae8dd9c2650c Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 5 Feb 2014 14:21:43 -0800 Subject: [PATCH 0688/1087] don't share fma pipes --- rocket/src/main/scala/core.scala | 6 ++---- rocket/src/main/scala/rocc.scala | 2 -- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 89cbdc32..7b89db98 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -42,8 +42,6 @@ class Core(implicit conf: RocketConfiguration) extends Module ctrl.io.rocc <> io.rocc dpath.io.rocc <> io.rocc - require(conf.fpu) - // Hookup the {S,D}FMA pipes - fpu.io.sfma <> io.rocc.cp_sfma - fpu.io.dfma <> io.rocc.cp_dfma + fpu.io.sfma.valid := Bool(false) + fpu.io.dfma.valid := Bool(false) } diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index aa099d00..a6430e54 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -46,8 +46,6 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO - val cp_dfma = new ioFMA(65).flip - val cp_sfma = new ioFMA(33).flip val exception = Bool(INPUT) override def clone = new RoCCInterface().asInstanceOf[this.type] From ff7cae29f76be54eb2eee186fe2c53db6c43a77c Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Thu, 6 Feb 2014 00:09:42 -0800 Subject: [PATCH 0689/1087] hookup rocc interrupt and s bit --- rocket/src/main/scala/ctrl.scala | 1 + rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/dpath_util.scala | 5 +++-- rocket/src/main/scala/rocc.scala | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 22ed8502..45fe7acf 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -745,4 +745,5 @@ class Control(implicit conf: RocketConfiguration) extends Module io.rocc.cmd.valid := wb_rocc_val io.rocc.exception := wb_reg_xcpt && sr.er + io.rocc.s := sr.s } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 8ae151e7..a987b720 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -190,6 +190,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module pcr.io.host <> io.host pcr.io <> io.ctrl pcr.io <> io.fpu + pcr.io.rocc <> io.rocc pcr.io.pc := wb_reg_pc io.ctrl.csr_replay := pcr.io.replay diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 08b463b3..49da47fe 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -108,6 +108,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val time = UInt(OUTPUT, conf.xprlen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip + val rocc = new RoCCInterface().flip } val reg_epc = Reg(Bits(width = VADDR_BITS+1)) @@ -168,8 +169,8 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val wdata = Mux(cpu_req_valid, io.rw.wdata, host_pcr_bits.data) io.status := reg_status - io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), - Bool(false), Bool(false), Bool(false), Bool(false)) + io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), + Bool(false), io.rocc.interrupt, Bool(false), Bool(false)) io.fatc := wen && decoded_addr(CSRs.fatc) io.evec := Mux(io.exception, reg_evec.toSInt, reg_epc).toUInt io.ptbr := reg_ptbr diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index a6430e54..745568ad 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -39,6 +39,7 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(conf.dcache) val busy = Bool(OUTPUT) + val s = Bool(INPUT) val interrupt = Bool(OUTPUT) // These should be handled differently, eventually From 62e9313aef1e1ca1f7e1ee3a98e8e1fc6b38d08c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Feb 2014 00:13:02 -0800 Subject: [PATCH 0690/1087] Add 16 microarchitectural counters --- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/dpath_util.scala | 7 +++++- rocket/src/main/scala/instructions.scala | 32 ++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 8ae151e7..bd3ce72f 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -192,6 +192,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module pcr.io <> io.fpu pcr.io.pc := wb_reg_pc io.ctrl.csr_replay := pcr.io.replay + pcr.io.uarch_counters.foreach(_ := Bool(false)) io.ptw.ptbr := pcr.io.ptbr io.ptw.invalidate := pcr.io.fatc diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 08b463b3..f15960e7 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -99,6 +99,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val evec = UInt(OUTPUT, VADDR_BITS+1) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+conf.retireWidth)) + val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+conf.retireWidth))) val cause = UInt(INPUT, conf.xprlen) val badvaddr_wen = Bool(INPUT) val pc = UInt(INPUT, VADDR_BITS+1) @@ -124,6 +125,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val reg_status = Reg(new Status) // reset down below val reg_time = WideCounter(conf.xprlen) val reg_instret = WideCounter(conf.xprlen, io.retire) + val reg_uarch_counters = io.uarch_counters.map(WideCounter(conf.xprlen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) @@ -209,7 +211,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS - val read_mapping = Map[Int,Bits]( + val read_mapping = collection.mutable.Map[Int,Bits]( CSRs.fflags -> (if (conf.fpu) reg_fflags else UInt(0)), CSRs.frm -> (if (conf.fpu) reg_frm else UInt(0)), CSRs.fcsr -> (if (conf.fpu) Cat(reg_frm, reg_fflags) else UInt(0)), @@ -236,6 +238,9 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module CSRs.tohost -> reg_tohost, CSRs.fromhost -> reg_fromhost) + for (i <- 0 until reg_uarch_counters.size) + read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) + io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) io.fcsr_rm := reg_frm diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 2d91d92c..05d8274d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -219,6 +219,22 @@ object CSRs { val fflags = 0x1 val frm = 0x2 val fcsr = 0x3 + val uarch0 = 0x80 + val uarch1 = 0x81 + val uarch2 = 0x82 + val uarch3 = 0x83 + val uarch4 = 0x84 + val uarch5 = 0x85 + val uarch6 = 0x86 + val uarch7 = 0x87 + val uarch8 = 0x88 + val uarch9 = 0x89 + val uarch10 = 0x8a + val uarch11 = 0x8b + val uarch12 = 0x8c + val uarch13 = 0x8d + val uarch14 = 0x8e + val uarch15 = 0x8f val sup0 = 0x500 val sup1 = 0x501 val epc = 0x502 @@ -247,6 +263,22 @@ object CSRs { res += fflags res += frm res += fcsr + res += uarch0 + res += uarch1 + res += uarch2 + res += uarch3 + res += uarch4 + res += uarch5 + res += uarch6 + res += uarch7 + res += uarch8 + res += uarch9 + res += uarch10 + res += uarch11 + res += uarch12 + res += uarch13 + res += uarch14 + res += uarch15 res += sup0 res += sup1 res += epc From e7a726fbac5636187a64a3cdfbe56bd92b590b83 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Feb 2014 01:48:56 -0800 Subject: [PATCH 0691/1087] Make uarch counters read-only --- rocket/src/main/scala/instructions.scala | 64 ++++++++++++------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 05d8274d..2cb872f1 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -219,22 +219,6 @@ object CSRs { val fflags = 0x1 val frm = 0x2 val fcsr = 0x3 - val uarch0 = 0x80 - val uarch1 = 0x81 - val uarch2 = 0x82 - val uarch3 = 0x83 - val uarch4 = 0x84 - val uarch5 = 0x85 - val uarch6 = 0x86 - val uarch7 = 0x87 - val uarch8 = 0x88 - val uarch9 = 0x89 - val uarch10 = 0x8a - val uarch11 = 0x8b - val uarch12 = 0x8c - val uarch13 = 0x8d - val uarch14 = 0x8e - val uarch15 = 0x8f val sup0 = 0x500 val sup1 = 0x501 val epc = 0x502 @@ -258,27 +242,27 @@ object CSRs { val cycle = 0xc00 val time = 0xc01 val instret = 0xc02 + val uarch0 = 0xc10 + val uarch1 = 0xc11 + val uarch2 = 0xc12 + val uarch3 = 0xc13 + val uarch4 = 0xc14 + val uarch5 = 0xc15 + val uarch6 = 0xc16 + val uarch7 = 0xc17 + val uarch8 = 0xc18 + val uarch9 = 0xc19 + val uarch10 = 0xc1a + val uarch11 = 0xc1b + val uarch12 = 0xc1c + val uarch13 = 0xc1d + val uarch14 = 0xc1e + val uarch15 = 0xc1f val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags res += frm res += fcsr - res += uarch0 - res += uarch1 - res += uarch2 - res += uarch3 - res += uarch4 - res += uarch5 - res += uarch6 - res += uarch7 - res += uarch8 - res += uarch9 - res += uarch10 - res += uarch11 - res += uarch12 - res += uarch13 - res += uarch14 - res += uarch15 res += sup0 res += sup1 res += epc @@ -302,6 +286,22 @@ object CSRs { res += cycle res += time res += instret + res += uarch0 + res += uarch1 + res += uarch2 + res += uarch3 + res += uarch4 + res += uarch5 + res += uarch6 + res += uarch7 + res += uarch8 + res += uarch9 + res += uarch10 + res += uarch11 + res += uarch12 + res += uarch13 + res += uarch14 + res += uarch15 res.toArray } } From eca8c99f44444c0053d1b251a6d6a3df2ea20468 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Feb 2014 03:06:55 -0800 Subject: [PATCH 0692/1087] Ignore rocc interrupt line when no rocc is present --- rocket/src/main/scala/dpath_util.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 803d6908..3d1922b1 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -132,6 +132,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val r_irq_timer = Reg(init=Bool(false)) val r_irq_ipi = Reg(init=Bool(true)) + val irq_rocc = Bool(!conf.rocc.isEmpty) && io.rocc.interrupt val cpu_req_valid = io.rw.cmd != CSR.N val host_pcr_req_valid = Reg(Bool()) // don't reset @@ -171,8 +172,8 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val wdata = Mux(cpu_req_valid, io.rw.wdata, host_pcr_bits.data) io.status := reg_status - io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), - Bool(false), io.rocc.interrupt, Bool(false), Bool(false)) + io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), + Bool(false), irq_rocc, Bool(false), Bool(false)) io.fatc := wen && decoded_addr(CSRs.fatc) io.evec := Mux(io.exception, reg_evec.toSInt, reg_epc).toUInt io.ptbr := reg_ptbr From 1456170c6df595b276317f5e6cda50a8a2477508 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Feb 2014 12:01:49 -0800 Subject: [PATCH 0693/1087] Always stall decode on RoCC -> FENCE; never stall on RoCC -> deferred AMO.RL fence --- rocket/src/main/scala/ctrl.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 45fe7acf..a4564f71 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -427,11 +427,12 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_amo_aq = io.dpath.inst(26) val id_amo_rl = io.dpath.inst(25) val id_fence_next = id_fence || id_amo && id_amo_rl - val id_rocc_busy = io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val - val id_fence_ok = io.dmem.ordered && !ex_reg_mem_val && - (Bool(conf.rocc.isEmpty) || !id_rocc_busy) - id_reg_fence := id_fence_next || id_reg_fence && !id_fence_ok - val id_do_fence = id_amo && id_amo_aq || id_fence_i || id_reg_fence && (id_mem_val || id_rocc_val) || id_csr_flush + val id_mem_busy = !io.dmem.ordered || ex_reg_mem_val + val id_rocc_busy = Bool(!conf.rocc.isEmpty) && + (io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val) + id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy + val id_do_fence = id_rocc_busy && id_fence || + id_mem_busy && (id_amo && id_amo_aq || id_fence_i || id_reg_fence && (id_mem_val || id_rocc_val) || id_csr_flush) val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), @@ -697,7 +698,7 @@ class Control(implicit conf: RocketConfiguration) extends Module id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_fp_val && id_stall_fpu || id_mem_val && !io.dmem.req.ready || - id_do_fence && !id_fence_ok + id_do_fence val ctrl_draind = id_interrupt || ex_reg_replay_next ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind From a09ff9fdc76d37b74c20031f4205c4ad233038f1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 10 Feb 2014 19:04:42 -0800 Subject: [PATCH 0694/1087] Revert to old AUIPC definition --- rocket/src/main/scala/consts.scala | 3 +-- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/dpath.scala | 10 +++------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index b58e94a3..78eed730 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -23,8 +23,7 @@ trait ScalarOpConstants { val A1_X = Bits("b??", 2) val A1_ZERO = UInt(0, 2) val A1_RS1 = UInt(1, 2) - val A1_PCHI = UInt(2, 2) - val A1_PC = UInt(3, 2) + val A1_PC = UInt(2, 2) val IMM_X = Bits("b???", 3) val IMM_S = UInt(0, 3) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index a4564f71..bab57620 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -99,7 +99,7 @@ object XDecode extends DecodeConstants JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PCHI,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,CSR.N,N,N,N,N,N,N), LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,CSR.N,N,N,N,N,N,N), diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index c5dd7d5f..381dd1da 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -132,13 +132,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val ex_rs = for (i <- 0 until id_rs.size) yield Mux(ex_reg_rs_bypass(i), bypass(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) val ex_imm = imm(ex_reg_sel_imm, ex_reg_inst) - val ex_op1_hi = MuxLookup(ex_reg_sel_alu1, ex_reg_pc.toSInt >> 12, Seq( - A1_RS1 -> (ex_rs(0).toSInt >> 12), - A1_ZERO -> SInt(0))) - val ex_op1_lo = MuxLookup(ex_reg_sel_alu1, UInt(0), Seq( - A1_RS1 -> ex_rs(0)(11,0), - A1_PC -> ex_reg_pc(11,0))) - val ex_op1 = Cat(ex_op1_hi, ex_op1_lo) + val ex_op1 = MuxLookup(ex_reg_sel_alu1, SInt(0), Seq( + A1_RS1 -> ex_rs(0).toSInt, + A1_PC -> ex_reg_pc.toSInt)) val ex_op2 = MuxLookup(ex_reg_sel_alu2, SInt(0), Seq( A2_RS2 -> ex_rs(1).toSInt, A2_IMM -> ex_imm, From 8e3ca609f71b226dc24e7d81a03f0831352856be Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 14 Feb 2014 17:40:00 -0800 Subject: [PATCH 0695/1087] Renumber uarch CSRs into custom CSR space --- rocket/src/main/scala/instructions.scala | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 2cb872f1..a70ad1f6 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -242,22 +242,22 @@ object CSRs { val cycle = 0xc00 val time = 0xc01 val instret = 0xc02 - val uarch0 = 0xc10 - val uarch1 = 0xc11 - val uarch2 = 0xc12 - val uarch3 = 0xc13 - val uarch4 = 0xc14 - val uarch5 = 0xc15 - val uarch6 = 0xc16 - val uarch7 = 0xc17 - val uarch8 = 0xc18 - val uarch9 = 0xc19 - val uarch10 = 0xc1a - val uarch11 = 0xc1b - val uarch12 = 0xc1c - val uarch13 = 0xc1d - val uarch14 = 0xc1e - val uarch15 = 0xc1f + val uarch0 = 0xcc0 + val uarch1 = 0xcc1 + val uarch2 = 0xcc2 + val uarch3 = 0xcc3 + val uarch4 = 0xcc4 + val uarch5 = 0xcc5 + val uarch6 = 0xcc6 + val uarch7 = 0xcc7 + val uarch8 = 0xcc8 + val uarch9 = 0xcc9 + val uarch10 = 0xcca + val uarch11 = 0xccb + val uarch12 = 0xccc + val uarch13 = 0xccd + val uarch14 = 0xcce + val uarch15 = 0xccf val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags From 97b1841fcf48abbaa2b91f8b96e9556c3b244cac Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 22 Feb 2014 22:53:04 -0800 Subject: [PATCH 0696/1087] change dcache tag bits to 7 --- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/tile.scala | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 381dd1da..f325a0f0 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -180,6 +180,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) + require(conf.dcacheReqTagBits >= 6) // processor control regfile read val pcr = Module(new CSRFile) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 1791e6f8..99173748 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,7 +13,7 @@ case class RocketConfiguration(tl: TileLinkConfiguration, fastLoadByte: Boolean = false, fastMulDiv: Boolean = true) { - val dcacheReqTagBits = 10 // enforce compliance with require() // hue hue hue + val dcacheReqTagBits = 7 // enforce compliance with require() val xprlen = 64 val nxpr = 32 val nxprbits = log2Up(nxpr) From 98b830201a63816090cb64fb4a116e2297047758 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 25 Feb 2014 03:31:06 -0800 Subject: [PATCH 0697/1087] add wen signal to dasm printf --- rocket/src/main/scala/dpath.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f325a0f0..afbe1ddc 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -300,9 +300,9 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.mem_waddr := mem_reg_inst(11,7) io.ctrl.wb_waddr := wb_reg_inst(11,7) - printf("C: %d [%d] pc=[%x] W[r%d=%x] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", + printf("C: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", pcr.io.time(32,0), io.ctrl.retire, wb_reg_pc, - Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, + Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, wb_wen, wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), wb_reg_inst, wb_reg_inst) From c7110c83892cd1527a8a4622130d5e041180d7c8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 28 Feb 2014 13:39:35 -0800 Subject: [PATCH 0698/1087] Make FPU pipeline depths configurable --- rocket/src/main/scala/core.scala | 4 ++-- rocket/src/main/scala/ctrl.scala | 8 ++++---- rocket/src/main/scala/dpath_util.scala | 8 ++++---- rocket/src/main/scala/fpu.scala | 8 +++++--- rocket/src/main/scala/tile.scala | 3 ++- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 7b89db98..cd8b694c 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -21,8 +21,8 @@ class Core(implicit conf: RocketConfiguration) extends Module val ctrl = Module(new Control) val dpath = Module(new Datapath) - val fpu: FPU = if (conf.fpu) { - val fpu = Module(new FPU(2,3)) + val fpu: FPU = if (!conf.fpu.isEmpty) { + val fpu = Module(new FPU(conf.fpu.get)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl fpu diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index bab57620..9e004623 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -318,7 +318,7 @@ class Control(implicit conf: RocketConfiguration) extends Module } var decode_table = XDecode.table - if (conf.fpu) decode_table ++= FDecode.table + if (!conf.fpu.isEmpty) decode_table ++= FDecode.table if (!conf.rocc.isEmpty) decode_table ++= RoCCDecode.table val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) @@ -404,11 +404,11 @@ class Control(implicit conf: RocketConfiguration) extends Module (x.map(_._1).reduce(_||_), PriorityMux(x)) val fp_csrs = CSRs.fcsr :: CSRs.frm :: CSRs.fflags :: Nil - val legal_csrs = if (conf.fpu) CSRs.all.toSet else CSRs.all.toSet -- fp_csrs + val legal_csrs = if (!conf.fpu.isEmpty) CSRs.all.toSet else CSRs.all.toSet -- fp_csrs val id_csr_addr = io.dpath.inst(31,20) val id_csr_en = id_csr != CSR.N - val id_csr_fp = Bool(conf.fpu) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) + val id_csr_fp = Bool(!conf.fpu.isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) val id_csr_invalid = id_csr_en && !Vec(legal_csrs.map(UInt(_))).contains(id_csr_addr) val id_csr_privileged = id_csr_en && @@ -604,7 +604,7 @@ class Control(implicit conf: RocketConfiguration) extends Module sboard.set((wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val) && io.dpath.wb_wen, io.dpath.wb_waddr) sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr) - val id_stall_fpu = if (conf.fpu) { + val id_stall_fpu = if (!conf.fpu.isEmpty) { val fp_sboard = new Scoreboard(32) fp_sboard.set((wb_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set) && !replay_wb, io.dpath.wb_waddr) fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 3d1922b1..a0082388 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -214,9 +214,9 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS val read_mapping = collection.mutable.Map[Int,Bits]( - CSRs.fflags -> (if (conf.fpu) reg_fflags else UInt(0)), - CSRs.frm -> (if (conf.fpu) reg_frm else UInt(0)), - CSRs.fcsr -> (if (conf.fpu) Cat(reg_frm, reg_fflags) else UInt(0)), + CSRs.fflags -> (if (!conf.fpu.isEmpty) reg_fflags else UInt(0)), + CSRs.frm -> (if (!conf.fpu.isEmpty) reg_frm else UInt(0)), + CSRs.fcsr -> (if (!conf.fpu.isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.cycle -> reg_time, CSRs.time -> reg_time, CSRs.instret -> reg_instret, @@ -258,7 +258,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module reg_status.zero := 0 if (!conf.vm) reg_status.vm := false if (conf.rocc.isEmpty) reg_status.er := false - if (!conf.fpu) reg_status.ef := false + if (conf.fpu.isEmpty) reg_status.ef := false } when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 15c3f83f..a9277aca 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -6,6 +6,8 @@ import Util._ import FPConstants._ import uncore.constants.MemoryOpConstants._ +case class FPUConfig(sfmaLatency: Int = 2, dfmaLatency: Int = 3) + object FPConstants { val FCMD_ADD = Bits("b000000") @@ -432,7 +434,7 @@ class FPUDFMAPipe(val latency: Int) extends Module io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits } -class FPU(sfma_latency: Int, dfma_latency: Int) extends Module +class FPU(conf: FPUConfig) extends Module { val io = new Bundle { val ctrl = (new CtrlFPUIO).flip @@ -501,7 +503,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB - val sfma = Module(new FPUSFMAPipe(sfma_latency)) + val sfma = Module(new FPUSFMAPipe(conf.sfmaLatency)) sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) @@ -511,7 +513,7 @@ class FPU(sfma_latency: Int, dfma_latency: Int) extends Module io.sfma.out := sfma.io.out io.sfma.exc := sfma.io.exc - val dfma = Module(new FPUDFMAPipe(dfma_latency)) + val dfma = Module(new FPUDFMAPipe(conf.dfmaLatency)) dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 99173748..e2e0a60f 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,8 @@ import Util._ case class RocketConfiguration(tl: TileLinkConfiguration, icache: ICacheConfig, dcache: DCacheConfig, - fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None, + fpu: Option[FPUConfig] = None, + rocc: Option[RocketConfiguration => RoCC] = None, retireWidth: Int = 1, vm: Boolean = true, fastLoadWord: Boolean = true, From fa75f6e81ed9d0e2c159bc6a6a6eba95f5394a2b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 4 Mar 2014 16:32:09 -0800 Subject: [PATCH 0699/1087] Fix null pointer exception when HAS_FPU=false --- rocket/src/main/scala/core.scala | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index cd8b694c..23fc00a5 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -21,12 +21,13 @@ class Core(implicit conf: RocketConfiguration) extends Module val ctrl = Module(new Control) val dpath = Module(new Datapath) - val fpu: FPU = if (!conf.fpu.isEmpty) { + if (!conf.fpu.isEmpty) { val fpu = Module(new FPU(conf.fpu.get)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl - fpu - } else null + fpu.io.sfma.valid := Bool(false) + fpu.io.dfma.valid := Bool(false) + } ctrl.io.dpath <> dpath.io.ctrl dpath.io.host <> io.host @@ -41,7 +42,4 @@ class Core(implicit conf: RocketConfiguration) extends Module ctrl.io.rocc <> io.rocc dpath.io.rocc <> io.rocc - - fpu.io.sfma.valid := Bool(false) - fpu.io.dfma.valid := Bool(false) } From 9f2e16c58a5cda014bdbd0f18c8902d75d555081 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 4 Mar 2014 16:32:17 -0800 Subject: [PATCH 0700/1087] Fix D$ arbiter for >2 inputs --- rocket/src/main/scala/arbiter.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 48ea127e..5f4ee53e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -46,7 +46,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu resp.bits.replay := io.mem.resp.bits.replay && tag_hit io.requestor(i).replay_next.valid := io.mem.replay_next.valid && - io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(log2Up(n)) + io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> UInt(log2Up(n)) } } From ac4b3f9f224a68596a122ed88ddf96954ba0c3b6 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 4 Mar 2014 23:38:49 -0800 Subject: [PATCH 0701/1087] print out core id --- rocket/src/main/scala/dpath.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index afbe1ddc..0a958236 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -300,8 +300,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ctrl.mem_waddr := mem_reg_inst(11,7) io.ctrl.wb_waddr := wb_reg_inst(11,7) - printf("C: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", - pcr.io.time(32,0), io.ctrl.retire, wb_reg_pc, + printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", + io.host.id, pcr.io.time(32,0), io.ctrl.retire, wb_reg_pc, Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, wb_wen, wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), From 00bc1a22936e06747bb91221982a169135d7e5c1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 10 Mar 2014 16:59:07 -0700 Subject: [PATCH 0702/1087] Add fclass.{s|d} instructions --- rocket/src/main/scala/ctrl.scala | 2 ++ rocket/src/main/scala/fpu.scala | 10 +++++++++- rocket/src/main/scala/instructions.scala | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 9e004623..bdedf889 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -232,6 +232,8 @@ object FDecode extends DecodeConstants FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCLASS_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCLASS_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index a9277aca..c20f7b24 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -34,6 +34,7 @@ object FPConstants val FCMD_MIN = Bits("b011000") val FCMD_MAX = Bits("b011001") val FCMD_MFTX = Bits("b011100") + val FCMD_CLASS = Bits("b011101") val FCMD_MXTF = Bits("b011110") val FCMD_MADD = Bits("b100100") val FCMD_MSUB = Bits("b100101") @@ -91,6 +92,8 @@ class FPUDecoder extends Module FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,Y), FMV_X_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,Y), FMV_X_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,Y), + FCLASS_S -> List(FCMD_CLASS, N,Y,N,N,Y,N,Y,N,N,Y), + FCLASS_D -> List(FCMD_CLASS, N,Y,N,N,N,N,Y,N,N,Y), FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,Y), FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,Y), FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,Y), @@ -203,7 +206,7 @@ class FPToInt extends Module when (io.in.bits.cmd === FCMD_STORE) { in.in1 := io.in.bits.in2 }.otherwise { - val doUpconvert = io.in.bits.single && io.in.bits.cmd != FCMD_MFTX + val doUpconvert = io.in.bits.single && io.in.bits.cmd != FCMD_MFTX && io.in.bits.cmd != FCMD_CLASS in.in1 := Mux(doUpconvert, upconvert(io.in.bits.in1), io.in.bits.in1) in.in2 := Mux(doUpconvert, upconvert(io.in.bits.in2), io.in.bits.in2) } @@ -236,6 +239,11 @@ class FPToInt extends Module io.out.bits.toint := d2i._1 io.out.bits.exc := d2i._2 } + when (in.cmd === FCMD_CLASS) { + val classify_s = hardfloat.recodedFloatNClassify(in.in1, 23, 9) + val classify_d = hardfloat.recodedFloatNClassify(in.in1, 52, 12) + io.out.bits.toint := Mux(in.single, classify_s, classify_d) + } when (in.cmd === FCMD_EQ || in.cmd === FCMD_LT || in.cmd === FCMD_LE) { io.out.bits.toint := dcmp_out io.out.bits.exc := dcmp_exc diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index a70ad1f6..9077d46c 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -146,6 +146,8 @@ object Instructions { def FMAX_D = Bits("b1100101??????????000?????1010011") def FMV_X_S = Bits("b111000000000?????000?????1010011") def FMV_X_D = Bits("b111000100000?????000?????1010011") + def FCLASS_S = Bits("b111010000000?????000?????1010011") + def FCLASS_D = Bits("b111010100000?????000?????1010011") def FMV_S_X = Bits("b111100000000?????000?????1010011") def FMV_D_X = Bits("b111100100000?????000?????1010011") def FLW = Bits("b?????????????????010?????0000111") From a0389645b7198705f9db7d9f6cfaf32a8ce97c90 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 11 Mar 2014 18:58:24 -0700 Subject: [PATCH 0703/1087] New FP encoding; improved FP implementation --- rocket/src/main/scala/core.scala | 2 - rocket/src/main/scala/fpu.scala | 537 +++++++++-------------- rocket/src/main/scala/instructions.scala | 76 ++-- 3 files changed, 250 insertions(+), 365 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 23fc00a5..4a37c424 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -25,8 +25,6 @@ class Core(implicit conf: RocketConfiguration) extends Module val fpu = Module(new FPU(conf.fpu.get)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl - fpu.io.sfma.valid := Bool(false) - fpu.io.dfma.valid := Bool(false) } ctrl.io.dpath <> dpath.io.ctrl diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index c20f7b24..682760f2 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -10,40 +10,25 @@ case class FPUConfig(sfmaLatency: Int = 2, dfmaLatency: Int = 3) object FPConstants { - val FCMD_ADD = Bits("b000000") - val FCMD_SUB = Bits("b000001") - val FCMD_MUL = Bits("b000010") - val FCMD_DIV = Bits("b000011") - val FCMD_SQRT = Bits("b000100") - val FCMD_SGNJ = Bits("b000101") - val FCMD_SGNJN = Bits("b000110") - val FCMD_SGNJX = Bits("b000111") - val FCMD_CVT_L_FMT = Bits("b001000") - val FCMD_CVT_LU_FMT = Bits("b001001") - val FCMD_CVT_W_FMT = Bits("b001010") - val FCMD_CVT_WU_FMT = Bits("b001011") - val FCMD_CVT_FMT_L = Bits("b001100") - val FCMD_CVT_FMT_LU = Bits("b001101") - val FCMD_CVT_FMT_W = Bits("b001110") - val FCMD_CVT_FMT_WU = Bits("b001111") - val FCMD_CVT_FMT_S = Bits("b010000") - val FCMD_CVT_FMT_D = Bits("b010001") - val FCMD_EQ = Bits("b010101") - val FCMD_LT = Bits("b010110") - val FCMD_LE = Bits("b010111") - val FCMD_MIN = Bits("b011000") - val FCMD_MAX = Bits("b011001") - val FCMD_MFTX = Bits("b011100") - val FCMD_CLASS = Bits("b011101") - val FCMD_MXTF = Bits("b011110") - val FCMD_MADD = Bits("b100100") - val FCMD_MSUB = Bits("b100101") - val FCMD_NMSUB = Bits("b100110") - val FCMD_NMADD = Bits("b100111") - val FCMD_LOAD = Bits("b111000") - val FCMD_STORE = Bits("b111001") - val FCMD_X = Bits("b??????") - val FCMD_WIDTH = 6 + val FCMD_ADD = Bits("b0??00") + val FCMD_SUB = Bits("b0??01") + val FCMD_MUL = Bits("b0??10") + val FCMD_MADD = Bits("b1??00") + val FCMD_MSUB = Bits("b1??01") + val FCMD_NMSUB = Bits("b1??10") + val FCMD_NMADD = Bits("b1??11") + val FCMD_DIV = Bits("b?0?11") + val FCMD_SQRT = Bits("b?1?11") + val FCMD_SGNJ = Bits("b??1?0") + val FCMD_MINMAX = Bits("b?01?1") + val FCMD_CVT_FF = Bits("b??0??") + val FCMD_CVT_IF = Bits("b?10??") + val FCMD_CMP = Bits("b?01??") + val FCMD_MV_XF = Bits("b?11??") + val FCMD_CVT_FI = Bits("b??0??") + val FCMD_MV_FX = Bits("b??1??") + val FCMD_X = Bits("b?????") + val FCMD_WIDTH = 5 val RM_SZ = 3 val FLAGS_SZ = 5 @@ -52,10 +37,12 @@ object FPConstants class FPUCtrlSigs extends Bundle { val cmd = Bits(width = FCMD_WIDTH) + val ldst = Bool() val wen = Bool() val ren1 = Bool() val ren2 = Bool() val ren3 = Bool() + val swap23 = Bool() val single = Bool() val fromint = Bool() val toint = Bool() @@ -75,79 +62,69 @@ class FPUDecoder extends Module val Y = Bool(true) val X = Bool(false) val decoder = DecodeLogic(io.inst, - List (FCMD_X, X,X,X,X,X,X,X,X,X,X), - Array(FLW -> List(FCMD_LOAD, Y,N,N,N,Y,N,N,N,N,N), - FLD -> List(FCMD_LOAD, Y,N,N,N,N,N,N,N,N,N), - FSW -> List(FCMD_STORE, N,N,Y,N,Y,N,Y,N,N,N), - FSD -> List(FCMD_STORE, N,N,Y,N,N,N,Y,N,N,N), - FMV_S_X -> List(FCMD_MXTF, Y,N,N,N,Y,Y,N,N,N,Y), - FMV_D_X -> List(FCMD_MXTF, Y,N,N,N,N,Y,N,N,N,Y), - FCVT_S_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,Y,Y,N,N,N,Y), - FCVT_S_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,Y,Y,N,N,N,Y), - FCVT_S_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,Y,Y,N,N,N,Y), - FCVT_S_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,Y,Y,N,N,N,Y), - FCVT_D_W -> List(FCMD_CVT_FMT_W, Y,N,N,N,N,Y,N,N,N,Y), - FCVT_D_WU-> List(FCMD_CVT_FMT_WU,Y,N,N,N,N,Y,N,N,N,Y), - FCVT_D_L -> List(FCMD_CVT_FMT_L, Y,N,N,N,N,Y,N,N,N,Y), - FCVT_D_LU-> List(FCMD_CVT_FMT_LU,Y,N,N,N,N,Y,N,N,N,Y), - FMV_X_S -> List(FCMD_MFTX, N,Y,N,N,Y,N,Y,N,N,Y), - FMV_X_D -> List(FCMD_MFTX, N,Y,N,N,N,N,Y,N,N,Y), - FCLASS_S -> List(FCMD_CLASS, N,Y,N,N,Y,N,Y,N,N,Y), - FCLASS_D -> List(FCMD_CLASS, N,Y,N,N,N,N,Y,N,N,Y), - FCVT_W_S -> List(FCMD_CVT_W_FMT, N,Y,N,N,Y,N,Y,N,N,Y), - FCVT_WU_S-> List(FCMD_CVT_WU_FMT,N,Y,N,N,Y,N,Y,N,N,Y), - FCVT_L_S -> List(FCMD_CVT_L_FMT, N,Y,N,N,Y,N,Y,N,N,Y), - FCVT_LU_S-> List(FCMD_CVT_LU_FMT,N,Y,N,N,Y,N,Y,N,N,Y), - FCVT_W_D -> List(FCMD_CVT_W_FMT, N,Y,N,N,N,N,Y,N,N,Y), - FCVT_WU_D-> List(FCMD_CVT_WU_FMT,N,Y,N,N,N,N,Y,N,N,Y), - FCVT_L_D -> List(FCMD_CVT_L_FMT, N,Y,N,N,N,N,Y,N,N,Y), - FCVT_LU_D-> List(FCMD_CVT_LU_FMT,N,Y,N,N,N,N,Y,N,N,Y), - FCVT_S_D -> List(FCMD_CVT_FMT_D, Y,Y,N,N,Y,N,N,Y,N,Y), - FCVT_D_S -> List(FCMD_CVT_FMT_S, Y,Y,N,N,N,N,N,Y,N,Y), - FEQ_S -> List(FCMD_EQ, N,Y,Y,N,Y,N,Y,N,N,Y), - FLT_S -> List(FCMD_LT, N,Y,Y,N,Y,N,Y,N,N,Y), - FLE_S -> List(FCMD_LE, N,Y,Y,N,Y,N,Y,N,N,Y), - FEQ_D -> List(FCMD_EQ, N,Y,Y,N,N,N,Y,N,N,Y), - FLT_D -> List(FCMD_LT, N,Y,Y,N,N,N,Y,N,N,Y), - FLE_D -> List(FCMD_LE, N,Y,Y,N,N,N,Y,N,N,Y), - FSGNJ_S -> List(FCMD_SGNJ, Y,Y,Y,N,Y,N,N,Y,N,Y), - FSGNJN_S -> List(FCMD_SGNJN, Y,Y,Y,N,Y,N,N,Y,N,Y), - FSGNJX_S -> List(FCMD_SGNJX, Y,Y,Y,N,Y,N,N,Y,N,Y), - FSGNJ_D -> List(FCMD_SGNJ, Y,Y,Y,N,N,N,N,Y,N,Y), - FSGNJN_D -> List(FCMD_SGNJN, Y,Y,Y,N,N,N,N,Y,N,Y), - FSGNJX_D -> List(FCMD_SGNJX, Y,Y,Y,N,N,N,N,Y,N,Y), - FMIN_S -> List(FCMD_MIN, Y,Y,Y,N,Y,N,Y,Y,N,Y), - FMAX_S -> List(FCMD_MAX, Y,Y,Y,N,Y,N,Y,Y,N,Y), - FMIN_D -> List(FCMD_MIN, Y,Y,Y,N,N,N,Y,Y,N,Y), - FMAX_D -> List(FCMD_MAX, Y,Y,Y,N,N,N,Y,Y,N,Y), - FADD_S -> List(FCMD_ADD, Y,Y,Y,N,Y,N,N,N,Y,Y), - FSUB_S -> List(FCMD_SUB, Y,Y,Y,N,Y,N,N,N,Y,Y), - FMUL_S -> List(FCMD_MUL, Y,Y,Y,N,Y,N,N,N,Y,Y), - FADD_D -> List(FCMD_ADD, Y,Y,Y,N,N,N,N,N,Y,Y), - FSUB_D -> List(FCMD_SUB, Y,Y,Y,N,N,N,N,N,Y,Y), - FMUL_D -> List(FCMD_MUL, Y,Y,Y,N,N,N,N,N,Y,Y), - FMADD_S -> List(FCMD_MADD, Y,Y,Y,Y,Y,N,N,N,Y,Y), - FMSUB_S -> List(FCMD_MSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y), - FNMADD_S -> List(FCMD_NMADD, Y,Y,Y,Y,Y,N,N,N,Y,Y), - FNMSUB_S -> List(FCMD_NMSUB, Y,Y,Y,Y,Y,N,N,N,Y,Y), - FMADD_D -> List(FCMD_MADD, Y,Y,Y,Y,N,N,N,N,Y,Y), - FMSUB_D -> List(FCMD_MSUB, Y,Y,Y,Y,N,N,N,N,Y,Y), - FNMADD_D -> List(FCMD_NMADD, Y,Y,Y,Y,N,N,N,N,Y,Y), - FNMSUB_D -> List(FCMD_NMSUB, Y,Y,Y,Y,N,N,N,N,Y,Y) + List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X), + Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,Y,N,N,N,N,N), + FLD -> List(FCMD_X, Y,Y,N,N,N,X,N,N,N,N,N,N), + FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,X,Y,N,Y,N,N,N), + FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,X,N,N,Y,N,N,N), + FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,Y,Y,N,N,N,Y), + FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,N,Y,N,N,N,Y), + FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), + FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), + FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), + FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), + FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), + FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), + FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), + FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), + FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), + FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,X,N,N,Y,N,N,Y), + FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), + FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,X,N,N,Y,N,N,Y), + FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), + FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), + FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), + FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), + FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), + FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), + FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), + FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), + FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,X,Y,N,N,Y,N,Y), + FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,X,N,N,N,Y,N,Y), + FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,Y,N,Y,N,N,N), + FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,Y,N,Y,N,N,N), + FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,Y,N,Y,N,N,N), + FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,Y,N,N,N), + FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,Y,N,N,N), + FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,Y,N,N,N), + FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), + FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), + FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), + FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), + FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), + FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), + FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,Y,Y,N,N), + FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,Y,Y,N,N), + FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,Y,Y,N,N), + FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,Y,Y,N,N), + FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,Y,Y,N,N,N,Y,Y), + FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,Y,Y,N,N,N,Y,Y), + FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,Y,N,N,N,Y,Y), + FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,Y,N,N,N,N,Y,Y), + FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,Y,N,N,N,N,Y,Y), + FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,Y,Y), + FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), + FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), + FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), + FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), + FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y), + FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y), + FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y), + FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y) )) - val cmd :: wen :: ren1 :: ren2 :: ren3 :: single :: fromint :: toint :: fastpipe :: fma :: round :: Nil = decoder - - io.sigs.cmd := cmd - io.sigs.wen := wen.toBool - io.sigs.ren1 := ren1.toBool - io.sigs.ren2 := ren2.toBool - io.sigs.ren3 := ren3.toBool - io.sigs.single := single.toBool - io.sigs.fromint := fromint.toBool - io.sigs.toint := toint.toBool - io.sigs.fastpipe := fastpipe.toBool - io.sigs.fma := fma.toBool - io.sigs.round := round.toBool + val s = io.sigs + Vec(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap23, s.single, s.fromint, + s.toint, s.fastpipe, s.fma, s.round) := decoder } class DpathFPUIO extends Bundle { @@ -179,18 +156,25 @@ class CtrlFPUIO extends Bundle { val sboard_clra = UInt(INPUT, 5) } +class FPResult extends Bundle +{ + val data = Bits(width = 65) + val exc = Bits(width = 5) +} + +class FPInput extends FPUCtrlSigs { + val rm = Bits(width = 3) + val typ = Bits(width = 2) + val in1 = Bits(width = 65) + val in2 = Bits(width = 65) + val in3 = Bits(width = 65) + override def clone = new FPInput().asInstanceOf[this.type] +} + class FPToInt extends Module { - class Input extends Bundle { - val single = Bool() - val cmd = Bits(width = FCMD_WIDTH) - val rm = Bits(width = 3) - val in1 = Bits(width = 65) - val in2 = Bits(width = 65) - override def clone = new Input().asInstanceOf[this.type] - } val io = new Bundle { - val in = Valid(new Input).flip + val in = Valid(new FPInput).flip val out = Valid(new Bundle { val lt = Bool() val store = Bits(width = 64) @@ -199,77 +183,54 @@ class FPToInt extends Module }) } - val in = Reg(new Input) + val in = Reg(new FPInput) val valid = Reg(next=io.in.valid) when (io.in.valid) { def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 - when (io.in.bits.cmd === FCMD_STORE) { - in.in1 := io.in.bits.in2 - }.otherwise { - val doUpconvert = io.in.bits.single && io.in.bits.cmd != FCMD_MFTX && io.in.bits.cmd != FCMD_CLASS - in.in1 := Mux(doUpconvert, upconvert(io.in.bits.in1), io.in.bits.in1) - in.in2 := Mux(doUpconvert, upconvert(io.in.bits.in2), io.in.bits.in2) + in := io.in.bits + when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd != FCMD_MV_XF) { + in.in1 := upconvert(io.in.bits.in1) + in.in2 := upconvert(io.in.bits.in2) } - in.single := io.in.bits.single - in.cmd := io.in.bits.cmd - in.rm := io.in.bits.rm } val unrec_s = hardfloat.recodedFloatNToFloatN(in.in1, 23, 9) val unrec_d = hardfloat.recodedFloatNToFloatN(in.in1, 52, 12) val unrec_out = Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d) + val classify_s = hardfloat.recodedFloatNClassify(in.in1, 23, 9) + val classify_d = hardfloat.recodedFloatNClassify(in.in1, 52, 12) + val classify_out = Mux(in.single, classify_s, classify_d) + val dcmp = Module(new hardfloat.recodedFloatNCompare(52, 12)) dcmp.io.a := in.in1 dcmp.io.b := in.in2 - val dcmp_out = (in.cmd & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR - val dcmp_exc = (in.cmd & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UInt(4) + val dcmp_out = (~in.rm & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR + val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UInt(4) - val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, ~in.cmd(1,0), 52, 12, 64) + val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, in.typ ^ 1, 52, 12, 64) - io.out.bits.toint := unrec_out + io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_out) io.out.bits.store := unrec_out io.out.bits.exc := Bits(0) - when (in.cmd === FCMD_CVT_W_FMT || in.cmd === FCMD_CVT_WU_FMT) { - io.out.bits.toint := Cat(Fill(32, d2i._1(31)), d2i._1(31,0)) - io.out.bits.exc := d2i._2 - } - when (in.cmd === FCMD_CVT_L_FMT || in.cmd === FCMD_CVT_LU_FMT) { - io.out.bits.toint := d2i._1 - io.out.bits.exc := d2i._2 - } - when (in.cmd === FCMD_CLASS) { - val classify_s = hardfloat.recodedFloatNClassify(in.in1, 23, 9) - val classify_d = hardfloat.recodedFloatNClassify(in.in1, 52, 12) - io.out.bits.toint := Mux(in.single, classify_s, classify_d) - } - when (in.cmd === FCMD_EQ || in.cmd === FCMD_LT || in.cmd === FCMD_LE) { + when (in.cmd === FCMD_CMP) { io.out.bits.toint := dcmp_out io.out.bits.exc := dcmp_exc } + when (in.cmd === FCMD_CVT_IF) { + io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt) + io.out.bits.exc := d2i._2 + } io.out.valid := valid io.out.bits.lt := dcmp.io.a_lt_b } -class FPResult extends Bundle -{ - val data = Bits(width = 65) - val exc = Bits(width = 5) -} - class IntToFP(val latency: Int) extends Module { - class Input extends Bundle { - val single = Bool() - val cmd = Bits(width = FCMD_WIDTH) - val rm = Bits(width = 3) - val data = Bits(width = 64) - override def clone = new Input().asInstanceOf[this.type] - } val io = new Bundle { - val in = Valid(new Input).flip + val in = Valid(new FPInput).flip val out = Valid(new FPResult) } @@ -277,19 +238,18 @@ class IntToFP(val latency: Int) extends Module val mux = new FPResult mux.exc := Bits(0) - mux.data := hardfloat.floatNToRecodedFloatN(in.bits.data, 52, 12) + mux.data := hardfloat.floatNToRecodedFloatN(in.bits.in1, 52, 12) when (in.bits.single) { - mux.data := Cat(SInt(-1, 32), hardfloat.floatNToRecodedFloatN(in.bits.data, 23, 9)) + mux.data := Cat(SInt(-1, 32), hardfloat.floatNToRecodedFloatN(in.bits.in1, 23, 9)) } - when (in.bits.cmd === FCMD_CVT_FMT_W || in.bits.cmd === FCMD_CVT_FMT_WU || - in.bits.cmd === FCMD_CVT_FMT_L || in.bits.cmd === FCMD_CVT_FMT_LU) { + when (in.bits.cmd === FCMD_CVT_FI) { when (in.bits.single) { - val u = hardfloat.anyToRecodedFloatN(in.bits.data, in.bits.rm, ~in.bits.cmd(1,0), 23, 9, 64) + val u = hardfloat.anyToRecodedFloatN(in.bits.in1(63,0), in.bits.rm, in.bits.typ ^ 1, 23, 9, 64) mux.data := Cat(SInt(-1, 32), u._1) mux.exc := u._2 }.otherwise { - val u = hardfloat.anyToRecodedFloatN(in.bits.data, in.bits.rm, ~in.bits.cmd(1,0), 52, 12, 64) + val u = hardfloat.anyToRecodedFloatN(in.bits.in1(63,0), in.bits.rm, in.bits.typ ^ 1, 52, 12, 64) mux.data := u._1 mux.exc := u._2 } @@ -300,16 +260,8 @@ class IntToFP(val latency: Int) extends Module class FPToFP(val latency: Int) extends Module { - class Input extends Bundle { - val single = Bool() - val cmd = Bits(width = FCMD_WIDTH) - val rm = Bits(width = 3) - val in1 = Bits(width = 65) - val in2 = Bits(width = 65) - override def clone = new Input().asInstanceOf[this.type] - } val io = new Bundle { - val in = Valid(new Input).flip + val in = Valid(new FPInput).flip val out = Valid(new FPResult) val lt = Bool(INPUT) // from FPToInt } @@ -317,34 +269,31 @@ class FPToFP(val latency: Int) extends Module val in = Pipe(io.in) // fp->fp units - val sign_s = Mux(in.bits.cmd === FCMD_SGNJ, in.bits.in2(32), - Mux(in.bits.cmd === FCMD_SGNJN, ~in.bits.in2(32), - in.bits.in1(32) ^ in.bits.in2(32))) // FCMD_SGNJX - val sign_d = Mux(in.bits.cmd === FCMD_SGNJ, in.bits.in2(64), - Mux(in.bits.cmd === FCMD_SGNJN, ~in.bits.in2(64), - in.bits.in1(64) ^ in.bits.in2(64))) // FCMD_SGNJX - val fsgnj = Cat(Mux(in.bits.single, in.bits.in1(64), sign_d), in.bits.in1(63,33), - Mux(in.bits.single, sign_s, in.bits.in1(32)), in.bits.in1(31,0)) + val isSgnj = in.bits.cmd === FCMD_SGNJ + def fsgnjSign(in1: Bits, in2: Bits, pos: Int, en: Bool, rm: Bits) = + Mux(rm(1) || !en, in1(pos), rm(0)) ^ (en && in2(pos)) + val sign_s = fsgnjSign(in.bits.in1, in.bits.in2, 32, in.bits.single && isSgnj, in.bits.rm) + val sign_d = fsgnjSign(in.bits.in1, in.bits.in2, 64, !in.bits.single && isSgnj, in.bits.rm) + val fsgnj = Cat(sign_d, in.bits.in1(63,33), sign_s, in.bits.in1(31,0)) val s2d = hardfloat.recodedFloatNToRecodedFloatM(in.bits.in1, in.bits.rm, 23, 9, 52, 12) val d2s = hardfloat.recodedFloatNToRecodedFloatM(in.bits.in1, in.bits.rm, 52, 12, 23, 9) - val isnan1 = Mux(in.bits.single, in.bits.in1(31,29) === Bits("b111"), in.bits.in1(63,61) === Bits("b111")) - val isnan2 = Mux(in.bits.single, in.bits.in2(31,29) === Bits("b111"), in.bits.in2(63,61) === Bits("b111")) + val isnan1 = Mux(in.bits.single, in.bits.in1(31,29).andR, in.bits.in1(63,61).andR) + val isnan2 = Mux(in.bits.single, in.bits.in2(31,29).andR, in.bits.in2(63,61).andR) val issnan1 = isnan1 && ~Mux(in.bits.single, in.bits.in1(22), in.bits.in1(51)) val issnan2 = isnan2 && ~Mux(in.bits.single, in.bits.in2(22), in.bits.in2(51)) val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4)) - val min = in.bits.cmd === FCMD_MIN - val minmax = Mux(isnan2 || !isnan1 && (min === io.lt), in.bits.in1, in.bits.in2) + val isMax = in.bits.rm(0) + val isLHS = isnan2 || isMax != io.lt && !isnan1 val mux = new FPResult - mux.data := fsgnj - mux.exc := Bits(0) + mux.exc := minmax_exc + mux.data := in.bits.in2 - when (in.bits.cmd === FCMD_MIN || in.bits.cmd === FCMD_MAX) { - mux.data := minmax - } - when (in.bits.cmd === FCMD_CVT_FMT_S || in.bits.cmd === FCMD_CVT_FMT_D) { + when (isSgnj) { mux.exc := UInt(0) } + when (isSgnj || isLHS) { mux.data := fsgnj } + when (in.bits.cmd === FCMD_CVT_FF) { when (in.bits.single) { mux.data := Cat(SInt(-1, 32), d2s._1) mux.exc := d2s._2 @@ -357,89 +306,39 @@ class FPToFP(val latency: Int) extends Module io.out <> Pipe(in.valid, mux, latency-1) } -class ioFMA(width: Int) extends Bundle { - val valid = Bool(INPUT) - val cmd = Bits(INPUT, FCMD_WIDTH) - val rm = Bits(INPUT, 3) - val in1 = Bits(INPUT, width) - val in2 = Bits(INPUT, width) - val in3 = Bits(INPUT, width) - val out = Bits(OUTPUT, width) - val exc = Bits(OUTPUT, 5) -} - -class FPUSFMAPipe(val latency: Int) extends Module +class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module { - val io = new ioFMA(33) + val io = new Bundle { + val in = Valid(new FPInput).flip + val out = Valid(new FPResult) + } - val cmd = Reg(Bits()) - val rm = Reg(Bits()) - val in1 = Reg(Bits()) - val in2 = Reg(Bits()) - val in3 = Reg(Bits()) + val width = sigWidth + expWidth + val one = UInt(1) << (width-1) + val zero = (io.in.bits.in1(width) ^ io.in.bits.in2(width)) << width - val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || - io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB - val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB - - val one = Bits("h80000000") - val zero = Cat(io.in1(32) ^ io.in2(32), Bits(0, 32)) - - val valid = Reg(next=io.valid) - when (io.valid) { - cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) - rm := io.rm - in1 := io.in1 - in2 := Mux(cmd_addsub, one, io.in2) - in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) + val valid = Reg(next=io.in.valid) + val in = Reg(new FPInput) + when (io.in.valid) { + in := io.in.bits + val cmd_fma = io.in.bits.ren3 + val cmd_addsub = io.in.bits.swap23 + in.cmd := Cat(io.in.bits.cmd(1) & (cmd_fma || cmd_addsub), io.in.bits.cmd(0)) + when (cmd_addsub) { in.in2 := one } + unless (cmd_fma || cmd_addsub) { in.in3 := zero } } - val fma = Module(new hardfloat.mulAddSubRecodedFloatN(23, 9)) - fma.io.op := cmd - fma.io.roundingMode := rm - fma.io.a := in1 - fma.io.b := in2 - fma.io.c := in3 + val fma = Module(new hardfloat.mulAddSubRecodedFloatN(sigWidth, expWidth)) + fma.io.op := in.cmd + fma.io.roundingMode := in.rm + fma.io.a := in.in1 + fma.io.b := in.in2 + fma.io.c := in.in3 - io.out := Pipe(valid, fma.io.out, latency-1).bits - io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits -} - -class FPUDFMAPipe(val latency: Int) extends Module -{ - val io = new ioFMA(65) - - val cmd = Reg(Bits()) - val rm = Reg(Bits()) - val in1 = Reg(Bits()) - val in2 = Reg(Bits()) - val in3 = Reg(Bits()) - - val cmd_fma = io.cmd === FCMD_MADD || io.cmd === FCMD_MSUB || - io.cmd === FCMD_NMADD || io.cmd === FCMD_NMSUB - val cmd_addsub = io.cmd === FCMD_ADD || io.cmd === FCMD_SUB - - val one = Bits("h8000000000000000") - val zero = Cat(io.in1(64) ^ io.in2(64), Bits(0, 64)) - - val valid = Reg(next=io.valid) - when (io.valid) { - cmd := Cat(io.cmd(1) & (cmd_fma || cmd_addsub), io.cmd(0)) - rm := io.rm - in1 := io.in1 - in2 := Mux(cmd_addsub, one, io.in2) - in3 := Mux(cmd_fma, io.in3, Mux(cmd_addsub, io.in2, zero)) - } - - val fma = Module(new hardfloat.mulAddSubRecodedFloatN(52, 12)) - fma.io.op := cmd - fma.io.roundingMode := rm - fma.io.a := in1 - fma.io.b := in2 - fma.io.c := in3 - - io.out := Pipe(valid, fma.io.out, latency-1).bits - io.exc := Pipe(valid, fma.io.exceptionFlags, latency-1).bits + val res = new FPResult + res.data := fma.io.out + res.exc := fma.io.exceptionFlags + io.out := Pipe(valid, res, latency-1) } class FPU(conf: FPUConfig) extends Module @@ -447,24 +346,21 @@ class FPU(conf: FPUConfig) extends Module val io = new Bundle { val ctrl = (new CtrlFPUIO).flip val dpath = (new DpathFPUIO).flip - val sfma = new ioFMA(33) - val dfma = new ioFMA(65) } - val ex_reg_inst = Reg(Bits()) - when (io.ctrl.valid) { - ex_reg_inst := io.dpath.inst - } val ex_reg_valid = Reg(next=io.ctrl.valid, init=Bool(false)) + val ex_reg_inst = RegEnable(io.dpath.inst, io.ctrl.valid) val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx, init=Bool(false)) + val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) val killm = io.ctrl.killm || io.ctrl.nack_mem val wb_reg_valid = Reg(next=mem_reg_valid && !killm, init=Bool(false)) val fp_decoder = Module(new FPUDecoder) fp_decoder.io.inst := io.dpath.inst - val ctrl = RegEnable(fp_decoder.io.sigs, io.ctrl.valid) - val mem_ctrl = RegEnable(ctrl, ex_reg_valid) + val id_ctrl = fp_decoder.io.sigs + val ex_ctrl = RegEnable(id_ctrl, io.ctrl.valid) + val mem_ctrl = RegEnable(ex_ctrl, ex_reg_valid) val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) // load response @@ -480,64 +376,58 @@ class FPU(conf: FPUConfig) extends Module val regfile = Mem(Bits(width = 65), 32) when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } - val ex_rs1 = regfile(ex_reg_inst(19,15)) - val ex_rs2 = regfile(ex_reg_inst(24,20)) - val ex_rs3 = regfile(ex_reg_inst(31,27)) + val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) + when (io.ctrl.valid) { + when (id_ctrl.ren1) { ex_ra1 := io.dpath.inst(19,15) } + when (id_ctrl.ren3) { ex_ra3 := io.dpath.inst(31,27) } + when (id_ctrl.ren2) { + when ( id_ctrl.ldst) { ex_ra1 := io.dpath.inst(24,20) } + when (!id_ctrl.ldst && !id_ctrl.swap23) { ex_ra2 := io.dpath.inst(24,20) } + when (!id_ctrl.ldst && id_ctrl.swap23) { ex_ra3 := io.dpath.inst(24,20) } + } + } + val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_)) val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12)) - val fpiu = Module(new FPToInt) - fpiu.io.in.valid := ex_reg_valid && ctrl.toint - fpiu.io.in.bits := ctrl - fpiu.io.in.bits.rm := ex_rm - fpiu.io.in.bits.in1 := ex_rs1 - fpiu.io.in.bits.in2 := ex_rs2 + val req = new FPInput + req := ex_ctrl + req.rm := ex_rm + req.in1 := ex_rs1 + req.in2 := ex_rs2 + req.in3 := ex_rs3 + req.typ := ex_reg_inst(21,20) + val sfma = Module(new FPUFMAPipe(conf.sfmaLatency, 23, 9)) + sfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && ex_ctrl.single + sfma.io.in.bits := req + + val dfma = Module(new FPUFMAPipe(conf.dfmaLatency, 52, 12)) + dfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && !ex_ctrl.single + dfma.io.in.bits := req + + val fpiu = Module(new FPToInt) + fpiu.io.in.valid := ex_reg_valid && ex_ctrl.toint + fpiu.io.in.bits := req io.dpath.store_data := fpiu.io.out.bits.store io.dpath.toint_data := fpiu.io.out.bits.toint val ifpu = Module(new IntToFP(3)) - ifpu.io.in.valid := ex_reg_valid && ctrl.fromint - ifpu.io.in.bits := ctrl - ifpu.io.in.bits.rm := ex_rm - ifpu.io.in.bits.data := io.dpath.fromint_data + ifpu.io.in.valid := ex_reg_valid && ex_ctrl.fromint + ifpu.io.in.bits := req + ifpu.io.in.bits.in1 := io.dpath.fromint_data + val fpmu = Module(new FPToFP(2)) - fpmu.io.in.valid := ex_reg_valid && ctrl.fastpipe - fpmu.io.in.bits := ctrl - fpmu.io.in.bits.rm := ex_rm - fpmu.io.in.bits.in1 := ex_rs1 - fpmu.io.in.bits.in2 := ex_rs2 + fpmu.io.in.valid := ex_reg_valid && ex_ctrl.fastpipe + fpmu.io.in.bits := req fpmu.io.lt := fpiu.io.out.bits.lt - val cmd_fma = mem_ctrl.cmd === FCMD_MADD || mem_ctrl.cmd === FCMD_MSUB || - mem_ctrl.cmd === FCMD_NMADD || mem_ctrl.cmd === FCMD_NMSUB - val cmd_addsub = mem_ctrl.cmd === FCMD_ADD || mem_ctrl.cmd === FCMD_SUB - val sfma = Module(new FPUSFMAPipe(conf.sfmaLatency)) - sfma.io.valid := io.sfma.valid || ex_reg_valid && ctrl.fma && ctrl.single - sfma.io.in1 := Mux(io.sfma.valid, io.sfma.in1, ex_rs1) - sfma.io.in2 := Mux(io.sfma.valid, io.sfma.in2, ex_rs2) - sfma.io.in3 := Mux(io.sfma.valid, io.sfma.in3, ex_rs3) - sfma.io.cmd := Mux(io.sfma.valid, io.sfma.cmd, ctrl.cmd) - sfma.io.rm := Mux(io.sfma.valid, io.sfma.rm, ex_rm) - io.sfma.out := sfma.io.out - io.sfma.exc := sfma.io.exc - - val dfma = Module(new FPUDFMAPipe(conf.dfmaLatency)) - dfma.io.valid := io.dfma.valid || ex_reg_valid && ctrl.fma && !ctrl.single - dfma.io.in1 := Mux(io.dfma.valid, io.dfma.in1, ex_rs1) - dfma.io.in2 := Mux(io.dfma.valid, io.dfma.in2, ex_rs2) - dfma.io.in3 := Mux(io.dfma.valid, io.dfma.in3, ex_rs3) - dfma.io.cmd := Mux(io.dfma.valid, io.dfma.cmd, ctrl.cmd) - dfma.io.rm := Mux(io.dfma.valid, io.dfma.rm, ex_rm) - io.dfma.out := dfma.io.out - io.dfma.exc := dfma.io.exc - // writeback arbitration case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits) val pipes = List( Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), - Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, Cat(SInt(-1, 32), sfma.io.out), sfma.io.exc), - Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out, dfma.io.exc)) + Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, Cat(SInt(-1, 32), sfma.io.out.bits.data), sfma.io.out.bits.exc), + Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits.data, dfma.io.out.bits.exc)) def latencyMask(c: FPUCtrlSigs, offset: Int) = { require(pipes.forall(_.lat >= offset)) pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_) @@ -549,11 +439,8 @@ class FPU(conf: FPUConfig) extends Module val wen = Reg(init=Bits(0, maxLatency-1)) val winfo = Vec.fill(maxLatency-1){Reg(Bits())} val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) - val (write_port_busy, mem_winfo) = (Reg(Bool()), Reg(Bits())) - when (ex_reg_valid) { - write_port_busy := mem_wen && (memLatencyMask & latencyMask(ctrl, 1)).orR || (wen & latencyMask(ctrl, 0)).orR - mem_winfo := Cat(pipeid(ctrl), ex_reg_inst(11,7)) - } + val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, ex_reg_valid) + val mem_winfo = Cat(pipeid(mem_ctrl), mem_reg_inst(11,7)) for (i <- 0 until maxLatency-2) { when (wen(i+1)) { winfo(i) := winfo(i+1) } @@ -584,7 +471,7 @@ class FPU(conf: FPUConfig) extends Module Mux(wen(0), wexc, UInt(0)) val fp_inflight = wb_reg_valid && wb_ctrl.toint || wen.orR - val units_busy = mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ctrl.single, io.sfma.valid, io.dfma.valid)) + val units_busy = Bool(false) //mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid)) io.ctrl.fcsr_rdy := !fp_inflight io.ctrl.nack_mem := units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs @@ -593,5 +480,5 @@ class FPU(conf: FPUConfig) extends Module io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UInt(x._2)) io.ctrl.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) - io.ctrl.illegal_rm := ex_rm(2) && ctrl.round + io.ctrl.illegal_rm := ex_rm(2) && ex_ctrl.round } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 9077d46c..35bf5df0 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -104,51 +104,51 @@ object Instructions { def FSUB_S = Bits("b0000100??????????????????1010011") def FMUL_S = Bits("b0001000??????????????????1010011") def FDIV_S = Bits("b0001100??????????????????1010011") - def FSQRT_S = Bits("b001000000000?????????????1010011") - def FSGNJ_S = Bits("b0010100??????????000?????1010011") - def FSGNJN_S = Bits("b0011000??????????000?????1010011") - def FSGNJX_S = Bits("b0011100??????????000?????1010011") + def FSGNJ_S = Bits("b0010000??????????000?????1010011") + def FSGNJN_S = Bits("b0010000??????????001?????1010011") + def FSGNJX_S = Bits("b0010000??????????010?????1010011") + def FMIN_S = Bits("b0010100??????????000?????1010011") + def FMAX_S = Bits("b0010100??????????001?????1010011") + def FSQRT_S = Bits("b010110000000?????????????1010011") def FADD_D = Bits("b0000001??????????????????1010011") def FSUB_D = Bits("b0000101??????????????????1010011") def FMUL_D = Bits("b0001001??????????????????1010011") def FDIV_D = Bits("b0001101??????????????????1010011") - def FSQRT_D = Bits("b001000100000?????????????1010011") - def FSGNJ_D = Bits("b0010101??????????000?????1010011") - def FSGNJN_D = Bits("b0011001??????????000?????1010011") - def FSGNJX_D = Bits("b0011101??????????000?????1010011") - def FCVT_L_S = Bits("b010000000000?????????????1010011") - def FCVT_LU_S = Bits("b010010000000?????????????1010011") - def FCVT_W_S = Bits("b010100000000?????????????1010011") - def FCVT_WU_S = Bits("b010110000000?????????????1010011") - def FCVT_L_D = Bits("b010000100000?????????????1010011") - def FCVT_LU_D = Bits("b010010100000?????????????1010011") - def FCVT_W_D = Bits("b010100100000?????????????1010011") - def FCVT_WU_D = Bits("b010110100000?????????????1010011") - def FCVT_S_L = Bits("b011000000000?????????????1010011") - def FCVT_S_LU = Bits("b011010000000?????????????1010011") - def FCVT_S_W = Bits("b011100000000?????????????1010011") - def FCVT_S_WU = Bits("b011110000000?????????????1010011") - def FCVT_D_L = Bits("b011000100000?????????????1010011") - def FCVT_D_LU = Bits("b011010100000?????????????1010011") - def FCVT_D_W = Bits("b011100100000?????????????1010011") - def FCVT_D_WU = Bits("b011110100000?????????????1010011") - def FCVT_S_D = Bits("b100010000000?????????????1010011") - def FCVT_D_S = Bits("b100000100000?????????????1010011") - def FEQ_S = Bits("b1010100??????????000?????1010011") - def FLT_S = Bits("b1011000??????????000?????1010011") - def FLE_S = Bits("b1011100??????????000?????1010011") - def FEQ_D = Bits("b1010101??????????000?????1010011") - def FLT_D = Bits("b1011001??????????000?????1010011") - def FLE_D = Bits("b1011101??????????000?????1010011") - def FMIN_S = Bits("b1100000??????????000?????1010011") - def FMAX_S = Bits("b1100100??????????000?????1010011") - def FMIN_D = Bits("b1100001??????????000?????1010011") - def FMAX_D = Bits("b1100101??????????000?????1010011") + def FSGNJ_D = Bits("b0010001??????????000?????1010011") + def FSGNJN_D = Bits("b0010001??????????001?????1010011") + def FSGNJX_D = Bits("b0010001??????????010?????1010011") + def FMIN_D = Bits("b0010101??????????000?????1010011") + def FMAX_D = Bits("b0010101??????????001?????1010011") + def FCVT_S_D = Bits("b010000000001?????????????1010011") + def FCVT_D_S = Bits("b010000100000?????????????1010011") + def FSQRT_D = Bits("b010110100000?????????????1010011") + def FLE_S = Bits("b1010000??????????000?????1010011") + def FLT_S = Bits("b1010000??????????001?????1010011") + def FEQ_S = Bits("b1010000??????????010?????1010011") + def FLE_D = Bits("b1010001??????????000?????1010011") + def FLT_D = Bits("b1010001??????????001?????1010011") + def FEQ_D = Bits("b1010001??????????010?????1010011") + def FCVT_W_S = Bits("b110000000000?????????????1010011") + def FCVT_WU_S = Bits("b110000000001?????????????1010011") + def FCVT_L_S = Bits("b110000000010?????????????1010011") + def FCVT_LU_S = Bits("b110000000011?????????????1010011") def FMV_X_S = Bits("b111000000000?????000?????1010011") + def FCLASS_S = Bits("b111000000000?????001?????1010011") + def FCVT_W_D = Bits("b110000100000?????????????1010011") + def FCVT_WU_D = Bits("b110000100001?????????????1010011") + def FCVT_L_D = Bits("b110000100010?????????????1010011") + def FCVT_LU_D = Bits("b110000100011?????????????1010011") def FMV_X_D = Bits("b111000100000?????000?????1010011") - def FCLASS_S = Bits("b111010000000?????000?????1010011") - def FCLASS_D = Bits("b111010100000?????000?????1010011") + def FCLASS_D = Bits("b111000100000?????001?????1010011") + def FCVT_S_W = Bits("b110100000000?????????????1010011") + def FCVT_S_WU = Bits("b110100000001?????????????1010011") + def FCVT_S_L = Bits("b110100000010?????????????1010011") + def FCVT_S_LU = Bits("b110100000011?????????????1010011") def FMV_S_X = Bits("b111100000000?????000?????1010011") + def FCVT_D_W = Bits("b110100100000?????????????1010011") + def FCVT_D_WU = Bits("b110100100001?????????????1010011") + def FCVT_D_L = Bits("b110100100010?????????????1010011") + def FCVT_D_LU = Bits("b110100100011?????????????1010011") def FMV_D_X = Bits("b111100100000?????000?????1010011") def FLW = Bits("b?????????????????010?????0000111") def FLD = Bits("b?????????????????011?????0000111") From 53d62cb69de2bd05a36f3594e4134fffab6994ef Mon Sep 17 00:00:00 2001 From: Donggyu Kim Date: Sat, 15 Mar 2014 16:45:58 -0700 Subject: [PATCH 0704/1087] remove nondeterminism --- rocket/src/main/scala/ctrl.scala | 8 +++++++- rocket/src/main/scala/dpath_util.scala | 7 ++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index bdedf889..39ce01c5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -412,7 +412,13 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_csr_en = id_csr != CSR.N val id_csr_fp = Bool(!conf.fpu.isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) - val id_csr_invalid = id_csr_en && !Vec(legal_csrs.map(UInt(_))).contains(id_csr_addr) + + val legal_uint_csrs = new scala.collection.mutable.ArrayBuffer[Bits] + for (csr <- legal_csrs) { + legal_uint_csrs += UInt(csr) + } + + val id_csr_invalid = id_csr_en && !Vec(legal_uint_csrs).contains(id_csr_addr) val id_csr_privileged = id_csr_en && (id_csr_addr(11,10) === UInt(3) && id_csr_wen || id_csr_addr(11,10) === UInt(2) || diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index a0082388..34af1d8c 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -243,7 +243,12 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module for (i <- 0 until reg_uarch_counters.size) read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) - io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) + val decoded_mapping = new scala.collection.mutable.ArrayBuffer[(Bool, Bits)] + for ((k, v) <- read_mapping) { + decoded_mapping += ((decoded_addr(k), v)) + } + + io.rw.rdata := Mux1H(decoded_mapping) io.fcsr_rm := reg_frm when (io.fcsr_flags.valid) { From 943d7ac80a268b4b6b985bf58ec9a288a39e49d5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 15 Mar 2014 17:31:48 -0700 Subject: [PATCH 0705/1087] Use LinkedHashSet/Map for simpler determinism --- rocket/src/main/scala/ctrl.scala | 12 ++++-------- rocket/src/main/scala/dpath_util.scala | 9 ++------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 39ce01c5..c87d5cc1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -406,19 +406,15 @@ class Control(implicit conf: RocketConfiguration) extends Module (x.map(_._1).reduce(_||_), PriorityMux(x)) val fp_csrs = CSRs.fcsr :: CSRs.frm :: CSRs.fflags :: Nil - val legal_csrs = if (!conf.fpu.isEmpty) CSRs.all.toSet else CSRs.all.toSet -- fp_csrs + val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) + if (conf.fpu.isEmpty) + legal_csrs --= fp_csrs val id_csr_addr = io.dpath.inst(31,20) val id_csr_en = id_csr != CSR.N val id_csr_fp = Bool(!conf.fpu.isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) - - val legal_uint_csrs = new scala.collection.mutable.ArrayBuffer[Bits] - for (csr <- legal_csrs) { - legal_uint_csrs += UInt(csr) - } - - val id_csr_invalid = id_csr_en && !Vec(legal_uint_csrs).contains(id_csr_addr) + val id_csr_invalid = id_csr_en && !Vec(legal_csrs.map(UInt(_))).contains(id_csr_addr) val id_csr_privileged = id_csr_en && (id_csr_addr(11,10) === UInt(3) && id_csr_wen || id_csr_addr(11,10) === UInt(2) || diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/dpath_util.scala index 34af1d8c..e539cbd2 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/dpath_util.scala @@ -213,7 +213,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val read_impl = Bits(2) val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS - val read_mapping = collection.mutable.Map[Int,Bits]( + val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.fflags -> (if (!conf.fpu.isEmpty) reg_fflags else UInt(0)), CSRs.frm -> (if (!conf.fpu.isEmpty) reg_frm else UInt(0)), CSRs.fcsr -> (if (!conf.fpu.isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), @@ -243,12 +243,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module for (i <- 0 until reg_uarch_counters.size) read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) - val decoded_mapping = new scala.collection.mutable.ArrayBuffer[(Bool, Bits)] - for ((k, v) <- read_mapping) { - decoded_mapping += ((decoded_addr(k), v)) - } - - io.rw.rdata := Mux1H(decoded_mapping) + io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) io.fcsr_rm := reg_frm when (io.fcsr_flags.valid) { From 54cbf0c4f1a953bcb06ff31e981d27d4cc77abcd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 15 Mar 2014 17:33:17 -0700 Subject: [PATCH 0706/1087] Add (unused) RV32 CSRs --- rocket/src/main/scala/instructions.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 35bf5df0..ec85ca9e 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -260,6 +260,10 @@ object CSRs { val uarch13 = 0xccd val uarch14 = 0xcce val uarch15 = 0xccf + val counth = 0x586 + val cycleh = 0xc80 + val timeh = 0xc81 + val instreth = 0xc82 val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags @@ -306,4 +310,12 @@ object CSRs { res += uarch15 res.toArray } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) + res += counth + res += cycleh + res += timeh + res += instreth + res.toArray + } } From 59964180216e78e64e7071027d5a7238cc846bf8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 18 Mar 2014 18:36:19 -0700 Subject: [PATCH 0707/1087] Fix exception behavior of fmin/fmax --- rocket/src/main/scala/fpu.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 682760f2..ee25d6ab 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -103,10 +103,10 @@ class FPUDecoder extends Module FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), - FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,Y,Y,N,N), - FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,Y,Y,N,N), - FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,Y,Y,N,N), - FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,Y,Y,N,N), + FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), + FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), + FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,Y,N,N), + FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,Y,N,N), FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,Y,Y,N,N,N,Y,Y), FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,Y,Y,N,N,N,Y,Y), FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,Y,N,N,N,Y,Y), @@ -406,7 +406,7 @@ class FPU(conf: FPUConfig) extends Module dfma.io.in.bits := req val fpiu = Module(new FPToInt) - fpiu.io.in.valid := ex_reg_valid && ex_ctrl.toint + fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.cmd === FCMD_MINMAX) fpiu.io.in.bits := req io.dpath.store_data := fpiu.io.out.bits.store io.dpath.toint_data := fpiu.io.out.bits.toint From 1b030777ced234c0b1360401b596d8859f02a7b6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 24 Mar 2014 04:36:12 -0700 Subject: [PATCH 0708/1087] Remove vestigial control signal --- rocket/src/main/scala/ctrl.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c87d5cc1..974ea106 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -383,7 +383,6 @@ class Control(implicit conf: RocketConfiguration) extends Module val wb_reg_div_mul_val = Reg(init=Bool(false)) val take_pc = Bool() - val pc_taken = Reg(next=take_pc, init=Bool(false)) val take_pc_wb = Bool() val ctrl_killd = Bool() val ctrl_killx = Bool() @@ -707,7 +706,7 @@ class Control(implicit conf: RocketConfiguration) extends Module ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind - io.imem.resp.ready := pc_taken || !ctrl_stalld || ctrl_draind + io.imem.resp.ready := !ctrl_stalld || ctrl_draind io.imem.invalidate := wb_reg_flush_inst io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen From 6465e2df143b74f8d34ea5fb49eaa880e8549cc8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 24 Mar 2014 04:36:53 -0700 Subject: [PATCH 0709/1087] Make Int -> Bool conversions explicit --- rocket/src/main/scala/ecc.scala | 4 ++-- rocket/src/main/scala/util.scala | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala index fc5e8773..01164e88 100644 --- a/rocket/src/main/scala/ecc.scala +++ b/rocket/src/main/scala/ecc.scala @@ -57,7 +57,7 @@ class SECCode extends Code val y = for (i <- 1 to n) yield { if (isPow2(i)) { - val r = for (j <- 1 to n; if j != i && (j & i)) + val r = for (j <- 1 to n; if j != i && (j & i).toBoolean) yield x(mapping(j)) r reduce (_^_) } else @@ -71,7 +71,7 @@ class SECCode extends Code val p2 = for (i <- 0 until log2Up(n)) yield 1 << i val syndrome = p2 map { i => - val r = for (j <- 1 to n; if j & i) + val r = for (j <- 1 to n; if (j & i).toBoolean) yield y(j-1) r reduce (_^_) } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 7735847c..d917a522 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -3,15 +3,18 @@ package rocket import Chisel._ import scala.math._ -object Util -{ +class BooleanToInt(x: Int) { + def toBoolean: Boolean = if (x != 0) true else false +} + +object Util { implicit def intToUInt(x: Int): UInt = UInt(x) - implicit def intToBoolean(x: Int): Boolean = if (x != 0) true else false - implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_)) - implicit def wcToUInt(c: WideCounter): UInt = c.value + + implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 + implicit def intToBooleanToInt(x: Int): BooleanToInt = new BooleanToInt(x) } object AVec From 804b09c8c5b88135c3fca8e4278174f01e4383ba Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 25 Mar 2014 05:20:24 -0700 Subject: [PATCH 0710/1087] Frontend QoR tweaks --- rocket/src/main/scala/icache.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 88742586..61faf477 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -71,9 +71,9 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val pcp4_0 = s1_pc + UInt(c.ibytes) val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) val icmiss = s2_valid && !icache.io.resp.valid - val predicted_npc = Mux(btb.io.hit, btbTarget, pcp4) + val predicted_npc = btbTarget /* zero if btb miss */ | Mux(btb.io.hit, UInt(0), pcp4) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && (predicted_npc >> log2Up(c.databits/8)) === (s1_pc >> log2Up(c.databits/8)) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.hit && ((pcp4 & (c.databits/8)) === (s1_pc & (c.databits/8))) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -115,7 +115,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) - io.cpu.resp.bits.pc := s2_pc + io.cpu.resp.bits.pc := s2_pc & SInt(-c.ibytes) // discard PC LSBs io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)) io.cpu.resp.bits.taken := s2_btb_hit io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UInt(0) From e3b12e0b85de931c3452bd2368774ed2fadef820 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 25 Mar 2014 05:22:04 -0700 Subject: [PATCH 0711/1087] Make BTB more complexity-effective BTB entries reference a small number of unique pages, so we separate the storage of pages from indices. This makes much larger BTBs feasible. It's easy to exacerbate cycle time this way, so one-hot encoding is used as needed. --- rocket/src/main/scala/btb.scala | 121 ++++++++++++++++++ .../scala/{dpath_util.scala => csr.scala} | 63 --------- rocket/src/main/scala/icache.scala | 6 +- rocket/src/main/scala/util.scala | 31 +++-- 4 files changed, 142 insertions(+), 79 deletions(-) create mode 100644 rocket/src/main/scala/btb.scala rename rocket/src/main/scala/{dpath_util.scala => csr.scala} (82%) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala new file mode 100644 index 00000000..cdbc4733 --- /dev/null +++ b/rocket/src/main/scala/btb.scala @@ -0,0 +1,121 @@ +package rocket + +import Chisel._ +import Util._ +import Node._ +import uncore.constants.AddressConstants._ + +case class BTBConfig(entries: Int) { + val matchBits = PGIDX_BITS + val pages0 = 1 + log2Up(entries) // is this sensible? what about matchBits? + val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages +} + +// fully-associative branch target buffer +class BTB(conf: BTBConfig) extends Module { + val io = new Bundle { + val current_pc = UInt(INPUT, VADDR_BITS) + val hit = Bool(OUTPUT) + val target = UInt(OUTPUT, VADDR_BITS) + val wen = Bool(INPUT) + val taken = Bool(INPUT) + val invalidate = Bool(INPUT) + val correct_pc = UInt(INPUT, VADDR_BITS) + val correct_target = UInt(INPUT, VADDR_BITS) + } + + val idxValid = Vec.fill(conf.entries){Reg(init=Bool(false))} + val idxs = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} + val idxPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} + val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) + val tgts = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} + val tgtPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} + val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) + val pages = Vec.fill(conf.pages){Reg(UInt(width=VADDR_BITS-conf.matchBits))} + val pageValid = Vec.fill(conf.pages){Reg(init=Bool(false))} + + private def page(addr: UInt) = addr >> conf.matchBits + private def pageMatch(addr: UInt) = { + val p = page(addr) + Vec(pages.map(_ === p)).toBits & pageValid.toBits + } + private def tagMatch(addr: UInt): UInt = tagMatch(addr, pageMatch(addr)) + private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { + val idx = addr(conf.matchBits-1,0) + val idxMatch = idxs.map(_ === idx).toBits + val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits + idxValid.toBits & idxMatch & idxPageMatch + } + + val hits = tagMatch(io.current_pc) + val idxPageMatch = pageMatch(io.correct_pc) + val tgtPageMatch = pageMatch(io.correct_target) + val updates = tagMatch(io.correct_pc, idxPageMatch) + val anyUpdates = updates.orR + + private var lfsr = LFSR16(io.wen) + def rand(width: Int) = { + lfsr = lfsr(lfsr.getWidth-1,1) + Random.oneHot(width, lfsr) + } + def randOrInvalid(valid: UInt) = + Mux(!valid.andR, PriorityEncoderOH(~valid), rand(valid.getWidth)) + + val idxRepl = randOrInvalid(idxValid.toBits) + val idxWen = updates.toBits | idxRepl & ~anyUpdates.toSInt + + val useIdxPageMatch = idxPageMatch.orR + val doIdxPageRepl = !useIdxPageMatch && io.taken + val idxPageRepl = rand(conf.pages) + val idxPageUpdate = Mux(useIdxPageMatch, idxPageMatch, idxPageRepl) + val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) + + val samePage = page(io.correct_pc) === page(io.correct_target) + val useTgtPageMatch = (tgtPageMatch & ~idxPageReplEn).orR + val doTgtPageRepl = !useTgtPageMatch && io.taken && !samePage + val tgtPageRepl = Mux(samePage, idxPageUpdate, idxPageUpdate(conf.pages-2,0) << 1 | idxPageUpdate(conf.pages-1)) + val tgtPageUpdate = Mux(useTgtPageMatch, tgtPageMatch, tgtPageRepl) + val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) + + val pageReplEn = idxPageReplEn | tgtPageReplEn + + when (io.wen) { + for (i <- 0 until conf.entries) { + when (idxWen(i)) { + idxValid(i) := io.taken + when (io.taken) { + idxs(i) := io.correct_pc + idxPages(i) := OHToUInt(idxPageUpdate) + tgts(i) := io.correct_target + tgtPages(i) := OHToUInt(tgtPageUpdate) + } + }.elsewhen ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { + idxValid(i) := false + } + } + + require(conf.pages % 2 == 0) + val idxWritesEven = (idxPageUpdate & Fill(conf.pages/2, UInt(1,2))).orR + + def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = { + for (i <- i until conf.pages by mod) { + when (en && pageReplEn(i)) { + pages(i) := data + pageValid(i) := true + } + } + } + writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), + Mux(idxWritesEven, page(io.correct_pc), page(io.correct_target))) + writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), + Mux(idxWritesEven, page(io.correct_target), page(io.correct_pc))) + } + + when (io.invalidate) { + idxValid.foreach(_ := false) + pageValid.foreach(_ := false) + } + + io.hit := hits.toBits.orR + io.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) +} diff --git a/rocket/src/main/scala/dpath_util.scala b/rocket/src/main/scala/csr.scala similarity index 82% rename from rocket/src/main/scala/dpath_util.scala rename to rocket/src/main/scala/csr.scala index e539cbd2..421079a4 100644 --- a/rocket/src/main/scala/dpath_util.scala +++ b/rocket/src/main/scala/csr.scala @@ -7,56 +7,6 @@ import uncore.HTIFIO import uncore.constants.AddressConstants._ import scala.math._ -class DpathBTBIO extends Bundle -{ - val current_pc = UInt(INPUT, VADDR_BITS) - val hit = Bool(OUTPUT) - val target = UInt(OUTPUT, VADDR_BITS) - val wen = Bool(INPUT) - val clr = Bool(INPUT) - val invalidate = Bool(INPUT) - val correct_pc = UInt(INPUT, VADDR_BITS) - val correct_target = UInt(INPUT, VADDR_BITS) -} - -// fully-associative branch target buffer -class rocketDpathBTB(entries: Int) extends Module -{ - val io = new DpathBTBIO - - var hit_reduction = Bool(false) - val hit = Bool() - val update = Bool() - var update_reduction = Bool(false) - val valid = Vec.fill(entries){Reg(init=Bool(false))} - val hits = Vec.fill(entries){Bool()} - val updates = Vec.fill(entries){Bool()} - val targets = Vec.fill(entries){Reg(UInt())} - val anyUpdate = updates.toBits.orR - - val random_way = Random(entries, io.wen) - val invalid_way = valid.indexWhere((x: Bool) => !x) - val repl_way = Mux(valid.contains(Bool(false)), invalid_way, random_way) - - for (i <- 0 until entries) { - val tag = Reg(UInt()) - hits(i) := valid(i) && tag === io.current_pc - updates(i) := valid(i) && tag === io.correct_pc - - when (io.wen && (updates(i) || !anyUpdate && UInt(i) === repl_way)) { - valid(i) := Bool(false) - when (!io.clr) { - valid(i) := Bool(true) - tag := io.correct_pc - targets(i) := io.correct_target - } - } - } - - io.hit := hits.toBits.orR - io.target := Mux1H(hits, targets) -} - class Status extends Bundle { val ip = Bits(width = 8) val im = Bits(width = 8) @@ -294,16 +244,3 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module reg_status.ip := 0 } } - -class ioReadPort(d: Int, w: Int) extends Bundle -{ - override def clone = new ioReadPort(d, w).asInstanceOf[this.type] -} - -class ioWritePort(d: Int, w: Int) extends Bundle -{ - val addr = UInt(INPUT, log2Up(d)) - val en = Bool(INPUT) - val data = Bits(INPUT, w) - override def clone = new ioWritePort(d, w).asInstanceOf[this.type] -} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 61faf477..07ff1510 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -6,7 +6,7 @@ import Util._ case class ICacheConfig(sets: Int, assoc: Int, ibytes: Int = 4, - ntlb: Int = 8, nbtb: Int = 8, + ntlb: Int = 8, btb: BTBConfig = BTBConfig(8), code: Code = new IdentityCode) { val w = 1 @@ -55,7 +55,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val mem = new UncachedTileLinkIO } - val btb = Module(new rocketDpathBTB(c.nbtb)) + val btb = Module(new BTB(c.btb)) val icache = Module(new ICache) val tlb = Module(new TLB(c.ntlb)) @@ -94,7 +94,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu btb.io.current_pc := s1_pc btb.io.wen := io.cpu.req.bits.mispredict - btb.io.clr := !io.cpu.req.bits.taken + btb.io.taken := io.cpu.req.bits.taken btb.io.correct_pc := io.cpu.req.bits.currentpc btb.io.correct_target := io.cpu.req.bits.pc btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index d917a522..24016763 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -11,12 +11,15 @@ object Util { implicit def intToUInt(x: Int): UInt = UInt(x) implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_)) + implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x) implicit def wcToUInt(c: WideCounter): UInt = c.value implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 implicit def intToBooleanToInt(x: Int): BooleanToInt = new BooleanToInt(x) } +import Util._ + object AVec { def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) @@ -141,18 +144,20 @@ case class WideCounter(width: Int, inc: UInt = UInt(1)) object Random { - def apply(mod: Int, inc: Bool = Bool(true)): UInt = { - if (isPow2(mod)) { - require(mod <= 65536) - LFSR16(inc)(log2Up(mod)-1,0).toUInt - } else { - val max = 1 << log2Up(mod*8) - val rand_pow2 = apply(max, inc) - - var res = UInt(mod-1) - for (i <- mod-1 to 1 by -1) - res = Mux(rand_pow2 < UInt(i*max/mod), UInt(i-1), res) - res - } + def apply(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) random(log2Up(mod)-1,0) + else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod)) } + def apply(mod: Int): UInt = apply(mod, randomizer) + def oneHot(mod: Int, random: UInt): UInt = { + if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0)) + else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).toBits + } + def oneHot(mod: Int): UInt = oneHot(mod, randomizer) + + private def randomizer = LFSR16() + private def round(x: Double): Int = + if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2 + private def partition(value: UInt, slices: Int) = + Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices)) } From db59fc65ab65f43aeebc1eb09b173dec97d1b213 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 1 Apr 2014 15:01:27 -0700 Subject: [PATCH 0712/1087] Add return address stack --- rocket/src/main/scala/btb.scala | 133 +++++-- rocket/src/main/scala/ctrl.scala | 555 +++++++++++++++-------------- rocket/src/main/scala/decode.scala | 2 + rocket/src/main/scala/dpath.scala | 10 +- rocket/src/main/scala/icache.scala | 31 +- 5 files changed, 404 insertions(+), 327 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index cdbc4733..9f1e04cd 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -5,41 +5,80 @@ import Util._ import Node._ import uncore.constants.AddressConstants._ -case class BTBConfig(entries: Int) { +case class BTBConfig(entries: Int, nras: Int = 0, inOrder: Boolean = true) { val matchBits = PGIDX_BITS val pages0 = 1 + log2Up(entries) // is this sensible? what about matchBits? val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages + val opaqueBits = log2Up(entries) +} + +class BTBUpdate(implicit conf: BTBConfig) extends Bundle { + val prediction = Valid(new BTBResp) + val pc = UInt(width = VADDR_BITS) + val target = UInt(width = VADDR_BITS) + val returnAddr = UInt(width = VADDR_BITS) + val taken = Bool() + val isCall = Bool() + val isReturn = Bool() + val incorrectTarget = Bool() + + override def clone = new BTBUpdate().asInstanceOf[this.type] +} + +class BTBResp(implicit conf: BTBConfig) extends Bundle { + val taken = Bool() + val target = UInt(width = VADDR_BITS) + val opaque = UInt(width = conf.opaqueBits) + + override def clone = new BTBResp().asInstanceOf[this.type] +} + +class RAS(implicit conf: BTBConfig) { + def push(addr: UInt): Unit = { + when (count < conf.nras-1) { count := count + 1 } + stack(pos+1) := addr + pos := pos+1 + } + def pop: UInt = { + count := count - 1 + pos := pos - 1 + stack(pos) + } + def clear: Unit = count := UInt(0) + def isEmpty: Bool = count === UInt(0) + + require(isPow2(conf.nras)) + private val count = Reg(init=UInt(0,log2Up(conf.nras+1))) + private val pos = Reg(init=UInt(0,log2Up(conf.nras))) + private val stack = Vec.fill(conf.nras){Reg(UInt())} } // fully-associative branch target buffer -class BTB(conf: BTBConfig) extends Module { +class BTB(implicit conf: BTBConfig) extends Module { val io = new Bundle { - val current_pc = UInt(INPUT, VADDR_BITS) - val hit = Bool(OUTPUT) - val target = UInt(OUTPUT, VADDR_BITS) - val wen = Bool(INPUT) - val taken = Bool(INPUT) - val invalidate = Bool(INPUT) - val correct_pc = UInt(INPUT, VADDR_BITS) - val correct_target = UInt(INPUT, VADDR_BITS) + val req = UInt(INPUT, VADDR_BITS) + val resp = Valid(new BTBResp) + val update = Valid(new BTBUpdate).flip + val invalidate = Bool(INPUT) } val idxValid = Vec.fill(conf.entries){Reg(init=Bool(false))} val idxs = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} val idxPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} - val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) val tgts = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} val tgtPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} - val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) val pages = Vec.fill(conf.pages){Reg(UInt(width=VADDR_BITS-conf.matchBits))} val pageValid = Vec.fill(conf.pages){Reg(init=Bool(false))} + val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) + val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) + + val useRAS = Vec.fill(conf.entries){Reg(Bool())} private def page(addr: UInt) = addr >> conf.matchBits private def pageMatch(addr: UInt) = { val p = page(addr) Vec(pages.map(_ === p)).toBits & pageValid.toBits } - private def tagMatch(addr: UInt): UInt = tagMatch(addr, pageMatch(addr)) private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { val idx = addr(conf.matchBits-1,0) val idxMatch = idxs.map(_ === idx).toBits @@ -47,13 +86,19 @@ class BTB(conf: BTBConfig) extends Module { idxValid.toBits & idxMatch & idxPageMatch } - val hits = tagMatch(io.current_pc) - val idxPageMatch = pageMatch(io.correct_pc) - val tgtPageMatch = pageMatch(io.correct_target) - val updates = tagMatch(io.correct_pc, idxPageMatch) - val anyUpdates = updates.orR + val update = Pipe(io.update) + val update_target = io.req - private var lfsr = LFSR16(io.wen) + val pageHit = pageMatch(io.req) + val hits = tagMatch(io.req, pageHit) + val updatePageHit = pageMatch(update.bits.pc) + val updateHits = tagMatch(update.bits.pc, updatePageHit) + + val taken = update.bits.incorrectTarget || update.bits.taken + val predicted_taken = update.bits.prediction.valid && update.bits.prediction.bits.taken + val correction = update.bits.incorrectTarget || update.bits.taken != predicted_taken + + private var lfsr = LFSR16(update.valid) def rand(width: Int) = { lfsr = lfsr(lfsr.getWidth-1,1) Random.oneHot(width, lfsr) @@ -62,32 +107,35 @@ class BTB(conf: BTBConfig) extends Module { Mux(!valid.andR, PriorityEncoderOH(~valid), rand(valid.getWidth)) val idxRepl = randOrInvalid(idxValid.toBits) - val idxWen = updates.toBits | idxRepl & ~anyUpdates.toSInt + val idxWen = + if (conf.inOrder) Mux(update.bits.prediction.valid, UIntToOH(update.bits.prediction.bits.opaque), idxRepl) + else updateHits | Mux(updateHits.orR, UInt(0), idxRepl) - val useIdxPageMatch = idxPageMatch.orR - val doIdxPageRepl = !useIdxPageMatch && io.taken + val useUpdatePageHit = updatePageHit.orR + val doIdxPageRepl = !useUpdatePageHit && update.valid val idxPageRepl = rand(conf.pages) - val idxPageUpdate = Mux(useIdxPageMatch, idxPageMatch, idxPageRepl) + val idxPageUpdate = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) - val samePage = page(io.correct_pc) === page(io.correct_target) - val useTgtPageMatch = (tgtPageMatch & ~idxPageReplEn).orR - val doTgtPageRepl = !useTgtPageMatch && io.taken && !samePage + val samePage = page(update.bits.pc) === page(update_target) + val usePageHit = (pageHit & ~idxPageReplEn).orR + val doTgtPageRepl = !usePageHit && !samePage && update.valid val tgtPageRepl = Mux(samePage, idxPageUpdate, idxPageUpdate(conf.pages-2,0) << 1 | idxPageUpdate(conf.pages-1)) - val tgtPageUpdate = Mux(useTgtPageMatch, tgtPageMatch, tgtPageRepl) + val tgtPageUpdate = Mux(usePageHit, pageHit, tgtPageRepl) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val pageReplEn = idxPageReplEn | tgtPageReplEn - when (io.wen) { + when (update.valid) { for (i <- 0 until conf.entries) { when (idxWen(i)) { - idxValid(i) := io.taken - when (io.taken) { - idxs(i) := io.correct_pc + idxValid(i) := taken + when (correction) { + idxs(i) := update.bits.pc idxPages(i) := OHToUInt(idxPageUpdate) - tgts(i) := io.correct_target + tgts(i) := update_target tgtPages(i) := OHToUInt(tgtPageUpdate) + useRAS(i) := update.bits.isReturn } }.elsewhen ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { idxValid(i) := false @@ -106,9 +154,9 @@ class BTB(conf: BTBConfig) extends Module { } } writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), - Mux(idxWritesEven, page(io.correct_pc), page(io.correct_target))) + Mux(idxWritesEven, page(update.bits.pc), page(update_target))) writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), - Mux(idxWritesEven, page(io.correct_target), page(io.correct_pc))) + Mux(idxWritesEven, page(update_target), page(update.bits.pc))) } when (io.invalidate) { @@ -116,6 +164,19 @@ class BTB(conf: BTBConfig) extends Module { pageValid.foreach(_ := false) } - io.hit := hits.toBits.orR - io.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) + io.resp.valid := hits.toBits.orR + io.resp.bits.taken := io.resp.valid + io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) + io.resp.bits.opaque := OHToUInt(hits) + + if (conf.nras > 0) { + val ras = new RAS + when (!ras.isEmpty && Mux1H(hits, useRAS)) { + io.resp.bits.target := ras.pop + } + when (io.update.valid && io.update.bits.isCall) { + ras.push(io.update.bits.returnAddr) + } + when (io.invalidate) { ras.clear } + } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 974ea106..2ada1dd7 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -53,6 +53,7 @@ class CtrlDpathIO(implicit conf: RocketConfiguration) extends Bundle val ll_wen = Bool(INPUT) val ll_waddr = UInt(INPUT, 5) val ex_waddr = UInt(INPUT, 5) + val ex_rs = Vec.fill(2)(UInt(INPUT, 5)) val mem_waddr = UInt(INPUT, 5) val wb_waddr = UInt(INPUT, 5) val status = new Status().asInput @@ -66,15 +67,15 @@ abstract trait DecodeConstants val xpr64 = Y val decode_default = - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | brtype | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -82,225 +83,225 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,BR_NE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BEQ-> List(Y, N,N,BR_EQ, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLT-> List(Y, N,N,BR_LT, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLTU-> List(Y, N,N,BR_LTU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGE-> List(Y, N,N,BR_GE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGEU-> List(Y, N,N,BR_GEU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - - JAL-> List(Y, N,N,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - JALR-> List(Y, N,N,BR_N, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - AUIPC-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - - LB-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,CSR.N,N,N,N,N,N,N), - LH-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,CSR.N,N,N,N,N,N,N), - LW-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,N), - LD-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,N), - LBU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,CSR.N,N,N,N,N,N,N), - LHU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,CSR.N,N,N,N,N,N,N), - LWU-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,CSR.N,N,N,N,N,N,N), - SB-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,CSR.N,N,N,N,N,N,N), - SH-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,CSR.N,N,N,N,N,N,N), - SW-> List(Y, N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - SD-> List(xpr64,N,N,BR_N, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), - - AMOADD_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | brtype | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,Y,BR_NE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BEQ-> List(Y, N,N,Y,BR_EQ, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLT-> List(Y, N,N,Y,BR_LT, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLTU-> List(Y, N,N,Y,BR_LTU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGE-> List(Y, N,N,Y,BR_GE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGEU-> List(Y, N,N,Y,BR_GEU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - LR_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - LR_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_W-> List(Y, N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_D-> List(xpr64,N,N,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - - LUI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTI -> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTIU-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ANDI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - XORI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAI-> List(Y, N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADD-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SUB-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLT-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - AND-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - OR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - XOR-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRA-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - - ADDIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - - MUL-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULH-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULHU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULHSU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - - DIV-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REM-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMU-> List(Y, N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,BR_N, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - - SCALL-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,Y,N,N,N), - SRET-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,Y,N,N,N,N), - FENCE-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,Y,N), - FENCE_I-> List(Y, N,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,Y,N,N,Y,N,N), - CSRRW-> List(Y, N,N,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRS-> List(Y, N,N,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRC-> List(Y, N,N,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N), - CSRRWI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRSI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRCI-> List(Y, N,N,BR_N, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N)) + JAL-> List(Y, N,N,Y,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + JALR-> List(Y, N,N,N,BR_X, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + AUIPC-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + + LB-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,CSR.N,N,N,N,N,N,N), + LH-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,CSR.N,N,N,N,N,N,N), + LW-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,N), + LD-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,N), + LBU-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,CSR.N,N,N,N,N,N,N), + LHU-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,CSR.N,N,N,N,N,N,N), + LWU-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,CSR.N,N,N,N,N,N,N), + SB-> List(Y, N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,CSR.N,N,N,N,N,N,N), + SH-> List(Y, N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,CSR.N,N,N,N,N,N,N), + SW-> List(Y, N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + SD-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), + + AMOADD_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + + LR_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + LR_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + + LUI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTI -> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTIU-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ANDI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ORI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + XORI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADD-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SUB-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLT-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + AND-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + OR-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + XOR-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLL-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRL-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRA-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + + ADDIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + + MUL-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULH-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULHU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULHSU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + + DIV-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REM-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + + SCALL-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,Y,N,N,N), + SRET-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,Y,N,N,N,N), + FENCE-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,Y,N), + FENCE_I-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,Y,N,N,Y,N,N), + CSRRW-> List(Y, N,N,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRS-> List(Y, N,N,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRC-> List(Y, N,N,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N), + CSRRWI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRSI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRCI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCLASS_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCLASS_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,BR_N, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,BR_N, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FLW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - FLD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), - FSW-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - FSD-> List(Y, Y,N,BR_N, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N)) + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | brtype | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCLASS_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCLASS_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FLW-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + FLD-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), + FSW-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + FSD-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N)) } object RoCCDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | brtype | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | - CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N)) + // fence.i + // jalr mul_val | sret + // fp_val | renx2 | div_val | | syscall + // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | + // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | brtype | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + CUSTOM0-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N)) } class Control(implicit conf: RocketConfiguration) extends Module @@ -325,62 +326,65 @@ class Control(implicit conf: RocketConfiguration) extends Module val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) - val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs + val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: (id_branch: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: cs2 = cs1 val id_csr :: (id_fence_i: Bool) :: (id_sret: Bool) :: (id_syscall: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 - val ex_reg_xcpt_interrupt = Reg(init=Bool(false)) - val ex_reg_valid = Reg(init=Bool(false)) - val ex_reg_sret = Reg(init=Bool(false)) - val ex_reg_wen = Reg(init=Bool(false)) - val ex_reg_fp_wen = Reg(init=Bool(false)) - val ex_reg_flush_inst = Reg(init=Bool(false)) - val ex_reg_jalr = Reg(init=Bool(false)) - val ex_reg_btb_hit = Reg(init=Bool(false)) - val ex_reg_div_mul_val = Reg(init=Bool(false)) - val ex_reg_mem_val = Reg(init=Bool(false)) - val ex_reg_xcpt = Reg(init=Bool(false)) - val ex_reg_fp_val = Reg(init=Bool(false)) - val ex_reg_rocc_val = Reg(init=Bool(false)) - val ex_reg_replay_next = Reg(init=Bool(false)) - val ex_reg_load_use = Reg(init=Bool(false)) - val ex_reg_csr = Reg(init=CSR.N) - val ex_reg_br_type = Reg(init=BR_N) + val ex_reg_xcpt_interrupt = Reg(Bool()) + val ex_reg_valid = Reg(Bool()) + val ex_reg_branch = Reg(Bool()) + val ex_reg_jalr = Reg(Bool()) + val ex_reg_predicted_taken = Reg(Bool()) + val ex_reg_btb_hit = Reg(Bool()) + val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) + val ex_reg_br_type = Reg(UInt()) + val ex_reg_sret = Reg(Bool()) + val ex_reg_wen = Reg(Bool()) + val ex_reg_fp_wen = Reg(Bool()) + val ex_reg_flush_inst = Reg(Bool()) + val ex_reg_div_mul_val = Reg(Bool()) + val ex_reg_mem_val = Reg(Bool()) + val ex_reg_xcpt = Reg(Bool()) + val ex_reg_fp_val = Reg(Bool()) + val ex_reg_rocc_val = Reg(Bool()) + val ex_reg_replay_next = Reg(Bool()) + val ex_reg_load_use = Reg(Bool()) + val ex_reg_csr = Reg(UInt()) val ex_reg_mem_cmd = Reg(Bits()) val ex_reg_mem_type = Reg(Bits()) val ex_reg_cause = Reg(UInt()) - val mem_reg_xcpt_interrupt = Reg(init=Bool(false)) - val mem_reg_valid = Reg(init=Bool(false)) - val mem_reg_sret = Reg(init=Bool(false)) - val mem_reg_wen = Reg(init=Bool(false)) - val mem_reg_fp_wen = Reg(init=Bool(false)) - val mem_reg_flush_inst = Reg(init=Bool(false)) - val mem_reg_div_mul_val = Reg(init=Bool(false)) - val mem_reg_mem_val = Reg(init=Bool(false)) - val mem_reg_xcpt = Reg(init=Bool(false)) - val mem_reg_fp_val = Reg(init=Bool(false)) - val mem_reg_rocc_val = Reg(init=Bool(false)) - val mem_reg_replay = Reg(init=Bool(false)) - val mem_reg_replay_next = Reg(init=Bool(false)) - val mem_reg_csr = Reg(init=CSR.N) + val mem_reg_xcpt_interrupt = Reg(Bool()) + val mem_reg_valid = Reg(Bool()) + val mem_reg_sret = Reg(Bool()) + val mem_reg_wen = Reg(Bool()) + val mem_reg_fp_wen = Reg(Bool()) + val mem_reg_flush_inst = Reg(Bool()) + val mem_reg_div_mul_val = Reg(Bool()) + val mem_reg_mem_val = Reg(Bool()) + val mem_reg_xcpt = Reg(Bool()) + val mem_reg_fp_val = Reg(Bool()) + val mem_reg_rocc_val = Reg(Bool()) + val mem_reg_replay = Reg(Bool()) + val mem_reg_replay_next = Reg(Bool()) + val mem_reg_csr = Reg(UInt()) val mem_reg_cause = Reg(UInt()) val mem_reg_slow_bypass = Reg(Bool()) - val wb_reg_valid = Reg(init=Bool(false)) - val wb_reg_csr = Reg(init=CSR.N) - val wb_reg_wen = Reg(init=Bool(false)) - val wb_reg_fp_wen = Reg(init=Bool(false)) - val wb_reg_rocc_val = Reg(init=Bool(false)) - val wb_reg_flush_inst = Reg(init=Bool(false)) - val wb_reg_mem_val = Reg(init=Bool(false)) - val wb_reg_sret = Reg(init=Bool(false)) - val wb_reg_xcpt = Reg(init=Bool(false)) - val wb_reg_replay = Reg(init=Bool(false)) + val wb_reg_valid = Reg(Bool()) + val wb_reg_csr = Reg(UInt()) + val wb_reg_wen = Reg(Bool()) + val wb_reg_fp_wen = Reg(Bool()) + val wb_reg_rocc_val = Reg(Bool()) + val wb_reg_flush_inst = Reg(Bool()) + val wb_reg_mem_val = Reg(Bool()) + val wb_reg_sret = Reg(Bool()) + val wb_reg_xcpt = Reg(Bool()) + val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) - val wb_reg_fp_val = Reg(init=Bool(false)) - val wb_reg_div_mul_val = Reg(init=Bool(false)) + val wb_reg_fp_val = Reg(Bool()) + val wb_reg_div_mul_val = Reg(Bool()) val take_pc = Bool() val take_pc_wb = Bool() @@ -452,8 +456,10 @@ class Control(implicit conf: RocketConfiguration) extends Module when (id_xcpt) { ex_reg_cause := id_cause } when (ctrl_killd) { - ex_reg_jalr := Bool(false) - ex_reg_btb_hit := Bool(false) + ex_reg_branch := false + ex_reg_jalr := false + ex_reg_predicted_taken := false + ex_reg_btb_hit := false ex_reg_div_mul_val := Bool(false) ex_reg_mem_val := Bool(false) ex_reg_valid := Bool(false) @@ -466,13 +472,15 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_replay_next := Bool(false) ex_reg_load_use := Bool(false) ex_reg_csr := CSR.N - ex_reg_br_type := BR_N ex_reg_xcpt := Bool(false) } .otherwise { - ex_reg_br_type := id_br_type - ex_reg_jalr := id_jalr - ex_reg_btb_hit := io.imem.resp.bits.taken && !id_jalr + ex_reg_branch := id_branch + ex_reg_jalr := id_jalr + ex_reg_predicted_taken := io.imem.btb_resp.valid && io.imem.btb_resp.bits.taken + when (id_branch) { ex_reg_br_type := id_br_type } + ex_reg_btb_hit := io.imem.btb_resp.valid + when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } ex_reg_div_mul_val := id_mul_val || id_div_val ex_reg_mem_val := id_mem_val.toBool ex_reg_valid := Bool(true) @@ -635,8 +643,13 @@ class Control(implicit conf: RocketConfiguration) extends Module Mux(replay_wb, PC_WB, // replay PC_EX)))// branch/jal[r] - io.imem.req.bits.mispredict := !take_pc_wb && take_pc_ex && !ex_reg_xcpt - io.imem.req.bits.taken := !ex_reg_btb_hit || ex_reg_jalr + io.imem.btb_update.valid := ex_reg_btb_hit || !take_pc_wb && (ex_reg_branch || ex_reg_jalr) && !ex_reg_xcpt + io.imem.btb_update.bits.prediction.valid := ex_reg_btb_hit + io.imem.btb_update.bits.prediction.bits := ex_reg_btb_resp + io.imem.btb_update.bits.taken := ex_reg_jalr || io.dpath.ex_br_taken ^ io.dpath.ex_predicted_taken + io.imem.btb_update.bits.incorrectTarget := ex_reg_jalr && !io.dpath.jalr_eq + io.imem.btb_update.bits.isCall := ex_reg_wen && io.dpath.ex_waddr(0) + io.imem.btb_update.bits.isReturn := ex_reg_jalr && io.dpath.ex_rs(0) === 1 io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) @@ -723,7 +736,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.ex_fp_val:= ex_reg_fp_val io.dpath.mem_fp_val:= mem_reg_fp_val io.dpath.ex_jalr := ex_reg_jalr - io.dpath.ex_predicted_taken := ex_reg_btb_hit + io.dpath.ex_predicted_taken := ex_reg_branch && ex_reg_btb_hit && ex_reg_btb_resp.taken io.dpath.ex_wen := ex_reg_wen io.dpath.mem_wen := mem_reg_wen io.dpath.ll_ready := !wb_reg_wen @@ -732,7 +745,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.csr := wb_reg_csr io.dpath.sret := wb_reg_sret io.dpath.ex_mem_type := ex_reg_mem_type - io.dpath.ex_br_type := ex_reg_br_type ^ ex_reg_btb_hit + io.dpath.ex_br_type := Mux(ex_reg_branch, ex_reg_br_type, BR_N) ^ io.dpath.ex_predicted_taken io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_rocc_val io.dpath.ex_rocc_val := ex_reg_rocc_val io.dpath.mem_rocc_val := mem_reg_rocc_val diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 6a969d76..38e9bdff 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -57,6 +57,8 @@ object DecodeLogic } def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = apply(addr, Bool.DC, trues.map(_ -> Bool(true)) ++ falses.map(_ -> Bool(false))) + def apply(addr: UInt, tru: UInt, fals: UInt): Bool = + apply(addr, Seq(tru), Seq(fals)) private val caches = collection.mutable.Map[Module,collection.mutable.Map[Term,Bool]]() } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0a958236..0f446550 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -118,8 +118,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } } - val ex_raddr1 = ex_reg_inst(19,15) - val ex_raddr2 = ex_reg_inst(24,20) + io.ctrl.ex_rs(0) := ex_reg_inst(19,15) + io.ctrl.ex_rs(1) := ex_reg_inst(24,20) val bypass = Vec.fill(NBYP)(Bits()) bypass(BYP_0) := Bits(0) @@ -171,7 +171,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs(0), ex_reg_pc) val ex_br_offset = Mux(io.ctrl.ex_predicted_taken, SInt(4), ex_imm(20,0).toSInt) - val ex_br64 = ex_br_base + ex_br_offset + val ex_br64 = (ex_br_base + ex_br_offset) & SInt(-2) val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs(0), ex_br64), vaSign(ex_reg_pc, ex_br64)) val ex_br_addr = Cat(ex_br_msb, ex_br64(VADDR_BITS-1,0)) @@ -289,11 +289,13 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.rocc.cmd.bits.rs2 := wb_reg_rs2 // hook up I$ - io.imem.req.bits.currentpc := ex_reg_pc io.imem.req.bits.pc := Mux(io.ctrl.sel_pc === PC_EX, ex_br_addr, Mux(io.ctrl.sel_pc === PC_PCR, pcr.io.evec, wb_reg_pc)).toUInt // PC_WB + io.imem.btb_update.bits.pc := ex_reg_pc + io.imem.btb_update.bits.target := io.imem.req.bits.pc + io.imem.btb_update.bits.returnAddr := io.dmem.req.bits.addr & SInt(-4) // for hazard/bypass opportunity detection io.ctrl.ex_waddr := ex_reg_inst(11,7) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 07ff1510..d9f49da8 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -26,15 +26,11 @@ case class ICacheConfig(sets: Int, assoc: Int, class FrontendReq extends Bundle { val pc = UInt(width = VADDR_BITS+1) - val mispredict = Bool() - val taken = Bool() - val currentpc = UInt(width = VADDR_BITS+1) } class FrontendResp(implicit conf: ICacheConfig) extends Bundle { val pc = UInt(width = VADDR_BITS+1) // ID stage PC val data = Bits(width = conf.ibytes*8) - val taken = Bool() val xcpt_ma = Bool() val xcpt_if = Bool() @@ -44,6 +40,8 @@ class FrontendResp(implicit conf: ICacheConfig) extends Bundle { class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip + val btb_resp = Valid(new BTBResp()(conf.btb)).flip + val btb_update = Valid(new BTBUpdate()(conf.btb)) val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } @@ -55,7 +53,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val mem = new UncachedTileLinkIO } - val btb = Module(new BTB(c.btb)) + val btb = Module(new BTB()(c.btb)) val icache = Module(new ICache) val tlb = Module(new TLB(c.ntlb)) @@ -64,16 +62,17 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) val s2_pc = Reg(init=UInt(START_ADDR)) - val s2_btb_hit = Reg(init=Bool(false)) + val s2_btb_resp_valid = Reg(init=Bool(false)) + val s2_btb_resp_bits = Reg(btb.io.resp.bits.clone) val s2_xcpt_if = Reg(init=Bool(false)) - val btbTarget = Cat(btb.io.target(VADDR_BITS-1), btb.io.target) + val btbTarget = Cat(btb.io.resp.bits.target(VADDR_BITS-1), btb.io.resp.bits.target) val pcp4_0 = s1_pc + UInt(c.ibytes) val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) val icmiss = s2_valid && !icache.io.resp.valid - val predicted_npc = btbTarget /* zero if btb miss */ | Mux(btb.io.hit, UInt(0), pcp4) + val predicted_npc = btbTarget /* zero if btb miss */ | Mux(btb.io.resp.bits.taken, UInt(0), pcp4) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.hit && ((pcp4 & (c.databits/8)) === (s1_pc & (c.databits/8))) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & (c.databits/8)) === (s1_pc & (c.databits/8))) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -82,7 +81,8 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu s2_valid := !icmiss when (!icmiss) { s2_pc := s1_pc - s2_btb_hit := btb.io.hit + s2_btb_resp_valid := btb.io.resp.valid + when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits } s2_xcpt_if := tlb.io.resp.xcpt_if } } @@ -92,11 +92,8 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu s2_valid := Bool(false) } - btb.io.current_pc := s1_pc - btb.io.wen := io.cpu.req.bits.mispredict - btb.io.taken := io.cpu.req.bits.taken - btb.io.correct_pc := io.cpu.req.bits.currentpc - btb.io.correct_target := io.cpu.req.bits.pc + btb.io.req := s1_pc & SInt(-c.ibytes) + btb.io.update := io.cpu.btb_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw @@ -117,9 +114,11 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc & SInt(-c.ibytes) // discard PC LSBs io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)) - io.cpu.resp.bits.taken := s2_btb_hit io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if + + io.cpu.btb_resp.valid := s2_btb_resp_valid + io.cpu.btb_resp.bits := s2_btb_resp_bits } class ICacheReq extends Bundle { From f235fa0db68d750a8b65b29302b1c16583421df3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 7 Apr 2014 15:58:49 -0700 Subject: [PATCH 0713/1087] Move branch resolution to M stage --- rocket/src/main/scala/btb.scala | 139 +++++--- rocket/src/main/scala/consts.scala | 1 + rocket/src/main/scala/ctrl.scala | 495 +++++++++++++------------- rocket/src/main/scala/dpath.scala | 36 +- rocket/src/main/scala/dpath_alu.scala | 26 +- rocket/src/main/scala/icache.scala | 2 +- 6 files changed, 374 insertions(+), 325 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 9f1e04cd..d4beed04 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -5,11 +5,53 @@ import Util._ import Node._ import uncore.constants.AddressConstants._ -case class BTBConfig(entries: Int, nras: Int = 0, inOrder: Boolean = true) { +case class BTBConfig(entries: Int, nras: Int = 0) { val matchBits = PGIDX_BITS - val pages0 = 1 + log2Up(entries) // is this sensible? what about matchBits? + val pages0 = 1 max log2Up(entries) // is this sensible? val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages val opaqueBits = log2Up(entries) + val nbht = 1 << log2Up(entries * 2) +} + +class RAS(implicit conf: BTBConfig) { + def push(addr: UInt): Unit = { + when (count < conf.nras) { count := count + 1 } + val nextPos = Mux(Bool(isPow2(conf.nras)) || pos > 0, pos+1, UInt(0)) + stack(nextPos) := addr + pos := nextPos + } + def peek: UInt = stack(pos) + def pop: Unit = when (!isEmpty) { + count := count - 1 + pos := Mux(Bool(isPow2(conf.nras)) || pos > 0, pos-1, UInt(conf.nras-1)) + } + def clear: Unit = count := UInt(0) + def isEmpty: Bool = count === UInt(0) + + private val count = Reg(init=UInt(0,log2Up(conf.nras+1))) + private val pos = Reg(init=UInt(0,log2Up(conf.nras))) + private val stack = Vec.fill(conf.nras){Reg(UInt())} +} + +class BHTResp(implicit conf: BTBConfig) extends Bundle { + val index = UInt(width = log2Up(conf.nbht).max(1)) + val value = UInt(width = 2) +} + +class BHT(implicit conf: BTBConfig) { + def get(addr: UInt): BHTResp = { + val res = new BHTResp + res.index := addr(log2Up(conf.nbht)+1,2) ^ history + res.value := table(res.index) + res + } + def update(d: BHTResp, taken: Bool): Unit = { + table(d.index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) + history := Cat(taken, history(log2Up(conf.nbht)-1,1)) + } + + private val table = Mem(UInt(width = 2), conf.nbht) + val history = Reg(UInt(width = log2Up(conf.nbht))) } class BTBUpdate(implicit conf: BTBConfig) extends Bundle { @@ -18,6 +60,7 @@ class BTBUpdate(implicit conf: BTBConfig) extends Bundle { val target = UInt(width = VADDR_BITS) val returnAddr = UInt(width = VADDR_BITS) val taken = Bool() + val isJump = Bool() val isCall = Bool() val isReturn = Bool() val incorrectTarget = Bool() @@ -28,31 +71,12 @@ class BTBUpdate(implicit conf: BTBConfig) extends Bundle { class BTBResp(implicit conf: BTBConfig) extends Bundle { val taken = Bool() val target = UInt(width = VADDR_BITS) - val opaque = UInt(width = conf.opaqueBits) + val entry = UInt(width = conf.opaqueBits) + val bht = new BHTResp override def clone = new BTBResp().asInstanceOf[this.type] } -class RAS(implicit conf: BTBConfig) { - def push(addr: UInt): Unit = { - when (count < conf.nras-1) { count := count + 1 } - stack(pos+1) := addr - pos := pos+1 - } - def pop: UInt = { - count := count - 1 - pos := pos - 1 - stack(pos) - } - def clear: Unit = count := UInt(0) - def isEmpty: Bool = count === UInt(0) - - require(isPow2(conf.nras)) - private val count = Reg(init=UInt(0,log2Up(conf.nras+1))) - private val pos = Reg(init=UInt(0,log2Up(conf.nras))) - private val stack = Vec.fill(conf.nras){Reg(UInt())} -} - // fully-associative branch target buffer class BTB(implicit conf: BTBConfig) extends Module { val io = new Bundle { @@ -73,6 +97,7 @@ class BTB(implicit conf: BTBConfig) extends Module { val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) val useRAS = Vec.fill(conf.entries){Reg(Bool())} + val isJump = Vec.fill(conf.entries){Reg(Bool())} private def page(addr: UInt) = addr >> conf.matchBits private def pageMatch(addr: UInt) = { @@ -94,48 +119,48 @@ class BTB(implicit conf: BTBConfig) extends Module { val updatePageHit = pageMatch(update.bits.pc) val updateHits = tagMatch(update.bits.pc, updatePageHit) - val taken = update.bits.incorrectTarget || update.bits.taken - val predicted_taken = update.bits.prediction.valid && update.bits.prediction.bits.taken - val correction = update.bits.incorrectTarget || update.bits.taken != predicted_taken - private var lfsr = LFSR16(update.valid) def rand(width: Int) = { lfsr = lfsr(lfsr.getWidth-1,1) Random.oneHot(width, lfsr) } - def randOrInvalid(valid: UInt) = - Mux(!valid.andR, PriorityEncoderOH(~valid), rand(valid.getWidth)) - val idxRepl = randOrInvalid(idxValid.toBits) - val idxWen = - if (conf.inOrder) Mux(update.bits.prediction.valid, UIntToOH(update.bits.prediction.bits.opaque), idxRepl) - else updateHits | Mux(updateHits.orR, UInt(0), idxRepl) + val updateHit = update.bits.prediction.valid + val updateValid = update.bits.incorrectTarget || updateHit && Bool(conf.nbht > 0) + val updateTarget = updateValid && update.bits.incorrectTarget val useUpdatePageHit = updatePageHit.orR - val doIdxPageRepl = !useUpdatePageHit && update.valid - val idxPageRepl = rand(conf.pages) - val idxPageUpdate = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) + val doIdxPageRepl = updateTarget && !useUpdatePageHit + val idxPageRepl = UInt() + val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) + val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) val samePage = page(update.bits.pc) === page(update_target) val usePageHit = (pageHit & ~idxPageReplEn).orR - val doTgtPageRepl = !usePageHit && !samePage && update.valid - val tgtPageRepl = Mux(samePage, idxPageUpdate, idxPageUpdate(conf.pages-2,0) << 1 | idxPageUpdate(conf.pages-1)) - val tgtPageUpdate = Mux(usePageHit, pageHit, tgtPageRepl) + val doTgtPageRepl = updateTarget && !samePage && !usePageHit + val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(conf.pages-2,0) << 1 | idxPageUpdateOH(conf.pages-1)) + val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val pageReplEn = idxPageReplEn | tgtPageReplEn + idxPageRepl := UIntToOH(Counter(update.valid && (doIdxPageRepl || doTgtPageRepl), conf.pages)._1) + + when (update.valid && !(updateValid && !updateTarget)) { + val nextRepl = Counter(!updateHit && updateValid, conf.entries)._1 + val waddr = Mux(updateHit, update.bits.prediction.bits.entry, nextRepl) - when (update.valid) { for (i <- 0 until conf.entries) { - when (idxWen(i)) { - idxValid(i) := taken - when (correction) { + when (waddr === i) { + idxValid(i) := updateValid + when (updateTarget) { + if (i == 0) assert(io.req === update.bits.target, "BTB request != I$ target") idxs(i) := update.bits.pc - idxPages(i) := OHToUInt(idxPageUpdate) + idxPages(i) := idxPageUpdate tgts(i) := update_target - tgtPages(i) := OHToUInt(tgtPageUpdate) - useRAS(i) := update.bits.isReturn + tgtPages(i) := tgtPageUpdate + useRAS(i) := update.bits.isReturn + isJump(i) := update.bits.isJump } }.elsewhen ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { idxValid(i) := false @@ -143,7 +168,7 @@ class BTB(implicit conf: BTBConfig) extends Module { } require(conf.pages % 2 == 0) - val idxWritesEven = (idxPageUpdate & Fill(conf.pages/2, UInt(1,2))).orR + val idxWritesEven = (idxPageUpdateOH & Fill(conf.pages/2, UInt(1,2))).orR def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = { for (i <- i until conf.pages by mod) { @@ -167,15 +192,27 @@ class BTB(implicit conf: BTBConfig) extends Module { io.resp.valid := hits.toBits.orR io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) - io.resp.bits.opaque := OHToUInt(hits) + io.resp.bits.entry := OHToUInt(hits) + + if (conf.nbht > 0) { + val bht = new BHT + val res = bht.get(io.req) + when (update.valid && updateHit && !update.bits.isJump) { bht.update(update.bits.prediction.bits.bht, update.bits.taken) } + when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } + io.resp.bits.bht := res + } if (conf.nras > 0) { val ras = new RAS when (!ras.isEmpty && Mux1H(hits, useRAS)) { - io.resp.bits.target := ras.pop + io.resp.bits.target := ras.peek } - when (io.update.valid && io.update.bits.isCall) { - ras.push(io.update.bits.returnAddr) + when (io.update.valid) { + when (io.update.bits.isCall) { + ras.push(io.update.bits.returnAddr) + }.elsewhen (io.update.bits.isReturn && io.update.bits.prediction.valid) { + ras.pop + } } when (io.invalidate) { ras.clear } } diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 78eed730..2b59413c 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -17,6 +17,7 @@ trait ScalarOpConstants { val BR_GEU = Bits(7, 3) val PC_EX = UInt(0, 2) + val PC_MEM = UInt(1, 2) val PC_WB = UInt(2, 2) val PC_PCR = UInt(3, 2) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 2ada1dd7..4dabb716 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -27,9 +27,10 @@ class CtrlDpathIO(implicit conf: RocketConfiguration) extends Bundle val wb_load = Bool(OUTPUT) val ex_fp_val= Bool(OUTPUT) val mem_fp_val= Bool(OUTPUT) - val ex_wen = Bool(OUTPUT) - val ex_jalr = Bool(OUTPUT) - val ex_predicted_taken = Bool(OUTPUT) + val ex_wen = Bool(OUTPUT) + val ex_valid = Bool(OUTPUT) + val mem_jalr = Bool(OUTPUT) + val mem_branch = Bool(OUTPUT) val mem_wen = Bool(OUTPUT) val wb_wen = Bool(OUTPUT) val ex_mem_type = Bits(OUTPUT, 3) @@ -47,13 +48,13 @@ class CtrlDpathIO(implicit conf: RocketConfiguration) extends Bundle // inputs from datapath val inst = Bits(INPUT, 32) val jalr_eq = Bool(INPUT) - val ex_br_type = Bits(OUTPUT, SZ_BR) - val ex_br_taken = Bool(INPUT) + val mem_br_taken = Bool(INPUT) + val mem_misprediction = Bool(INPUT) val div_mul_rdy = Bool(INPUT) val ll_wen = Bool(INPUT) val ll_waddr = UInt(INPUT, 5) val ex_waddr = UInt(INPUT, 5) - val ex_rs = Vec.fill(2)(UInt(INPUT, 5)) + val mem_rs1_ra = Bool(INPUT) val mem_waddr = UInt(INPUT, 5) val wb_waddr = UInt(INPUT, 5) val status = new Status().asInput @@ -67,15 +68,15 @@ abstract trait DecodeConstants val xpr64 = Y val decode_default = - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | brtype | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,X,BR_X, X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) + // jal fence.i + // | jalr mul_val | sret + // fp_val| | renx2 | div_val | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | wen | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -83,225 +84,225 @@ abstract trait DecodeConstants object XDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | brtype | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,Y,BR_NE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BEQ-> List(Y, N,N,Y,BR_EQ, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLT-> List(Y, N,N,Y,BR_LT, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLTU-> List(Y, N,N,Y,BR_LTU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGE-> List(Y, N,N,Y,BR_GE, N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGEU-> List(Y, N,N,Y,BR_GEU,N,Y,Y,A2_X, A1_X, IMM_SB,DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + // jal fence.i + // | jalr mul_val | sret + // fp_val| | renx2 | div_val | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | wen | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - JAL-> List(Y, N,N,Y,BR_J, N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - JALR-> List(Y, N,N,N,BR_X, Y,N,Y,A2_FOUR,A1_PC, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - AUIPC-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - LB-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,CSR.N,N,N,N,N,N,N), - LH-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,CSR.N,N,N,N,N,N,N), - LW-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,N), - LD-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,N), - LBU-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,CSR.N,N,N,N,N,N,N), - LHU-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,CSR.N,N,N,N,N,N,N), - LWU-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,CSR.N,N,N,N,N,N,N), - SB-> List(Y, N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,CSR.N,N,N,N,N,N,N), - SH-> List(Y, N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,CSR.N,N,N,N,N,N,N), - SW-> List(Y, N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - SD-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), + LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,CSR.N,N,N,N,N,N,N), + LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,CSR.N,N,N,N,N,N,N), + LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,N), + LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,N), + LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,CSR.N,N,N,N,N,N,N), + LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,CSR.N,N,N,N,N,N,N), + LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,CSR.N,N,N,N,N,N,N), + SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,CSR.N,N,N,N,N,N,N), + SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,CSR.N,N,N,N,N,N,N), + SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - LR_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - LR_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_W-> List(Y, N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_D-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - LUI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTI -> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTIU-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ANDI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ORI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - XORI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAI-> List(Y, N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADD-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SUB-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLT-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - AND-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - OR-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - XOR-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLL-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRL-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRA-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - MUL-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULH-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULHU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULHSU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - DIV-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REM-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMU-> List(Y, N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,N,BR_X, N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - SCALL-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,Y,N,N,N), - SRET-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,Y,N,N,N,N), - FENCE-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,Y,N), - FENCE_I-> List(Y, N,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,Y,N,N,Y,N,N), - CSRRW-> List(Y, N,N,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRS-> List(Y, N,N,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRC-> List(Y, N,N,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N), - CSRRWI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRSI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRCI-> List(Y, N,N,N,BR_X, N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N)) + SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,Y,N,N,N), + SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,Y,N,N,N,N), + FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,Y,N), + FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,Y,N,N,Y,N,N), + CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N), + CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | brtype | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCLASS_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCLASS_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,N,BR_X, N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,N,BR_X, N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FLW-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - FLD-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), - FSW-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - FSD-> List(Y, Y,N,N,BR_X, N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N)) + // jal fence.i + // | jalr mul_val | sret + // fp_val| | renx2 | div_val | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | wen | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), + FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), + FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N)) } object RoCCDecode extends DecodeConstants { val table = Array( - // fence.i - // jalr mul_val | sret - // fp_val | renx2 | div_val | | syscall - // | rocc_val | | renx1 s_alu1 mem_val | | wen | | | - // val | | b | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | brtype | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - CUSTOM0-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD-> List(Y, N,Y,N,BR_X, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1-> List(Y, N,Y,N,BR_X, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,BR_X, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N)) + // jal fence.i + // | jalr mul_val | sret + // fp_val| | renx2 | div_val | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | wen | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N)) } class Control(implicit conf: RocketConfiguration) extends Module @@ -326,7 +327,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) - val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: (id_branch: Bool) :: id_br_type :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs + val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: (id_branch: Bool) :: (id_jal: Bool) :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: cs2 = cs1 val id_csr :: (id_fence_i: Bool) :: (id_sret: Bool) :: (id_syscall: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 @@ -334,11 +335,10 @@ class Control(implicit conf: RocketConfiguration) extends Module val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) val ex_reg_branch = Reg(Bool()) + val ex_reg_jal = Reg(Bool()) val ex_reg_jalr = Reg(Bool()) - val ex_reg_predicted_taken = Reg(Bool()) val ex_reg_btb_hit = Reg(Bool()) val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) - val ex_reg_br_type = Reg(UInt()) val ex_reg_sret = Reg(Bool()) val ex_reg_wen = Reg(Bool()) val ex_reg_fp_wen = Reg(Bool()) @@ -357,6 +357,11 @@ class Control(implicit conf: RocketConfiguration) extends Module val mem_reg_xcpt_interrupt = Reg(Bool()) val mem_reg_valid = Reg(Bool()) + val mem_reg_branch = Reg(Bool()) + val mem_reg_jal = Reg(Bool()) + val mem_reg_jalr = Reg(Bool()) + val mem_reg_btb_hit = Reg(Bool()) + val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) val mem_reg_sret = Reg(Bool()) val mem_reg_wen = Reg(Bool()) val mem_reg_fp_wen = Reg(Bool()) @@ -386,8 +391,10 @@ class Control(implicit conf: RocketConfiguration) extends Module val wb_reg_fp_val = Reg(Bool()) val wb_reg_div_mul_val = Reg(Bool()) - val take_pc = Bool() val take_pc_wb = Bool() + val take_pc_mem = io.dpath.mem_misprediction && (mem_reg_branch || mem_reg_jalr || mem_reg_jal) + val take_pc_mem_wb = take_pc_wb || take_pc_mem + val take_pc = take_pc_mem_wb val ctrl_killd = Bool() val ctrl_killx = Bool() val ctrl_killm = Bool() @@ -457,8 +464,8 @@ class Control(implicit conf: RocketConfiguration) extends Module when (ctrl_killd) { ex_reg_branch := false + ex_reg_jal := false ex_reg_jalr := false - ex_reg_predicted_taken := false ex_reg_btb_hit := false ex_reg_div_mul_val := Bool(false) ex_reg_mem_val := Bool(false) @@ -476,9 +483,8 @@ class Control(implicit conf: RocketConfiguration) extends Module } .otherwise { ex_reg_branch := id_branch + ex_reg_jal := id_jal ex_reg_jalr := id_jalr - ex_reg_predicted_taken := io.imem.btb_resp.valid && io.imem.btb_resp.bits.taken - when (id_branch) { ex_reg_br_type := id_br_type } ex_reg_btb_hit := io.imem.btb_resp.valid when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } ex_reg_div_mul_val := id_mul_val || id_div_val @@ -504,8 +510,7 @@ class Control(implicit conf: RocketConfiguration) extends Module ex_reg_div_mul_val && !io.dpath.div_mul_rdy val replay_ex_other = wb_dcache_miss && ex_reg_load_use || mem_reg_replay_next val replay_ex = replay_ex_structural || replay_ex_other - ctrl_killx := take_pc_wb || replay_ex - val take_pc_ex = ex_reg_jalr && !io.dpath.jalr_eq || io.dpath.ex_br_taken + ctrl_killx := take_pc_mem_wb || replay_ex // detect 2-cycle load-use delay for LB/LH/SC val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || AVec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) @@ -513,13 +518,16 @@ class Control(implicit conf: RocketConfiguration) extends Module (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), (ex_reg_fp_val && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) - mem_reg_replay := replay_ex && !take_pc_wb - mem_reg_xcpt_interrupt := ex_reg_xcpt_interrupt && !take_pc_wb && !mem_reg_replay_next + mem_reg_replay := !take_pc_mem_wb && replay_ex + mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt && !mem_reg_replay_next when (ex_xcpt) { mem_reg_cause := ex_cause } mem_reg_div_mul_val := ex_reg_div_mul_val && io.dpath.div_mul_rdy when (ctrl_killx) { - mem_reg_valid := Bool(false) + mem_reg_valid := false + mem_reg_branch := false + mem_reg_jal := false + mem_reg_jalr := false mem_reg_csr := CSR.N mem_reg_wen := Bool(false) mem_reg_fp_wen := Bool(false) @@ -532,7 +540,12 @@ class Control(implicit conf: RocketConfiguration) extends Module mem_reg_xcpt := Bool(false) } .otherwise { - mem_reg_valid := ex_reg_valid + mem_reg_valid := ex_reg_valid + mem_reg_branch := ex_reg_branch + mem_reg_jal := ex_reg_jal + mem_reg_jalr := ex_reg_jalr + mem_reg_btb_hit := ex_reg_btb_hit + when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } mem_reg_csr := ex_reg_csr mem_reg_wen := ex_reg_wen mem_reg_fp_wen := ex_reg_fp_wen @@ -635,21 +648,21 @@ class Control(implicit conf: RocketConfiguration) extends Module // control transfer from ex/wb take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_sret - take_pc := take_pc_ex || take_pc_wb io.dpath.sel_pc := Mux(wb_reg_xcpt, PC_PCR, // exception Mux(wb_reg_sret, PC_PCR, // sret instruction Mux(replay_wb, PC_WB, // replay - PC_EX)))// branch/jal[r] + PC_MEM))) - io.imem.btb_update.valid := ex_reg_btb_hit || !take_pc_wb && (ex_reg_branch || ex_reg_jalr) && !ex_reg_xcpt - io.imem.btb_update.bits.prediction.valid := ex_reg_btb_hit - io.imem.btb_update.bits.prediction.bits := ex_reg_btb_resp - io.imem.btb_update.bits.taken := ex_reg_jalr || io.dpath.ex_br_taken ^ io.dpath.ex_predicted_taken - io.imem.btb_update.bits.incorrectTarget := ex_reg_jalr && !io.dpath.jalr_eq - io.imem.btb_update.bits.isCall := ex_reg_wen && io.dpath.ex_waddr(0) - io.imem.btb_update.bits.isReturn := ex_reg_jalr && io.dpath.ex_rs(0) === 1 + io.imem.btb_update.valid := (mem_reg_branch || mem_reg_jal || mem_reg_jalr) && !take_pc_wb && !mem_reg_xcpt + io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.btb_update.bits.taken := mem_reg_jal || mem_reg_branch && io.dpath.mem_br_taken + io.imem.btb_update.bits.incorrectTarget := take_pc_mem + io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr + io.imem.btb_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) + io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) @@ -677,7 +690,7 @@ class Control(implicit conf: RocketConfiguration) extends Module io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_csr != CSR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || + val id_ex_hazard = data_hazard_ex && (ex_reg_csr != CSR.N || ex_reg_jalr || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. @@ -735,9 +748,10 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val io.dpath.mem_fp_val:= mem_reg_fp_val - io.dpath.ex_jalr := ex_reg_jalr - io.dpath.ex_predicted_taken := ex_reg_branch && ex_reg_btb_hit && ex_reg_btb_resp.taken + io.dpath.mem_jalr := mem_reg_jalr + io.dpath.mem_branch := mem_reg_branch io.dpath.ex_wen := ex_reg_wen + io.dpath.ex_valid := ex_reg_valid io.dpath.mem_wen := mem_reg_wen io.dpath.ll_ready := !wb_reg_wen io.dpath.wb_wen := wb_reg_wen && !replay_wb @@ -745,7 +759,6 @@ class Control(implicit conf: RocketConfiguration) extends Module io.dpath.csr := wb_reg_csr io.dpath.sret := wb_reg_sret io.dpath.ex_mem_type := ex_reg_mem_type - io.dpath.ex_br_type := Mux(ex_reg_branch, ex_reg_br_type, BR_N) ^ io.dpath.ex_predicted_taken io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_rocc_val io.dpath.ex_rocc_val := ex_reg_rocc_val io.dpath.mem_rocc_val := mem_reg_rocc_val diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0f446550..86c5d2fb 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -118,9 +118,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } } - io.ctrl.ex_rs(0) := ex_reg_inst(19,15) - io.ctrl.ex_rs(1) := ex_reg_inst(24,20) - val bypass = Vec.fill(NBYP)(Bits()) bypass(BYP_0) := Bits(0) bypass(BYP_EX) := mem_reg_wdata @@ -169,11 +166,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), e(0))) } - val ex_br_base = Mux(io.ctrl.ex_jalr, ex_rs(0), ex_reg_pc) - val ex_br_offset = Mux(io.ctrl.ex_predicted_taken, SInt(4), ex_imm(20,0).toSInt) - val ex_br64 = (ex_br_base + ex_br_offset) & SInt(-2) - val ex_br_msb = Mux(io.ctrl.ex_jalr, vaSign(ex_rs(0), ex_br64), vaSign(ex_reg_pc, ex_br64)) - val ex_br_addr = Cat(ex_br_msb, ex_br64(VADDR_BITS-1,0)) // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module @@ -196,17 +188,6 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.ptw.invalidate := pcr.io.fatc io.ptw.sret := io.ctrl.sret io.ptw.status := pcr.io.status - - // branch resolution logic - io.ctrl.jalr_eq := ex_rs(0) === id_pc.toSInt && ex_reg_inst(31,20) === UInt(0) - io.ctrl.ex_br_taken := - Mux(io.ctrl.ex_br_type === BR_EQ, ex_rs(0) === ex_rs(1), - Mux(io.ctrl.ex_br_type === BR_NE, ex_rs(0) != ex_rs(1), - Mux(io.ctrl.ex_br_type === BR_LT, ex_rs(0).toSInt < ex_rs(1).toSInt, - Mux(io.ctrl.ex_br_type === BR_GE, ex_rs(0).toSInt >= ex_rs(1).toSInt, - Mux(io.ctrl.ex_br_type === BR_LTU, ex_rs(0) < ex_rs(1), - Mux(io.ctrl.ex_br_type === BR_GEU, ex_rs(0) >= ex_rs(1), - io.ctrl.ex_br_type === BR_J)))))) // memory stage mem_reg_kill := ex_reg_kill @@ -255,11 +236,20 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.fpu.dmem_resp_type := io.dmem.resp.bits.typ io.fpu.dmem_resp_tag := dmem_resp_waddr + io.ctrl.mem_br_taken := mem_reg_wdata(0) + val mem_br_target = mem_reg_pc + + Mux(io.ctrl.mem_branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), + Mux(!io.ctrl.mem_jalr && !io.ctrl.mem_branch, imm(IMM_UJ, mem_reg_inst), SInt(4))) + val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(VADDR_BITS-1,0)), mem_br_target) + io.ctrl.mem_misprediction := mem_npc != Mux(io.ctrl.ex_valid, ex_reg_pc, id_pc) + io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 + val mem_int_wdata = Mux(io.ctrl.mem_jalr, mem_br_target, mem_reg_wdata) + // writeback stage when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc wb_reg_inst := mem_reg_inst - wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_reg_wdata) + wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_int_wdata) } when (io.ctrl.mem_rocc_val) { wb_reg_rs2 := mem_reg_rs2 @@ -290,12 +280,12 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // hook up I$ io.imem.req.bits.pc := - Mux(io.ctrl.sel_pc === PC_EX, ex_br_addr, + Mux(io.ctrl.sel_pc === PC_MEM, mem_npc, Mux(io.ctrl.sel_pc === PC_PCR, pcr.io.evec, wb_reg_pc)).toUInt // PC_WB - io.imem.btb_update.bits.pc := ex_reg_pc + io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc - io.imem.btb_update.bits.returnAddr := io.dmem.req.bits.addr & SInt(-4) + io.imem.btb_update.bits.returnAddr := mem_int_wdata // for hazard/bypass opportunity detection io.ctrl.ex_waddr := ex_reg_inst(11,7) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index e4cbcd8c..4d9c54cb 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -14,10 +14,14 @@ object ALU val FN_OR = Bits(6) val FN_AND = Bits(7) val FN_SR = Bits(5) - val FN_SUB = Bits(8) - val FN_SLT = Bits(10) - val FN_SLTU = Bits(11) - val FN_SRA = Bits(13) + val FN_SEQ = Bits(8) + val FN_SNE = Bits(9) + val FN_SUB = Bits(10) + val FN_SRA = Bits(11) + val FN_SLT = Bits(12) + val FN_SGE = Bits(13) + val FN_SLTU = Bits(14) + val FN_SGEU = Bits(15) val FN_DIV = FN_XOR val FN_DIVU = FN_SR @@ -31,7 +35,9 @@ object ALU def isMulFN(fn: Bits, cmp: Bits) = fn(1,0) === cmp(1,0) def isSub(cmd: Bits) = cmd(3) - def isSLTU(cmd: Bits) = cmd(0) + def cmpUnsigned(cmd: Bits) = cmd(1) + def cmpInverted(cmd: Bits) = cmd(0) + def cmpEq(cmd: Bits) = !cmd(2) } import ALU._ @@ -52,8 +58,10 @@ class ALU(implicit conf: RocketConfiguration) extends Module val sum = io.in1 + Mux(isSub(io.fn), -io.in2, io.in2) // SLT, SLTU - val less = Mux(io.in1(63) === io.in2(63), sum(63), - Mux(isSLTU(io.fn), io.in2(63), io.in1(63))) + val cmp = cmpInverted(io.fn) ^ + Mux(cmpEq(io.fn), sum === UInt(0), + Mux(io.in1(63) === io.in2(63), sum(63), + Mux(cmpUnsigned(io.fn), io.in2(63), io.in1(63)))) // SLL, SRL, SRA val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUInt @@ -66,12 +74,12 @@ class ALU(implicit conf: RocketConfiguration) extends Module val out64 = Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, - Mux(io.fn === FN_SLT || io.fn === FN_SLTU, less, Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, Mux(io.fn === FN_SL, shout_l, Mux(io.fn === FN_AND, io.in1 & io.in2, Mux(io.fn === FN_OR, io.in1 | io.in2, - /*FN_XOR*/ io.in1 ^ io.in2)))))) + Mux(io.fn === FN_XOR, io.in1 ^ io.in2, + /* all comparisons */ cmp)))))) val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) io.out := Cat(out_hi, out64(31,0)).toUInt diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index d9f49da8..5ea88247 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -70,7 +70,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val pcp4_0 = s1_pc + UInt(c.ibytes) val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) val icmiss = s2_valid && !icache.io.resp.valid - val predicted_npc = btbTarget /* zero if btb miss */ | Mux(btb.io.resp.bits.taken, UInt(0), pcp4) + val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & (c.databits/8)) === (s1_pc & (c.databits/8))) From 927287da34a0e3c3be2fc2000da9055829f73b0c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 7 Apr 2014 23:47:53 -0700 Subject: [PATCH 0714/1087] Bypass RAS push/pop --- rocket/src/main/scala/btb.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d4beed04..a6a99a8b 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -204,12 +204,16 @@ class BTB(implicit conf: BTBConfig) extends Module { if (conf.nras > 0) { val ras = new RAS - when (!ras.isEmpty && Mux1H(hits, useRAS)) { + val doPeek = Mux1H(hits, useRAS) + when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek } when (io.update.valid) { when (io.update.bits.isCall) { ras.push(io.update.bits.returnAddr) + when (doPeek) { + io.resp.bits.target := io.update.bits.returnAddr + } }.elsewhen (io.update.bits.isReturn && io.update.bits.prediction.valid) { ras.pop } From 3ed8adf032d55bf3a5f536c7419e3fabb9a82ef1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 7 Apr 2014 23:48:02 -0700 Subject: [PATCH 0715/1087] Add early out for MUL[W] (not MULH[[S]U]) --- rocket/src/main/scala/multiplier.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index ae4ba082..a80cc050 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -91,10 +91,16 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke val mpcand = divisor.toSInt val prod = mplier(mulUnroll-1,0) * mpcand + accum val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt - remainder := Cat(nextMulReg >> w, Bool(false), nextMulReg(w-1,0)).toSInt + + val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * mulUnroll)(log2Up(mulw)-1,0))(mulw-1,0) + val eOut = Bool(earlyOut) && count != mulw/mulUnroll-1 && count != 0 && + !isHi && (mplier & ~eOutMask) === UInt(0) + val eOutRes = (mulReg >> (mulw - count * mulUnroll)(log2Up(mulw)-1,0)) + val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0)) + remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)).toSInt count := count + 1 - when (count === mulw/mulUnroll-1) { + when (eOut || count === mulw/mulUnroll-1) { state := Mux(isHi, s_move_rem, s_done) } } From e90f2484aa6dbfc0ec83d9b78150bed7a0c59c39 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Tue, 8 Apr 2014 15:48:37 -0700 Subject: [PATCH 0716/1087] Sync with riscv-opcodes (csr register mapping) --- rocket/src/main/scala/instructions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index ec85ca9e..5be33ff4 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -221,6 +221,7 @@ object CSRs { val fflags = 0x1 val frm = 0x2 val fcsr = 0x3 + val stats = 0xc0 val sup0 = 0x500 val sup1 = 0x501 val epc = 0x502 @@ -237,7 +238,6 @@ object CSRs { val fatc = 0x50d val send_ipi = 0x50e val clear_ipi = 0x50f - val stats = 0x51c val reset = 0x51d val tohost = 0x51e val fromhost = 0x51f @@ -269,6 +269,7 @@ object CSRs { res += fflags res += frm res += fcsr + res += stats res += sup0 res += sup1 res += epc @@ -285,7 +286,6 @@ object CSRs { res += fatc res += send_ipi res += clear_ipi - res += stats res += reset res += tohost res += fromhost From ebdc0a269210e10ca042a3dd99190062de9503e3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sat, 29 Mar 2014 10:59:07 -0700 Subject: [PATCH 0717/1087] merge Aqcuire and AcquireData. cache line size coupled to tilelink data size --- rocket/src/main/scala/icache.scala | 33 +++---- rocket/src/main/scala/nbdcache.scala | 124 +++++++++++++-------------- rocket/src/main/scala/tile.scala | 9 +- 3 files changed, 82 insertions(+), 84 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 5ea88247..c04314d6 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -7,17 +7,19 @@ import Util._ case class ICacheConfig(sets: Int, assoc: Int, ibytes: Int = 4, ntlb: Int = 8, btb: BTBConfig = BTBConfig(8), + tl: TileLinkConfiguration, code: Code = new IdentityCode) { val w = 1 val dm = assoc == 1 val lines = sets * assoc - val databits = MEM_DATA_BITS + val databits = tl.dataBits val idxbits = log2Up(sets) val offbits = OFFSET_BITS val untagbits = idxbits + offbits val tagbits = PADDR_BITS - untagbits + def refillcycles = CACHE_DATA_SIZE_IN_BYTES*8/tl.dataBits require(isPow2(sets) && isPow2(assoc)) require(isPow2(w) && isPow2(ibytes)) @@ -133,9 +135,9 @@ class ICacheResp(implicit c: ICacheConfig) extends Bundle { override def clone = new ICacheResp().asInstanceOf[this.type] } -class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module +class ICache(implicit c: ICacheConfig) extends Module { - implicit val lnConf = tl.ln + implicit val (tl, ln) = (c.tl, c.tl.ln) val io = new Bundle { val req = Valid(new ICacheReq).flip val resp = Decoupled(new ICacheResp) @@ -179,7 +181,7 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module rdy := state === s_ready && !s2_miss //assert(!co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") - val (rf_cnt, refill_done) = Counter(io.mem.grant.valid, REFILL_CYCLES) + val (rf_cnt, refill_done) = (if(c.refillcycles > 1) Counter(io.mem.grant.valid, c.refillcycles) else (UInt(0), state === s_refill)) val repl_way = if (c.dm) UInt(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) val enc_tagbits = c.code.width(c.tagbits) @@ -229,15 +231,16 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) for (i <- 0 until c.assoc) { - val data_array = Mem(Bits(width = c.code.width(c.databits)), c.sets*REFILL_CYCLES, seqRead = true) + val data_array = Mem(Bits(width = c.code.width(c.databits)), c.sets*c.refillcycles, seqRead = true) val s1_raddr = Reg(UInt()) when (io.mem.grant.valid && repl_way === UInt(i)) { val d = io.mem.grant.bits.payload.data - data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) + if(c.refillcycles > 1) data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) + else data_array(s2_idx) := c.code.encode(d) } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - s1_raddr := s0_pgoff(c.untagbits-1,c.offbits-rf_cnt.getWidth) + s1_raddr := s0_pgoff(c.untagbits-1,c.offbits-(if(c.refillcycles > 1) rf_cnt.getWidth else 0)) } // if s1_tag_match is critical, replace with partial tag check when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } @@ -246,16 +249,16 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) - val finish_q = Module(new Queue(new GrantAck, 1)) - finish_q.io.enq.valid := refill_done && tl.co.requiresAckForGrant(io.mem.grant.bits.payload.g_type) - finish_q.io.enq.bits.master_xact_id := io.mem.grant.bits.payload.master_xact_id + val ack_q = Module(new Queue(new LogicalNetworkIO(new GrantAck), 1)) + ack_q.io.enq.valid := refill_done && tl.co.requiresAckForGrant(io.mem.grant.bits.payload.g_type) + ack_q.io.enq.bits.payload.master_xact_id := io.mem.grant.bits.payload.master_xact_id + ack_q.io.enq.bits.header.dst := io.mem.grant.bits.header.src // output signals io.resp.valid := s2_hit - io.mem.acquire.meta.valid := (state === s_request) && finish_q.io.enq.ready - io.mem.acquire.meta.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UInt(c.offbits), UInt(0)) - io.mem.acquire.data.valid := Bool(false) - io.mem.grant_ack <> FIFOedLogicalNetworkIOWrapper(finish_q.io.deq) + io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready + io.mem.acquire.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UInt(c.offbits), UInt(0)) + io.mem.grant_ack <> ack_q.io.deq io.mem.grant.ready := Bool(true) // control state machine @@ -265,7 +268,7 @@ class ICache(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module invalidated := Bool(false) } is (s_request) { - when (io.mem.acquire.meta.ready && finish_q.io.enq.ready) { state := s_refill_wait } + when (io.mem.acquire.ready && ack_q.io.enq.ready) { state := s_refill_wait } } is (s_refill_wait) { when (io.mem.grant.valid) { state := s_refill } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7eb5b15d..94014511 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -6,7 +6,7 @@ import Util._ case class DCacheConfig(sets: Int, ways: Int, nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, - states: Int = 2, + tl: TileLinkConfiguration, code: Code = new IdentityCode, narrowRead: Boolean = true, reqtagbits: Int = -1, databits: Int = -1) @@ -17,6 +17,7 @@ case class DCacheConfig(sets: Int, ways: Int, require(log2Up(OFFSET_BITS) <= ACQUIRE_SUBWORD_ADDR_BITS) require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this + def states = tl.co.nClientStates def lines = sets*ways def dm = ways == 1 def ppnbits = PADDR_BITS - PGIDX_BITS @@ -30,15 +31,16 @@ case class DCacheConfig(sets: Int, ways: Int, def waybits = log2Up(ways) def untagbits = offbits + idxbits def tagbits = lineaddrbits - idxbits - def ramoffbits = log2Up(MEM_DATA_BITS/8) + def ramoffbits = log2Up(tl.dataBits/8) def databytes = databits/8 def wordoffbits = log2Up(databytes) - def isNarrowRead = narrowRead && databits*ways % MEM_DATA_BITS == 0 + def isNarrowRead = narrowRead && databits*ways % tl.dataBits == 0 + def refillcycles = CACHE_DATA_SIZE_IN_BYTES*8/tl.dataBits val statebits = log2Up(states) val metabits = statebits + tagbits val encdatabits = code.width(databits) val encmetabits = code.width(metabits) - val wordsperrow = MEM_DATA_BITS/databits + val wordsperrow = tl.dataBits/databits val bitsperrow = wordsperrow*encdatabits val lrsc_cycles = 32 // ISA requires 16-insn LRSC sequences to succeed } @@ -120,18 +122,19 @@ class DataWriteReq(implicit val conf: DCacheConfig) extends DCacheBundle { val data = Bits(width = conf.bitsperrow) } -class InternalProbe(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Probe { - val client_xact_id = Bits(width = tl.clientXactIdBits) +class InternalProbe(implicit conf: DCacheConfig) extends Probe()(conf.tl) { + val client_xact_id = Bits(width = conf.tl.clientXactIdBits) override def clone = new InternalProbe().asInstanceOf[this.type] } -class WritebackReq(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Bundle { +class WritebackReq(implicit conf: DCacheConfig) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) - val client_xact_id = Bits(width = tl.clientXactIdBits) - val r_type = UInt(width = tl.co.releaseTypeWidth) + val client_xact_id = Bits(width = conf.tl.clientXactIdBits) + val master_xact_id = Bits(width = conf.tl.masterXactIdBits) + val r_type = UInt(width = conf.tl.co.releaseTypeWidth) override def clone = new WritebackReq().asInstanceOf[this.type] } @@ -159,8 +162,8 @@ class MetaWriteReq(implicit val conf: DCacheConfig) extends DCacheBundle { val data = new MetaData() } -class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { - implicit val ln = tl.ln +class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { + implicit val (tl, ln) = (conf.tl, conf.tl.ln) val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -189,7 +192,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val acquire_type = Reg(UInt()) val release_type = Reg(UInt()) val line_state = Reg(UInt()) - val refill_count = Reg(UInt(width = log2Up(REFILL_CYCLES))) + val refill_count = Reg(UInt(width = log2Up(conf.refillcycles))) // TODO: zero-width wire val req = Reg(new MSHRReq()) val req_cmd = io.req_bits.cmd @@ -198,7 +201,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !tl.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) - val refill_done = reply && refill_count.andR + val refill_done = reply && (if(conf.refillcycles > 1) refill_count.andR else Bool(true)) val wb_done = reply && (state === s_wb_resp) val rpq = Module(new Queue(new Replay, conf.nrpq)) @@ -220,7 +223,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte when (state === s_refill_resp) { when (refill_done) { state := s_meta_write_req } when (reply) { - refill_count := refill_count + UInt(1) + if(conf.refillcycles > 1) refill_count := refill_count + UInt(1) line_state := tl.co.newStateOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) } } @@ -270,7 +273,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.idx_match := (state != s_invalid) && idx_match io.mem_resp := req - io.mem_resp.addr := Cat(req_idx, refill_count) << conf.ramoffbits + io.mem_resp.addr := (if(conf.refillcycles > 1) Cat(req_idx, refill_count) else req_idx) << conf.ramoffbits io.tag := req.addr >> conf.untagbits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -291,6 +294,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) + io.wb_req.bits.master_xact_id := Bits(0) // DNC io.wb_req.bits.r_type := tl.co.getReleaseTypeOnVoluntaryWriteback() io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready @@ -314,8 +318,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte } } -class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { - implicit val ln = tl.ln +class MSHRFile(implicit conf: DCacheConfig) extends Module { + implicit val (tl, ln) = (conf.tl, conf.tl.ln) val io = new Bundle { val req = Decoupled(new MSHRReq).flip val secondary_miss = Bool(OUTPUT) @@ -416,22 +420,23 @@ class MSHRFile(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends M } -class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { +class WritebackUnit(implicit conf: DCacheConfig) extends Module { + implicit val tl = conf.tl val io = new Bundle { val req = Decoupled(new WritebackReq()).flip - val probe = Decoupled(new WritebackReq()).flip val meta_read = Decoupled(new MetaReadReq) val data_req = Decoupled(new DataReadReq()) val data_resp = Bits(INPUT, conf.bitsperrow) val release = Decoupled(new Release) - val release_data = Decoupled(new ReleaseData) } + require(conf.refillcycles == 1) // TODO Currently will issue refillcycles distinct releases; need to merge if rowsize < tilelink.dataSize + val valid = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) val cmd_sent = Reg(Bool()) - val cnt = Reg(UInt(width = log2Up(REFILL_CYCLES+1))) + val cnt = Reg(UInt(width = log2Up(conf.refillcycles+1))) val req = Reg(new WritebackReq) when (valid) { @@ -441,26 +446,18 @@ class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte r1_data_req_fired := true cnt := cnt + 1 } - - when (r2_data_req_fired && !io.release_data.ready) { + when (r2_data_req_fired && !io.release.ready) { r1_data_req_fired := false r2_data_req_fired := false - cnt := cnt - Mux[UInt](r1_data_req_fired, 2, 1) + cnt := (if(conf.refillcycles > 1) cnt - Mux[UInt](r1_data_req_fired, 2, 1) else UInt(0)) } - - when (!r1_data_req_fired && !r2_data_req_fired && cmd_sent && cnt === REFILL_CYCLES) { + when (io.release.fire()) { + cmd_sent := true + } + when (!r1_data_req_fired && !r2_data_req_fired && cmd_sent && cnt === conf.refillcycles) { valid := false } - when (valid && io.release.ready) { - cmd_sent := true - } - } - when (io.probe.fire()) { - valid := true - cmd_sent := true - cnt := 0 - req := io.probe.bits } when (io.req.fire()) { valid := true @@ -469,26 +466,27 @@ class WritebackUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) exte req := io.req.bits } - val fire = valid && cnt < UInt(REFILL_CYCLES) - io.req.ready := !valid && !io.probe.valid - io.probe.ready := !valid + val fire = valid && cnt < UInt(conf.refillcycles) + io.req.ready := !valid io.data_req.valid := fire io.data_req.bits.way_en := req.way_en - io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(REFILL_CYCLES)-1,0)) << conf.ramoffbits + io.data_req.bits.addr := (if(conf.refillcycles > 1) Cat(req.idx, cnt(log2Up(conf.refillcycles)-1,0)) + else req.idx) << conf.ramoffbits - io.release.valid := valid && !cmd_sent + io.release.valid := valid && r2_data_req_fired io.release.bits.r_type := req.r_type io.release.bits.addr := Cat(req.tag, req.idx).toUInt io.release.bits.client_xact_id := req.client_xact_id - io.release.bits.master_xact_id := UInt(0) - io.release_data.valid := r2_data_req_fired - io.release_data.bits.data := io.data_resp + io.release.bits.master_xact_id := req.master_xact_id + io.release.bits.data := io.data_resp + // We reissue the meta read as it sets up the muxing for s2_data_muxed io.meta_read.valid := fire io.meta_read.bits.addr := io.release.bits.addr << conf.offbits } -class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { +class ProbeUnit(implicit conf: DCacheConfig) extends Module { + implicit val tl = conf.tl val io = new Bundle { val req = Decoupled(new InternalProbe).flip val rep = Decoupled(new Release) @@ -543,7 +541,7 @@ class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends } io.req.ready := state === s_invalid - io.rep.valid := state === s_release + io.rep.valid := state === s_release && !tl.co.needsWriteback(line_state) io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read @@ -559,11 +557,13 @@ class ProbeUnit(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr io.wb_req.bits.tag := req.addr >> UInt(conf.idxbits) - io.wb_req.bits.r_type := UInt(0) // DNC - io.wb_req.bits.client_xact_id := UInt(0) // DNC + io.wb_req.bits.r_type := tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)) + io.wb_req.bits.client_xact_id := req.client_xact_id + io.wb_req.bits.master_xact_id := req.master_xact_id } -class MetaDataArray(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { +class MetaDataArray(implicit conf: DCacheConfig) extends Module { + implicit val tl = conf.tl val io = new Bundle { val read = Decoupled(new MetaReadReq).flip val write = Decoupled(new MetaWriteReq).flip @@ -612,7 +612,7 @@ class DataArray(implicit conf: DCacheConfig) extends Module { val resp = Vec.fill(conf.wordsperrow){Bits(width = conf.bitsperrow)} val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = Mem(Bits(width=conf.bitsperrow), conf.sets*REFILL_CYCLES, seqRead = true) + val array = Mem(Bits(width=conf.bitsperrow), conf.sets*conf.refillcycles, seqRead = true) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { val data = Fill(conf.wordsperrow, io.write.bits.data(conf.encdatabits*(p+1)-1,conf.encdatabits*p)) val mask = FillInterleaved(conf.encdatabits, wway_en) @@ -631,7 +631,7 @@ class DataArray(implicit conf: DCacheConfig) extends Module { } else { val wmask = FillInterleaved(conf.encdatabits, io.write.bits.wmask) for (w <- 0 until conf.ways) { - val array = Mem(Bits(width=conf.bitsperrow), conf.sets*REFILL_CYCLES, seqRead = true) + val array = Mem(Bits(width=conf.bitsperrow), conf.sets*conf.refillcycles, seqRead = true) when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } @@ -727,8 +727,8 @@ class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { val ordered = Bool(INPUT) } -class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends Module { - implicit val ln = tl.ln +class HellaCache(implicit conf: DCacheConfig) extends Module { + implicit val (tl, ln) = (conf.tl, conf.tl.ln) val io = new Bundle { val cpu = (new HellaCacheIO).flip val mem = new TileLinkIO @@ -930,16 +930,11 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends mshrs.io.req.bits.old_meta := Mux(s2_tag_match, MetaData(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshrs.io.req.bits.data := s2_req.data + when (mshrs.io.req.fire()) { replacer.miss } mshrs.io.mem_grant.valid := io.mem.grant.fire() mshrs.io.mem_grant.bits := io.mem.grant.bits - when (mshrs.io.req.fire()) { replacer.miss } - - io.mem.acquire.meta <> FIFOedLogicalNetworkIOWrapper(mshrs.io.mem_req) - //TODO io.mem.acquire.data should be connected to uncached store data generator - //io.mem.acquire.data <> FIFOedLogicalNetworkIOWrapper(TODO) - io.mem.acquire.data.valid := Bool(false) - io.mem.acquire.data.bits.payload.data := UInt(0) + io.mem.acquire <> DecoupledLogicalNetworkIOWrapper(mshrs.io.mem_req) // replays readArb.io.in(1).valid := mshrs.io.replay.valid @@ -951,14 +946,13 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes val releaseArb = Module(new Arbiter(new Release, 2)) - FIFOedLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release.meta + DecoupledLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release - val probe = FIFOedLogicalNetworkIOUnwrapper(io.mem.probe) + val probe = DecoupledLogicalNetworkIOUnwrapper(io.mem.probe) prober.io.req.valid := probe.valid && !lrsc_valid probe.ready := prober.io.req.ready && !lrsc_valid prober.io.req.bits := probe.bits prober.io.rep <> releaseArb.io.in(1) - prober.io.wb_req <> wb.io.probe prober.io.way_en := s2_tag_match_way prober.io.line_state := s2_hit_state prober.io.meta_read <> metaReadArb.io.in(2) @@ -974,12 +968,14 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends writeArb.io.in(1).bits.data := io.mem.grant.bits.payload.data // writebacks - wb.io.req <> mshrs.io.wb_req + val wbArb = Module(new Arbiter(new WritebackReq, 2)) + prober.io.wb_req <> wbArb.io.in(0) + mshrs.io.wb_req <> wbArb.io.in(1) + wbArb.io.out <> wb.io.req wb.io.meta_read <> metaReadArb.io.in(3) wb.io.data_req <> readArb.io.in(2) wb.io.data_resp := s2_data_corrected releaseArb.io.in(0) <> wb.io.release - FIFOedLogicalNetworkIOWrapper(wb.io.release_data) <> io.mem.release.data // store->load bypassing val s4_valid = Reg(next=s3_valid, init=Bool(false)) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index e2e0a60f..7fde42f0 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -77,9 +77,8 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module memArb.io.out.grant <> io.tilelink.grant io.tilelink.grant_ack <> memArb.io.out.grant_ack dcache.io.mem.probe <> io.tilelink.probe - io.tilelink.release.data <> dcache.io.mem.release.data - io.tilelink.release.meta.valid := dcache.io.mem.release.meta.valid - dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready - io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits - io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) + io.tilelink.release.valid := dcache.io.mem.release.valid + dcache.io.mem.release.ready := io.tilelink.release.ready + io.tilelink.release.bits := dcache.io.mem.release.bits + io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) } From 910b3b203adc39a03ceaff51ad75311269087fe2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 1 Apr 2014 17:15:46 -0700 Subject: [PATCH 0718/1087] removed AddressSpaceConstants, CacheConstants, and TileLinkSizeConstants --- rocket/src/main/scala/btb.scala | 17 +++--- rocket/src/main/scala/core.scala | 3 +- rocket/src/main/scala/csr.scala | 26 ++++----- rocket/src/main/scala/dpath.scala | 14 ++--- rocket/src/main/scala/icache.scala | 57 ++++++++++--------- rocket/src/main/scala/nbdcache.scala | 82 ++++++++++++++-------------- rocket/src/main/scala/ptw.scala | 35 ++++++------ rocket/src/main/scala/rocc.scala | 1 + rocket/src/main/scala/tile.scala | 2 +- rocket/src/main/scala/tlb.scala | 24 ++++---- 10 files changed, 135 insertions(+), 126 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index a6a99a8b..e7a30221 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -3,10 +3,9 @@ package rocket import Chisel._ import Util._ import Node._ -import uncore.constants.AddressConstants._ -case class BTBConfig(entries: Int, nras: Int = 0) { - val matchBits = PGIDX_BITS +case class BTBConfig(as: uncore.AddressSpaceConfiguration, entries: Int, nras: Int = 0) { + val matchBits = as.pgIdxBits val pages0 = 1 max log2Up(entries) // is this sensible? val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages val opaqueBits = log2Up(entries) @@ -56,9 +55,9 @@ class BHT(implicit conf: BTBConfig) { class BTBUpdate(implicit conf: BTBConfig) extends Bundle { val prediction = Valid(new BTBResp) - val pc = UInt(width = VADDR_BITS) - val target = UInt(width = VADDR_BITS) - val returnAddr = UInt(width = VADDR_BITS) + val pc = UInt(width = conf.as.vaddrBits) + val target = UInt(width = conf.as.vaddrBits) + val returnAddr = UInt(width = conf.as.vaddrBits) val taken = Bool() val isJump = Bool() val isCall = Bool() @@ -70,7 +69,7 @@ class BTBUpdate(implicit conf: BTBConfig) extends Bundle { class BTBResp(implicit conf: BTBConfig) extends Bundle { val taken = Bool() - val target = UInt(width = VADDR_BITS) + val target = UInt(width = conf.as.vaddrBits) val entry = UInt(width = conf.opaqueBits) val bht = new BHTResp @@ -80,7 +79,7 @@ class BTBResp(implicit conf: BTBConfig) extends Bundle { // fully-associative branch target buffer class BTB(implicit conf: BTBConfig) extends Module { val io = new Bundle { - val req = UInt(INPUT, VADDR_BITS) + val req = UInt(INPUT, conf.as.vaddrBits) val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip val invalidate = Bool(INPUT) @@ -91,7 +90,7 @@ class BTB(implicit conf: BTBConfig) extends Module { val idxPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} val tgts = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} val tgtPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} - val pages = Vec.fill(conf.pages){Reg(UInt(width=VADDR_BITS-conf.matchBits))} + val pages = Vec.fill(conf.pages){Reg(UInt(width=conf.as.vaddrBits-conf.matchBits))} val pageValid = Vec.fill(conf.pages){Reg(init=Bool(false))} val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 4a37c424..05a13534 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -3,14 +3,13 @@ package rocket import Chisel._ import Util._ import uncore.HTIFIO -import uncore.constants.MemoryOpConstants._ class RocketIO(implicit conf: RocketConfiguration) extends Bundle { val host = new HTIFIO(conf.tl.ln.nClients) val imem = new CPUFrontendIO()(conf.icache) val dmem = new HellaCacheIO()(conf.dcache) - val ptw = new DatapathPTWIO().flip + val ptw = new DatapathPTWIO()(conf.as).flip val rocc = new RoCCInterface().flip } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 421079a4..8f10b02c 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -4,7 +4,7 @@ import Chisel._ import Util._ import Node._ import uncore.HTIFIO -import uncore.constants.AddressConstants._ +import uncore.AddressSpaceConfiguration import scala.math._ class Status extends Bundle { @@ -45,14 +45,14 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module } val status = new Status().asOutput - val ptbr = UInt(OUTPUT, PADDR_BITS) - val evec = UInt(OUTPUT, VADDR_BITS+1) + val ptbr = UInt(OUTPUT, conf.as.paddrBits) + val evec = UInt(OUTPUT, conf.as.vaddrBits+1) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+conf.retireWidth)) val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+conf.retireWidth))) val cause = UInt(INPUT, conf.xprlen) val badvaddr_wen = Bool(INPUT) - val pc = UInt(INPUT, VADDR_BITS+1) + val pc = UInt(INPUT, conf.as.vaddrBits+1) val sret = Bool(INPUT) val fatc = Bool(OUTPUT) val replay = Bool(OUTPUT) @@ -62,16 +62,16 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val rocc = new RoCCInterface().flip } - val reg_epc = Reg(Bits(width = VADDR_BITS+1)) - val reg_badvaddr = Reg(Bits(width = VADDR_BITS)) - val reg_evec = Reg(Bits(width = VADDR_BITS)) + val reg_epc = Reg(Bits(width = conf.as.vaddrBits+1)) + val reg_badvaddr = Reg(Bits(width = conf.as.vaddrBits)) + val reg_evec = Reg(Bits(width = conf.as.vaddrBits)) val reg_compare = Reg(Bits(width = 32)) val reg_cause = Reg(Bits(width = conf.xprlen)) val reg_tohost = Reg(init=Bits(0, conf.xprlen)) val reg_fromhost = Reg(init=Bits(0, conf.xprlen)) val reg_sup0 = Reg(Bits(width = conf.xprlen)) val reg_sup1 = Reg(Bits(width = conf.xprlen)) - val reg_ptbr = Reg(UInt(width = PADDR_BITS)) + val reg_ptbr = Reg(UInt(width = conf.as.paddrBits)) val reg_stats = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below val reg_time = WideCounter(conf.xprlen) @@ -130,7 +130,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (io.badvaddr_wen) { val wdata = io.rw.wdata - val (upper, lower) = Split(wdata, VADDR_BITS) + val (upper, lower) = Split(wdata, conf.as.vaddrBits) val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) reg_badvaddr := Cat(sign, lower).toSInt } @@ -161,7 +161,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } val read_impl = Bits(2) - val read_ptbr = reg_ptbr(PADDR_BITS-1,PGIDX_BITS) << PGIDX_BITS + val read_ptbr = reg_ptbr(conf.as.paddrBits-1, conf.as.pgIdxBits) << conf.as.pgIdxBits val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.fflags -> (if (!conf.fpu.isEmpty) reg_fflags else UInt(0)), @@ -213,8 +213,8 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.epc)) { reg_epc := wdata(VADDR_BITS,0).toSInt } - when (decoded_addr(CSRs.evec)) { reg_evec := wdata(VADDR_BITS-1,0).toSInt } + when (decoded_addr(CSRs.epc)) { reg_epc := wdata(conf.as.vaddrBits,0).toSInt } + when (decoded_addr(CSRs.evec)) { reg_evec := wdata(conf.as.vaddrBits-1,0).toSInt } when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false) } when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } @@ -222,7 +222,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata } when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata } - when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(PADDR_BITS-1, PGIDX_BITS), Bits(0, PGIDX_BITS)).toUInt } + when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(conf.as.paddrBits-1, conf.as.pgIdxBits), Bits(0, conf.as.pgIdxBits)).toUInt } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 86c5d2fb..ea6b59cf 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -4,10 +4,10 @@ import Chisel._ import Instructions._ import Util._ import uncore.HTIFIO -import uncore.constants.AddressConstants._ class Datapath(implicit conf: RocketConfiguration) extends Module { + implicit val as = conf.as val io = new Bundle { val host = new HTIFIO(conf.tl.ln.nClients) val ctrl = (new CtrlDpathIO).flip @@ -158,10 +158,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.fpu.fromint_data := ex_rs(0) def vaSign(a0: UInt, ea: Bits) = { - // efficient means to compress 64-bit VA into VADDR_BITS+1 bits - // (VA is bad if VA(VADDR_BITS) != VA(VADDR_BITS-1)) - val a = a0 >> VADDR_BITS-1 - val e = ea(VADDR_BITS,VADDR_BITS-1) + // efficient means to compress 64-bit VA into conf.as.vaddrBits+1 bits + // (VA is bad if VA(conf.as.vaddrBits) != VA(conf.as.vaddrBits-1)) + val a = a0 >> conf.as.vaddrBits-1 + val e = ea(conf.as.vaddrBits,conf.as.vaddrBits-1) Mux(a === UInt(0) || a === UInt(1), e != UInt(0), Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), e(0))) @@ -169,7 +169,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(VADDR_BITS-1,0)).toUInt + io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(conf.as.vaddrBits-1,0)).toUInt io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) require(conf.dcacheReqTagBits >= 6) @@ -240,7 +240,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val mem_br_target = mem_reg_pc + Mux(io.ctrl.mem_branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(!io.ctrl.mem_jalr && !io.ctrl.mem_branch, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(VADDR_BITS-1,0)), mem_br_target) + val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(conf.as.vaddrBits-1,0)), mem_br_target) io.ctrl.mem_misprediction := mem_npc != Mux(io.ctrl.ex_valid, ex_reg_pc, id_pc) io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 val mem_int_wdata = Mux(io.ctrl.mem_jalr, mem_br_target, mem_reg_wdata) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c04314d6..8ed0091f 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,33 +5,37 @@ import uncore._ import Util._ case class ICacheConfig(sets: Int, assoc: Int, - ibytes: Int = 4, - ntlb: Int = 8, btb: BTBConfig = BTBConfig(8), + ibytes: Int = 4, rowbytes: Int = 64, + ntlb: Int = 8, tl: TileLinkConfiguration, + as: AddressSpaceConfiguration, + btb: BTBConfig, code: Code = new IdentityCode) { val w = 1 val dm = assoc == 1 val lines = sets * assoc - val databits = tl.dataBits val idxbits = log2Up(sets) - val offbits = OFFSET_BITS + val offbits = log2Up(tl.dataBits/8) + val rowbits = rowbytes*8 + val rowoffbits = log2Up(rowbytes) val untagbits = idxbits + offbits - val tagbits = PADDR_BITS - untagbits - def refillcycles = CACHE_DATA_SIZE_IN_BYTES*8/tl.dataBits + val tagbits = as.paddrBits - untagbits + val refillcycles = tl.dataBits/rowbits require(isPow2(sets) && isPow2(assoc)) require(isPow2(w) && isPow2(ibytes)) - require(PGIDX_BITS >= untagbits) + require(as.pgIdxBits >= untagbits) } -class FrontendReq extends Bundle { - val pc = UInt(width = VADDR_BITS+1) +class FrontendReq()(implicit conf: ICacheConfig) extends Bundle { + val pc = UInt(width = conf.as.vaddrBits+1) + override def clone = new FrontendReq().asInstanceOf[this.type] } class FrontendResp(implicit conf: ICacheConfig) extends Bundle { - val pc = UInt(width = VADDR_BITS+1) // ID stage PC + val pc = UInt(width = conf.as.vaddrBits+1) // ID stage PC val data = Bits(width = conf.ibytes*8) val xcpt_ma = Bool() val xcpt_if = Bool() @@ -44,12 +48,13 @@ class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp()(conf.btb)).flip val btb_update = Valid(new BTBUpdate()(conf.btb)) - val ptw = new TLBPTWIO().flip + val ptw = new TLBPTWIO()(conf.as).flip val invalidate = Bool(OUTPUT) } -class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Module +class Frontend(implicit c: ICacheConfig) extends Module { + implicit val (tl, as) = (c.tl, c.as) val io = new Bundle { val cpu = new CPUFrontendIO()(c).flip val mem = new UncachedTileLinkIO @@ -68,13 +73,14 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu val s2_btb_resp_bits = Reg(btb.io.resp.bits.clone) val s2_xcpt_if = Reg(init=Bool(false)) - val btbTarget = Cat(btb.io.resp.bits.target(VADDR_BITS-1), btb.io.resp.bits.target) + val msb = c.as.vaddrBits-1 + val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) val pcp4_0 = s1_pc + UInt(c.ibytes) - val pcp4 = Cat(s1_pc(VADDR_BITS-1) & pcp4_0(VADDR_BITS-1), pcp4_0(VADDR_BITS-1,0)) + val pcp4 = Cat(s1_pc(msb) & pcp4_0(msb), pcp4_0(msb,0)) val icmiss = s2_valid && !icache.io.resp.valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & (c.databits/8)) === (s1_pc & (c.databits/8))) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & c.rowbytes) === (s1_pc & c.rowbytes)) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -100,7 +106,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> UInt(PGIDX_BITS) + tlb.io.req.bits.vpn := s1_pc >> UInt(c.as.pgIdxBits) tlb.io.req.bits.asid := UInt(0) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) @@ -115,7 +121,7 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc & SInt(-c.ibytes) // discard PC LSBs - io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)) + io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(c.rowbytes)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)) io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if @@ -123,15 +129,16 @@ class Frontend(implicit c: ICacheConfig, tl: TileLinkConfiguration) extends Modu io.cpu.btb_resp.bits := s2_btb_resp_bits } -class ICacheReq extends Bundle { - val idx = UInt(width = PGIDX_BITS) - val ppn = UInt(width = PPN_BITS) // delayed one cycle +class ICacheReq(implicit c: ICacheConfig) extends Bundle { + val idx = UInt(width = c.as.pgIdxBits) + val ppn = UInt(width = c.as.ppnBits) // delayed one cycle val kill = Bool() // delayed one cycle + override def clone = new ICacheReq().asInstanceOf[this.type] } class ICacheResp(implicit c: ICacheConfig) extends Bundle { val data = Bits(width = c.ibytes*8) - val datablock = Bits(width = c.databits) + val datablock = Bits(width = c.rowbits) override def clone = new ICacheResp().asInstanceOf[this.type] } @@ -152,11 +159,11 @@ class ICache(implicit c: ICacheConfig) extends Module val rdy = Bool() val s2_valid = Reg(init=Bool(false)) - val s2_addr = Reg(UInt(width = PADDR_BITS)) + val s2_addr = Reg(UInt(width = c.as.paddrBits)) val s2_any_tag_hit = Bool() val s1_valid = Reg(init=Bool(false)) - val s1_pgoff = Reg(UInt(width = PGIDX_BITS)) + val s1_pgoff = Reg(UInt(width = c.as.pgIdxBits)) val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt val s1_tag = s1_addr(c.tagbits+c.untagbits-1,c.untagbits) @@ -231,7 +238,7 @@ class ICache(implicit c: ICacheConfig) extends Module s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) for (i <- 0 until c.assoc) { - val data_array = Mem(Bits(width = c.code.width(c.databits)), c.sets*c.refillcycles, seqRead = true) + val data_array = Mem(Bits(width = c.code.width(c.rowbits)), c.sets*c.refillcycles, seqRead = true) val s1_raddr = Reg(UInt()) when (io.mem.grant.valid && repl_way === UInt(i)) { val d = io.mem.grant.bits.payload.data @@ -245,7 +252,7 @@ class ICache(implicit c: ICacheConfig) extends Module // if s1_tag_match is critical, replace with partial tag check when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } } - val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(c.databits/8)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)))(c.ibytes*8-1,0)) + val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(c.rowbytes)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)))(c.ibytes*8-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 94014511..39390aae 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -7,41 +7,41 @@ import Util._ case class DCacheConfig(sets: Int, ways: Int, nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, tl: TileLinkConfiguration, + as: AddressSpaceConfiguration, + reqtagbits: Int, databits: Int, + rowwords: Int = 8, code: Code = new IdentityCode, - narrowRead: Boolean = true, - reqtagbits: Int = -1, databits: Int = -1) + narrowRead: Boolean = true) { require(states > 0) - require(OFFSET_BITS == log2Up(CACHE_DATA_SIZE_IN_BYTES)) - require(OFFSET_BITS <= ACQUIRE_WRITE_MASK_BITS) - require(log2Up(OFFSET_BITS) <= ACQUIRE_SUBWORD_ADDR_BITS) require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this def states = tl.co.nClientStates def lines = sets*ways def dm = ways == 1 - def ppnbits = PADDR_BITS - PGIDX_BITS - def vpnbits = VADDR_BITS - PGIDX_BITS - def pgidxbits = PGIDX_BITS - def offbits = OFFSET_BITS + def offbits = log2Up(tl.dataBits/8) + def ppnbits = as.ppnBits + def vpnbits = as.vpnBits + def pgidxbits = as.pgIdxBits def maxaddrbits = ppnbits.max(vpnbits+1) + pgidxbits - def paddrbits = ppnbits + pgidxbits + def paddrbits = as.paddrBits def lineaddrbits = paddrbits - offbits def idxbits = log2Up(sets) def waybits = log2Up(ways) def untagbits = offbits + idxbits def tagbits = lineaddrbits - idxbits - def ramoffbits = log2Up(tl.dataBits/8) def databytes = databits/8 def wordoffbits = log2Up(databytes) - def isNarrowRead = narrowRead && databits*ways % tl.dataBits == 0 - def refillcycles = CACHE_DATA_SIZE_IN_BYTES*8/tl.dataBits + def rowbits = rowwords*databits + def rowbytes = rowwords*databytes + def rowoffbits = log2Up(rowbytes) + def refillcycles = tl.dataBits/(rowwords*databits) + def isNarrowRead = narrowRead && databits*ways % rowbits == 0 val statebits = log2Up(states) val metabits = statebits + tagbits val encdatabits = code.width(databits) val encmetabits = code.width(metabits) - val wordsperrow = tl.dataBits/databits - val bitsperrow = wordsperrow*encdatabits + val encrowbits = rowwords*encdatabits val lrsc_cycles = 32 // ISA requires 16-insn LRSC sequences to succeed } @@ -118,8 +118,8 @@ class DataReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { class DataWriteReq(implicit val conf: DCacheConfig) extends DCacheBundle { val way_en = Bits(width = conf.ways) val addr = Bits(width = conf.untagbits) - val wmask = Bits(width = conf.wordsperrow) - val data = Bits(width = conf.bitsperrow) + val wmask = Bits(width = conf.rowwords) + val data = Bits(width = conf.encrowbits) } class InternalProbe(implicit conf: DCacheConfig) extends Probe()(conf.tl) { @@ -273,7 +273,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { io.idx_match := (state != s_invalid) && idx_match io.mem_resp := req - io.mem_resp.addr := (if(conf.refillcycles > 1) Cat(req_idx, refill_count) else req_idx) << conf.ramoffbits + io.mem_resp.addr := (if(conf.refillcycles > 1) Cat(req_idx, refill_count) else req_idx) << conf.rowoffbits io.tag := req.addr >> conf.untagbits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -426,7 +426,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { val req = Decoupled(new WritebackReq()).flip val meta_read = Decoupled(new MetaReadReq) val data_req = Decoupled(new DataReadReq()) - val data_resp = Bits(INPUT, conf.bitsperrow) + val data_resp = Bits(INPUT, conf.encrowbits) val release = Decoupled(new Release) } @@ -471,7 +471,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { io.data_req.valid := fire io.data_req.bits.way_en := req.way_en io.data_req.bits.addr := (if(conf.refillcycles > 1) Cat(req.idx, cnt(log2Up(conf.refillcycles)-1,0)) - else req.idx) << conf.ramoffbits + else req.idx) << conf.rowoffbits io.release.valid := valid && r2_data_req_fired io.release.bits.r_type := req.r_type @@ -599,39 +599,39 @@ class DataArray(implicit conf: DCacheConfig) extends Module { val io = new Bundle { val read = Decoupled(new DataReadReq).flip val write = Decoupled(new DataWriteReq).flip - val resp = Vec.fill(conf.ways){Bits(OUTPUT, conf.bitsperrow)} + val resp = Vec.fill(conf.ways){Bits(OUTPUT, conf.encrowbits)} } - val waddr = io.write.bits.addr >> conf.ramoffbits - val raddr = io.read.bits.addr >> conf.ramoffbits + val waddr = io.write.bits.addr >> conf.rowoffbits + val raddr = io.read.bits.addr >> conf.rowoffbits if (conf.isNarrowRead) { - for (w <- 0 until conf.ways by conf.wordsperrow) { - val wway_en = io.write.bits.way_en(w+conf.wordsperrow-1,w) - val rway_en = io.read.bits.way_en(w+conf.wordsperrow-1,w) - val resp = Vec.fill(conf.wordsperrow){Bits(width = conf.bitsperrow)} + for (w <- 0 until conf.ways by conf.rowwords) { + val wway_en = io.write.bits.way_en(w+conf.rowwords-1,w) + val rway_en = io.read.bits.way_en(w+conf.rowwords-1,w) + val resp = Vec.fill(conf.rowwords){Bits(width = conf.encrowbits)} val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = Mem(Bits(width=conf.bitsperrow), conf.sets*conf.refillcycles, seqRead = true) + val array = Mem(Bits(width=conf.encrowbits), conf.sets*conf.refillcycles, seqRead = true) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { - val data = Fill(conf.wordsperrow, io.write.bits.data(conf.encdatabits*(p+1)-1,conf.encdatabits*p)) + val data = Fill(conf.rowwords, io.write.bits.data(conf.encdatabits*(p+1)-1,conf.encdatabits*p)) val mask = FillInterleaved(conf.encdatabits, wway_en) array.write(waddr, data, mask) } resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid)) } - for (dw <- 0 until conf.wordsperrow) { + for (dw <- 0 until conf.rowwords) { val r = AVec(resp.map(_(conf.encdatabits*(dw+1)-1,conf.encdatabits*dw))) val resp_mux = if (r.size == 1) r - else AVec(r(r_raddr(conf.ramoffbits-1,conf.wordoffbits)), r.tail:_*) + else AVec(r(r_raddr(conf.rowoffbits-1,conf.wordoffbits)), r.tail:_*) io.resp(w+dw) := resp_mux.toBits } } } else { val wmask = FillInterleaved(conf.encdatabits, io.write.bits.wmask) for (w <- 0 until conf.ways) { - val array = Mem(Bits(width=conf.bitsperrow), conf.sets*conf.refillcycles, seqRead = true) + val array = Mem(Bits(width=conf.encrowbits), conf.sets*conf.refillcycles, seqRead = true) when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } @@ -723,7 +723,7 @@ class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { val resp = Valid(new HellaCacheResp).flip val replay_next = Valid(Bits(width = conf.reqtagbits)).flip val xcpt = (new HellaCacheExceptions).asInput - val ptw = (new TLBPTWIO).flip + val ptw = new TLBPTWIO()(conf.as).flip val ordered = Bool(INPUT) } @@ -766,7 +766,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { val s1_sc = s1_req.cmd === M_XSC val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) - val dtlb = Module(new TLB(8)) + val dtlb = Module(new TLB(8)(conf.as)) dtlb.io.ptw <> io.cpu.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys @@ -834,7 +834,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { data.io.write.valid := writeArb.io.out.valid writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits - val wdata_encoded = (0 until conf.wordsperrow).map(i => conf.code.encode(writeArb.io.out.bits.data(conf.databits*(i+1)-1,conf.databits*i))) + val wdata_encoded = (0 until conf.rowwords).map(i => conf.code.encode(writeArb.io.out.bits.data(conf.databits*(i+1)-1,conf.databits*i))) data.io.write.bits.data := AVec(wdata_encoded).toBits // tag read for new requests @@ -885,9 +885,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } when (io.cpu.ptw.sret) { lrsc_count := 0 } - val s2_data = Vec.fill(conf.ways){Bits(width = conf.bitsperrow)} + val s2_data = Vec.fill(conf.ways){Bits(width = conf.encrowbits)} for (w <- 0 until conf.ways) { - val regs = Vec.fill(conf.wordsperrow){Reg(Bits(width = conf.encdatabits))} + val regs = Vec.fill(conf.rowwords){Reg(Bits(width = conf.encdatabits))} val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { val en = en1 && (Bool(i == 0 || !conf.isNarrowRead) || s1_writeback) @@ -896,10 +896,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { s2_data(w) := regs.toBits } val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) - val s2_data_decoded = (0 until conf.wordsperrow).map(i => conf.code.decode(s2_data_muxed(conf.encdatabits*(i+1)-1,conf.encdatabits*i))) + val s2_data_decoded = (0 until conf.rowwords).map(i => conf.code.decode(s2_data_muxed(conf.encdatabits*(i+1)-1,conf.encdatabits*i))) val s2_data_corrected = AVec(s2_data_decoded.map(_.corrected)).toBits val s2_data_uncorrected = AVec(s2_data_decoded.map(_.uncorrected)).toBits - val s2_word_idx = if (conf.isNarrowRead) UInt(0) else s2_req.addr(log2Up(conf.wordsperrow*conf.databytes)-1,3) + val s2_word_idx = if (conf.isNarrowRead) UInt(0) else s2_req.addr(log2Up(conf.rowwords*conf.databytes)-1,3) val s2_data_correctable = AVec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits @@ -912,8 +912,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } writeArb.io.in(0).bits.addr := s3_req.addr - writeArb.io.in(0).bits.wmask := UInt(1) << s3_req.addr(conf.ramoffbits-1,offsetlsb).toUInt - writeArb.io.in(0).bits.data := Fill(conf.wordsperrow, s3_req.data) + writeArb.io.in(0).bits.wmask := UInt(1) << s3_req.addr(conf.rowoffbits-1,offsetlsb).toUInt + writeArb.io.in(0).bits.data := Fill(conf.rowwords, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 7fe20ed0..22b86da2 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -1,27 +1,27 @@ package rocket import Chisel._ -import uncore.constants.AddressConstants._ -import uncore.constants.MemoryOpConstants._ +import uncore._ import Util._ -class PTWResp extends Bundle { - val error = Bool() - val ppn = UInt(width = PPN_BITS) - val perm = Bits(width = PERM_BITS) +class PTWResp()(implicit conf: AddressSpaceConfiguration) extends Bundle { + val error = Bool() + val ppn = UInt(width = conf.ppnBits) + val perm = Bits(width = conf.permBits) + + override def clone = new PTWResp().asInstanceOf[this.type] } -class TLBPTWIO extends Bundle { - val req = Decoupled(UInt(width = VPN_BITS)) +class TLBPTWIO()(implicit conf: AddressSpaceConfiguration) extends Bundle { + val req = Decoupled(UInt(width = conf.vpnBits)) val resp = Valid(new PTWResp).flip - val status = new Status().asInput val invalidate = Bool(INPUT) val sret = Bool(INPUT) } -class DatapathPTWIO extends Bundle { - val ptbr = UInt(INPUT, PADDR_BITS) +class DatapathPTWIO()(implicit conf: AddressSpaceConfiguration) extends Bundle { + val ptbr = UInt(INPUT, conf.paddrBits) val invalidate = Bool(INPUT) val sret = Bool(INPUT) val status = new Status().asInput @@ -29,6 +29,7 @@ class DatapathPTWIO extends Bundle { class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module { + implicit val as = conf.as val io = new Bundle { val requestor = Vec.fill(n){new TLBPTWIO}.flip val mem = new HellaCacheIO()(conf.dcache) @@ -36,8 +37,8 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module } val levels = 3 - val bitsPerLevel = VPN_BITS/levels - require(VPN_BITS == levels * bitsPerLevel) + val bitsPerLevel = conf.as.vpnBits/levels + require(conf.as.vpnBits == levels * bitsPerLevel) val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(UInt(), 5) val state = Reg(init=s_ready) @@ -49,14 +50,14 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module val vpn_idx = AVec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) - val arb = Module(new RRArbiter(UInt(width = VPN_BITS), n)) + val arb = Module(new RRArbiter(UInt(width = conf.as.vpnBits), n)) arb.io.in <> io.requestor.map(_.req) arb.io.out.ready := state === s_ready when (arb.io.out.fire()) { r_req_vpn := arb.io.out.bits r_req_dest := arb.io.chosen - r_pte := Cat(io.dpath.ptbr(PADDR_BITS-1,PGIDX_BITS), io.mem.resp.bits.data(PGIDX_BITS-1,0)) + r_pte := Cat(io.dpath.ptbr(conf.as.paddrBits-1,conf.as.pgIdxBits), io.mem.resp.bits.data(conf.as.pgIdxBits-1,0)) } when (io.mem.resp.valid) { @@ -67,13 +68,13 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.addr := Cat(r_pte(PADDR_BITS-1,PGIDX_BITS), vpn_idx).toUInt << log2Up(conf.xprlen/8) + io.mem.req.bits.addr := Cat(r_pte(conf.as.paddrBits-1,conf.as.pgIdxBits), vpn_idx).toUInt << log2Up(conf.xprlen/8) io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error val resp_err = state === s_error || state === s_wait - val r_resp_ppn = io.mem.req.bits.addr >> PGIDX_BITS + val r_resp_ppn = io.mem.req.bits.addr >> conf.as.pgIdxBits val resp_ppn = AVec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 745568ad..c3a9f5cf 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -35,6 +35,7 @@ class RoCCResponse(implicit conf: RocketConfiguration) extends Bundle class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle { + implicit val as = conf.as val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(conf.dcache) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 7fde42f0..56cdbef4 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,7 +4,7 @@ import Chisel._ import uncore._ import Util._ -case class RocketConfiguration(tl: TileLinkConfiguration, +case class RocketConfiguration(tl: TileLinkConfiguration, as: AddressSpaceConfiguration, icache: ICacheConfig, dcache: DCacheConfig, fpu: Option[FPUConfig] = None, rocc: Option[RocketConfiguration => RoCC] = None, diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index a5631402..8014e0ca 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -1,7 +1,7 @@ package rocket import Chisel._ -import uncore.constants.AddressConstants._ +import uncore.AddressSpaceConfiguration import scala.math._ class CAMIO(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { @@ -64,28 +64,30 @@ class PseudoLRU(n: Int) } } -class TLBReq extends Bundle +class TLBReq()(implicit conf: AddressSpaceConfiguration) extends Bundle { - val asid = UInt(width = ASID_BITS) - val vpn = UInt(width = VPN_BITS+1) + val asid = UInt(width = conf.asidBits) + val vpn = UInt(width = conf.vpnBits+1) val passthrough = Bool() val instruction = Bool() + + override def clone = new TLBReq().asInstanceOf[this.type] } -class TLBResp(entries: Int) extends Bundle +class TLBResp(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Bundle { // lookup responses val miss = Bool(OUTPUT) val hit_idx = UInt(OUTPUT, entries) - val ppn = UInt(OUTPUT, PPN_BITS) + val ppn = UInt(OUTPUT, conf.ppnBits) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) - override def clone = new TLBResp(entries).asInstanceOf[this.type] + override def clone = new TLBResp(entries)(conf).asInstanceOf[this.type] } -class TLB(entries: Int) extends Module +class TLB(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Module { val io = new Bundle { val req = Decoupled(new TLBReq).flip @@ -98,7 +100,7 @@ class TLB(entries: Int) extends Module val r_refill_tag = Reg(UInt()) val r_refill_waddr = Reg(UInt()) - val tag_cam = Module(new RocketCAM(entries, ASID_BITS+VPN_BITS)) + val tag_cam = Module(new RocketCAM(entries, conf.asidBits+conf.vpnBits)) val tag_ram = Vec.fill(entries){Reg(io.ptw.resp.bits.ppn.clone)} val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt @@ -135,7 +137,7 @@ class TLB(entries: Int) extends Module val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val bad_va = io.req.bits.vpn(VPN_BITS) != io.req.bits.vpn(VPN_BITS-1) + val bad_va = io.req.bits.vpn(conf.vpnBits) != io.req.bits.vpn(conf.vpnBits-1) val tlb_hit = io.ptw.status.vm && tag_hit val tlb_miss = io.ptw.status.vm && !tag_hit && !bad_va @@ -148,7 +150,7 @@ class TLB(entries: Int) extends Module io.resp.xcpt_st := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sw_array & tag_cam.io.hits).orR, (uw_array & tag_cam.io.hits).orR) io.resp.xcpt_if := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sx_array & tag_cam.io.hits).orR, (ux_array & tag_cam.io.hits).orR) io.resp.miss := tlb_miss - io.resp.ppn := Mux(io.ptw.status.vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(PPN_BITS-1,0)) + io.resp.ppn := Mux(io.ptw.status.vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(conf.ppnBits-1,0)) io.resp.hit_idx := tag_cam.io.hits io.ptw.req.valid := state === s_request From 1da8ef2ddf8aeb78a0faa12252741beae5721328 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 7 Apr 2014 18:22:46 -0700 Subject: [PATCH 0719/1087] Added serdes to decouple cache row size from tilelink data size --- rocket/src/main/scala/icache.scala | 40 ++++++--- rocket/src/main/scala/nbdcache.scala | 120 ++++++++++++++++----------- rocket/src/main/scala/util.scala | 47 +++++++++++ 3 files changed, 146 insertions(+), 61 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 8ed0091f..742905b7 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,7 +5,7 @@ import uncore._ import Util._ case class ICacheConfig(sets: Int, assoc: Int, - ibytes: Int = 4, rowbytes: Int = 64, + ibytes: Int = 4, rowbytes: Int = 16, ntlb: Int = 8, tl: TileLinkConfiguration, as: AddressSpaceConfiguration, @@ -187,10 +187,25 @@ class ICache(implicit c: ICacheConfig) extends Module val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - //assert(!co.isVoluntary(io.mem.grant.bits.payload) || !io.mem.grant.valid, "UncachedRequestors shouldn't get voluntary grants.") - val (rf_cnt, refill_done) = (if(c.refillcycles > 1) Counter(io.mem.grant.valid, c.refillcycles) else (UInt(0), state === s_refill)) - val repl_way = if (c.dm) UInt(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) + var refill_cnt = UInt(0) + var refill_done = state === s_refill + var refill_valid = io.mem.grant.valid + var refill_bits = io.mem.grant.bits + def doRefill(g: Grant): Bool = Bool(true) + if(c.refillcycles > 1) { + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, c.refillcycles, doRefill)) + ser.io.in <> io.mem.grant + refill_cnt = ser.io.cnt + refill_done = ser.io.done + refill_valid = ser.io.out.valid + refill_bits = ser.io.out.bits + ser.io.out.ready := Bool(true) + } else { + io.mem.grant.ready := Bool(true) + } + //assert(!c.tlco.isVoluntary(refill_bits.payload) || !refill_valid, "UncachedRequestors shouldn't get voluntary grants.") + val repl_way = if (c.dm) UInt(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) val enc_tagbits = c.code.width(c.tagbits) val tag_array = Mem(Bits(width = enc_tagbits*c.assoc), c.sets, seqRead = true) val tag_raddr = Reg(UInt()) @@ -240,14 +255,14 @@ class ICache(implicit c: ICacheConfig) extends Module for (i <- 0 until c.assoc) { val data_array = Mem(Bits(width = c.code.width(c.rowbits)), c.sets*c.refillcycles, seqRead = true) val s1_raddr = Reg(UInt()) - when (io.mem.grant.valid && repl_way === UInt(i)) { - val d = io.mem.grant.bits.payload.data - if(c.refillcycles > 1) data_array(Cat(s2_idx,rf_cnt)) := c.code.encode(d) - else data_array(s2_idx) := c.code.encode(d) + when (refill_valid && repl_way === UInt(i)) { + val e_d = c.code.encode(refill_bits.payload.data) + if(c.refillcycles > 1) data_array(Cat(s2_idx,refill_cnt)) := e_d + else data_array(s2_idx) := e_d } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - s1_raddr := s0_pgoff(c.untagbits-1,c.offbits-(if(c.refillcycles > 1) rf_cnt.getWidth else 0)) + s1_raddr := s0_pgoff(c.untagbits-1,c.offbits-(if(c.refillcycles > 1) refill_cnt.getWidth else 0)) } // if s1_tag_match is critical, replace with partial tag check when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } @@ -257,16 +272,15 @@ class ICache(implicit c: ICacheConfig) extends Module io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val ack_q = Module(new Queue(new LogicalNetworkIO(new GrantAck), 1)) - ack_q.io.enq.valid := refill_done && tl.co.requiresAckForGrant(io.mem.grant.bits.payload.g_type) - ack_q.io.enq.bits.payload.master_xact_id := io.mem.grant.bits.payload.master_xact_id - ack_q.io.enq.bits.header.dst := io.mem.grant.bits.header.src + ack_q.io.enq.valid := refill_done && tl.co.requiresAckForGrant(refill_bits.payload.g_type) + ack_q.io.enq.bits.payload.master_xact_id := refill_bits.payload.master_xact_id + ack_q.io.enq.bits.header.dst := refill_bits.header.src // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready io.mem.acquire.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UInt(c.offbits), UInt(0)) io.mem.grant_ack <> ack_q.io.deq - io.mem.grant.ready := Bool(true) // control state machine switch (state) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 39390aae..a4b686e5 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -9,13 +9,10 @@ case class DCacheConfig(sets: Int, ways: Int, tl: TileLinkConfiguration, as: AddressSpaceConfiguration, reqtagbits: Int, databits: Int, - rowwords: Int = 8, + rowwords: Int = 2, code: Code = new IdentityCode, narrowRead: Boolean = true) { - require(states > 0) - require(isPow2(sets)) - require(isPow2(ways)) // TODO: relax this def states = tl.co.nClientStates def lines = sets*ways def dm = ways == 1 @@ -35,7 +32,7 @@ case class DCacheConfig(sets: Int, ways: Int, def rowbits = rowwords*databits def rowbytes = rowwords*databytes def rowoffbits = log2Up(rowbytes) - def refillcycles = tl.dataBits/(rowwords*databits) + def refillcycles = tl.dataBits/(rowbits) def isNarrowRead = narrowRead && databits*ways % rowbits == 0 val statebits = log2Up(states) val metabits = statebits + tagbits @@ -43,6 +40,11 @@ case class DCacheConfig(sets: Int, ways: Int, val encmetabits = code.width(metabits) val encrowbits = rowwords*encdatabits val lrsc_cycles = 32 // ISA requires 16-insn LRSC sequences to succeed + + require(states > 0) + require(isPow2(sets)) + require(isPow2(ways)) // TODO: relax this + require(rowbits <= tl.dataBits) } abstract trait DCacheBundle extends Bundle { @@ -200,6 +202,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !tl.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + require(isPow2(conf.refillcycles)) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) val refill_done = reply && (if(conf.refillcycles > 1) refill_count.andR else Bool(true)) val wb_done = reply && (state === s_wb_resp) @@ -302,7 +305,6 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { io.mem_req.bits.addr := Cat(io.tag, req_idx).toUInt io.mem_req.bits.client_xact_id := Bits(id) io.mem_finish <> ackq.io.deq - io.mem_req.bits.client_xact_id := Bits(id) io.meta_read.valid := state === s_drain_rpq io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits @@ -430,59 +432,73 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { val release = Decoupled(new Release) } - require(conf.refillcycles == 1) // TODO Currently will issue refillcycles distinct releases; need to merge if rowsize < tilelink.dataSize - - val valid = Reg(init=Bool(false)) + val active = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) - val cmd_sent = Reg(Bool()) - val cnt = Reg(UInt(width = log2Up(conf.refillcycles+1))) + val cnt = Reg(init = UInt(0, width = log2Up(conf.refillcycles+1))) val req = Reg(new WritebackReq) - when (valid) { + io.release.valid := false + when (active) { r1_data_req_fired := false r2_data_req_fired := r1_data_req_fired when (io.data_req.fire() && io.meta_read.fire()) { r1_data_req_fired := true cnt := cnt + 1 } - when (r2_data_req_fired && !io.release.ready) { - r1_data_req_fired := false - r2_data_req_fired := false - cnt := (if(conf.refillcycles > 1) cnt - Mux[UInt](r1_data_req_fired, 2, 1) else UInt(0)) + if(conf.refillcycles > 1) { // Coalescing buffer inserted + when (!r1_data_req_fired && !r2_data_req_fired && cnt === conf.refillcycles) { + io.release.valid := true + active := !io.release.ready + } + } else { // No buffer, data released a cycle earlier + when (r2_data_req_fired) { + io.release.valid := true + when(!io.release.ready) { + r1_data_req_fired := false + r2_data_req_fired := false + cnt := UInt(0) + } .otherwise { + active := false + } + } } - when (io.release.fire()) { - cmd_sent := true - } - when (!r1_data_req_fired && !r2_data_req_fired && cmd_sent && cnt === conf.refillcycles) { - valid := false - } - } when (io.req.fire()) { - valid := true - cmd_sent := false + active := true cnt := 0 req := io.req.bits } - val fire = valid && cnt < UInt(conf.refillcycles) - io.req.ready := !valid - io.data_req.valid := fire - io.data_req.bits.way_en := req.way_en - io.data_req.bits.addr := (if(conf.refillcycles > 1) Cat(req.idx, cnt(log2Up(conf.refillcycles)-1,0)) - else req.idx) << conf.rowoffbits - - io.release.valid := valid && r2_data_req_fired - io.release.bits.r_type := req.r_type - io.release.bits.addr := Cat(req.tag, req.idx).toUInt - io.release.bits.client_xact_id := req.client_xact_id - io.release.bits.master_xact_id := req.master_xact_id - io.release.bits.data := io.data_resp + val fire = active && cnt < UInt(conf.refillcycles) + io.req.ready := !active // We reissue the meta read as it sets up the muxing for s2_data_muxed io.meta_read.valid := fire io.meta_read.bits.addr := io.release.bits.addr << conf.offbits + + io.data_req.valid := fire + io.data_req.bits.way_en := req.way_en + if(conf.refillcycles > 1) { + io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(conf.refillcycles)-1,0)) << conf.rowoffbits + } else { + io.data_req.bits.addr := req.idx << conf.rowoffbits + } + + io.release.bits.r_type := req.r_type + io.release.bits.addr := Cat(req.tag, req.idx).toUInt + io.release.bits.client_xact_id := req.client_xact_id + io.release.bits.master_xact_id := req.master_xact_id + if(conf.refillcycles > 1) { + val data_buf = Reg(Bits()) + when(active && r2_data_req_fired) { + data_buf := Cat(io.data_resp, data_buf(conf.refillcycles*conf.encrowbits-1, conf.encrowbits)) + } + io.release.bits.data := data_buf + } else { + io.release.bits.data := io.data_resp + } + } class ProbeUnit(implicit conf: DCacheConfig) extends Module { @@ -541,7 +557,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { } io.req.ready := state === s_invalid - io.rep.valid := state === s_release && !tl.co.needsWriteback(line_state) + io.rep.valid := state === s_release && !(hit && tl.co.needsWriteback(line_state)) io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read @@ -827,8 +843,6 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { // data val data = Module(new DataArray) val readArb = Module(new Arbiter(new DataReadReq, 4)) - readArb.io.out.ready := !io.mem.grant.valid || io.mem.grant.ready // insert bubble if refill gets blocked - readArb.io.out <> data.io.read val writeArb = Module(new Arbiter(new DataWriteReq, 2)) data.io.write.valid := writeArb.io.out.valid @@ -912,7 +926,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } writeArb.io.in(0).bits.addr := s3_req.addr - writeArb.io.in(0).bits.wmask := UInt(1) << s3_req.addr(conf.rowoffbits-1,offsetlsb).toUInt + writeArb.io.in(0).bits.wmask := UInt(1) << (if(conf.rowoffbits > offsetlsb) + s3_req.addr(conf.rowoffbits-1,offsetlsb).toUInt + else UInt(0)) writeArb.io.in(0).bits.data := Fill(conf.rowwords, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way @@ -932,8 +948,6 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { mshrs.io.req.bits.data := s2_req.data when (mshrs.io.req.fire()) { replacer.miss } - mshrs.io.mem_grant.valid := io.mem.grant.fire() - mshrs.io.mem_grant.bits := io.mem.grant.bits io.mem.acquire <> DecoupledLogicalNetworkIOWrapper(mshrs.io.mem_req) // replays @@ -944,6 +958,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready metaReadArb.io.in(1) <> mshrs.io.meta_read metaWriteArb.io.in(0) <> mshrs.io.meta_write + // probes val releaseArb = Module(new Arbiter(new Release, 2)) DecoupledLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release @@ -960,12 +975,21 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { prober.io.mshr_rdy := mshrs.io.probe_rdy // refills - val refill = tl.co.messageUpdatesDataArray(io.mem.grant.bits.payload) - writeArb.io.in(1).valid := io.mem.grant.valid && refill - io.mem.grant.ready := writeArb.io.in(1).ready || !refill + def doRefill(g: Grant): Bool = tl.co.messageUpdatesDataArray(g) + val refill = if(conf.refillcycles > 1) { + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, conf.refillcycles, doRefill)) + ser.io.in <> io.mem.grant + ser.io.out + } else io.mem.grant + mshrs.io.mem_grant.valid := refill.fire() + mshrs.io.mem_grant.bits := refill.bits + refill.ready := writeArb.io.in(1).ready || !doRefill(refill.bits.payload) + writeArb.io.in(1).valid := refill.valid && doRefill(refill.bits.payload) writeArb.io.in(1).bits := mshrs.io.mem_resp writeArb.io.in(1).bits.wmask := SInt(-1) - writeArb.io.in(1).bits.data := io.mem.grant.bits.payload.data + writeArb.io.in(1).bits.data := refill.bits.payload.data(conf.encrowbits-1,0) + readArb.io.out.ready := !refill.valid || refill.ready // insert bubble if refill gets blocked + readArb.io.out <> data.io.read // writebacks val wbArb = Module(new Arbiter(new WritebackReq, 2)) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 24016763..e912bb1b 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -1,6 +1,7 @@ package rocket import Chisel._ +import uncore._ import scala.math._ class BooleanToInt(x: Int) { @@ -161,3 +162,49 @@ object Random private def partition(value: UInt, slices: Int) = Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices)) } + +class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: Int, doSer: T => Bool) extends Module { + val io = new Bundle { + val in = Decoupled(gen.clone).flip + val out = Decoupled(gen.clone) + val cnt = UInt(OUTPUT, log2Up(n)) + val done = Bool(OUTPUT) + } + require(io.in.bits.payload.data.width % n == 0) + val narrowWidth = io.in.bits.payload.data.width / n + val cnt = Reg(init=UInt(0, width = log2Up(n))) + val wrap = cnt === UInt(n-1) + val rbits = Reg(init=io.in.bits) + val active = Reg(init=Bool(false)) + + val shifter = Vec.fill(n){Bits(width = narrowWidth)} + (0 until n).foreach { + i => shifter(i) := rbits.payload.data((i+1)*narrowWidth-1,i*narrowWidth) + } + + io.done := Bool(false) + io.cnt := cnt + io.in.ready := !active + io.out.valid := active || io.in.valid + io.out.bits := io.in.bits + when(!active && io.in.valid) { + when(doSer(io.in.bits.payload)) { + cnt := Mux(io.out.ready, UInt(1), UInt(0)) + rbits := io.in.bits + active := Bool(true) + } + io.done := !doSer(io.in.bits.payload) + } + when(active) { + io.out.bits := rbits + io.out.bits.payload.data := shifter(cnt) + when(io.out.ready) { + cnt := cnt + UInt(1) + when(wrap) { + io.done := Bool(true) + active := Bool(false) + } + } + } +} + From 444d0449e30c5884cc14c844373c02f9682dc9eb Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 14 Apr 2014 17:12:30 -0700 Subject: [PATCH 0720/1087] io.cnt bug in serializer --- rocket/src/main/scala/util.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index e912bb1b..7595ce8b 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -201,6 +201,7 @@ class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: I when(io.out.ready) { cnt := cnt + UInt(1) when(wrap) { + cnt := UInt(0) io.done := Bool(true) active := Bool(false) } From de492b3cf70ccfe2d9918e4e982ba198d7b18e3d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 15 Apr 2014 21:26:54 -0700 Subject: [PATCH 0721/1087] Fix critical path through integer scoreboard --- rocket/src/main/scala/ctrl.scala | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 4dabb716..199799be 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -601,6 +601,7 @@ class Control(implicit conf: RocketConfiguration) extends Module wb_reg_rocc_val := mem_reg_rocc_val } + val wb_set_sboard = wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common @@ -625,7 +626,6 @@ class Control(implicit conf: RocketConfiguration) extends Module } val sboard = new Scoreboard(32) - sboard.set((wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val) && io.dpath.wb_wen, io.dpath.wb_waddr) sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr) val id_stall_fpu = if (!conf.fpu.isEmpty) { @@ -711,18 +711,25 @@ class Control(implicit conf: RocketConfiguration) extends Module id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. + val data_hazard_wb = wb_reg_wen && + (id_renx1_not0 && id_raddr1 === io.dpath.wb_waddr || + id_renx2_not0 && id_raddr2 === io.dpath.wb_waddr || + id_wen_not0 && id_waddr === io.dpath.wb_waddr) val fp_data_hazard_wb = wb_reg_fp_wen && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) + val id_wb_hazard = data_hazard_wb && wb_set_sboard || + fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) val id_sboard_hazard = (id_renx1_not0 && sboard.readBypassed(id_raddr1) || id_renx2_not0 && sboard.readBypassed(id_raddr2) || id_wen_not0 && sboard.readBypassed(id_waddr)) + sboard.set(wb_set_sboard && io.dpath.wb_wen, io.dpath.wb_waddr) + val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_fp_val && id_stall_fpu || From 3520620fbdaefff254815246797e7ab12f5d4ba8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 15 Apr 2014 23:05:02 -0700 Subject: [PATCH 0722/1087] Remove D$ -> BTB path --- rocket/src/main/scala/ctrl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 199799be..8317777f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -655,7 +655,7 @@ class Control(implicit conf: RocketConfiguration) extends Module Mux(replay_wb, PC_WB, // replay PC_MEM))) - io.imem.btb_update.valid := (mem_reg_branch || mem_reg_jal || mem_reg_jalr) && !take_pc_wb && !mem_reg_xcpt + io.imem.btb_update.valid := mem_reg_branch || mem_reg_jal || mem_reg_jalr io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.taken := mem_reg_jal || mem_reg_branch && io.dpath.mem_br_taken From 1fa505f9ff44cd49fc90866640d37f74b0b1200b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 16 Apr 2014 17:19:08 -0700 Subject: [PATCH 0723/1087] remove superfluous AVec object --- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 12 ++++++------ rocket/src/main/scala/ptw.scala | 4 ++-- rocket/src/main/scala/util.scala | 12 ------------ 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 8317777f..6148fcf4 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -512,7 +512,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val replay_ex = replay_ex_structural || replay_ex_other ctrl_killx := take_pc_mem_wb || replay_ex // detect 2-cycle load-use delay for LB/LH/SC - val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || AVec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) + val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a4b686e5..974646cb 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -637,10 +637,10 @@ class DataArray(implicit conf: DCacheConfig) extends Module { resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid)) } for (dw <- 0 until conf.rowwords) { - val r = AVec(resp.map(_(conf.encdatabits*(dw+1)-1,conf.encdatabits*dw))) + val r = Vec(resp.map(_(conf.encdatabits*(dw+1)-1,conf.encdatabits*dw))) val resp_mux = if (r.size == 1) r - else AVec(r(r_raddr(conf.rowoffbits-1,conf.wordoffbits)), r.tail:_*) + else Vec(r(r_raddr(conf.rowoffbits-1,conf.wordoffbits)), r.tail:_*) io.resp(w+dw) := resp_mux.toBits } } @@ -849,7 +849,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits val wdata_encoded = (0 until conf.rowwords).map(i => conf.code.encode(writeArb.io.out.bits.data(conf.databits*(i+1)-1,conf.databits*i))) - data.io.write.bits.data := AVec(wdata_encoded).toBits + data.io.write.bits.data := Vec(wdata_encoded).toBits // tag read for new requests metaReadArb.io.in(4).valid := io.cpu.req.valid @@ -911,10 +911,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) val s2_data_decoded = (0 until conf.rowwords).map(i => conf.code.decode(s2_data_muxed(conf.encdatabits*(i+1)-1,conf.encdatabits*i))) - val s2_data_corrected = AVec(s2_data_decoded.map(_.corrected)).toBits - val s2_data_uncorrected = AVec(s2_data_decoded.map(_.uncorrected)).toBits + val s2_data_corrected = Vec(s2_data_decoded.map(_.corrected)).toBits + val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits val s2_word_idx = if (conf.isNarrowRead) UInt(0) else s2_req.addr(log2Up(conf.rowwords*conf.databytes)-1,3) - val s2_data_correctable = AVec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) + val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 22b86da2..f72e0b14 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -48,7 +48,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module val r_req_dest = Reg(Bits()) val r_pte = Reg(Bits()) - val vpn_idx = AVec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) + val vpn_idx = Vec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) val arb = Module(new RRArbiter(UInt(width = conf.as.vpnBits), n)) arb.io.in <> io.requestor.map(_.req) @@ -75,7 +75,7 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module val resp_err = state === s_error || state === s_wait val r_resp_ppn = io.mem.req.bits.addr >> conf.as.pgIdxBits - val resp_ppn = AVec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) + val resp_ppn = Vec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { val me = r_req_dest === UInt(i) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 7595ce8b..4a0ef27d 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -21,18 +21,6 @@ object Util { import Util._ -object AVec -{ - def apply[T <: Data](elts: Seq[T]): Vec[T] = Vec(elts) - def apply[T <: Data](elts: Vec[T]): Vec[T] = apply(elts.toSeq) - def apply[T <: Data](elt0: T, elts: T*): Vec[T] = apply(elt0 :: elts.toList) - - def tabulate[T <: Data](n: Int)(f: Int => T): Vec[T] = - apply((0 until n).map(i => f(i))) - def tabulate[T <: Data](n1: Int, n2: Int)(f: (Int, Int) => T): Vec[Vec[T]] = - tabulate(n1)(i1 => tabulate(n2)(f(i1, _))) -} - object Str { def apply(s: String): UInt = { From 09e2ec1f9eaa64ec463ad1b8dae108ac1c70289b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 18 Apr 2014 16:30:25 -0700 Subject: [PATCH 0724/1087] Fix sign of remainder when dividing by zero h/t chris --- rocket/src/main/scala/multiplier.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index a80cc050..54320436 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -122,7 +122,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: Rocke remainder := remainder(w-1,0) << shift count := shift } - when (count === 0 && !less /* divby0 */) { neg_out := false } + when (count === 0 && !less /* divby0 */ && !isHi) { neg_out := false } } when (io.resp.fire() || io.kill) { state := s_ready From 5c62cff2cee62652eae3c4f659d685fdb04845fd Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 22 Apr 2014 16:53:20 -0700 Subject: [PATCH 0725/1087] put replacement policy in uncore and minor nbdcache cleanups --- rocket/src/main/scala/nbdcache.scala | 38 +++++++--------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 974646cb..81730c12 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -4,15 +4,14 @@ import Chisel._ import uncore._ import Util._ -case class DCacheConfig(sets: Int, ways: Int, +case class DCacheConfig(val sets: Int, val ways: Int, nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, - tl: TileLinkConfiguration, - as: AddressSpaceConfiguration, + val tl: TileLinkConfiguration, + val as: AddressSpaceConfiguration, reqtagbits: Int, databits: Int, rowwords: Int = 2, code: Code = new IdentityCode, - narrowRead: Boolean = true) -{ + narrowRead: Boolean = true) extends CacheConfig { def states = tl.co.nClientStates def lines = sets*ways def dm = ways == 1 @@ -45,6 +44,7 @@ case class DCacheConfig(sets: Int, ways: Int, require(isPow2(sets)) require(isPow2(ways)) // TODO: relax this require(rowbits <= tl.dataBits) + require(lineaddrbits == tl.addrBits) } abstract trait DCacheBundle extends Bundle { @@ -52,24 +52,6 @@ abstract trait DCacheBundle extends Bundle { override def clone = this.getClass.getConstructors.head.newInstance(conf).asInstanceOf[this.type] } -abstract class ReplacementPolicy -{ - def way: UInt - def miss: Unit - def hit: Unit -} - -class RandomReplacement(implicit conf: DCacheConfig) extends ReplacementPolicy -{ - private val replace = Bool() - replace := Bool(false) - val lfsr = LFSR16(replace) - - def way = if (conf.dm) UInt(0) else lfsr(conf.waybits-1,0) - def miss = replace := Bool(true) - def hit = {} -} - class StoreGen(typ: Bits, addr: Bits, dat: Bits) { val byte = typ === MT_B || typ === MT_BU @@ -117,9 +99,7 @@ class DataReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { val addr = Bits(width = conf.untagbits) } -class DataWriteReq(implicit val conf: DCacheConfig) extends DCacheBundle { - val way_en = Bits(width = conf.ways) - val addr = Bits(width = conf.untagbits) +class DataWriteReq(implicit conf: DCacheConfig) extends DataReadReq()(conf) { val wmask = Bits(width = conf.rowwords) val data = Bits(width = conf.encrowbits) } @@ -561,7 +541,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read - io.meta_read.bits.addr := req.addr << UInt(conf.offbits) + io.meta_read.bits.addr := req.addr << conf.offbits io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en @@ -782,7 +762,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { val s1_sc = s1_req.cmd === M_XSC val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) - val dtlb = Module(new TLB(8)(conf.as)) + val dtlb = Module(new TLB(conf.ntlb)(conf.as)) dtlb.io.ptw <> io.cpu.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys @@ -857,8 +837,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) } // data read for new requests - readArb.io.in(3).bits.addr := io.cpu.req.bits.addr readArb.io.in(3).valid := io.cpu.req.valid + readArb.io.in(3).bits.addr := io.cpu.req.bits.addr readArb.io.in(3).bits.way_en := SInt(-1) when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } From f4d326b8d7023d3c68bf67109bee595619faeb86 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 23 Apr 2014 15:43:31 -0700 Subject: [PATCH 0726/1087] Prep in HellaCache for extracting MetaData to uncore --- rocket/src/main/scala/nbdcache.scala | 39 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 81730c12..e3a2abcf 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -45,6 +45,7 @@ case class DCacheConfig(val sets: Int, val ways: Int, require(isPow2(ways)) // TODO: relax this require(rowbits <= tl.dataBits) require(lineaddrbits == tl.addrBits) + require(untagbits <= pgidxbits) } abstract trait DCacheBundle extends Bundle { @@ -135,15 +136,18 @@ class MetaData(implicit val conf: DCacheConfig) extends DCacheBundle { } class MetaReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { - val addr = UInt(width = conf.paddrbits) + val idx = Bits(width = conf.idxbits) } -class MetaWriteReq(implicit val conf: DCacheConfig) extends DCacheBundle { +class MetaWriteReq(implicit conf: DCacheConfig) extends MetaReadReq()(conf) { val way_en = Bits(width = conf.ways) - val idx = Bits(width = conf.idxbits) val data = new MetaData() } +class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq()(conf) { + val tag = Bits(width = conf.tagbits) +} + class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { implicit val (tl, ln) = (conf.tl, conf.tl.ln) val io = new Bundle { @@ -159,7 +163,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput - val meta_read = Decoupled(new MetaReadReq) + val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new MetaWriteReq) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip @@ -287,7 +291,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { io.mem_finish <> ackq.io.deq io.meta_read.valid := state === s_drain_rpq - io.meta_read.bits.addr := io.mem_req.bits.addr << conf.offbits + io.meta_read.bits.idx := req_idx + io.meta_read.bits.tag := io.tag io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid io.replay.bits := rpq.io.deq.bits @@ -308,7 +313,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput - val meta_read = Decoupled(new MetaReadReq) + val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new MetaWriteReq) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip @@ -332,7 +337,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val wbTagList = Vec.fill(conf.nmshr){Bits()} val memRespMux = Vec.fill(conf.nmshr){new DataWriteReq} - val meta_read_arb = Module(new Arbiter(new MetaReadReq, conf.nmshr)) + val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, conf.nmshr)) val meta_write_arb = Module(new Arbiter(new MetaWriteReq, conf.nmshr)) val mem_req_arb = Module(new Arbiter(new Acquire, conf.nmshr)) val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new GrantAck), conf.nmshr)) @@ -406,7 +411,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { implicit val tl = conf.tl val io = new Bundle { val req = Decoupled(new WritebackReq()).flip - val meta_read = Decoupled(new MetaReadReq) + val meta_read = Decoupled(new L1MetaReadReq) val data_req = Decoupled(new DataReadReq()) val data_resp = Bits(INPUT, conf.encrowbits) val release = Decoupled(new Release) @@ -455,7 +460,8 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { // We reissue the meta read as it sets up the muxing for s2_data_muxed io.meta_read.valid := fire - io.meta_read.bits.addr := io.release.bits.addr << conf.offbits + io.meta_read.bits.idx := req.idx + io.meta_read.bits.tag := req.tag io.data_req.valid := fire io.data_req.bits.way_en := req.way_en @@ -486,7 +492,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { val io = new Bundle { val req = Decoupled(new InternalProbe).flip val rep = Decoupled(new Release) - val meta_read = Decoupled(new MetaReadReq) + val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new MetaWriteReq) val wb_req = Decoupled(new WritebackReq) val way_en = Bits(INPUT, conf.ways) @@ -541,7 +547,8 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read - io.meta_read.bits.addr := req.addr << conf.offbits + io.meta_read.bits.idx := req.addr + io.meta_read.bits.tag := req.addr >> conf.idxbits io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en @@ -579,7 +586,7 @@ class MetaDataArray(implicit conf: DCacheConfig) extends Module { val mask = Mux(rst, SInt(-1), io.write.bits.way_en) tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) } - val tag = tags(RegEnable(io.read.bits.addr >> conf.offbits, io.read.valid)) + val tag = tags(RegEnable(io.read.bits.idx, io.read.valid)) for (w <- 0 until conf.ways) { val m = tag(metabits*(w+1)-1, metabits*w) @@ -775,11 +782,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { s1_req := io.cpu.req.bits } when (wb.io.meta_read.valid) { - s1_req := wb.io.meta_read.bits + s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << conf.offbits s1_req.phys := Bool(true) } when (prober.io.meta_read.valid) { - s1_req := prober.io.meta_read.bits + s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << conf.offbits s1_req.phys := Bool(true) } when (mshrs.io.replay.valid) { @@ -833,7 +840,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { // tag read for new requests metaReadArb.io.in(4).valid := io.cpu.req.valid - metaReadArb.io.in(4).bits.addr := io.cpu.req.bits.addr + metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> conf.offbits when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) } // data read for new requests @@ -844,7 +851,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { // recycled requests metaReadArb.io.in(0).valid := s2_recycle - metaReadArb.io.in(0).bits.addr := s2_req.addr + metaReadArb.io.in(0).bits.idx := s2_req.addr >> conf.offbits readArb.io.in(0).valid := s2_recycle readArb.io.in(0).bits.addr := s2_req.addr readArb.io.in(0).bits.way_en := SInt(-1) From fc825c7103ee266340f8752ca86904ef29df6ab3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 23 Apr 2014 16:23:51 -0700 Subject: [PATCH 0727/1087] MetaData & friends moved to uncore/ --- rocket/src/main/scala/nbdcache.scala | 68 ++-------------------------- 1 file changed, 5 insertions(+), 63 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e3a2abcf..f639e392 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -105,12 +105,13 @@ class DataWriteReq(implicit conf: DCacheConfig) extends DataReadReq()(conf) { val data = Bits(width = conf.encrowbits) } -class InternalProbe(implicit conf: DCacheConfig) extends Probe()(conf.tl) { - val client_xact_id = Bits(width = conf.tl.clientXactIdBits) - - override def clone = new InternalProbe().asInstanceOf[this.type] +class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq()(conf) { + val tag = Bits(width = conf.tagbits) } +class InternalProbe(implicit conf: TileLinkConfiguration) extends Probe()(conf) + with HasClientTransactionId + class WritebackReq(implicit conf: DCacheConfig) extends Bundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) @@ -122,32 +123,6 @@ class WritebackReq(implicit conf: DCacheConfig) extends Bundle { override def clone = new WritebackReq().asInstanceOf[this.type] } -object MetaData { - def apply(tag: Bits, state: UInt)(implicit conf: DCacheConfig) = { - val meta = new MetaData - meta.state := state - meta.tag := tag - meta - } -} -class MetaData(implicit val conf: DCacheConfig) extends DCacheBundle { - val state = UInt(width = conf.statebits) - val tag = Bits(width = conf.tagbits) -} - -class MetaReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { - val idx = Bits(width = conf.idxbits) -} - -class MetaWriteReq(implicit conf: DCacheConfig) extends MetaReadReq()(conf) { - val way_en = Bits(width = conf.ways) - val data = new MetaData() -} - -class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq()(conf) { - val tag = Bits(width = conf.tagbits) -} - class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { implicit val (tl, ln) = (conf.tl, conf.tl.ln) val io = new Bundle { @@ -565,39 +540,6 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { io.wb_req.bits.master_xact_id := req.master_xact_id } -class MetaDataArray(implicit conf: DCacheConfig) extends Module { - implicit val tl = conf.tl - val io = new Bundle { - val read = Decoupled(new MetaReadReq).flip - val write = Decoupled(new MetaWriteReq).flip - val resp = Vec.fill(conf.ways){(new MetaData).asOutput} - } - - val rst_cnt = Reg(init=UInt(0, log2Up(conf.sets+1))) - val rst = rst_cnt < conf.sets - when (rst) { rst_cnt := rst_cnt+1 } - - val metabits = io.write.bits.data.state.getWidth + conf.tagbits - val tags = Mem(UInt(width = metabits*conf.ways), conf.sets, seqRead = true) - - when (rst || io.write.valid) { - val addr = Mux(rst, rst_cnt, io.write.bits.idx) - val data = Cat(Mux(rst, tl.co.newStateOnFlush, io.write.bits.data.state), io.write.bits.data.tag) - val mask = Mux(rst, SInt(-1), io.write.bits.way_en) - tags.write(addr, Fill(conf.ways, data), FillInterleaved(metabits, mask)) - } - val tag = tags(RegEnable(io.read.bits.idx, io.read.valid)) - - for (w <- 0 until conf.ways) { - val m = tag(metabits*(w+1)-1, metabits*w) - io.resp(w).state := m >> conf.tagbits - io.resp(w).tag := m - } - - io.read.ready := !rst && !io.write.valid // so really this could be a 6T RAM - io.write.ready := !rst -} - class DataArray(implicit conf: DCacheConfig) extends Module { val io = new Bundle { val read = Decoupled(new DataReadReq).flip From 1b156c6db941bd1d6885983b0e0b459c66ba58b3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sat, 26 Apr 2014 15:18:21 -0700 Subject: [PATCH 0728/1087] TileLinkIO.GrantAck -> TileLinkIO.Finish --- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 10 +++++----- rocket/src/main/scala/tile.scala | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 742905b7..9bc4c617 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -271,7 +271,7 @@ class ICache(implicit c: ICacheConfig) extends Module io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) - val ack_q = Module(new Queue(new LogicalNetworkIO(new GrantAck), 1)) + val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) ack_q.io.enq.valid := refill_done && tl.co.requiresAckForGrant(refill_bits.payload.g_type) ack_q.io.enq.bits.payload.master_xact_id := refill_bits.payload.master_xact_id ack_q.io.enq.bits.header.dst := refill_bits.header.src @@ -280,7 +280,7 @@ class ICache(implicit c: ICacheConfig) extends Module io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready io.mem.acquire.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UInt(c.offbits), UInt(0)) - io.mem.grant_ack <> ack_q.io.deq + io.mem.finish <> ack_q.io.deq // control state machine switch (state) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f639e392..ac2bce26 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -142,7 +142,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val meta_write = Decoupled(new MetaWriteReq) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip - val mem_finish = Decoupled(new LogicalNetworkIO(new GrantAck)) + val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) } @@ -224,7 +224,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { } } - val ackq = Module(new Queue(new LogicalNetworkIO(new GrantAck), 1)) + val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) ackq.io.enq.valid := (wb_done || refill_done) && tl.co.requiresAckForGrant(io.mem_grant.bits.payload.g_type) ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src @@ -292,7 +292,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val meta_write = Decoupled(new MetaWriteReq) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip - val mem_finish = Decoupled(new LogicalNetworkIO(new GrantAck)) + val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) @@ -315,7 +315,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, conf.nmshr)) val meta_write_arb = Module(new Arbiter(new MetaWriteReq, conf.nmshr)) val mem_req_arb = Module(new Arbiter(new Acquire, conf.nmshr)) - val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new GrantAck), conf.nmshr)) + val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), conf.nmshr)) val wb_req_arb = Module(new Arbiter(new WritebackReq, conf.nmshr)) val replay_arb = Module(new Arbiter(new Replay, conf.nmshr)) val alloc_arb = Module(new Arbiter(Bool(), conf.nmshr)) @@ -992,7 +992,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) io.cpu.replay_next.bits := s1_req.tag - io.mem.grant_ack <> mshrs.io.mem_finish + io.mem.finish <> mshrs.io.mem_finish } // exposes a sane decoupled request interface diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 56cdbef4..94beb86d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -75,7 +75,7 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module io.tilelink.acquire <> memArb.io.out.acquire memArb.io.out.grant <> io.tilelink.grant - io.tilelink.grant_ack <> memArb.io.out.grant_ack + io.tilelink.finish <> memArb.io.out.finish dcache.io.mem.probe <> io.tilelink.probe io.tilelink.release.valid := dcache.io.mem.release.valid dcache.io.mem.release.ready := io.tilelink.release.ready From 519b2ea2b61d8d12ea4032e8074118854e3e8819 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sat, 26 Apr 2014 19:08:56 -0700 Subject: [PATCH 0729/1087] New metadata result trait --- rocket/src/main/scala/nbdcache.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ac2bce26..74bc68fe 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -85,11 +85,8 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } -class MSHRReq(implicit conf: DCacheConfig) extends HellaCacheReq { - val tag_match = Bool() - val old_meta = new MetaData - val way_en = Bits(width = conf.ways) -} +class MSHRReq(implicit val cacheconf: DCacheConfig) extends HellaCacheReq + with InternalRequestState class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { val sdq_id = UInt(width = log2Up(conf.nsdq)) From 7f690dd9c88da7bd058bf5af3a3f8c7823c7f988 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 1 May 2014 01:45:45 -0700 Subject: [PATCH 0730/1087] parameterize metadataarray --- rocket/src/main/scala/nbdcache.scala | 44 +++++++++++++++++----------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 74bc68fe..66960a10 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -34,9 +34,7 @@ case class DCacheConfig(val sets: Int, val ways: Int, def refillcycles = tl.dataBits/(rowbits) def isNarrowRead = narrowRead && databits*ways % rowbits == 0 val statebits = log2Up(states) - val metabits = statebits + tagbits val encdatabits = code.width(databits) - val encmetabits = code.width(metabits) val encrowbits = rowwords*encdatabits val lrsc_cycles = 32 // ISA requires 16-insn LRSC sequences to succeed @@ -85,8 +83,11 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } -class MSHRReq(implicit val cacheconf: DCacheConfig) extends HellaCacheReq - with InternalRequestState +class MSHRReq(implicit val cacheconf: DCacheConfig) extends HellaCacheReq { + val tag_match = Bool() + val old_meta = new L1MetaData + val way_en = Bits(width = cacheconf.ways) +} class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { val sdq_id = UInt(width = log2Up(conf.nsdq)) @@ -97,27 +98,37 @@ class DataReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { val addr = Bits(width = conf.untagbits) } -class DataWriteReq(implicit conf: DCacheConfig) extends DataReadReq()(conf) { +class DataWriteReq(implicit conf: DCacheConfig) extends DataReadReq { val wmask = Bits(width = conf.rowwords) val data = Bits(width = conf.encrowbits) } -class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq()(conf) { +object L1MetaData { + def apply(tag: Bits, state: UInt)(implicit conf: DCacheConfig) = { + val meta = new L1MetaData + meta.state := state + meta.tag := tag + meta + } +} +class L1MetaData(implicit val conf: DCacheConfig) extends MetaData { + val state = UInt(width = conf.statebits) +} + +class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq { val tag = Bits(width = conf.tagbits) } class InternalProbe(implicit conf: TileLinkConfiguration) extends Probe()(conf) with HasClientTransactionId -class WritebackReq(implicit conf: DCacheConfig) extends Bundle { +class WritebackReq(implicit val conf: DCacheConfig) extends DCacheBundle { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) val client_xact_id = Bits(width = conf.tl.clientXactIdBits) val master_xact_id = Bits(width = conf.tl.masterXactIdBits) val r_type = UInt(width = conf.tl.co.releaseTypeWidth) - - override def clone = new WritebackReq().asInstanceOf[this.type] } class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { @@ -136,7 +147,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) - val meta_write = Decoupled(new MetaWriteReq) + val meta_write = Decoupled(new MetaWriteReq(new L1MetaData)) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) @@ -286,7 +297,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) - val meta_write = Decoupled(new MetaWriteReq) + val meta_write = Decoupled(new MetaWriteReq(new L1MetaData)) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) @@ -310,7 +321,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val wbTagList = Vec.fill(conf.nmshr){Bits()} val memRespMux = Vec.fill(conf.nmshr){new DataWriteReq} val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, conf.nmshr)) - val meta_write_arb = Module(new Arbiter(new MetaWriteReq, conf.nmshr)) + val meta_write_arb = Module(new Arbiter(new MetaWriteReq(new L1MetaData), conf.nmshr)) val mem_req_arb = Module(new Arbiter(new Acquire, conf.nmshr)) val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), conf.nmshr)) val wb_req_arb = Module(new Arbiter(new WritebackReq, conf.nmshr)) @@ -465,7 +476,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { val req = Decoupled(new InternalProbe).flip val rep = Decoupled(new Release) val meta_read = Decoupled(new L1MetaReadReq) - val meta_write = Decoupled(new MetaWriteReq) + val meta_write = Decoupled(new MetaWriteReq(new L1MetaData)) val wb_req = Decoupled(new WritebackReq) val way_en = Bits(INPUT, conf.ways) val mshr_rdy = Bool(INPUT) @@ -760,16 +771,15 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - val meta = Module(new MetaDataArray) + val meta = Module(new MetaDataArray(L1MetaData(tl.co.newStateOnFlush,UInt(0)))) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) - val metaWriteArb = Module(new Arbiter(new MetaWriteReq, 2)) + val metaWriteArb = Module(new Arbiter(new MetaWriteReq(new L1MetaData), 2)) metaReadArb.io.out <> meta.io.read metaWriteArb.io.out <> meta.io.write // data val data = Module(new DataArray) val readArb = Module(new Arbiter(new DataReadReq, 4)) - val writeArb = Module(new Arbiter(new DataWriteReq, 2)) data.io.write.valid := writeArb.io.out.valid writeArb.io.out.ready := data.io.write.ready @@ -869,7 +879,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) mshrs.io.req.bits := s2_req mshrs.io.req.bits.tag_match := s2_tag_match - mshrs.io.req.bits.old_meta := Mux(s2_tag_match, MetaData(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) + mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1MetaData(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshrs.io.req.bits.data := s2_req.data when (mshrs.io.req.fire()) { replacer.miss } From 7d6a642c0ce49e3ebd8b18f980598c34e111f0f4 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 May 2014 13:00:00 -0700 Subject: [PATCH 0731/1087] correct use of function value to initialize MetaDataArray --- rocket/src/main/scala/nbdcache.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 66960a10..f89d5dc7 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -771,7 +771,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - val meta = Module(new MetaDataArray(L1MetaData(tl.co.newStateOnFlush,UInt(0)))) + def onReset = L1MetaData(tl.co.newStateOnFlush, UInt(0)) + val meta = Module(new MetaDataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) val metaWriteArb = Module(new Arbiter(new MetaWriteReq(new L1MetaData), 2)) metaReadArb.io.out <> meta.io.read From 5bc69814141d6a018e18d049563becfb7692f9e3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 May 2014 18:36:22 -0700 Subject: [PATCH 0732/1087] fix metadata default, add bug TODO --- rocket/src/main/scala/nbdcache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f89d5dc7..20b2c256 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -771,7 +771,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - def onReset = L1MetaData(tl.co.newStateOnFlush, UInt(0)) + def onReset = L1MetaData(UInt(0), tl.co.newStateOnFlush) val meta = Module(new MetaDataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) val metaWriteArb = Module(new Arbiter(new MetaWriteReq(new L1MetaData), 2)) @@ -810,7 +810,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { def wayMap[T <: Data](f: Int => T) = Vec((0 until conf.ways).map(f)) val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)).toBits val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).state)).toBits - s1_clk_en := metaReadArb.io.out.valid + s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR From 8dcc0cbb535c935d8503fc2f236b4b3b824bb022 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 4 May 2014 20:02:31 -0700 Subject: [PATCH 0733/1087] Fix bug with multiple DecodeLogics per module --- rocket/src/main/scala/decode.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 38e9bdff..0c958950 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -20,7 +20,7 @@ object DecodeLogic }.foldLeft(Bool(false))(_||_) } def apply[T <: Bits](addr: UInt, default: T, mapping: Iterable[(UInt, T)]): T = { - val cache = caches.getOrElseUpdate(Module.current, collection.mutable.Map[Term,Bool]()) + val cache = caches.getOrElseUpdate(addr, collection.mutable.Map[Term,Bool]()) val dterm = term(default) val (keys, values) = mapping.unzip val addrWidth = keys.map(_.getWidth).max @@ -59,7 +59,7 @@ object DecodeLogic apply(addr, Bool.DC, trues.map(_ -> Bool(true)) ++ falses.map(_ -> Bool(false))) def apply(addr: UInt, tru: UInt, fals: UInt): Bool = apply(addr, Seq(tru), Seq(fals)) - private val caches = collection.mutable.Map[Module,collection.mutable.Map[Term,Bool]]() + private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]() } class Term(val value: BigInt, val mask: BigInt = 0) From 0c13c00d0831ce0cc9836b327166afbfb20879be Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 May 2014 19:09:26 -0700 Subject: [PATCH 0734/1087] Reduce node count by avoiding elsewhen :-( --- rocket/src/main/scala/btb.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index e7a30221..d1f9f34d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -150,6 +150,9 @@ class BTB(implicit conf: BTBConfig) extends Module { val waddr = Mux(updateHit, update.bits.prediction.bits.entry, nextRepl) for (i <- 0 until conf.entries) { + when ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { + idxValid(i) := false + } when (waddr === i) { idxValid(i) := updateValid when (updateTarget) { @@ -161,8 +164,6 @@ class BTB(implicit conf: BTBConfig) extends Module { useRAS(i) := update.bits.isReturn isJump(i) := update.bits.isJump } - }.elsewhen ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { - idxValid(i) := false } } From fd5f419eb1701c8c387e9799909a1c29420c538e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 May 2014 19:10:14 -0700 Subject: [PATCH 0735/1087] use getWidth instead of width --- rocket/src/main/scala/decode.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 0c958950..7e0b8544 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -31,7 +31,7 @@ object DecodeLogic for (u <- t.tail) assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap") - val result = (0 until default.litOf.width.max(values.map(_.litOf.width).max)).map({ case (i: Int) => + val result = (0 until default.litOf.getWidth.max(values.map(_.litOf.getWidth).max)).map({ case (i: Int) => val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) From 94c1f01ec62e3138829beb452117c2c40777d9fe Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 May 2014 19:26:43 -0700 Subject: [PATCH 0736/1087] Deanonymize CSRFile's IO bundle --- rocket/src/main/scala/csr.scala | 54 +++++++++++++++++---------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 8f10b02c..2169055b 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -33,34 +33,36 @@ object CSR val C = Bits(3,2) } +class CSRFileIO(implicit conf: RocketConfiguration) extends Bundle { + val host = new HTIFIO(conf.tl.ln.nClients) + val rw = new Bundle { + val addr = UInt(INPUT, 12) + val cmd = Bits(INPUT, CSR.SZ) + val rdata = Bits(OUTPUT, conf.xprlen) + val wdata = Bits(INPUT, conf.xprlen) + } + + val status = new Status().asOutput + val ptbr = UInt(OUTPUT, conf.as.paddrBits) + val evec = UInt(OUTPUT, conf.as.vaddrBits+1) + val exception = Bool(INPUT) + val retire = UInt(INPUT, log2Up(1+conf.retireWidth)) + val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+conf.retireWidth))) + val cause = UInt(INPUT, conf.xprlen) + val badvaddr_wen = Bool(INPUT) + val pc = UInt(INPUT, conf.as.vaddrBits+1) + val sret = Bool(INPUT) + val fatc = Bool(OUTPUT) + val replay = Bool(OUTPUT) + val time = UInt(OUTPUT, conf.xprlen) + val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) + val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip + val rocc = new RoCCInterface().flip +} + class CSRFile(implicit conf: RocketConfiguration) extends Module { - val io = new Bundle { - val host = new HTIFIO(conf.tl.ln.nClients) - val rw = new Bundle { - val addr = UInt(INPUT, 12) - val cmd = Bits(INPUT, CSR.SZ) - val rdata = Bits(OUTPUT, conf.xprlen) - val wdata = Bits(INPUT, conf.xprlen) - } - - val status = new Status().asOutput - val ptbr = UInt(OUTPUT, conf.as.paddrBits) - val evec = UInt(OUTPUT, conf.as.vaddrBits+1) - val exception = Bool(INPUT) - val retire = UInt(INPUT, log2Up(1+conf.retireWidth)) - val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+conf.retireWidth))) - val cause = UInt(INPUT, conf.xprlen) - val badvaddr_wen = Bool(INPUT) - val pc = UInt(INPUT, conf.as.vaddrBits+1) - val sret = Bool(INPUT) - val fatc = Bool(OUTPUT) - val replay = Bool(OUTPUT) - val time = UInt(OUTPUT, conf.xprlen) - val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) - val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip - val rocc = new RoCCInterface().flip - } + val io = new CSRFileIO val reg_epc = Reg(Bits(width = conf.as.vaddrBits+1)) val reg_badvaddr = Reg(Bits(width = conf.as.vaddrBits)) From 4ca152b012bc6318e8fe7a2b673f86a2f730758b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 9 May 2014 19:30:05 -0700 Subject: [PATCH 0737/1087] Use BundleWithConf to avoid clone method boilerplate --- rocket/src/main/scala/btb.scala | 8 ++------ rocket/src/main/scala/fpu.scala | 1 - rocket/src/main/scala/icache.scala | 21 ++++++++------------- rocket/src/main/scala/multiplier.scala | 8 ++------ rocket/src/main/scala/nbdcache.scala | 13 ++++--------- rocket/src/main/scala/ptw.scala | 8 +++----- rocket/src/main/scala/rocc.scala | 12 +++--------- rocket/src/main/scala/tlb.scala | 8 ++------ rocket/src/main/scala/util.scala | 5 +++++ 9 files changed, 29 insertions(+), 55 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d1f9f34d..311cfa3a 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -53,7 +53,7 @@ class BHT(implicit conf: BTBConfig) { val history = Reg(UInt(width = log2Up(conf.nbht))) } -class BTBUpdate(implicit conf: BTBConfig) extends Bundle { +class BTBUpdate(implicit val conf: BTBConfig) extends BundleWithConf { val prediction = Valid(new BTBResp) val pc = UInt(width = conf.as.vaddrBits) val target = UInt(width = conf.as.vaddrBits) @@ -63,17 +63,13 @@ class BTBUpdate(implicit conf: BTBConfig) extends Bundle { val isCall = Bool() val isReturn = Bool() val incorrectTarget = Bool() - - override def clone = new BTBUpdate().asInstanceOf[this.type] } -class BTBResp(implicit conf: BTBConfig) extends Bundle { +class BTBResp(implicit val conf: BTBConfig) extends BundleWithConf { val taken = Bool() val target = UInt(width = conf.as.vaddrBits) val entry = UInt(width = conf.opaqueBits) val bht = new BHTResp - - override def clone = new BTBResp().asInstanceOf[this.type] } // fully-associative branch target buffer diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index ee25d6ab..a2d02dba 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -168,7 +168,6 @@ class FPInput extends FPUCtrlSigs { val in1 = Bits(width = 65) val in2 = Bits(width = 65) val in3 = Bits(width = 65) - override def clone = new FPInput().asInstanceOf[this.type] } class FPToInt extends Module diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 9bc4c617..a9308086 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -29,18 +29,15 @@ case class ICacheConfig(sets: Int, assoc: Int, require(as.pgIdxBits >= untagbits) } -class FrontendReq()(implicit conf: ICacheConfig) extends Bundle { +class FrontendReq()(implicit val conf: ICacheConfig) extends BundleWithConf { val pc = UInt(width = conf.as.vaddrBits+1) - override def clone = new FrontendReq().asInstanceOf[this.type] } -class FrontendResp(implicit conf: ICacheConfig) extends Bundle { +class FrontendResp(implicit val conf: ICacheConfig) extends BundleWithConf { val pc = UInt(width = conf.as.vaddrBits+1) // ID stage PC val data = Bits(width = conf.ibytes*8) val xcpt_ma = Bool() val xcpt_if = Bool() - - override def clone = new FrontendResp().asInstanceOf[this.type] } class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { @@ -129,17 +126,15 @@ class Frontend(implicit c: ICacheConfig) extends Module io.cpu.btb_resp.bits := s2_btb_resp_bits } -class ICacheReq(implicit c: ICacheConfig) extends Bundle { - val idx = UInt(width = c.as.pgIdxBits) - val ppn = UInt(width = c.as.ppnBits) // delayed one cycle +class ICacheReq(implicit val conf: ICacheConfig) extends BundleWithConf { + val idx = UInt(width = conf.as.pgIdxBits) + val ppn = UInt(width = conf.as.ppnBits) // delayed one cycle val kill = Bool() // delayed one cycle - override def clone = new ICacheReq().asInstanceOf[this.type] } -class ICacheResp(implicit c: ICacheConfig) extends Bundle { - val data = Bits(width = c.ibytes*8) - val datablock = Bits(width = c.rowbits) - override def clone = new ICacheResp().asInstanceOf[this.type] +class ICacheResp(implicit val conf: ICacheConfig) extends BundleWithConf { + val data = Bits(width = conf.ibytes*8) + val datablock = Bits(width = conf.rowbits) } class ICache(implicit c: ICacheConfig) extends Module diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 54320436..4f43bd41 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -4,21 +4,17 @@ import Chisel._ import ALU._ import Util._ -class MultiplierReq(implicit conf: RocketConfiguration) extends Bundle { +class MultiplierReq(implicit val conf: RocketConfiguration) extends BundleWithConf { val fn = Bits(width = SZ_ALU_FN) val dw = Bits(width = SZ_DW) val in1 = Bits(width = conf.xprlen) val in2 = Bits(width = conf.xprlen) val tag = UInt(width = conf.nxprbits) - - override def clone = new MultiplierReq().asInstanceOf[this.type] } -class MultiplierResp(implicit conf: RocketConfiguration) extends Bundle { +class MultiplierResp(implicit val conf: RocketConfiguration) extends BundleWithConf { val data = Bits(width = conf.xprlen) val tag = UInt(width = conf.nxprbits) - - override def clone = new MultiplierResp().asInstanceOf[this.type] } class MultiplierIO(implicit conf: RocketConfiguration) extends Bundle { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 20b2c256..42620e71 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -46,11 +46,6 @@ case class DCacheConfig(val sets: Int, val ways: Int, require(untagbits <= pgidxbits) } -abstract trait DCacheBundle extends Bundle { - implicit val conf: DCacheConfig - override def clone = this.getClass.getConstructors.head.newInstance(conf).asInstanceOf[this.type] -} - class StoreGen(typ: Bits, addr: Bits, dat: Bits) { val byte = typ === MT_B || typ === MT_BU @@ -93,7 +88,7 @@ class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { val sdq_id = UInt(width = log2Up(conf.nsdq)) } -class DataReadReq(implicit val conf: DCacheConfig) extends DCacheBundle { +class DataReadReq(implicit val conf: DCacheConfig) extends BundleWithConf { val way_en = Bits(width = conf.ways) val addr = Bits(width = conf.untagbits) } @@ -122,7 +117,7 @@ class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq { class InternalProbe(implicit conf: TileLinkConfiguration) extends Probe()(conf) with HasClientTransactionId -class WritebackReq(implicit val conf: DCacheConfig) extends DCacheBundle { +class WritebackReq(implicit val conf: DCacheConfig) extends BundleWithConf { val tag = Bits(width = conf.tagbits) val idx = Bits(width = conf.idxbits) val way_en = Bits(width = conf.ways) @@ -637,7 +632,7 @@ class AMOALU(implicit conf: DCacheConfig) extends Module { io.out := wmask & out | ~wmask & io.lhs } -class HellaCacheReq(implicit val conf: DCacheConfig) extends DCacheBundle { +class HellaCacheReq(implicit val conf: DCacheConfig) extends BundleWithConf { val kill = Bool() val typ = Bits(width = MT_SZ) val phys = Bool() @@ -647,7 +642,7 @@ class HellaCacheReq(implicit val conf: DCacheConfig) extends DCacheBundle { val cmd = Bits(width = M_SZ) } -class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle { +class HellaCacheResp(implicit val conf: DCacheConfig) extends BundleWithConf { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val typ = Bits(width = 3) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index f72e0b14..1b42af49 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -4,15 +4,13 @@ import Chisel._ import uncore._ import Util._ -class PTWResp()(implicit conf: AddressSpaceConfiguration) extends Bundle { +class PTWResp()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { val error = Bool() val ppn = UInt(width = conf.ppnBits) val perm = Bits(width = conf.permBits) - - override def clone = new PTWResp().asInstanceOf[this.type] } -class TLBPTWIO()(implicit conf: AddressSpaceConfiguration) extends Bundle { +class TLBPTWIO()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { val req = Decoupled(UInt(width = conf.vpnBits)) val resp = Valid(new PTWResp).flip val status = new Status().asInput @@ -20,7 +18,7 @@ class TLBPTWIO()(implicit conf: AddressSpaceConfiguration) extends Bundle { val sret = Bool(INPUT) } -class DatapathPTWIO()(implicit conf: AddressSpaceConfiguration) extends Bundle { +class DatapathPTWIO()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { val ptbr = UInt(INPUT, conf.paddrBits) val invalidate = Bool(INPUT) val sret = Bool(INPUT) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index c3a9f5cf..3a3e2bf4 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -16,24 +16,20 @@ class RoCCInstruction extends Bundle val opcode = Bits(width = 7) } -class RoCCCommand(implicit conf: RocketConfiguration) extends Bundle +class RoCCCommand(implicit val conf: RocketConfiguration) extends BundleWithConf { val inst = new RoCCInstruction val rs1 = Bits(width = conf.xprlen) val rs2 = Bits(width = conf.xprlen) - - override def clone = new RoCCCommand().asInstanceOf[this.type] } -class RoCCResponse(implicit conf: RocketConfiguration) extends Bundle +class RoCCResponse(implicit val conf: RocketConfiguration) extends BundleWithConf { val rd = Bits(width = 5) val data = Bits(width = conf.xprlen) - - override def clone = new RoCCResponse().asInstanceOf[this.type] } -class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle +class RoCCInterface(implicit val conf: RocketConfiguration) extends BundleWithConf { implicit val as = conf.as val cmd = Decoupled(new RoCCCommand).flip @@ -49,8 +45,6 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle val dptw = new TLBPTWIO val pptw = new TLBPTWIO val exception = Bool(INPUT) - - override def clone = new RoCCInterface().asInstanceOf[this.type] } abstract class RoCC(conf: RocketConfiguration) extends Module diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 8014e0ca..067565f0 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -64,17 +64,15 @@ class PseudoLRU(n: Int) } } -class TLBReq()(implicit conf: AddressSpaceConfiguration) extends Bundle +class TLBReq()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { val asid = UInt(width = conf.asidBits) val vpn = UInt(width = conf.vpnBits+1) val passthrough = Bool() val instruction = Bool() - - override def clone = new TLBReq().asInstanceOf[this.type] } -class TLBResp(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Bundle +class TLBResp(entries: Int)(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { // lookup responses val miss = Bool(OUTPUT) @@ -83,8 +81,6 @@ class TLBResp(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Bu val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) - - override def clone = new TLBResp(entries)(conf).asInstanceOf[this.type] } class TLB(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Module diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 4a0ef27d..82ead46c 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -21,6 +21,11 @@ object Util { import Util._ +abstract trait BundleWithConf extends Bundle { + val conf: AnyRef + override def clone = this.getClass.getConstructors.head.newInstance(conf).asInstanceOf[this.type] +} + object Str { def apply(s: String): UInt = { From e91e12ed880ac14ef9491f22ca7b58110373eebb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 14 May 2014 16:17:39 -0700 Subject: [PATCH 0738/1087] Fix RoCC accumulator example --- rocket/src/main/scala/rocc.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 3a3e2bf4..c1ce0694 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import Node._ import uncore._ +import Util._ class RoCCInstruction extends Bundle { @@ -119,4 +120,11 @@ class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 io.mem.req.bits.data := Bits(0) // we're not performing any stores... + + io.imem.acquire.valid := false + io.imem.grant.ready := false + io.imem.finish.valid := false + io.iptw.req.valid := false + io.dptw.req.valid := false + io.pptw.req.valid := false } From cbb37ccc3e0a408176f21329e6cc361a1ea8d117 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 18 May 2014 19:25:43 -0700 Subject: [PATCH 0739/1087] Use Mem instead of Vec[Reg] --- rocket/src/main/scala/btb.scala | 73 ++++++++++++++++----------------- rocket/src/main/scala/tlb.scala | 2 +- 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 311cfa3a..c4de8820 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -81,29 +81,29 @@ class BTB(implicit conf: BTBConfig) extends Module { val invalidate = Bool(INPUT) } - val idxValid = Vec.fill(conf.entries){Reg(init=Bool(false))} - val idxs = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} - val idxPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} - val tgts = Vec.fill(conf.entries){Reg(UInt(width=conf.matchBits))} - val tgtPages = Vec.fill(conf.entries){Reg(UInt(width=log2Up(conf.pages)))} - val pages = Vec.fill(conf.pages){Reg(UInt(width=conf.as.vaddrBits-conf.matchBits))} - val pageValid = Vec.fill(conf.pages){Reg(init=Bool(false))} + val idxValid = Reg(init=UInt(0, conf.entries)) + val idxs = Mem(UInt(width=conf.matchBits), conf.entries) + val idxPages = Mem(UInt(width=log2Up(conf.pages)), conf.entries) + val tgts = Mem(UInt(width=conf.matchBits), conf.entries) + val tgtPages = Mem(UInt(width=log2Up(conf.pages)), conf.entries) + val pages = Mem(UInt(width=conf.as.vaddrBits-conf.matchBits), conf.pages) + val pageValid = Reg(init=UInt(0, conf.pages)) val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) - val useRAS = Vec.fill(conf.entries){Reg(Bool())} - val isJump = Vec.fill(conf.entries){Reg(Bool())} + val useRAS = Mem(Bool(), conf.entries) + val isJump = Mem(Bool(), conf.entries) private def page(addr: UInt) = addr >> conf.matchBits private def pageMatch(addr: UInt) = { val p = page(addr) - Vec(pages.map(_ === p)).toBits & pageValid.toBits + Vec(pages.map(_ === p)).toBits & pageValid } private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { val idx = addr(conf.matchBits-1,0) val idxMatch = idxs.map(_ === idx).toBits val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits - idxValid.toBits & idxMatch & idxPageMatch + idxValid & idxMatch & idxPageMatch } val update = Pipe(io.update) @@ -137,52 +137,49 @@ class BTB(implicit conf: BTBConfig) extends Module { val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(conf.pages-2,0) << 1 | idxPageUpdateOH(conf.pages-1)) val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) + val doPageRepl = doIdxPageRepl || doTgtPageRepl val pageReplEn = idxPageReplEn | tgtPageReplEn - idxPageRepl := UIntToOH(Counter(update.valid && (doIdxPageRepl || doTgtPageRepl), conf.pages)._1) + idxPageRepl := UIntToOH(Counter(update.valid && doPageRepl, conf.pages)._1) when (update.valid && !(updateValid && !updateTarget)) { val nextRepl = Counter(!updateHit && updateValid, conf.entries)._1 val waddr = Mux(updateHit, update.bits.prediction.bits.entry, nextRepl) - for (i <- 0 until conf.entries) { - when ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { - idxValid(i) := false - } - when (waddr === i) { - idxValid(i) := updateValid - when (updateTarget) { - if (i == 0) assert(io.req === update.bits.target, "BTB request != I$ target") - idxs(i) := update.bits.pc - idxPages(i) := idxPageUpdate - tgts(i) := update_target - tgtPages(i) := tgtPageUpdate - useRAS(i) := update.bits.isReturn - isJump(i) := update.bits.isJump - } - } + when (doPageRepl) { + val clearValid = for (i <- 0 until conf.entries) + yield (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR + idxValid := idxValid & ~Vec(clearValid).toBits + } + when (updateTarget) { + assert(io.req === update.bits.target, "BTB request != I$ target") + idxValid := idxValid.bitSet(waddr, updateValid) + idxs(waddr) := update.bits.pc + tgts(waddr) := update_target + idxPages(waddr) := idxPageUpdate + tgtPages(waddr) := tgtPageUpdate + useRAS(waddr) := update.bits.isReturn + isJump(waddr) := update.bits.isJump } require(conf.pages % 2 == 0) val idxWritesEven = (idxPageUpdateOH & Fill(conf.pages/2, UInt(1,2))).orR - def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = { - for (i <- i until conf.pages by mod) { - when (en && pageReplEn(i)) { - pages(i) := data - pageValid(i) := true - } - } - } + def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = + for (i <- i until conf.pages by mod) + when (en && pageReplEn(i)) { pages(i) := data } + writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), Mux(idxWritesEven, page(update.bits.pc), page(update_target))) writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), Mux(idxWritesEven, page(update_target), page(update.bits.pc))) + + when (doPageRepl) { pageValid := pageValid | pageReplEn } } when (io.invalidate) { - idxValid.foreach(_ := false) - pageValid.foreach(_ := false) + idxValid := 0 + pageValid := 0 } io.resp.valid := hits.toBits.orR diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 067565f0..62b757bb 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -97,7 +97,7 @@ class TLB(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Module val r_refill_waddr = Reg(UInt()) val tag_cam = Module(new RocketCAM(entries, conf.asidBits+conf.vpnBits)) - val tag_ram = Vec.fill(entries){Reg(io.ptw.resp.bits.ppn.clone)} + val tag_ram = Mem(io.ptw.resp.bits.ppn.clone, entries) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt tag_cam.io.clear := io.ptw.invalidate From 8bc1c3354024922554f228caa271f6d5f5fd79b9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 19 May 2014 18:56:30 -0700 Subject: [PATCH 0740/1087] Fix BTB error (requires Chisel update) --- rocket/src/main/scala/btb.scala | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index c4de8820..44eb101c 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -146,14 +146,12 @@ class BTB(implicit conf: BTBConfig) extends Module { val nextRepl = Counter(!updateHit && updateValid, conf.entries)._1 val waddr = Mux(updateHit, update.bits.prediction.bits.entry, nextRepl) - when (doPageRepl) { - val clearValid = for (i <- 0 until conf.entries) - yield (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR - idxValid := idxValid & ~Vec(clearValid).toBits - } + // invalidate entries if we stomp on pages they depend upon + idxValid := idxValid & ~Vec.tabulate(conf.entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits + + idxValid(waddr) := updateValid when (updateTarget) { assert(io.req === update.bits.target, "BTB request != I$ target") - idxValid := idxValid.bitSet(waddr, updateValid) idxs(waddr) := update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate From dab675b2312c5a07a97666bb90172eccbef09d1a Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 28 May 2014 13:30:48 -0700 Subject: [PATCH 0741/1087] refactor Metadata, clean and expand coherence API --- rocket/src/main/scala/nbdcache.scala | 79 +++++++++++++++------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 42620e71..e7851298 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -80,7 +80,7 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) class MSHRReq(implicit val cacheconf: DCacheConfig) extends HellaCacheReq { val tag_match = Bool() - val old_meta = new L1MetaData + val old_meta = new L1Metadata val way_en = Bits(width = cacheconf.ways) } @@ -98,20 +98,23 @@ class DataWriteReq(implicit conf: DCacheConfig) extends DataReadReq { val data = Bits(width = conf.encrowbits) } -object L1MetaData { - def apply(tag: Bits, state: UInt)(implicit conf: DCacheConfig) = { - val meta = new L1MetaData - meta.state := state +class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq { + val tag = Bits(width = conf.tagbits) +} + +class L1MetaWriteReq(implicit conf: DCacheConfig) extends + MetaWriteReq[L1Metadata](new L1Metadata) + +object L1Metadata { + def apply(tag: Bits, coh: ClientMetadata)(implicit conf: DCacheConfig) = { + val meta = new L1Metadata meta.tag := tag + meta.coh := coh meta } } -class L1MetaData(implicit val conf: DCacheConfig) extends MetaData { - val state = UInt(width = conf.statebits) -} - -class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq { - val tag = Bits(width = conf.tagbits) +class L1Metadata(implicit val conf: DCacheConfig) extends Metadata { + val coh = conf.tl.co.clientMetadataOnFlush.clone } class InternalProbe(implicit conf: TileLinkConfiguration) extends Probe()(conf) @@ -142,7 +145,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) - val meta_write = Decoupled(new MetaWriteReq(new L1MetaData)) + val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) @@ -155,7 +158,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val acquire_type = Reg(UInt()) val release_type = Reg(UInt()) - val line_state = Reg(UInt()) + val line_state = Reg(new ClientMetadata()(tl.co)) val refill_count = Reg(UInt(width = log2Up(conf.refillcycles))) // TODO: zero-width wire val req = Reg(new MSHRReq()) @@ -169,6 +172,10 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val refill_done = reply && (if(conf.refillcycles > 1) refill_count.andR else Bool(true)) val wb_done = reply && (state === s_wb_resp) + val meta_on_flush = tl.co.clientMetadataOnFlush + val meta_on_grant = tl.co.clientMetadataOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) + val meta_on_hit = tl.co.clientMetadataOnHit(req_cmd, io.req_bits.old_meta.coh) + val rpq = Module(new Queue(new Replay, conf.nrpq)) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) rpq.io.enq.bits := io.req_bits @@ -189,7 +196,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { when (refill_done) { state := s_meta_write_req } when (reply) { if(conf.refillcycles > 1) refill_count := refill_count + UInt(1) - line_state := tl.co.newStateOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) + line_state := meta_on_grant } } when (io.mem_req.fire()) { // s_refill_req @@ -206,24 +213,24 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - acquire_type := tl.co.getAcquireTypeOnSecondaryMiss(req_cmd, tl.co.newStateOnFlush(), io.mem_req.bits) + acquire_type := tl.co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits) } when (io.req_pri_val && io.req_pri_rdy) { - line_state := tl.co.newStateOnFlush() + line_state := meta_on_flush refill_count := UInt(0) - acquire_type := tl.co.getAcquireTypeOnPrimaryMiss(req_cmd, tl.co.newStateOnFlush()) + acquire_type := tl.co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush) release_type := tl.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits when (io.req_bits.tag_match) { - when (tl.co.isHit(req_cmd, io.req_bits.old_meta.state)) { // set dirty bit + when (tl.co.isHit(req_cmd, io.req_bits.old_meta.coh)) { // set dirty bit state := s_meta_write_req - line_state := tl.co.newStateOnHit(req_cmd, io.req_bits.old_meta.state) + line_state := meta_on_hit }.otherwise { // upgrade permissions state := s_refill_req } }.otherwise { // writback if necessary and refill - state := Mux(tl.co.needsWriteback(io.req_bits.old_meta.state), s_wb_req, s_meta_clear) + state := Mux(tl.co.needsWriteback(io.req_bits.old_meta.coh), s_wb_req, s_meta_clear) } } @@ -250,7 +257,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx - io.meta_write.bits.data.state := Mux(state === s_meta_clear, tl.co.newStateOnFlush(), line_state) + io.meta_write.bits.data.coh := Mux(state === s_meta_clear, meta_on_flush, line_state) io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en @@ -292,7 +299,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) - val meta_write = Decoupled(new MetaWriteReq(new L1MetaData)) + val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new Replay) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) @@ -316,7 +323,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val wbTagList = Vec.fill(conf.nmshr){Bits()} val memRespMux = Vec.fill(conf.nmshr){new DataWriteReq} val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, conf.nmshr)) - val meta_write_arb = Module(new Arbiter(new MetaWriteReq(new L1MetaData), conf.nmshr)) + val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, conf.nmshr)) val mem_req_arb = Module(new Arbiter(new Acquire, conf.nmshr)) val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), conf.nmshr)) val wb_req_arb = Module(new Arbiter(new WritebackReq, conf.nmshr)) @@ -471,16 +478,16 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { val req = Decoupled(new InternalProbe).flip val rep = Decoupled(new Release) val meta_read = Decoupled(new L1MetaReadReq) - val meta_write = Decoupled(new MetaWriteReq(new L1MetaData)) + val meta_write = Decoupled(new L1MetaWriteReq) val wb_req = Decoupled(new WritebackReq) val way_en = Bits(INPUT, conf.ways) val mshr_rdy = Bool(INPUT) - val line_state = UInt(INPUT, 2) + val line_state = new ClientMetadata()(tl.co).asInput } val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) - val line_state = Reg(UInt()) + val line_state = Reg(tl.co.clientMetadataOnFlush.clone) val way_en = Reg(Bits()) val req = Reg(new InternalProbe) val hit = way_en.orR @@ -522,7 +529,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { io.req.ready := state === s_invalid io.rep.valid := state === s_release && !(hit && tl.co.needsWriteback(line_state)) - io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) + io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.clientMetadataOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read io.meta_read.bits.idx := req.addr @@ -531,14 +538,14 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr - io.meta_write.bits.data.state := tl.co.newStateOnProbe(req, line_state) + io.meta_write.bits.data.coh := tl.co.clientMetadataOnProbe(req, line_state) io.meta_write.bits.data.tag := req.addr >> UInt(conf.idxbits) io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr io.wb_req.bits.tag := req.addr >> UInt(conf.idxbits) - io.wb_req.bits.r_type := tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.newStateOnFlush)) + io.wb_req.bits.r_type := tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.clientMetadataOnFlush)) io.wb_req.bits.client_xact_id := req.client_xact_id io.wb_req.bits.master_xact_id := req.master_xact_id } @@ -766,10 +773,10 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - def onReset = L1MetaData(UInt(0), tl.co.newStateOnFlush) - val meta = Module(new MetaDataArray(onReset _)) + def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))(tl.co)) + val meta = Module(new MetadataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) - val metaWriteArb = Module(new Arbiter(new MetaWriteReq(new L1MetaData), 2)) + val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2)) metaReadArb.io.out <> meta.io.read metaWriteArb.io.out <> meta.io.write @@ -804,13 +811,13 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { // tag check and way muxing def wayMap[T <: Data](f: Int => T) = Vec((0 until conf.ways).map(f)) val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)).toBits - val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).state)).toBits + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).coh)).toBits s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR - val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).state, s1_clk_en))) - val s2_hit = s2_tag_match && tl.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === tl.co.newStateOnHit(s2_req.cmd, s2_hit_state) + val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) + val s2_hit = s2_tag_match && tl.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === tl.co.clientMetadataOnHit(s2_req.cmd, s2_hit_state) // load-reserved/store-conditional val lrsc_count = Reg(init=UInt(0)) @@ -875,7 +882,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) mshrs.io.req.bits := s2_req mshrs.io.req.bits.tag_match := s2_tag_match - mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1MetaData(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) + mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1Metadata(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshrs.io.req.bits.data := s2_req.data when (mshrs.io.req.fire()) { replacer.miss } From de32595fbad113e1f6af115f3736e6402afe2d7d Mon Sep 17 00:00:00 2001 From: Jim Lawson Date: Fri, 13 Jun 2014 12:00:50 -0700 Subject: [PATCH 0742/1087] Quick change to work with new Width class. --- rocket/src/main/scala/decode.scala | 2 +- rocket/src/main/scala/util.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 7e0b8544..c6e8e6be 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -9,7 +9,7 @@ object DecodeLogic val lit = b.litOf if (lit.isZ) { var (bits, mask, swidth) = Literal.parseLit(lit.toString) - new Term(BigInt(bits, 2), BigInt(2).pow(lit.width)-(BigInt(mask, 2)+1)) + new Term(BigInt(bits, 2), BigInt(2).pow(lit.needWidth())-(BigInt(mask, 2)+1)) } else { new Term(lit.value) } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 82ead46c..407fc1d1 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -163,8 +163,8 @@ class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: I val cnt = UInt(OUTPUT, log2Up(n)) val done = Bool(OUTPUT) } - require(io.in.bits.payload.data.width % n == 0) - val narrowWidth = io.in.bits.payload.data.width / n + require(io.in.bits.payload.data.needWidth() % n == 0) + val narrowWidth = io.in.bits.payload.data.needWidth() / n val cnt = Reg(init=UInt(0, width = log2Up(n))) val wrap = cnt === UInt(n-1) val rbits = Reg(init=io.in.bits) From 0c93567dea87533c2ad82f3ea73e51fb9c479fac Mon Sep 17 00:00:00 2001 From: Jim Lawson Date: Fri, 13 Jun 2014 14:58:52 -0700 Subject: [PATCH 0743/1087] Replace needWidth() with getWidth. --- rocket/src/main/scala/decode.scala | 2 +- rocket/src/main/scala/util.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index c6e8e6be..6f27b4dd 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -9,7 +9,7 @@ object DecodeLogic val lit = b.litOf if (lit.isZ) { var (bits, mask, swidth) = Literal.parseLit(lit.toString) - new Term(BigInt(bits, 2), BigInt(2).pow(lit.needWidth())-(BigInt(mask, 2)+1)) + new Term(BigInt(bits, 2), BigInt(2).pow(lit.getWidth)-(BigInt(mask, 2)+1)) } else { new Term(lit.value) } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 407fc1d1..6de69590 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -163,8 +163,8 @@ class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: I val cnt = UInt(OUTPUT, log2Up(n)) val done = Bool(OUTPUT) } - require(io.in.bits.payload.data.needWidth() % n == 0) - val narrowWidth = io.in.bits.payload.data.needWidth() / n + require(io.in.bits.payload.data.getWidth % n == 0) + val narrowWidth = io.in.bits.payload.data.getWidth / n val cnt = Reg(init=UInt(0, width = log2Up(n))) val wrap = cnt === UInt(n-1) val rbits = Reg(init=io.in.bits) From 88899eafe0f805f9fa4b25b2abef33f235a1f5e0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 May 2014 23:58:25 -0700 Subject: [PATCH 0744/1087] Reduce node count a bit --- rocket/src/main/scala/btb.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 44eb101c..cab8bfc7 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -91,8 +91,8 @@ class BTB(implicit conf: BTBConfig) extends Module { val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) - val useRAS = Mem(Bool(), conf.entries) - val isJump = Mem(Bool(), conf.entries) + val useRAS = Reg(UInt(width = conf.entries)) + val isJump = Reg(UInt(width = conf.entries)) private def page(addr: UInt) = addr >> conf.matchBits private def pageMatch(addr: UInt) = { @@ -180,7 +180,7 @@ class BTB(implicit conf: BTBConfig) extends Module { pageValid := 0 } - io.resp.valid := hits.toBits.orR + io.resp.valid := hits.orR io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) From ac88ded35ae03860c9187a77023e7174a2148743 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 May 2014 23:58:53 -0700 Subject: [PATCH 0745/1087] Use ROMs to reduce node count and improve QoR a bit --- rocket/src/main/scala/csr.scala | 10 ++++------ rocket/src/main/scala/ctrl.scala | 3 ++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 2169055b..9d62ba44 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -109,14 +109,12 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val addr = Mux(cpu_req_valid, io.rw.addr, host_pcr_bits.addr | 0x500) val decoded_addr = { - val default = List(Bits("b" + ("?"*CSRs.all.size), CSRs.all.size)) - val outs = for (i <- 0 until CSRs.all.size) - yield UInt(CSRs.all(i), addr.getWidth) -> List(UInt(BigInt(1) << i, CSRs.all.size)) - - val d = DecodeLogic(addr, default, outs).toArray + val map = for ((v, i) <- CSRs.all.zipWithIndex) + yield v -> UInt(BigInt(1) << i) + val out = ROM(map)(addr) val a = Array.fill(CSRs.all.max+1)(null.asInstanceOf[Bool]) for (i <- 0 until CSRs.all.size) - a(CSRs.all(i)) = d(0)(i) + a(CSRs.all(i)) = out(i) a } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6148fcf4..c6fb932f 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -421,10 +421,11 @@ class Control(implicit conf: RocketConfiguration) extends Module legal_csrs --= fp_csrs val id_csr_addr = io.dpath.inst(31,20) + val isLegalCSR = Vec.tabulate(1 << id_csr_addr.getWidth)(i => Bool(legal_csrs contains i)) val id_csr_en = id_csr != CSR.N val id_csr_fp = Bool(!conf.fpu.isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) - val id_csr_invalid = id_csr_en && !Vec(legal_csrs.map(UInt(_))).contains(id_csr_addr) + val id_csr_invalid = id_csr_en && !isLegalCSR(id_csr_addr) val id_csr_privileged = id_csr_en && (id_csr_addr(11,10) === UInt(3) && id_csr_wen || id_csr_addr(11,10) === UInt(2) || From 04593d433e58b1d8bfdbe157e51b041c04bd2807 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 25 May 2014 23:59:24 -0700 Subject: [PATCH 0746/1087] clean up Int <-> Boolean conversion stuff --- rocket/src/main/scala/ecc.scala | 8 ++++---- rocket/src/main/scala/tile.scala | 4 ++-- rocket/src/main/scala/util.scala | 14 ++++++++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala index 01164e88..cc116ae5 100644 --- a/rocket/src/main/scala/ecc.scala +++ b/rocket/src/main/scala/ecc.scala @@ -47,8 +47,8 @@ class ParityCode extends Code class SECCode extends Code { def width(k: Int) = { - val m = log2Up(k) + 1 - !isPow2(k) - k + m + ((1 << m) < m+k+1) + val m = k.log2 + 1 + k + m + ((1 << m) < m+k+1).toInt } def encode(x: Bits) = { val k = x.getWidth @@ -57,7 +57,7 @@ class SECCode extends Code val y = for (i <- 1 to n) yield { if (isPow2(i)) { - val r = for (j <- 1 to n; if j != i && (j & i).toBoolean) + val r = for (j <- 1 to n; if j != i && (j & i) != 0) yield x(mapping(j)) r reduce (_^_) } else @@ -71,7 +71,7 @@ class SECCode extends Code val p2 = for (i <- 0 until log2Up(n)) yield 1 << i val syndrome = p2 map { i => - val r = for (j <- 1 to n; if (j & i).toBoolean) + val r = for (j <- 1 to n; if (j & i) != 0) yield y(j-1) r reduce (_^_) } diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 94beb86d..93c2c068 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -23,11 +23,11 @@ case class RocketConfiguration(tl: TileLinkConfiguration, as: AddressSpaceConfig class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) { - val memPorts = 2 + !confIn.rocc.isEmpty // Number of ports to outer memory system from tile: 1 from I$, 1 from D$, maybe 1 from Rocc + val memPorts = 2 + (!confIn.rocc.isEmpty).toInt // Number of ports to outer memory system from tile: 1 from I$, 1 from D$, maybe 1 from Rocc val dcachePortId = 0 val icachePortId = 1 val roccPortId = 2 - val dcachePorts = 2 + !confIn.rocc.isEmpty // Number of ports into D$: 1 from core, 1 from PTW, maybe 1 from RoCC + val dcachePorts = 2 + (!confIn.rocc.isEmpty).toInt // Number of ports into D$: 1 from core, 1 from PTW, maybe 1 from RoCC implicit val tlConf = confIn.tl implicit val lnConf = confIn.tl.ln implicit val icConf = confIn.icache diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 6de69590..e08791ef 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -4,8 +4,12 @@ import Chisel._ import uncore._ import scala.math._ -class BooleanToInt(x: Int) { - def toBoolean: Boolean = if (x != 0) true else false +class Unsigned(x: Int) { + require(x >= 0) + def size: Int = { require(x > 0); ceil(log(x)/log(2)).toInt } + def log2: Int = { require(x > 0); floor(log(x)/log(2)).toInt } + def isPow2: Boolean = x > 0 && (x & (x-1)) == 0 + def nextPow2: Int = if (x == 0) 1 else 1 << size } object Util { @@ -15,8 +19,10 @@ object Util { implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x) implicit def wcToUInt(c: WideCounter): UInt = c.value - implicit def booleanToInt(x: Boolean): Int = if (x) 1 else 0 - implicit def intToBooleanToInt(x: Int): BooleanToInt = new BooleanToInt(x) + implicit def intToUnsigned(x: Int): Unsigned = new Unsigned(x) + implicit def booleanToIntConv(x: Boolean) = new AnyRef { + def toInt: Int = if (x) 1 else 0 + } } import Util._ From 3828c628c3419423dee17caf487fc746064679d6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 Jun 2014 10:28:42 -0700 Subject: [PATCH 0747/1087] Remove vestigial control signals --- rocket/src/main/scala/ctrl.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index c6fb932f..bb262268 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -311,12 +311,6 @@ class Control(implicit conf: RocketConfiguration) extends Module val dpath = new CtrlDpathIO val imem = new CPUFrontendIO()(conf.icache) val dmem = new HellaCacheIO()(conf.dcache) - val dtlb_val = Bool(OUTPUT) - val dtlb_kill = Bool(OUTPUT) - val dtlb_rdy = Bool(INPUT) - val dtlb_miss = Bool(INPUT) - val xcpt_dtlb_ld = Bool(INPUT) - val xcpt_dtlb_st = Bool(INPUT) val fpu = new CtrlFPUIO val rocc = new RoCCInterface().flip } From 7bffc6c58652e157d42b8792b397713d1769eedf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 6 Jun 2014 16:59:55 -0700 Subject: [PATCH 0748/1087] rename Unsigned.size to Unsigned.clog2 --- rocket/src/main/scala/util.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index e08791ef..e52b2e4d 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -6,10 +6,10 @@ import scala.math._ class Unsigned(x: Int) { require(x >= 0) - def size: Int = { require(x > 0); ceil(log(x)/log(2)).toInt } + def clog2: Int = { require(x > 0); ceil(log(x)/log(2)).toInt } def log2: Int = { require(x > 0); floor(log(x)/log(2)).toInt } def isPow2: Boolean = x > 0 && (x & (x-1)) == 0 - def nextPow2: Int = if (x == 0) 1 else 1 << size + def nextPow2: Int = if (x == 0) 1 else 1 << clog2 } object Util { From d5208466387e11ea4730ab12c1f615b0a70f2949 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 18 Aug 2014 19:23:10 -0700 Subject: [PATCH 0749/1087] add README and sbt files --- rocket/README.md | 33 +++++++++++++++++++++++++++++++++ rocket/build.sbt | 7 +++++++ rocket/chisel-dependent.sbt | 8 ++++++++ rocket/hardfloat-dependent.sbt | 8 ++++++++ rocket/uncore-dependent.sbt | 8 ++++++++ 5 files changed, 64 insertions(+) create mode 100644 rocket/README.md create mode 100644 rocket/build.sbt create mode 100644 rocket/chisel-dependent.sbt create mode 100644 rocket/hardfloat-dependent.sbt create mode 100644 rocket/uncore-dependent.sbt diff --git a/rocket/README.md b/rocket/README.md new file mode 100644 index 00000000..f3257d63 --- /dev/null +++ b/rocket/README.md @@ -0,0 +1,33 @@ +Rocket Core Generator +================================================================ + +Rocket is a 6-stage single-issue in-order pipeline that executes the 64-bit +scalar RISC-V ISA. Rocket implements an MMU that supports page-based virtual +memory and is able to boot modern operating systems such as Linux. Rocket +also has an optional IEEE 754-2008-compliant FPU, which implements both +single- and double-precision floating-point operations, including fused +multiply-add. + +We plan to open-source our Rocket core generator written in Chisel in the near +future. We are currently in the process of cleaning up the repository. Please stay tuned. + +Currently, a Rocket core with an 8 KB direct-mapped L1 instruction cache +and an 8 KB direct-mapped L1 data cache has been instantiated and committed to +the zynq-fpga infrastructure repository. A copy of the generated Verilog is available +[here](https://raw.githubusercontent.com/ucb-bar/zynq-fpga/master/hw/src/verilog/Slave.v). + +The following table compares a 32-bit ARM Cortex-A5 core to a 64-bit RISC-V +Rocket core built in the same TSMC process (40GPLUS). Fourth column is the +ratio of RISC-V Rocket to ARM Cortex-A5. Both use single-instruction-issue, +in-order pipelines, yet the RISC-V core is faster, smaller, and uses less +power. + +ISA/Implementation | ARM Cortex-A5 | RISC-V Rocket | R/A +--- | --- | --- | --- +ISA Register Width | 32 bits | 64 bits | 2 +Frequency | >1 GHz | >1 GHz | 1 +Dhrystone Performance | 1.57 DMIPS/MHz | 1.72 DMIPS/MHz | 1.1 +Area excluding caches | 0.27 mm2 | 0.14 mm2 | 0.5 +Area with 16KB caches | 0.53 mm2 | 0.39 mm2 | 0.7 +Area Efficiency | 2.96 DMIPS/MHz/mm2 | 4.41 DMIPS/MHz/mm2 | 1.5 +Dynamic Power | <0.08 mW/MHz | 0.034 mW/MHz | >= 0.4 diff --git a/rocket/build.sbt b/rocket/build.sbt new file mode 100644 index 00000000..d9150787 --- /dev/null +++ b/rocket/build.sbt @@ -0,0 +1,7 @@ +organization := "edu.berkeley.cs" + +version := "1.2" + +name := "rocket" + +scalaVersion := "2.10.2" diff --git a/rocket/chisel-dependent.sbt b/rocket/chisel-dependent.sbt new file mode 100644 index 00000000..e784e5e9 --- /dev/null +++ b/rocket/chisel-dependent.sbt @@ -0,0 +1,8 @@ +// Provide a managed dependency on chisel if -DchiselVersion="" is +// supplied on the command line. + +val chiselVersion = System.getProperty("chiselVersion", "None") + +libraryDependencies ++= ( if (chiselVersion != "None" ) ( + "edu.berkeley.cs" %% "chisel" % chiselVersion +) :: Nil; else Nil) diff --git a/rocket/hardfloat-dependent.sbt b/rocket/hardfloat-dependent.sbt new file mode 100644 index 00000000..e6cc8f7e --- /dev/null +++ b/rocket/hardfloat-dependent.sbt @@ -0,0 +1,8 @@ +// Provide a managed dependency on chisel if -DhardfloatVersion="" is +// supplied on the command line. + +val hardfloatVersion = System.getProperty("hardfloatVersion", "None") + +libraryDependencies ++= ( if (hardfloatVersion != "None" ) ( + "edu.berkeley.cs" %% "hardfloat" % hardfloatVersion +) :: Nil; else Nil) diff --git a/rocket/uncore-dependent.sbt b/rocket/uncore-dependent.sbt new file mode 100644 index 00000000..9526f621 --- /dev/null +++ b/rocket/uncore-dependent.sbt @@ -0,0 +1,8 @@ +// Provide a managed dependency on chisel if -DuncoreVersion="" is +// supplied on the command line. + +val uncoreVersion = System.getProperty("uncoreVersion", "None") + +libraryDependencies ++= ( if (uncoreVersion != "None" ) ( + "edu.berkeley.cs" %% "uncore" % uncoreVersion +) :: Nil; else Nil) From 4ac8e59b1f3c5d31d2ae20e3986fad6a5b9a1b76 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Mon, 18 Aug 2014 19:27:50 -0700 Subject: [PATCH 0750/1087] add .gitignore --- rocket/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 rocket/.gitignore diff --git a/rocket/.gitignore b/rocket/.gitignore new file mode 100644 index 00000000..eb5a316c --- /dev/null +++ b/rocket/.gitignore @@ -0,0 +1 @@ +target From 812353bacee91882c171c4f6ad8af4f8256be6d7 Mon Sep 17 00:00:00 2001 From: Adam Izraelevitz Date: Fri, 1 Aug 2014 18:01:08 -0700 Subject: [PATCH 0751/1087] Ported FPU parameters to new Chisel Parameters --- rocket/src/main/scala/core.scala | 7 +++++-- rocket/src/main/scala/csr.scala | 8 ++++---- rocket/src/main/scala/ctrl.scala | 8 ++++---- rocket/src/main/scala/fpu.scala | 9 +++++---- rocket/src/main/scala/tile.scala | 1 - 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 05a13534..b6309d09 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -4,6 +4,9 @@ import Chisel._ import Util._ import uncore.HTIFIO +case object FPUParams extends Field[PF] +case object HasFPU extends Field[Boolean] + class RocketIO(implicit conf: RocketConfiguration) extends Bundle { val host = new HTIFIO(conf.tl.ln.nClients) @@ -20,8 +23,8 @@ class Core(implicit conf: RocketConfiguration) extends Module val ctrl = Module(new Control) val dpath = Module(new Datapath) - if (!conf.fpu.isEmpty) { - val fpu = Module(new FPU(conf.fpu.get)) + if (!params(HasFPU)) { + val fpu = Module(new FPU,params(FPUParams)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 9d62ba44..52e7f7fc 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -164,9 +164,9 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module val read_ptbr = reg_ptbr(conf.as.paddrBits-1, conf.as.pgIdxBits) << conf.as.pgIdxBits val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( - CSRs.fflags -> (if (!conf.fpu.isEmpty) reg_fflags else UInt(0)), - CSRs.frm -> (if (!conf.fpu.isEmpty) reg_frm else UInt(0)), - CSRs.fcsr -> (if (!conf.fpu.isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), + CSRs.fflags -> (if (!params(HasFPU)) reg_fflags else UInt(0)), + CSRs.frm -> (if (!params(HasFPU)) reg_frm else UInt(0)), + CSRs.fcsr -> (if (!params(HasFPU)) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.cycle -> reg_time, CSRs.time -> reg_time, CSRs.instret -> reg_instret, @@ -208,7 +208,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module reg_status.zero := 0 if (!conf.vm) reg_status.vm := false if (conf.rocc.isEmpty) reg_status.er := false - if (conf.fpu.isEmpty) reg_status.ef := false + if (params(HasFPU)) reg_status.ef := false } when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index bb262268..12b2a5ae 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -316,7 +316,7 @@ class Control(implicit conf: RocketConfiguration) extends Module } var decode_table = XDecode.table - if (!conf.fpu.isEmpty) decode_table ++= FDecode.table + if (!params(HasFPU)) decode_table ++= FDecode.table if (!conf.rocc.isEmpty) decode_table ++= RoCCDecode.table val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) @@ -411,13 +411,13 @@ class Control(implicit conf: RocketConfiguration) extends Module val fp_csrs = CSRs.fcsr :: CSRs.frm :: CSRs.fflags :: Nil val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) - if (conf.fpu.isEmpty) + if (params(HasFPU)) legal_csrs --= fp_csrs val id_csr_addr = io.dpath.inst(31,20) val isLegalCSR = Vec.tabulate(1 << id_csr_addr.getWidth)(i => Bool(legal_csrs contains i)) val id_csr_en = id_csr != CSR.N - val id_csr_fp = Bool(!conf.fpu.isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) + val id_csr_fp = Bool(!params(HasFPU)) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) val id_csr_invalid = id_csr_en && !isLegalCSR(id_csr_addr) val id_csr_privileged = id_csr_en && @@ -623,7 +623,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val sboard = new Scoreboard(32) sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr) - val id_stall_fpu = if (!conf.fpu.isEmpty) { + val id_stall_fpu = if (!params(HasFPU)) { val fp_sboard = new Scoreboard(32) fp_sboard.set((wb_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set) && !replay_wb, io.dpath.wb_waddr) fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index a2d02dba..b2db3502 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -6,7 +6,8 @@ import Util._ import FPConstants._ import uncore.constants.MemoryOpConstants._ -case class FPUConfig(sfmaLatency: Int = 2, dfmaLatency: Int = 3) +case object SFMALatency +case object DFMALatency object FPConstants { @@ -340,7 +341,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module io.out := Pipe(valid, res, latency-1) } -class FPU(conf: FPUConfig) extends Module +class FPU extends Module { val io = new Bundle { val ctrl = (new CtrlFPUIO).flip @@ -396,11 +397,11 @@ class FPU(conf: FPUConfig) extends Module req.in3 := ex_rs3 req.typ := ex_reg_inst(21,20) - val sfma = Module(new FPUFMAPipe(conf.sfmaLatency, 23, 9)) + val sfma = Module(new FPUFMAPipe(params(SFMALatency), 23, 9)) sfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && ex_ctrl.single sfma.io.in.bits := req - val dfma = Module(new FPUFMAPipe(conf.dfmaLatency, 52, 12)) + val dfma = Module(new FPUFMAPipe(params(DFMALatency), 52, 12)) dfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && !ex_ctrl.single dfma.io.in.bits := req diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 93c2c068..f882acf1 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -6,7 +6,6 @@ import Util._ case class RocketConfiguration(tl: TileLinkConfiguration, as: AddressSpaceConfiguration, icache: ICacheConfig, dcache: DCacheConfig, - fpu: Option[FPUConfig] = None, rocc: Option[RocketConfiguration => RoCC] = None, retireWidth: Int = 1, vm: Boolean = true, From 4e6d69892d6d7fcfb198700174f58461278131f8 Mon Sep 17 00:00:00 2001 From: Adam Izraelevitz Date: Mon, 4 Aug 2014 14:06:52 -0700 Subject: [PATCH 0752/1087] Added initial brainstorm for parameter hierarchical flattening, does not compile ;) --- rocket/src/main/scala/core.scala | 15 ++++++++++++++- rocket/src/main/scala/ctrl.scala | 12 ++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index b6309d09..a172e0ec 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -9,7 +9,7 @@ case object HasFPU extends Field[Boolean] class RocketIO(implicit conf: RocketConfiguration) extends Bundle { - val host = new HTIFIO(conf.tl.ln.nClients) + val host = new HTIFIO(params[Int]("nClients")) val imem = new CPUFrontendIO()(conf.icache) val dmem = new HellaCacheIO()(conf.dcache) val ptw = new DatapathPTWIO()(conf.as).flip @@ -18,7 +18,20 @@ class RocketIO(implicit conf: RocketConfiguration) extends Bundle class Core(implicit conf: RocketConfiguration) extends Module { + //xprlen + //hasfpu + //hasrocc + //fastloadword + //fastloadbyte + //as <- unfolded + + //fpuparams + val io = new RocketIO + //nClients + + //icache + //dcache val ctrl = Module(new Control) val dpath = Module(new Datapath) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 12b2a5ae..54d89eef 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -43,7 +43,7 @@ class CtrlDpathIO(implicit conf: RocketConfiguration) extends Bundle // exception handling val retire = Bool(OUTPUT) val exception = Bool(OUTPUT) - val cause = UInt(OUTPUT, conf.xprlen) + val cause = UInt(OUTPUT, params[Int]("xprlen")) val badvaddr_wen = Bool(OUTPUT) // high for a load/store access fault // inputs from datapath val inst = Bits(INPUT, 32) @@ -316,8 +316,8 @@ class Control(implicit conf: RocketConfiguration) extends Module } var decode_table = XDecode.table - if (!params(HasFPU)) decode_table ++= FDecode.table - if (!conf.rocc.isEmpty) decode_table ++= RoCCDecode.table + if (params(HasFPU)) decode_table ++= FDecode.table + if (params[Boolean]("HasRoCC")) decode_table ++= RoCCDecode.table val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) @@ -401,7 +401,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_reg_fence = Reg(init=Bool(false)) val sr = io.dpath.status - var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (conf.xprlen-1) | i))) + var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (params[Int]("xprlen")-1) | i))) val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) val id_interrupt = io.dpath.status.ei && id_interrupt_unmasked @@ -437,7 +437,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val id_amo_rl = io.dpath.inst(25) val id_fence_next = id_fence || id_amo && id_amo_rl val id_mem_busy = !io.dmem.ordered || ex_reg_mem_val - val id_rocc_busy = Bool(!conf.rocc.isEmpty) && + val id_rocc_busy = Bool(params[Boolean]("HasRoCC")) && (io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val) id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy val id_do_fence = id_rocc_busy && id_fence || @@ -690,7 +690,7 @@ class Control(implicit conf: RocketConfiguration) extends Module // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = - if (conf.fastLoadWord) Bool(!conf.fastLoadByte) && mem_reg_slow_bypass + if (params[Boolean]("fastLoadWord")) Bool(!params[Boolean]("fastLoadByte")) && mem_reg_slow_bypass else Bool(true) val data_hazard_mem = mem_reg_wen && (id_renx1_not0 && id_raddr1 === io.dpath.mem_waddr || From 0dac9a74671d629231cebd6ceeca80aa4aa6f120 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 8 Aug 2014 12:23:02 -0700 Subject: [PATCH 0753/1087] Full conversion to params. Compiles but does not elaborate. --- rocket/src/main/scala/arbiter.scala | 6 +- rocket/src/main/scala/btb.scala | 112 +++---- rocket/src/main/scala/core.scala | 35 +-- rocket/src/main/scala/csr.scala | 75 +++-- rocket/src/main/scala/ctrl.scala | 18 +- rocket/src/main/scala/dpath.scala | 41 ++- rocket/src/main/scala/dpath_alu.scala | 12 +- rocket/src/main/scala/icache.scala | 160 +++++----- rocket/src/main/scala/multiplier.scala | 18 +- rocket/src/main/scala/nbdcache.scala | 410 ++++++++++++------------- rocket/src/main/scala/ptw.scala | 31 +- rocket/src/main/scala/rocc.scala | 26 +- rocket/src/main/scala/tile.scala | 91 +++--- rocket/src/main/scala/tlb.scala | 20 +- rocket/src/main/scala/util.scala | 5 - 15 files changed, 500 insertions(+), 560 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 5f4ee53e..917cb816 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -3,11 +3,11 @@ package rocket import Chisel._ import uncore._ -class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Module +class HellaCacheArbiter(n: Int) extends Module { val io = new Bundle { - val requestor = Vec.fill(n){new HellaCacheIO()(conf.dcache)}.flip - val mem = new HellaCacheIO()(conf.dcache) + val requestor = Vec.fill(n){new HellaCacheIO}.flip + val mem = new HellaCacheIO } val r_valid = io.requestor.map(r => Reg(next=r.req.valid)) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index cab8bfc7..4cecf1d7 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -3,61 +3,63 @@ package rocket import Chisel._ import Util._ import Node._ +import uncore._ -case class BTBConfig(as: uncore.AddressSpaceConfiguration, entries: Int, nras: Int = 0) { - val matchBits = as.pgIdxBits - val pages0 = 1 max log2Up(entries) // is this sensible? - val pages = (pages0+1)/2*2 // control logic assumes 2 divides pages - val opaqueBits = log2Up(entries) - val nbht = 1 << log2Up(entries * 2) -} +case object Entries extends Field[Int] +case object NRAS extends Field[Int] +case object MatchBits extends Field[Int] +case object Pages0 extends Field[Int] +case object Pages extends Field[Int] +case object OpaqueBits extends Field[Int] +case object NBHT extends Field[Int] -class RAS(implicit conf: BTBConfig) { +class RAS(nras: Int) { def push(addr: UInt): Unit = { - when (count < conf.nras) { count := count + 1 } - val nextPos = Mux(Bool(isPow2(conf.nras)) || pos > 0, pos+1, UInt(0)) + when (count < nras) { count := count + 1 } + val nextPos = Mux(Bool(isPow2(nras)) || pos > 0, pos+1, UInt(0)) stack(nextPos) := addr pos := nextPos } def peek: UInt = stack(pos) def pop: Unit = when (!isEmpty) { count := count - 1 - pos := Mux(Bool(isPow2(conf.nras)) || pos > 0, pos-1, UInt(conf.nras-1)) + pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1)) } def clear: Unit = count := UInt(0) def isEmpty: Bool = count === UInt(0) - private val count = Reg(init=UInt(0,log2Up(conf.nras+1))) - private val pos = Reg(init=UInt(0,log2Up(conf.nras))) - private val stack = Vec.fill(conf.nras){Reg(UInt())} + private val count = Reg(init=UInt(0,log2Up(nras+1))) + private val pos = Reg(init=UInt(0,log2Up(nras))) + private val stack = Vec.fill(nras){Reg(UInt())} } -class BHTResp(implicit conf: BTBConfig) extends Bundle { - val index = UInt(width = log2Up(conf.nbht).max(1)) +class BHTResp extends Bundle { + val index = UInt(width = log2Up(params(NBHT)).max(1)) val value = UInt(width = 2) } -class BHT(implicit conf: BTBConfig) { +class BHT(nbht: Int) { + val nbhtbits = log2Up(nbht) def get(addr: UInt): BHTResp = { val res = new BHTResp - res.index := addr(log2Up(conf.nbht)+1,2) ^ history + res.index := addr(nbhtbits+1,2) ^ history res.value := table(res.index) res } def update(d: BHTResp, taken: Bool): Unit = { table(d.index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - history := Cat(taken, history(log2Up(conf.nbht)-1,1)) + history := Cat(taken, history(nbhtbits-1,1)) } - private val table = Mem(UInt(width = 2), conf.nbht) - val history = Reg(UInt(width = log2Up(conf.nbht))) + private val table = Mem(UInt(width = 2), nbht) + val history = Reg(UInt(width = nbhtbits)) } -class BTBUpdate(implicit val conf: BTBConfig) extends BundleWithConf { +class BTBUpdate extends Bundle { val prediction = Valid(new BTBResp) - val pc = UInt(width = conf.as.vaddrBits) - val target = UInt(width = conf.as.vaddrBits) - val returnAddr = UInt(width = conf.as.vaddrBits) + val pc = UInt(width = params(VAddrBits)) + val target = UInt(width = params(VAddrBits)) + val returnAddr = UInt(width = params(VAddrBits)) val taken = Bool() val isJump = Bool() val isCall = Bool() @@ -65,42 +67,42 @@ class BTBUpdate(implicit val conf: BTBConfig) extends BundleWithConf { val incorrectTarget = Bool() } -class BTBResp(implicit val conf: BTBConfig) extends BundleWithConf { +class BTBResp extends Bundle { val taken = Bool() - val target = UInt(width = conf.as.vaddrBits) - val entry = UInt(width = conf.opaqueBits) + val target = UInt(width = params(VAddrBits)) + val entry = UInt(width = params(OpaqueBits)) val bht = new BHTResp } // fully-associative branch target buffer -class BTB(implicit conf: BTBConfig) extends Module { +class BTB extends Module { val io = new Bundle { - val req = UInt(INPUT, conf.as.vaddrBits) + val req = UInt(INPUT, params(VAddrBits)) val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip val invalidate = Bool(INPUT) } - val idxValid = Reg(init=UInt(0, conf.entries)) - val idxs = Mem(UInt(width=conf.matchBits), conf.entries) - val idxPages = Mem(UInt(width=log2Up(conf.pages)), conf.entries) - val tgts = Mem(UInt(width=conf.matchBits), conf.entries) - val tgtPages = Mem(UInt(width=log2Up(conf.pages)), conf.entries) - val pages = Mem(UInt(width=conf.as.vaddrBits-conf.matchBits), conf.pages) - val pageValid = Reg(init=UInt(0, conf.pages)) - val idxPagesOH = idxPages.map(UIntToOH(_)(conf.pages-1,0)) - val tgtPagesOH = tgtPages.map(UIntToOH(_)(conf.pages-1,0)) + val idxValid = Reg(init=UInt(0, params(Entries))) + val idxs = Mem(UInt(width=params(MatchBits)), params(Entries)) + val idxPages = Mem(UInt(width=log2Up(params(Pages))), params(Entries)) + val tgts = Mem(UInt(width=params(MatchBits)), params(Entries)) + val tgtPages = Mem(UInt(width=log2Up(params(Pages))), params(Entries)) + val pages = Mem(UInt(width=params(VAddrBits)-params(MatchBits)), params(Pages)) + val pageValid = Reg(init=UInt(0, params(Pages))) + val idxPagesOH = idxPages.map(UIntToOH(_)(params(Pages)-1,0)) + val tgtPagesOH = tgtPages.map(UIntToOH(_)(params(Pages)-1,0)) - val useRAS = Reg(UInt(width = conf.entries)) - val isJump = Reg(UInt(width = conf.entries)) + val useRAS = Reg(UInt(width = params(Entries))) + val isJump = Reg(UInt(width = params(Entries))) - private def page(addr: UInt) = addr >> conf.matchBits + private def page(addr: UInt) = addr >> params(MatchBits) private def pageMatch(addr: UInt) = { val p = page(addr) Vec(pages.map(_ === p)).toBits & pageValid } private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { - val idx = addr(conf.matchBits-1,0) + val idx = addr(params(MatchBits)-1,0) val idxMatch = idxs.map(_ === idx).toBits val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits idxValid & idxMatch & idxPageMatch @@ -121,7 +123,7 @@ class BTB(implicit conf: BTBConfig) extends Module { } val updateHit = update.bits.prediction.valid - val updateValid = update.bits.incorrectTarget || updateHit && Bool(conf.nbht > 0) + val updateValid = update.bits.incorrectTarget || updateHit && Bool(params(NBHT) > 0) val updateTarget = updateValid && update.bits.incorrectTarget val useUpdatePageHit = updatePageHit.orR @@ -134,20 +136,20 @@ class BTB(implicit conf: BTBConfig) extends Module { val samePage = page(update.bits.pc) === page(update_target) val usePageHit = (pageHit & ~idxPageReplEn).orR val doTgtPageRepl = updateTarget && !samePage && !usePageHit - val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(conf.pages-2,0) << 1 | idxPageUpdateOH(conf.pages-1)) + val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(params(Pages)-2,0) << 1 | idxPageUpdateOH(params(Pages)-1)) val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val doPageRepl = doIdxPageRepl || doTgtPageRepl val pageReplEn = idxPageReplEn | tgtPageReplEn - idxPageRepl := UIntToOH(Counter(update.valid && doPageRepl, conf.pages)._1) + idxPageRepl := UIntToOH(Counter(update.valid && doPageRepl, params(Pages))._1) when (update.valid && !(updateValid && !updateTarget)) { - val nextRepl = Counter(!updateHit && updateValid, conf.entries)._1 + val nextRepl = Counter(!updateHit && updateValid, params(Entries))._1 val waddr = Mux(updateHit, update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon - idxValid := idxValid & ~Vec.tabulate(conf.entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits + idxValid := idxValid & ~Vec.tabulate(params(Entries))(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits idxValid(waddr) := updateValid when (updateTarget) { @@ -160,11 +162,11 @@ class BTB(implicit conf: BTBConfig) extends Module { isJump(waddr) := update.bits.isJump } - require(conf.pages % 2 == 0) - val idxWritesEven = (idxPageUpdateOH & Fill(conf.pages/2, UInt(1,2))).orR + require(params(Pages) % 2 == 0) + val idxWritesEven = (idxPageUpdateOH & Fill(params(Pages)/2, UInt(1,2))).orR def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = - for (i <- i until conf.pages by mod) + for (i <- i until params(Pages) by mod) when (en && pageReplEn(i)) { pages(i) := data } writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), @@ -185,16 +187,16 @@ class BTB(implicit conf: BTBConfig) extends Module { io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) - if (conf.nbht > 0) { - val bht = new BHT + if (params(NBHT) > 0) { + val bht = new BHT(params(NBHT)) val res = bht.get(io.req) when (update.valid && updateHit && !update.bits.isJump) { bht.update(update.bits.prediction.bits.bht, update.bits.taken) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res } - if (conf.nras > 0) { - val ras = new RAS + if (params(NRAS) > 0) { + val ras = new RAS(params(NRAS)) val doPeek = Mux1H(hits, useRAS) when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index a172e0ec..c9ec77a7 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -5,39 +5,26 @@ import Util._ import uncore.HTIFIO case object FPUParams extends Field[PF] -case object HasFPU extends Field[Boolean] +case object BuildFPU extends Field[Option[() => FPU]] -class RocketIO(implicit conf: RocketConfiguration) extends Bundle +class RocketIO extends Bundle { - val host = new HTIFIO(params[Int]("nClients")) - val imem = new CPUFrontendIO()(conf.icache) - val dmem = new HellaCacheIO()(conf.dcache) - val ptw = new DatapathPTWIO()(conf.as).flip + val host = new HTIFIO + val imem = new CPUFrontendIO + val dmem = new HellaCacheIO + val ptw = new DatapathPTWIO().flip val rocc = new RoCCInterface().flip } -class Core(implicit conf: RocketConfiguration) extends Module +class Core extends Module { - //xprlen - //hasfpu - //hasrocc - //fastloadword - //fastloadbyte - //as <- unfolded - - //fpuparams - - val io = new RocketIO - //nClients - - //icache - //dcache + val io = new RocketIO - val ctrl = Module(new Control) + val ctrl = Module(new Control) val dpath = Module(new Datapath) - if (!params(HasFPU)) { - val fpu = Module(new FPU,params(FPUParams)) + if (!params(BuildFPU).isEmpty) { + val fpu = Module(params(BuildFPU).get(),params(FPUParams)) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 52e7f7fc..c15a3389 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -3,8 +3,7 @@ package rocket import Chisel._ import Util._ import Node._ -import uncore.HTIFIO -import uncore.AddressSpaceConfiguration +import uncore._ import scala.math._ class Status extends Bundle { @@ -33,58 +32,58 @@ object CSR val C = Bits(3,2) } -class CSRFileIO(implicit conf: RocketConfiguration) extends Bundle { - val host = new HTIFIO(conf.tl.ln.nClients) +class CSRFileIO extends Bundle { + val host = new HTIFIO val rw = new Bundle { val addr = UInt(INPUT, 12) val cmd = Bits(INPUT, CSR.SZ) - val rdata = Bits(OUTPUT, conf.xprlen) - val wdata = Bits(INPUT, conf.xprlen) + val rdata = Bits(OUTPUT, params(XprLen)) + val wdata = Bits(INPUT, params(XprLen)) } val status = new Status().asOutput - val ptbr = UInt(OUTPUT, conf.as.paddrBits) - val evec = UInt(OUTPUT, conf.as.vaddrBits+1) + val ptbr = UInt(OUTPUT, params(PAddrBits)) + val evec = UInt(OUTPUT, params(VAddrBits)+1) val exception = Bool(INPUT) - val retire = UInt(INPUT, log2Up(1+conf.retireWidth)) - val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+conf.retireWidth))) - val cause = UInt(INPUT, conf.xprlen) + val retire = UInt(INPUT, log2Up(1+params(RetireWidth))) + val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+params(RetireWidth)))) + val cause = UInt(INPUT, params(XprLen)) val badvaddr_wen = Bool(INPUT) - val pc = UInt(INPUT, conf.as.vaddrBits+1) + val pc = UInt(INPUT, params(VAddrBits)+1) val sret = Bool(INPUT) val fatc = Bool(OUTPUT) val replay = Bool(OUTPUT) - val time = UInt(OUTPUT, conf.xprlen) + val time = UInt(OUTPUT, params(XprLen)) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip val rocc = new RoCCInterface().flip } -class CSRFile(implicit conf: RocketConfiguration) extends Module +class CSRFile extends Module { val io = new CSRFileIO - val reg_epc = Reg(Bits(width = conf.as.vaddrBits+1)) - val reg_badvaddr = Reg(Bits(width = conf.as.vaddrBits)) - val reg_evec = Reg(Bits(width = conf.as.vaddrBits)) + val reg_epc = Reg(Bits(width = params(VAddrBits)+1)) + val reg_badvaddr = Reg(Bits(width = params(VAddrBits))) + val reg_evec = Reg(Bits(width = params(VAddrBits))) val reg_compare = Reg(Bits(width = 32)) - val reg_cause = Reg(Bits(width = conf.xprlen)) - val reg_tohost = Reg(init=Bits(0, conf.xprlen)) - val reg_fromhost = Reg(init=Bits(0, conf.xprlen)) - val reg_sup0 = Reg(Bits(width = conf.xprlen)) - val reg_sup1 = Reg(Bits(width = conf.xprlen)) - val reg_ptbr = Reg(UInt(width = conf.as.paddrBits)) + val reg_cause = Reg(Bits(width = params(XprLen))) + val reg_tohost = Reg(init=Bits(0, params(XprLen))) + val reg_fromhost = Reg(init=Bits(0, params(XprLen))) + val reg_sup0 = Reg(Bits(width = params(XprLen))) + val reg_sup1 = Reg(Bits(width = params(XprLen))) + val reg_ptbr = Reg(UInt(width = params(PAddrBits))) val reg_stats = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below - val reg_time = WideCounter(conf.xprlen) - val reg_instret = WideCounter(conf.xprlen, io.retire) - val reg_uarch_counters = io.uarch_counters.map(WideCounter(conf.xprlen, _)) + val reg_time = WideCounter(params(XprLen)) + val reg_instret = WideCounter(params(XprLen), io.retire) + val reg_uarch_counters = io.uarch_counters.map(WideCounter(params(XprLen), _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) val r_irq_timer = Reg(init=Bool(false)) val r_irq_ipi = Reg(init=Bool(true)) - val irq_rocc = Bool(!conf.rocc.isEmpty) && io.rocc.interrupt + val irq_rocc = Bool(!params(BuildRoCC).isEmpty) && io.rocc.interrupt val cpu_req_valid = io.rw.cmd != CSR.N val host_pcr_req_valid = Reg(Bool()) // don't reset @@ -130,7 +129,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (io.badvaddr_wen) { val wdata = io.rw.wdata - val (upper, lower) = Split(wdata, conf.as.vaddrBits) + val (upper, lower) = Split(wdata, params(VAddrBits)) val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) reg_badvaddr := Cat(sign, lower).toSInt } @@ -161,12 +160,12 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } val read_impl = Bits(2) - val read_ptbr = reg_ptbr(conf.as.paddrBits-1, conf.as.pgIdxBits) << conf.as.pgIdxBits + val read_ptbr = reg_ptbr(params(PAddrBits)-1, params(PgIdxBits)) << params(PgIdxBits) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( - CSRs.fflags -> (if (!params(HasFPU)) reg_fflags else UInt(0)), - CSRs.frm -> (if (!params(HasFPU)) reg_frm else UInt(0)), - CSRs.fcsr -> (if (!params(HasFPU)) Cat(reg_frm, reg_fflags) else UInt(0)), + CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), + CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), + CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.cycle -> reg_time, CSRs.time -> reg_time, CSRs.instret -> reg_instret, @@ -206,15 +205,15 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module reg_status.s64 := true reg_status.u64 := true reg_status.zero := 0 - if (!conf.vm) reg_status.vm := false - if (conf.rocc.isEmpty) reg_status.er := false - if (params(HasFPU)) reg_status.ef := false + if (!params(UseVM)) reg_status.vm := false + if (params(BuildRoCC).isEmpty) reg_status.er := false + if (params(BuildFPU).isEmpty) reg_status.ef := false } when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.epc)) { reg_epc := wdata(conf.as.vaddrBits,0).toSInt } - when (decoded_addr(CSRs.evec)) { reg_evec := wdata(conf.as.vaddrBits-1,0).toSInt } + when (decoded_addr(CSRs.epc)) { reg_epc := wdata(params(VAddrBits),0).toSInt } + when (decoded_addr(CSRs.evec)) { reg_evec := wdata(params(VAddrBits)-1,0).toSInt } when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false) } when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } @@ -222,7 +221,7 @@ class CSRFile(implicit conf: RocketConfiguration) extends Module when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata } when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata } - when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(conf.as.paddrBits-1, conf.as.pgIdxBits), Bits(0, conf.as.pgIdxBits)).toUInt } + when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(params(PAddrBits)-1, params(PgIdxBits)), Bits(0, params(PgIdxBits))).toUInt } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 54d89eef..1073019c 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -6,7 +6,7 @@ import uncore.constants.MemoryOpConstants._ import ALU._ import Util._ -class CtrlDpathIO(implicit conf: RocketConfiguration) extends Bundle +class CtrlDpathIO extends Bundle { // outputs to datapath val sel_pc = UInt(OUTPUT, 3) @@ -305,19 +305,19 @@ object RoCCDecode extends DecodeConstants CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N)) } -class Control(implicit conf: RocketConfiguration) extends Module +class Control extends Module { val io = new Bundle { val dpath = new CtrlDpathIO - val imem = new CPUFrontendIO()(conf.icache) - val dmem = new HellaCacheIO()(conf.dcache) + val imem = new CPUFrontendIO + val dmem = new HellaCacheIO val fpu = new CtrlFPUIO val rocc = new RoCCInterface().flip } var decode_table = XDecode.table - if (params(HasFPU)) decode_table ++= FDecode.table - if (params[Boolean]("HasRoCC")) decode_table ++= RoCCDecode.table + if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table + if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) @@ -411,13 +411,13 @@ class Control(implicit conf: RocketConfiguration) extends Module val fp_csrs = CSRs.fcsr :: CSRs.frm :: CSRs.fflags :: Nil val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) - if (params(HasFPU)) + if(params(BuildFPU).isEmpty) legal_csrs --= fp_csrs val id_csr_addr = io.dpath.inst(31,20) val isLegalCSR = Vec.tabulate(1 << id_csr_addr.getWidth)(i => Bool(legal_csrs contains i)) val id_csr_en = id_csr != CSR.N - val id_csr_fp = Bool(!params(HasFPU)) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) + val id_csr_fp = Bool(!params(BuildFPU).isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) val id_csr_invalid = id_csr_en && !isLegalCSR(id_csr_addr) val id_csr_privileged = id_csr_en && @@ -623,7 +623,7 @@ class Control(implicit conf: RocketConfiguration) extends Module val sboard = new Scoreboard(32) sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr) - val id_stall_fpu = if (!params(HasFPU)) { + val id_stall_fpu = if (!params(BuildFPU).isEmpty) { val fp_sboard = new Scoreboard(32) fp_sboard.set((wb_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set) && !replay_wb, io.dpath.wb_waddr) fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index ea6b59cf..e932b1dd 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -3,17 +3,16 @@ package rocket import Chisel._ import Instructions._ import Util._ -import uncore.HTIFIO +import uncore._ -class Datapath(implicit conf: RocketConfiguration) extends Module +class Datapath extends Module { - implicit val as = conf.as val io = new Bundle { - val host = new HTIFIO(conf.tl.ln.nClients) - val ctrl = (new CtrlDpathIO).flip - val dmem = new HellaCacheIO()(conf.dcache) - val ptw = (new DatapathPTWIO).flip - val imem = new CPUFrontendIO()(conf.icache) + val host = new HTIFIO + val ctrl = new CtrlDpathIO().flip + val dmem = new HellaCacheIO + val ptw = new DatapathPTWIO().flip + val imem = new CPUFrontendIO val fpu = new DpathFPUIO val rocc = new RoCCInterface().flip } @@ -122,8 +121,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module bypass(BYP_0) := Bits(0) bypass(BYP_EX) := mem_reg_wdata bypass(BYP_MEM) := wb_reg_wdata - bypass(BYP_DC) := (if (conf.fastLoadByte) io.dmem.resp.bits.data_subword - else if (conf.fastLoadWord) io.dmem.resp.bits.data + bypass(BYP_DC) := (if(params(FastLoadByte)) io.dmem.resp.bits.data_subword + else if(params(FastLoadWord)) io.dmem.resp.bits.data else wb_reg_wdata) val ex_rs = for (i <- 0 until id_rs.size) @@ -144,8 +143,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module alu.io.in1 := ex_op1 // multiplier and divider - val div = Module(new MulDiv(mulUnroll = if (conf.fastMulDiv) 8 else 1, - earlyOut = conf.fastMulDiv)) + val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, + earlyOut = params(FastMulDiv))) div.io.req.valid := io.ctrl.div_mul_val div.io.req.bits.dw := ex_reg_ctrl_fn_dw div.io.req.bits.fn := ex_reg_ctrl_fn_alu @@ -158,10 +157,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Module io.fpu.fromint_data := ex_rs(0) def vaSign(a0: UInt, ea: Bits) = { - // efficient means to compress 64-bit VA into conf.as.vaddrBits+1 bits - // (VA is bad if VA(conf.as.vaddrBits) != VA(conf.as.vaddrBits-1)) - val a = a0 >> conf.as.vaddrBits-1 - val e = ea(conf.as.vaddrBits,conf.as.vaddrBits-1) + // efficient means to compress 64-bit VA into params(VAddrBits)+1 bits + // (VA is bad if VA(params(VAddrBits)) != VA(params(VAddrBits)-1)) + val a = a0 >> params(VAddrBits)-1 + val e = ea(params(VAddrBits),params(VAddrBits)-1) Mux(a === UInt(0) || a === UInt(1), e != UInt(0), Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), e(0))) @@ -169,10 +168,10 @@ class Datapath(implicit conf: RocketConfiguration) extends Module // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(conf.as.vaddrBits-1,0)).toUInt + io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(params(VAddrBits)-1,0)).toUInt io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) - require(conf.dcacheReqTagBits >= 6) + require(params(DcacheReqTagBits) >= 6) // processor control regfile read val pcr = Module(new CSRFile) @@ -214,7 +213,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module ll_wdata := div.io.resp.bits.data io.ctrl.ll_waddr := div.io.resp.bits.tag io.ctrl.ll_wen := div.io.resp.fire() - if (!conf.rocc.isEmpty) { + if (!params(BuildRoCC).isEmpty) { io.rocc.resp.ready := io.ctrl.ll_ready when (io.rocc.resp.fire()) { div.io.resp.ready := Bool(false) @@ -225,7 +224,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module } when (dmem_resp_replay && dmem_resp_xpu) { div.io.resp.ready := Bool(false) - if (!conf.rocc.isEmpty) + if (!params(BuildRoCC).isEmpty) io.rocc.resp.ready := Bool(false) io.ctrl.ll_waddr := dmem_resp_waddr io.ctrl.ll_wen := Bool(true) @@ -240,7 +239,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module val mem_br_target = mem_reg_pc + Mux(io.ctrl.mem_branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(!io.ctrl.mem_jalr && !io.ctrl.mem_branch, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(conf.as.vaddrBits-1,0)), mem_br_target) + val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(params(VAddrBits)-1,0)), mem_br_target) io.ctrl.mem_misprediction := mem_npc != Mux(io.ctrl.ex_valid, ex_reg_pc, id_pc) io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 val mem_int_wdata = Mux(io.ctrl.mem_jalr, mem_br_target, mem_reg_wdata) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 4d9c54cb..f2d565cd 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -41,16 +41,16 @@ object ALU } import ALU._ -class ALUIO(implicit conf: RocketConfiguration) extends Bundle { +class ALUIO extends Bundle { val dw = Bits(INPUT, SZ_DW) val fn = Bits(INPUT, SZ_ALU_FN) - val in2 = UInt(INPUT, conf.xprlen) - val in1 = UInt(INPUT, conf.xprlen) - val out = UInt(OUTPUT, conf.xprlen) - val adder_out = UInt(OUTPUT, conf.xprlen) + val in2 = UInt(INPUT, params(XprLen)) + val in1 = UInt(INPUT, params(XprLen)) + val out = UInt(OUTPUT, params(XprLen)) + val adder_out = UInt(OUTPUT, params(XprLen)) } -class ALU(implicit conf: RocketConfiguration) extends Module +class ALU extends Module { val io = new ALUIO diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index a9308086..0db261a1 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -4,62 +4,39 @@ import Chisel._ import uncore._ import Util._ -case class ICacheConfig(sets: Int, assoc: Int, - ibytes: Int = 4, rowbytes: Int = 16, - ntlb: Int = 8, - tl: TileLinkConfiguration, - as: AddressSpaceConfiguration, - btb: BTBConfig, - code: Code = new IdentityCode) -{ - val w = 1 +case object InstBytes extends Field[Int] +case object CoreBTBParams extends Field[PF] - val dm = assoc == 1 - val lines = sets * assoc - val idxbits = log2Up(sets) - val offbits = log2Up(tl.dataBits/8) - val rowbits = rowbytes*8 - val rowoffbits = log2Up(rowbytes) - val untagbits = idxbits + offbits - val tagbits = as.paddrBits - untagbits - val refillcycles = tl.dataBits/rowbits - - require(isPow2(sets) && isPow2(assoc)) - require(isPow2(w) && isPow2(ibytes)) - require(as.pgIdxBits >= untagbits) +class FrontendReq extends Bundle { + val pc = UInt(width = params(VAddrBits)+1) } -class FrontendReq()(implicit val conf: ICacheConfig) extends BundleWithConf { - val pc = UInt(width = conf.as.vaddrBits+1) -} - -class FrontendResp(implicit val conf: ICacheConfig) extends BundleWithConf { - val pc = UInt(width = conf.as.vaddrBits+1) // ID stage PC - val data = Bits(width = conf.ibytes*8) +class FrontendResp extends Bundle { + val pc = UInt(width = params(VAddrBits)+1) // ID stage PC + val data = Bits(width = params(InstBytes)*8) val xcpt_ma = Bool() val xcpt_if = Bool() } -class CPUFrontendIO(implicit conf: ICacheConfig) extends Bundle { +class CPUFrontendIO extends Bundle { val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip - val btb_resp = Valid(new BTBResp()(conf.btb)).flip - val btb_update = Valid(new BTBUpdate()(conf.btb)) - val ptw = new TLBPTWIO()(conf.as).flip + val btb_resp = Valid(new BTBResp).flip + val btb_update = Valid(new BTBUpdate) + val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } -class Frontend(implicit c: ICacheConfig) extends Module +class Frontend extends Module { - implicit val (tl, as) = (c.tl, c.as) val io = new Bundle { - val cpu = new CPUFrontendIO()(c).flip + val cpu = new CPUFrontendIO().flip val mem = new UncachedTileLinkIO } - val btb = Module(new BTB()(c.btb)) + val btb = Module(new BTB, params(CoreBTBParams)) val icache = Module(new ICache) - val tlb = Module(new TLB(c.ntlb)) + val tlb = Module(new TLB(params(NTLBEntries))) val s1_pc_ = Reg(UInt()) val s1_pc = s1_pc_ & SInt(-2) // discard LSB of PC (throughout the pipeline) @@ -70,14 +47,14 @@ class Frontend(implicit c: ICacheConfig) extends Module val s2_btb_resp_bits = Reg(btb.io.resp.bits.clone) val s2_xcpt_if = Reg(init=Bool(false)) - val msb = c.as.vaddrBits-1 + val msb = params(VAddrBits)-1 val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val pcp4_0 = s1_pc + UInt(c.ibytes) + val pcp4_0 = s1_pc + UInt(params(InstBytes)) val pcp4 = Cat(s1_pc(msb) & pcp4_0(msb), pcp4_0(msb,0)) val icmiss = s2_valid && !icache.io.resp.valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & c.rowbytes) === (s1_pc & c.rowbytes)) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & params(RowBytes)) === (s1_pc & params(RowBytes))) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -97,13 +74,13 @@ class Frontend(implicit c: ICacheConfig) extends Module s2_valid := Bool(false) } - btb.io.req := s1_pc & SInt(-c.ibytes) + btb.io.req := s1_pc & SInt(-params(InstBytes)) btb.io.update := io.cpu.btb_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> UInt(c.as.pgIdxBits) + tlb.io.req.bits.vpn := s1_pc >> UInt(params(PgIdxBits)) tlb.io.req.bits.asid := UInt(0) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) @@ -117,35 +94,38 @@ class Frontend(implicit c: ICacheConfig) extends Module icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) - io.cpu.resp.bits.pc := s2_pc & SInt(-c.ibytes) // discard PC LSBs - io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(c.rowbytes)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)) - io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(c.ibytes)-1,0) != UInt(0) + io.cpu.resp.bits.pc := s2_pc & SInt(-params(InstBytes)) // discard PC LSBs + io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(params(RowBytes))-1,log2Up(params(InstBytes))) << log2Up(params(InstBytes)*8)) + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(params(InstBytes))-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.btb_resp.valid := s2_btb_resp_valid io.cpu.btb_resp.bits := s2_btb_resp_bits } -class ICacheReq(implicit val conf: ICacheConfig) extends BundleWithConf { - val idx = UInt(width = conf.as.pgIdxBits) - val ppn = UInt(width = conf.as.ppnBits) // delayed one cycle +class ICacheReq extends Bundle { + val idx = UInt(width = params(PgIdxBits)) + val ppn = UInt(width = params(PPNBits)) // delayed one cycle val kill = Bool() // delayed one cycle } -class ICacheResp(implicit val conf: ICacheConfig) extends BundleWithConf { - val data = Bits(width = conf.ibytes*8) - val datablock = Bits(width = conf.rowbits) +class ICacheResp extends Bundle { + val data = Bits(width = params(InstBytes)*8) + val datablock = Bits(width = params(RowBits)) } -class ICache(implicit c: ICacheConfig) extends Module +class ICache extends Module { - implicit val (tl, ln) = (c.tl, c.tl.ln) + val (nSets, nWays, co, ecc) = (params(NSets), params(NWays), params(TLCoherence), params(ECCCode)) val io = new Bundle { val req = Valid(new ICacheReq).flip val resp = Decoupled(new ICacheResp) val invalidate = Bool(INPUT) val mem = new UncachedTileLinkIO } + require(isPow2(nSets) && isPow2(nWays)) + require(isPow2(params(InstBytes))) + require(params(PgIdxBits) >= params(UntagBits)) val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) @@ -154,13 +134,13 @@ class ICache(implicit c: ICacheConfig) extends Module val rdy = Bool() val s2_valid = Reg(init=Bool(false)) - val s2_addr = Reg(UInt(width = c.as.paddrBits)) + val s2_addr = Reg(UInt(width = params(PAddrBits))) val s2_any_tag_hit = Bool() val s1_valid = Reg(init=Bool(false)) - val s1_pgoff = Reg(UInt(width = c.as.pgIdxBits)) + val s1_pgoff = Reg(UInt(width = params(PgIdxBits))) val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt - val s1_tag = s1_addr(c.tagbits+c.untagbits-1,c.untagbits) + val s1_tag = s1_addr(params(TagBits)+params(UntagBits)-1,params(UntagBits)) val s0_valid = io.req.valid || s1_valid && stall val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx) @@ -175,9 +155,9 @@ class ICache(implicit c: ICacheConfig) extends Module s2_addr := s1_addr } - val s2_tag = s2_addr(c.tagbits+c.untagbits-1,c.untagbits) - val s2_idx = s2_addr(c.untagbits-1,c.offbits) - val s2_offset = s2_addr(c.offbits-1,0) + val s2_tag = s2_addr(params(TagBits)+params(UntagBits)-1,params(UntagBits)) + val s2_idx = s2_addr(params(UntagBits)-1,params(OffBits)) + val s2_offset = s2_addr(params(OffBits)-1,0) val s2_hit = s2_valid && s2_any_tag_hit val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss @@ -187,8 +167,8 @@ class ICache(implicit c: ICacheConfig) extends Module var refill_valid = io.mem.grant.valid var refill_bits = io.mem.grant.bits def doRefill(g: Grant): Bool = Bool(true) - if(c.refillcycles > 1) { - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, c.refillcycles, doRefill)) + if(params(RefillCycles) > 1) { + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, params(RefillCycles), doRefill)) ser.io.in <> io.mem.grant refill_cnt = ser.io.cnt refill_done = ser.io.done @@ -200,21 +180,21 @@ class ICache(implicit c: ICacheConfig) extends Module } //assert(!c.tlco.isVoluntary(refill_bits.payload) || !refill_valid, "UncachedRequestors shouldn't get voluntary grants.") - val repl_way = if (c.dm) UInt(0) else LFSR16(s2_miss)(log2Up(c.assoc)-1,0) - val enc_tagbits = c.code.width(c.tagbits) - val tag_array = Mem(Bits(width = enc_tagbits*c.assoc), c.sets, seqRead = true) + val repl_way = if (params(IsDM)) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) + val entagbits = ecc.width(params(TagBits)) + val tag_array = Mem(Bits(width = entagbits*nWays), nSets, seqRead = true) val tag_raddr = Reg(UInt()) when (refill_done) { - val wmask = FillInterleaved(enc_tagbits, if (c.dm) Bits(1) else UIntToOH(repl_way)) - val tag = c.code.encode(s2_tag).toUInt - tag_array.write(s2_idx, Fill(c.assoc, tag), wmask) + val wmask = FillInterleaved(entagbits, if (params(IsDM)) Bits(1) else UIntToOH(repl_way)) + val tag = ecc.encode(s2_tag).toUInt + tag_array.write(s2_idx, Fill(nWays, tag), wmask) } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - tag_raddr := s0_pgoff(c.untagbits-1,c.offbits) + tag_raddr := s0_pgoff(params(UntagBits)-1,params(OffBits)) } - val vb_array = Reg(init=Bits(0, c.lines)) + val vb_array = Reg(init=Bits(0, nSets*nWays)) when (refill_done && !invalidated) { vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true)) } @@ -222,59 +202,59 @@ class ICache(implicit c: ICacheConfig) extends Module vb_array := Bits(0) invalidated := Bool(true) } - val s2_disparity = Vec.fill(c.assoc){Bool()} - for (i <- 0 until c.assoc) + val s2_disparity = Vec.fill(nWays){Bool()} + for (i <- 0 until nWays) when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s2_idx), Bool(false)) } - val s1_tag_match = Vec.fill(c.assoc){Bool()} - val s2_tag_hit = Vec.fill(c.assoc){Bool()} - val s2_dout = Vec.fill(c.assoc){Reg(Bits())} + val s1_tag_match = Vec.fill(nWays){Bool()} + val s2_tag_hit = Vec.fill(nWays){Bool()} + val s2_dout = Vec.fill(nWays){Reg(Bits())} - for (i <- 0 until c.assoc) { - val s1_vb = vb_array(Cat(UInt(i), s1_pgoff(c.untagbits-1,c.offbits))).toBool + for (i <- 0 until nWays) { + val s1_vb = vb_array(Cat(UInt(i), s1_pgoff(params(UntagBits)-1,params(OffBits)))).toBool val s2_vb = Reg(Bool()) val s2_tag_disparity = Reg(Bool()) val s2_tag_match = Reg(Bool()) - val tag_out = tag_array(tag_raddr)(enc_tagbits*(i+1)-1, enc_tagbits*i) + val tag_out = tag_array(tag_raddr)(entagbits*(i+1)-1, entagbits*i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb - s2_tag_disparity := c.code.decode(tag_out).error + s2_tag_disparity := ecc.decode(tag_out).error s2_tag_match := s1_tag_match(i) } - s1_tag_match(i) := tag_out(c.tagbits-1,0) === s1_tag + s1_tag_match(i) := tag_out(params(TagBits)-1,0) === s1_tag s2_tag_hit(i) := s2_vb && s2_tag_match - s2_disparity(i) := s2_vb && (s2_tag_disparity || c.code.decode(s2_dout(i)).error) + s2_disparity(i) := s2_vb && (s2_tag_disparity || ecc.decode(s2_dout(i)).error) } s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) - for (i <- 0 until c.assoc) { - val data_array = Mem(Bits(width = c.code.width(c.rowbits)), c.sets*c.refillcycles, seqRead = true) + for (i <- 0 until nWays) { + val data_array = Mem(Bits(width = ecc.width(params(RowBits))), nSets*params(RefillCycles), seqRead = true) val s1_raddr = Reg(UInt()) when (refill_valid && repl_way === UInt(i)) { - val e_d = c.code.encode(refill_bits.payload.data) - if(c.refillcycles > 1) data_array(Cat(s2_idx,refill_cnt)) := e_d + val e_d = ecc.encode(refill_bits.payload.data) + if(params(RefillCycles) > 1) data_array(Cat(s2_idx,refill_cnt)) := e_d else data_array(s2_idx) := e_d } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - s1_raddr := s0_pgoff(c.untagbits-1,c.offbits-(if(c.refillcycles > 1) refill_cnt.getWidth else 0)) + s1_raddr := s0_pgoff(params(UntagBits)-1,params(OffBits)-(if(params(RefillCycles) > 1) refill_cnt.getWidth else 0)) } // if s1_tag_match is critical, replace with partial tag check - when (s1_valid && rdy && !stall && (Bool(c.dm) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } + when (s1_valid && rdy && !stall && (Bool(params(IsDM)) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } } - val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(c.rowbytes)-1,log2Up(c.ibytes)) << log2Up(c.ibytes*8)))(c.ibytes*8-1,0)) + val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(params(RowBytes))-1,log2Up(params(InstBytes))) << log2Up(params(InstBytes)*8)))(params(InstBytes)*8-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ack_q.io.enq.valid := refill_done && tl.co.requiresAckForGrant(refill_bits.payload.g_type) + ack_q.io.enq.valid := refill_done && co.requiresAckForGrant(refill_bits.payload.g_type) ack_q.io.enq.bits.payload.master_xact_id := refill_bits.payload.master_xact_id ack_q.io.enq.bits.header.dst := refill_bits.header.src // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready - io.mem.acquire.bits.payload := Acquire(tl.co.getUncachedReadAcquireType, s2_addr >> UInt(c.offbits), UInt(0)) + io.mem.acquire.bits.payload := Acquire(co.getUncachedReadAcquireType, s2_addr >> UInt(params(OffBits)), UInt(0)) io.mem.finish <> ack_q.io.deq // control state machine diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 4f43bd41..02cf199c 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -4,26 +4,26 @@ import Chisel._ import ALU._ import Util._ -class MultiplierReq(implicit val conf: RocketConfiguration) extends BundleWithConf { +class MultiplierReq extends Bundle { val fn = Bits(width = SZ_ALU_FN) val dw = Bits(width = SZ_DW) - val in1 = Bits(width = conf.xprlen) - val in2 = Bits(width = conf.xprlen) - val tag = UInt(width = conf.nxprbits) + val in1 = Bits(width = params(XprLen)) + val in2 = Bits(width = params(XprLen)) + val tag = UInt(width = params(NXprBits)) } -class MultiplierResp(implicit val conf: RocketConfiguration) extends BundleWithConf { - val data = Bits(width = conf.xprlen) - val tag = UInt(width = conf.nxprbits) +class MultiplierResp extends Bundle { + val data = Bits(width = params(XprLen)) + val tag = UInt(width = params(NXprBits)) } -class MultiplierIO(implicit conf: RocketConfiguration) extends Bundle { +class MultiplierIO extends Bundle { val req = Decoupled(new MultiplierReq).flip val kill = Bool(INPUT) val resp = Decoupled(new MultiplierResp) } -class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false)(implicit conf: RocketConfiguration) extends Module { +class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { val io = new MultiplierIO val w = io.req.bits.in1.getWidth val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e7851298..0a33d01d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -4,47 +4,25 @@ import Chisel._ import uncore._ import Util._ -case class DCacheConfig(val sets: Int, val ways: Int, - nmshr: Int, nrpq: Int, nsdq: Int, ntlb: Int, - val tl: TileLinkConfiguration, - val as: AddressSpaceConfiguration, - reqtagbits: Int, databits: Int, - rowwords: Int = 2, - code: Code = new IdentityCode, - narrowRead: Boolean = true) extends CacheConfig { - def states = tl.co.nClientStates - def lines = sets*ways - def dm = ways == 1 - def offbits = log2Up(tl.dataBits/8) - def ppnbits = as.ppnBits - def vpnbits = as.vpnBits - def pgidxbits = as.pgIdxBits - def maxaddrbits = ppnbits.max(vpnbits+1) + pgidxbits - def paddrbits = as.paddrBits - def lineaddrbits = paddrbits - offbits - def idxbits = log2Up(sets) - def waybits = log2Up(ways) - def untagbits = offbits + idxbits - def tagbits = lineaddrbits - idxbits - def databytes = databits/8 - def wordoffbits = log2Up(databytes) - def rowbits = rowwords*databits - def rowbytes = rowwords*databytes - def rowoffbits = log2Up(rowbytes) - def refillcycles = tl.dataBits/(rowbits) - def isNarrowRead = narrowRead && databits*ways % rowbits == 0 - val statebits = log2Up(states) - val encdatabits = code.width(databits) - val encrowbits = rowwords*encdatabits - val lrsc_cycles = 32 // ISA requires 16-insn LRSC sequences to succeed - - require(states > 0) - require(isPow2(sets)) - require(isPow2(ways)) // TODO: relax this - require(rowbits <= tl.dataBits) - require(lineaddrbits == tl.addrBits) - require(untagbits <= pgidxbits) -} +//Knobs +case object StoreDataQueueDepth extends Field[Int] +case object ReplayQueueDepth extends Field[Int] +case object NMSHRs extends Field[Int] +case object NTLBEntries extends Field[Int] +case object CoreReqTagBits extends Field[Int] +case object CoreDataBits extends Field[Int] +case object RowWords extends Field[Int] +case object ECCCode extends Field[Code] +//Derived +case object MaxAddrBits extends Field[Int] +case object CoreDataBytes extends Field[Int] +case object WordOffBits extends Field[Int] +case object RowBytes extends Field[Int] +case object RowOffBits extends Field[Int] +case object DoNarrowRead extends Field[Int] +case object EncDataBits extends Field[Int] +case object EncRowBits extends Field[Int] +case object LRSCCycles extends Field[Int] class StoreGen(typ: Bits, addr: Bits, dat: Bits) { @@ -78,69 +56,68 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } -class MSHRReq(implicit val cacheconf: DCacheConfig) extends HellaCacheReq { +class MSHRReq extends HellaCacheReq { val tag_match = Bool() val old_meta = new L1Metadata - val way_en = Bits(width = cacheconf.ways) + val way_en = Bits(width = params(NWays)) } -class Replay(implicit conf: DCacheConfig) extends HellaCacheReq { - val sdq_id = UInt(width = log2Up(conf.nsdq)) +class Replay extends HellaCacheReq { + val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth))) } -class DataReadReq(implicit val conf: DCacheConfig) extends BundleWithConf { - val way_en = Bits(width = conf.ways) - val addr = Bits(width = conf.untagbits) +class DataReadReq extends Bundle { + val way_en = Bits(width = params(NWays)) + val addr = Bits(width = params(UntagBits)) } -class DataWriteReq(implicit conf: DCacheConfig) extends DataReadReq { - val wmask = Bits(width = conf.rowwords) - val data = Bits(width = conf.encrowbits) +class DataWriteReq extends DataReadReq { + val wmask = Bits(width = params(RowWords)) + val data = Bits(width = params(EncRowBits)) } -class L1MetaReadReq(implicit conf: DCacheConfig) extends MetaReadReq { - val tag = Bits(width = conf.tagbits) +class L1MetaReadReq extends MetaReadReq { + val tag = Bits(width = params(TagBits)) } -class L1MetaWriteReq(implicit conf: DCacheConfig) extends +class L1MetaWriteReq extends MetaWriteReq[L1Metadata](new L1Metadata) object L1Metadata { - def apply(tag: Bits, coh: ClientMetadata)(implicit conf: DCacheConfig) = { + def apply(tag: Bits, coh: ClientMetadata) = { val meta = new L1Metadata meta.tag := tag meta.coh := coh meta } } -class L1Metadata(implicit val conf: DCacheConfig) extends Metadata { - val coh = conf.tl.co.clientMetadataOnFlush.clone +class L1Metadata extends Metadata { + val coh = params(TLCoherence).clientMetadataOnFlush.clone } -class InternalProbe(implicit conf: TileLinkConfiguration) extends Probe()(conf) - with HasClientTransactionId +class InternalProbe extends Probe with HasClientTransactionId -class WritebackReq(implicit val conf: DCacheConfig) extends BundleWithConf { - val tag = Bits(width = conf.tagbits) - val idx = Bits(width = conf.idxbits) - val way_en = Bits(width = conf.ways) - val client_xact_id = Bits(width = conf.tl.clientXactIdBits) - val master_xact_id = Bits(width = conf.tl.masterXactIdBits) - val r_type = UInt(width = conf.tl.co.releaseTypeWidth) +class WritebackReq extends Bundle { + val tag = Bits(width = params(TagBits)) + val idx = Bits(width = params(IdxBits)) + val way_en = Bits(width = params(NWays)) + val client_xact_id = Bits(width = params(TLClientXactIdBits)) + val master_xact_id = Bits(width = params(TLMasterXactIdBits)) + val r_type = UInt(width = params(TLCoherence).releaseTypeWidth) } -class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { - implicit val (tl, ln) = (conf.tl, conf.tl.ln) +class MSHR(id: Int) extends Module { + val co = params(TLCoherence) val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) val req_bits = new MSHRReq().asInput - val req_sdq_id = UInt(INPUT, log2Up(conf.nsdq)) + val req_sdq_id = UInt(INPUT, log2Up(params(StoreDataQueueDepth))) val idx_match = Bool(OUTPUT) - val tag = Bits(OUTPUT, conf.tagbits) + val tag = Bits(OUTPUT, params(TagBits)) val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput @@ -158,25 +135,25 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { val acquire_type = Reg(UInt()) val release_type = Reg(UInt()) - val line_state = Reg(new ClientMetadata()(tl.co)) - val refill_count = Reg(UInt(width = log2Up(conf.refillcycles))) // TODO: zero-width wire + val line_state = Reg(new ClientMetadata()(co)) + val refill_count = Reg(UInt(width = log2Up(params(RefillCycles)))) // TODO: zero-width wire val req = Reg(new MSHRReq()) val req_cmd = io.req_bits.cmd - val req_idx = req.addr(conf.untagbits-1,conf.offbits) - val idx_match = req_idx === io.req_bits.addr(conf.untagbits-1,conf.offbits) - val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !tl.co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val req_idx = req.addr(params(UntagBits)-1,params(OffBits)) + val idx_match = req_idx === io.req_bits.addr(params(UntagBits)-1,params(OffBits)) + val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - require(isPow2(conf.refillcycles)) + require(isPow2(params(RefillCycles))) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) - val refill_done = reply && (if(conf.refillcycles > 1) refill_count.andR else Bool(true)) + val refill_done = reply && (if(params(RefillCycles) > 1) refill_count.andR else Bool(true)) val wb_done = reply && (state === s_wb_resp) - val meta_on_flush = tl.co.clientMetadataOnFlush - val meta_on_grant = tl.co.clientMetadataOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) - val meta_on_hit = tl.co.clientMetadataOnHit(req_cmd, io.req_bits.old_meta.coh) + val meta_on_flush = co.clientMetadataOnFlush + val meta_on_grant = co.clientMetadataOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) + val meta_on_hit = co.clientMetadataOnHit(req_cmd, io.req_bits.old_meta.coh) - val rpq = Module(new Queue(new Replay, conf.nrpq)) + val rpq = Module(new Queue(new Replay, params(ReplayQueueDepth))) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) rpq.io.enq.bits := io.req_bits rpq.io.enq.bits.sdq_id := io.req_sdq_id @@ -195,7 +172,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { when (state === s_refill_resp) { when (refill_done) { state := s_meta_write_req } when (reply) { - if(conf.refillcycles > 1) refill_count := refill_count + UInt(1) + if(params(RefillCycles) > 1) refill_count := refill_count + UInt(1) line_state := meta_on_grant } } @@ -213,29 +190,29 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - acquire_type := tl.co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits) + acquire_type := co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits) } when (io.req_pri_val && io.req_pri_rdy) { line_state := meta_on_flush refill_count := UInt(0) - acquire_type := tl.co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush) - release_type := tl.co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc + acquire_type := co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush) + release_type := co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits when (io.req_bits.tag_match) { - when (tl.co.isHit(req_cmd, io.req_bits.old_meta.coh)) { // set dirty bit + when (co.isHit(req_cmd, io.req_bits.old_meta.coh)) { // set dirty bit state := s_meta_write_req line_state := meta_on_hit }.otherwise { // upgrade permissions state := s_refill_req } }.otherwise { // writback if necessary and refill - state := Mux(tl.co.needsWriteback(io.req_bits.old_meta.coh), s_wb_req, s_meta_clear) + state := Mux(co.needsWriteback(io.req_bits.old_meta.coh), s_wb_req, s_meta_clear) } } val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ackq.io.enq.valid := (wb_done || refill_done) && tl.co.requiresAckForGrant(io.mem_grant.bits.payload.g_type) + ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload.g_type) ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp @@ -245,8 +222,8 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { io.idx_match := (state != s_invalid) && idx_match io.mem_resp := req - io.mem_resp.addr := (if(conf.refillcycles > 1) Cat(req_idx, refill_count) else req_idx) << conf.rowoffbits - io.tag := req.addr >> conf.untagbits + io.mem_resp.addr := (if(params(RefillCycles) > 1) Cat(req_idx, refill_count) else req_idx) << params(RowOffBits) + io.tag := req.addr >> params(UntagBits) io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -267,7 +244,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) io.wb_req.bits.master_xact_id := Bits(0) // DNC - io.wb_req.bits.r_type := tl.co.getReleaseTypeOnVoluntaryWriteback() + io.wb_req.bits.r_type := co.getReleaseTypeOnVoluntaryWriteback() io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready io.mem_req.bits.a_type := acquire_type @@ -282,7 +259,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid io.replay.bits := rpq.io.deq.bits io.replay.bits.phys := Bool(true) - io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(conf.offbits-1,0)).toUInt + io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(params(OffBits)-1,0)).toUInt when (!io.meta_read.ready) { rpq.io.deq.ready := Bool(false) @@ -290,8 +267,7 @@ class MSHR(id: Int)(implicit conf: DCacheConfig) extends Module { } } -class MSHRFile(implicit conf: DCacheConfig) extends Module { - implicit val (tl, ln) = (conf.tl, conf.tl.ln) +class MSHRFile extends Module { val io = new Bundle { val req = Decoupled(new MSHRReq).flip val secondary_miss = Bool(OUTPUT) @@ -309,26 +285,26 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { val fence_rdy = Bool(OUTPUT) } - val sdq_val = Reg(init=Bits(0, conf.nsdq)) - val sdq_alloc_id = PriorityEncoder(~sdq_val(conf.nsdq-1,0)) + val sdq_val = Reg(init=Bits(0, params(StoreDataQueueDepth))) + val sdq_alloc_id = PriorityEncoder(~sdq_val(params(StoreDataQueueDepth)-1,0)) val sdq_rdy = !sdq_val.andR val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) - val sdq = Mem(io.req.bits.data, conf.nsdq) + val sdq = Mem(io.req.bits.data, params(StoreDataQueueDepth)) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val idxMatch = Vec.fill(conf.nmshr){Bool()} - val tagList = Vec.fill(conf.nmshr){Bits()} - val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> conf.untagbits + val idxMatch = Vec.fill(params(NMSHRs)){Bool()} + val tagList = Vec.fill(params(NMSHRs)){Bits()} + val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> params(UntagBits) - val wbTagList = Vec.fill(conf.nmshr){Bits()} - val memRespMux = Vec.fill(conf.nmshr){new DataWriteReq} - val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, conf.nmshr)) - val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, conf.nmshr)) - val mem_req_arb = Module(new Arbiter(new Acquire, conf.nmshr)) - val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), conf.nmshr)) - val wb_req_arb = Module(new Arbiter(new WritebackReq, conf.nmshr)) - val replay_arb = Module(new Arbiter(new Replay, conf.nmshr)) - val alloc_arb = Module(new Arbiter(Bool(), conf.nmshr)) + val wbTagList = Vec.fill(params(NMSHRs)){Bits()} + val memRespMux = Vec.fill(params(NMSHRs)){new DataWriteReq} + val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, params(NMSHRs))) + val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, params(NMSHRs))) + val mem_req_arb = Module(new Arbiter(new Acquire, params(NMSHRs))) + val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), params(NMSHRs))) + val wb_req_arb = Module(new Arbiter(new WritebackReq, params(NMSHRs))) + val replay_arb = Module(new Arbiter(new Replay, params(NMSHRs))) + val alloc_arb = Module(new Arbiter(Bool(), params(NMSHRs))) var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -337,7 +313,7 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { io.fence_rdy := true io.probe_rdy := true - for (i <- 0 to conf.nmshr-1) { + for (i <- 0 until params(NMSHRs)) { val mshr = Module(new MSHR(i)) idxMatch(i) := mshr.io.idx_match @@ -386,26 +362,25 @@ class MSHRFile(implicit conf: DCacheConfig) extends Module { io.replay <> replay_arb.io.out when (io.replay.valid || sdq_enq) { - sdq_val := sdq_val & ~(UIntToOH(io.replay.bits.sdq_id) & Fill(conf.nsdq, free_sdq)) | - PriorityEncoderOH(~sdq_val(conf.nsdq-1,0)) & Fill(conf.nsdq, sdq_enq) + sdq_val := sdq_val & ~(UIntToOH(io.replay.bits.sdq_id) & Fill(params(StoreDataQueueDepth), free_sdq)) | + PriorityEncoderOH(~sdq_val(params(StoreDataQueueDepth)-1,0)) & Fill(params(StoreDataQueueDepth), sdq_enq) } } -class WritebackUnit(implicit conf: DCacheConfig) extends Module { - implicit val tl = conf.tl +class WritebackUnit extends Module { val io = new Bundle { val req = Decoupled(new WritebackReq()).flip val meta_read = Decoupled(new L1MetaReadReq) val data_req = Decoupled(new DataReadReq()) - val data_resp = Bits(INPUT, conf.encrowbits) + val data_resp = Bits(INPUT, params(EncRowBits)) val release = Decoupled(new Release) } val active = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) - val cnt = Reg(init = UInt(0, width = log2Up(conf.refillcycles+1))) + val cnt = Reg(init = UInt(0, width = log2Up(params(RefillCycles)+1))) val req = Reg(new WritebackReq) io.release.valid := false @@ -416,8 +391,8 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { r1_data_req_fired := true cnt := cnt + 1 } - if(conf.refillcycles > 1) { // Coalescing buffer inserted - when (!r1_data_req_fired && !r2_data_req_fired && cnt === conf.refillcycles) { + if(params(RefillCycles) > 1) { // Coalescing buffer inserted + when (!r1_data_req_fired && !r2_data_req_fired && cnt === params(RefillCycles)) { io.release.valid := true active := !io.release.ready } @@ -440,7 +415,7 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { req := io.req.bits } - val fire = active && cnt < UInt(conf.refillcycles) + val fire = active && cnt < UInt(params(RefillCycles)) io.req.ready := !active // We reissue the meta read as it sets up the muxing for s2_data_muxed @@ -450,20 +425,20 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { io.data_req.valid := fire io.data_req.bits.way_en := req.way_en - if(conf.refillcycles > 1) { - io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(conf.refillcycles)-1,0)) << conf.rowoffbits + if(params(RefillCycles) > 1) { + io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(params(RefillCycles))-1,0)) << params(RowOffBits) } else { - io.data_req.bits.addr := req.idx << conf.rowoffbits + io.data_req.bits.addr := req.idx << params(RowOffBits) } io.release.bits.r_type := req.r_type io.release.bits.addr := Cat(req.tag, req.idx).toUInt io.release.bits.client_xact_id := req.client_xact_id io.release.bits.master_xact_id := req.master_xact_id - if(conf.refillcycles > 1) { + if(params(RefillCycles) > 1) { val data_buf = Reg(Bits()) when(active && r2_data_req_fired) { - data_buf := Cat(io.data_resp, data_buf(conf.refillcycles*conf.encrowbits-1, conf.encrowbits)) + data_buf := Cat(io.data_resp, data_buf(params(RefillCycles)*params(EncRowBits)-1, params(EncRowBits))) } io.release.bits.data := data_buf } else { @@ -472,22 +447,22 @@ class WritebackUnit(implicit conf: DCacheConfig) extends Module { } -class ProbeUnit(implicit conf: DCacheConfig) extends Module { - implicit val tl = conf.tl +class ProbeUnit extends Module { + val co = params(TLCoherence) val io = new Bundle { val req = Decoupled(new InternalProbe).flip val rep = Decoupled(new Release) val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val wb_req = Decoupled(new WritebackReq) - val way_en = Bits(INPUT, conf.ways) + val way_en = Bits(INPUT, params(NWays)) val mshr_rdy = Bool(INPUT) - val line_state = new ClientMetadata()(tl.co).asInput + val line_state = new ClientMetadata()(co).asInput } val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) - val line_state = Reg(tl.co.clientMetadataOnFlush.clone) + val line_state = Reg(co.clientMetadataOnFlush.clone) val way_en = Reg(Bits()) val req = Reg(new InternalProbe) val hit = way_en.orR @@ -504,7 +479,7 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { when (state === s_release && io.rep.ready) { state := s_invalid when (hit) { - state := Mux(tl.co.needsWriteback(line_state), s_writeback_req, s_meta_write) + state := Mux(co.needsWriteback(line_state), s_writeback_req, s_meta_write) } } when (state === s_mshr_req) { @@ -528,65 +503,65 @@ class ProbeUnit(implicit conf: DCacheConfig) extends Module { } io.req.ready := state === s_invalid - io.rep.valid := state === s_release && !(hit && tl.co.needsWriteback(line_state)) - io.rep.bits := Release(tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.clientMetadataOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) + io.rep.valid := state === s_release && !(hit && co.needsWriteback(line_state)) + io.rep.bits := Release(co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) io.meta_read.valid := state === s_meta_read io.meta_read.bits.idx := req.addr - io.meta_read.bits.tag := req.addr >> conf.idxbits + io.meta_read.bits.tag := req.addr >> params(IdxBits) io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr - io.meta_write.bits.data.coh := tl.co.clientMetadataOnProbe(req, line_state) - io.meta_write.bits.data.tag := req.addr >> UInt(conf.idxbits) + io.meta_write.bits.data.coh := co.clientMetadataOnProbe(req, line_state) + io.meta_write.bits.data.tag := req.addr >> UInt(params(IdxBits)) io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr - io.wb_req.bits.tag := req.addr >> UInt(conf.idxbits) - io.wb_req.bits.r_type := tl.co.getReleaseTypeOnProbe(req, Mux(hit, line_state, tl.co.clientMetadataOnFlush)) + io.wb_req.bits.tag := req.addr >> UInt(params(IdxBits)) + io.wb_req.bits.r_type := co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)) io.wb_req.bits.client_xact_id := req.client_xact_id io.wb_req.bits.master_xact_id := req.master_xact_id } -class DataArray(implicit conf: DCacheConfig) extends Module { +class DataArray extends Module { val io = new Bundle { val read = Decoupled(new DataReadReq).flip val write = Decoupled(new DataWriteReq).flip - val resp = Vec.fill(conf.ways){Bits(OUTPUT, conf.encrowbits)} + val resp = Vec.fill(params(NWays)){Bits(OUTPUT, params(EncRowBits))} } - val waddr = io.write.bits.addr >> conf.rowoffbits - val raddr = io.read.bits.addr >> conf.rowoffbits + val waddr = io.write.bits.addr >> params(RowOffBits) + val raddr = io.read.bits.addr >> params(RowOffBits) - if (conf.isNarrowRead) { - for (w <- 0 until conf.ways by conf.rowwords) { - val wway_en = io.write.bits.way_en(w+conf.rowwords-1,w) - val rway_en = io.read.bits.way_en(w+conf.rowwords-1,w) - val resp = Vec.fill(conf.rowwords){Bits(width = conf.encrowbits)} + if (params(DoNarrowRead)) { + for (w <- 0 until params(NWays) by params(RowWords)) { + val wway_en = io.write.bits.way_en(w+params(RowWords)-1,w) + val rway_en = io.read.bits.way_en(w+params(RowWords)-1,w) + val resp = Vec.fill(params(RowWords)){Bits(width = params(EncRowBits))} val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = Mem(Bits(width=conf.encrowbits), conf.sets*conf.refillcycles, seqRead = true) + val array = Mem(Bits(width=params(EncRowBits)), params(NSets)*params(RefillCycles), seqRead = true) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { - val data = Fill(conf.rowwords, io.write.bits.data(conf.encdatabits*(p+1)-1,conf.encdatabits*p)) - val mask = FillInterleaved(conf.encdatabits, wway_en) + val data = Fill(params(RowWords), io.write.bits.data(params(EncDataBits)*(p+1)-1,params(EncDataBits)*p)) + val mask = FillInterleaved(params(EncDataBits), wway_en) array.write(waddr, data, mask) } resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid)) } - for (dw <- 0 until conf.rowwords) { - val r = Vec(resp.map(_(conf.encdatabits*(dw+1)-1,conf.encdatabits*dw))) + for (dw <- 0 until params(RowWords)) { + val r = Vec(resp.map(_(params(EncDataBits)*(dw+1)-1,params(EncDataBits)*dw))) val resp_mux = if (r.size == 1) r - else Vec(r(r_raddr(conf.rowoffbits-1,conf.wordoffbits)), r.tail:_*) + else Vec(r(r_raddr(params(RowOffBits)-1,params(WordOffBits))), r.tail:_*) io.resp(w+dw) := resp_mux.toBits } } } else { - val wmask = FillInterleaved(conf.encdatabits, io.write.bits.wmask) - for (w <- 0 until conf.ways) { - val array = Mem(Bits(width=conf.encrowbits), conf.sets*conf.refillcycles, seqRead = true) + val wmask = FillInterleaved(params(EncDataBits), io.write.bits.wmask) + for (w <- 0 until params(NWays)) { + val array = Mem(Bits(width=params(EncRowBits)), params(NSets)*params(RefillCycles), seqRead = true) when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } @@ -598,17 +573,17 @@ class DataArray(implicit conf: DCacheConfig) extends Module { io.write.ready := Bool(true) } -class AMOALU(implicit conf: DCacheConfig) extends Module { +class AMOALU extends Module { val io = new Bundle { - val addr = Bits(INPUT, conf.offbits) + val addr = Bits(INPUT, params(OffBits)) val cmd = Bits(INPUT, 4) val typ = Bits(INPUT, 3) - val lhs = Bits(INPUT, conf.databits) - val rhs = Bits(INPUT, conf.databits) - val out = Bits(OUTPUT, conf.databits) + val lhs = Bits(INPUT, params(CoreDataBits)) + val rhs = Bits(INPUT, params(CoreDataBits)) + val out = Bits(OUTPUT, params(CoreDataBits)) } - require(conf.databits == 64) + require(params(CoreDataBits) == 64) val storegen = new StoreGen(io.typ, io.addr, io.rhs) val rhs = storegen.wordData @@ -639,27 +614,27 @@ class AMOALU(implicit conf: DCacheConfig) extends Module { io.out := wmask & out | ~wmask & io.lhs } -class HellaCacheReq(implicit val conf: DCacheConfig) extends BundleWithConf { +class HellaCacheReq extends Bundle { val kill = Bool() val typ = Bits(width = MT_SZ) val phys = Bool() - val addr = UInt(width = conf.maxaddrbits) - val data = Bits(width = conf.databits) - val tag = Bits(width = conf.reqtagbits) + val addr = UInt(width = params(MaxAddrBits)) + val data = Bits(width = params(CoreDataBits)) + val tag = Bits(width = params(CoreReqTagBits)) val cmd = Bits(width = M_SZ) } -class HellaCacheResp(implicit val conf: DCacheConfig) extends BundleWithConf { +class HellaCacheResp extends Bundle { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val typ = Bits(width = 3) val has_data = Bool() - val data = Bits(width = conf.databits) - val data_subword = Bits(width = conf.databits) - val tag = Bits(width = conf.reqtagbits) + val data = Bits(width = params(CoreDataBits)) + val data_subword = Bits(width = params(CoreDataBits)) + val tag = Bits(width = params(CoreReqTagBits)) val cmd = Bits(width = 4) - val addr = UInt(width = conf.maxaddrbits) - val store_data = Bits(width = conf.databits) + val addr = UInt(width = params(MaxAddrBits)) + val store_data = Bits(width = params(CoreDataBits)) } class AlignmentExceptions extends Bundle { @@ -673,26 +648,33 @@ class HellaCacheExceptions extends Bundle { } // interface between D$ and processor/DTLB -class HellaCacheIO(implicit conf: DCacheConfig) extends Bundle { +class HellaCacheIO extends Bundle { val req = Decoupled(new HellaCacheReq) val resp = Valid(new HellaCacheResp).flip - val replay_next = Valid(Bits(width = conf.reqtagbits)).flip + val replay_next = Valid(Bits(width = params(CoreReqTagBits))).flip val xcpt = (new HellaCacheExceptions).asInput - val ptw = new TLBPTWIO()(conf.as).flip + val ptw = new TLBPTWIO().flip val ordered = Bool(INPUT) } -class HellaCache(implicit conf: DCacheConfig) extends Module { - implicit val (tl, ln) = (conf.tl, conf.tl.ln) +class HellaCache extends Module { + val co = params(TLCoherence) val io = new Bundle { val cpu = (new HellaCacheIO).flip val mem = new TileLinkIO } - val indexmsb = conf.untagbits-1 - val indexlsb = conf.offbits + require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed + require(isPow2(params(NSets))) + require(isPow2(params(NWays))) // TODO: relax this + require(params(RowBits) <= params(TLDataBits)) + require(params(PAddrBits)-params(OffBits) == params(TLAddrBits) ) + require(params(UntagBits) <= params(PgIdxBits)) + + val indexmsb = params(UntagBits)-1 + val indexlsb = params(OffBits) val offsetmsb = indexlsb-1 - val offsetlsb = log2Up(conf.databytes) + val offsetlsb = params(WordOffBits) val wb = Module(new WritebackUnit) val prober = Module(new ProbeUnit) @@ -721,12 +703,12 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { val s1_sc = s1_req.cmd === M_XSC val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) - val dtlb = Module(new TLB(conf.ntlb)(conf.as)) + val dtlb = Module(new TLB(params(NTLBEntries))) dtlb.io.ptw <> io.cpu.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.asid := UInt(0) - dtlb.io.req.bits.vpn := s1_req.addr >> conf.pgidxbits + dtlb.io.req.bits.vpn := s1_req.addr >> params(PgIdxBits) dtlb.io.req.bits.instruction := Bool(false) when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } @@ -734,11 +716,11 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { s1_req := io.cpu.req.bits } when (wb.io.meta_read.valid) { - s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << conf.offbits + s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << params(OffBits) s1_req.phys := Bool(true) } when (prober.io.meta_read.valid) { - s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << conf.offbits + s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << params(OffBits) s1_req.phys := Bool(true) } when (mshrs.io.replay.valid) { @@ -747,7 +729,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { when (s2_recycle) { s1_req := s2_req } - val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(conf.pgidxbits-1,0)) + val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(params(PgIdxBits)-1,0)) when (s1_clk_en) { s2_req.kill := s1_req.kill @@ -773,7 +755,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))(tl.co)) + def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))(co)) val meta = Module(new MetadataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2)) @@ -787,12 +769,12 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { data.io.write.valid := writeArb.io.out.valid writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits - val wdata_encoded = (0 until conf.rowwords).map(i => conf.code.encode(writeArb.io.out.bits.data(conf.databits*(i+1)-1,conf.databits*i))) + val wdata_encoded = (0 until params(RowWords)).map(i => params(ECCCode).encode(writeArb.io.out.bits.data(params(CoreDataBits)*(i+1)-1,params(CoreDataBits)*i))) data.io.write.bits.data := Vec(wdata_encoded).toBits // tag read for new requests metaReadArb.io.in(4).valid := io.cpu.req.valid - metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> conf.offbits + metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> params(OffBits) when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) } // data read for new requests @@ -803,34 +785,34 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { // recycled requests metaReadArb.io.in(0).valid := s2_recycle - metaReadArb.io.in(0).bits.idx := s2_req.addr >> conf.offbits + metaReadArb.io.in(0).bits.idx := s2_req.addr >> params(OffBits) readArb.io.in(0).valid := s2_recycle readArb.io.in(0).bits.addr := s2_req.addr readArb.io.in(0).bits.way_en := SInt(-1) // tag check and way muxing - def wayMap[T <: Data](f: Int => T) = Vec((0 until conf.ways).map(f)) - val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> conf.untagbits)).toBits - val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && tl.co.isValid(meta.io.resp(w).coh)).toBits + def wayMap[T <: Data](f: Int => T) = Vec((0 until params(NWays)).map(f)) + val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> params(UntagBits))).toBits + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && co.isValid(meta.io.resp(w).coh)).toBits s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) - val s2_hit = s2_tag_match && tl.co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === tl.co.clientMetadataOnHit(s2_req.cmd, s2_hit_state) + val s2_hit = s2_tag_match && co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === co.clientMetadataOnHit(s2_req.cmd, s2_hit_state) // load-reserved/store-conditional val lrsc_count = Reg(init=UInt(0)) val lrsc_valid = lrsc_count.orR val lrsc_addr = Reg(UInt()) val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) - val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> conf.offbits) + val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> params(OffBits)) val s2_sc_fail = s2_sc && !s2_lrsc_addr_match when (lrsc_valid) { lrsc_count := lrsc_count - 1 } when (s2_valid_masked && s2_hit || s2_replay) { when (s2_lr) { - when (!lrsc_valid) { lrsc_count := conf.lrsc_cycles-1 } - lrsc_addr := s2_req.addr >> conf.offbits + when (!lrsc_valid) { lrsc_count := params(LRSCCycles)-1 } + lrsc_addr := s2_req.addr >> params(OffBits) } when (s2_sc) { lrsc_count := 0 @@ -838,21 +820,21 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } when (io.cpu.ptw.sret) { lrsc_count := 0 } - val s2_data = Vec.fill(conf.ways){Bits(width = conf.encrowbits)} - for (w <- 0 until conf.ways) { - val regs = Vec.fill(conf.rowwords){Reg(Bits(width = conf.encdatabits))} + val s2_data = Vec.fill(params(NWays)){Bits(width = params(EncRowBits))} + for (w <- 0 until params(NWays)) { + val regs = Vec.fill(params(RowWords)){Reg(Bits(width = params(EncDataBits)))} val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { - val en = en1 && (Bool(i == 0 || !conf.isNarrowRead) || s1_writeback) - when (en) { regs(i) := data.io.resp(w) >> conf.encdatabits*i } + val en = en1 && ((Bool(i == 0) || !params(DoNarrowRead)) || s1_writeback) + when (en) { regs(i) := data.io.resp(w) >> params(EncDataBits)*i } } s2_data(w) := regs.toBits } val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) - val s2_data_decoded = (0 until conf.rowwords).map(i => conf.code.decode(s2_data_muxed(conf.encdatabits*(i+1)-1,conf.encdatabits*i))) + val s2_data_decoded = (0 until params(RowWords)).map(i => params(ECCCode).decode(s2_data_muxed(params(EncDataBits)*(i+1)-1,params(EncDataBits)*i))) val s2_data_corrected = Vec(s2_data_decoded.map(_.corrected)).toBits val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits - val s2_word_idx = if (conf.isNarrowRead) UInt(0) else s2_req.addr(log2Up(conf.rowwords*conf.databytes)-1,3) + val s2_word_idx = if (params(DoNarrowRead)) UInt(0) else s2_req.addr(log2Up(params(RowWords)*params(CoreDataBytes))-1,3) val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits @@ -865,15 +847,15 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } writeArb.io.in(0).bits.addr := s3_req.addr - writeArb.io.in(0).bits.wmask := UInt(1) << (if(conf.rowoffbits > offsetlsb) - s3_req.addr(conf.rowoffbits-1,offsetlsb).toUInt + writeArb.io.in(0).bits.wmask := UInt(1) << (if(params(RowOffBits) > offsetlsb) + s3_req.addr(params(RowOffBits)-1,offsetlsb).toUInt else UInt(0)) - writeArb.io.in(0).bits.data := Fill(conf.rowwords, s3_req.data) + writeArb.io.in(0).bits.data := Fill(params(RowWords), s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way // replacement policy - val replacer = new RandomReplacement + val replacer = params(Replacer)() val s1_replaced_way_en = UIntToOH(replacer.way) val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) @@ -914,9 +896,9 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { prober.io.mshr_rdy := mshrs.io.probe_rdy // refills - def doRefill(g: Grant): Bool = tl.co.messageUpdatesDataArray(g) - val refill = if(conf.refillcycles > 1) { - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, conf.refillcycles, doRefill)) + def doRefill(g: Grant): Bool = co.messageUpdatesDataArray(g) + val refill = if(params(RefillCycles) > 1) { + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, params(RefillCycles), doRefill)) ser.io.in <> io.mem.grant ser.io.out } else io.mem.grant @@ -926,7 +908,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { writeArb.io.in(1).valid := refill.valid && doRefill(refill.bits.payload) writeArb.io.in(1).bits := mshrs.io.mem_resp writeArb.io.in(1).bits.wmask := SInt(-1) - writeArb.io.in(1).bits.data := refill.bits.payload.data(conf.encrowbits-1,0) + writeArb.io.in(1).bits.data := refill.bits.payload.data(params(EncRowBits)-1,0) readArb.io.out.ready := !refill.valid || refill.ready // insert bubble if refill gets blocked readArb.io.out <> data.io.read @@ -947,8 +929,8 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) - ).map(r => (r._1 && (s1_addr >> conf.wordoffbits === r._2.addr >> conf.wordoffbits) && isWrite(r._2.cmd), r._3)) - val s2_store_bypass_data = Reg(Bits(width = conf.databits)) + ).map(r => (r._1 && (s1_addr >> params(WordOffBits) === r._2.addr >> params(WordOffBits)) && isWrite(r._2.cmd), r._3)) + val s2_store_bypass_data = Reg(Bits(width = params(CoreDataBits))) val s2_store_bypass = Reg(Bool()) when (s1_clk_en) { s2_store_bypass := false @@ -959,7 +941,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } // load data subword mux/sign extension - val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(conf.databits))) + val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(params(CoreDataBits)))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) @@ -1006,7 +988,7 @@ class HellaCache(implicit conf: DCacheConfig) extends Module { } // exposes a sane decoupled request interface -class SimpleHellaCacheIF(implicit conf: DCacheConfig) extends Module +class SimpleHellaCacheIF extends Module { val io = new Bundle { val requestor = new HellaCacheIO().flip diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 1b42af49..abe534a6 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -4,39 +4,38 @@ import Chisel._ import uncore._ import Util._ -class PTWResp()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { +class PTWResp extends Bundle { val error = Bool() - val ppn = UInt(width = conf.ppnBits) - val perm = Bits(width = conf.permBits) + val ppn = UInt(width = params(PPNBits)) + val perm = Bits(width = params(PermBits)) } -class TLBPTWIO()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { - val req = Decoupled(UInt(width = conf.vpnBits)) +class TLBPTWIO extends Bundle { + val req = Decoupled(UInt(width = params(VPNBits))) val resp = Valid(new PTWResp).flip val status = new Status().asInput val invalidate = Bool(INPUT) val sret = Bool(INPUT) } -class DatapathPTWIO()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf { - val ptbr = UInt(INPUT, conf.paddrBits) +class DatapathPTWIO extends Bundle { + val ptbr = UInt(INPUT, params(PAddrBits)) val invalidate = Bool(INPUT) val sret = Bool(INPUT) val status = new Status().asInput } -class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module +class PTW(n: Int) extends Module { - implicit val as = conf.as val io = new Bundle { val requestor = Vec.fill(n){new TLBPTWIO}.flip - val mem = new HellaCacheIO()(conf.dcache) + val mem = new HellaCacheIO val dpath = new DatapathPTWIO } val levels = 3 - val bitsPerLevel = conf.as.vpnBits/levels - require(conf.as.vpnBits == levels * bitsPerLevel) + val bitsPerLevel = params(VPNBits)/levels + require(params(VPNBits) == levels * bitsPerLevel) val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(UInt(), 5) val state = Reg(init=s_ready) @@ -48,14 +47,14 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module val vpn_idx = Vec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) - val arb = Module(new RRArbiter(UInt(width = conf.as.vpnBits), n)) + val arb = Module(new RRArbiter(UInt(width = params(VPNBits)), n)) arb.io.in <> io.requestor.map(_.req) arb.io.out.ready := state === s_ready when (arb.io.out.fire()) { r_req_vpn := arb.io.out.bits r_req_dest := arb.io.chosen - r_pte := Cat(io.dpath.ptbr(conf.as.paddrBits-1,conf.as.pgIdxBits), io.mem.resp.bits.data(conf.as.pgIdxBits-1,0)) + r_pte := Cat(io.dpath.ptbr(params(PAddrBits)-1,params(PgIdxBits)), io.mem.resp.bits.data(params(PgIdxBits)-1,0)) } when (io.mem.resp.valid) { @@ -66,13 +65,13 @@ class PTW(n: Int)(implicit conf: RocketConfiguration) extends Module io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.addr := Cat(r_pte(conf.as.paddrBits-1,conf.as.pgIdxBits), vpn_idx).toUInt << log2Up(conf.xprlen/8) + io.mem.req.bits.addr := Cat(r_pte(params(PAddrBits)-1,params(PgIdxBits)), vpn_idx).toUInt << log2Up(params(XprLen)/8) io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error val resp_err = state === s_error || state === s_wait - val r_resp_ppn = io.mem.req.bits.addr >> conf.as.pgIdxBits + val r_resp_ppn = io.mem.req.bits.addr >> params(PgIdxBits) val resp_ppn = Vec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index c1ce0694..c0d8decb 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -17,48 +17,46 @@ class RoCCInstruction extends Bundle val opcode = Bits(width = 7) } -class RoCCCommand(implicit val conf: RocketConfiguration) extends BundleWithConf +class RoCCCommand extends Bundle { val inst = new RoCCInstruction - val rs1 = Bits(width = conf.xprlen) - val rs2 = Bits(width = conf.xprlen) + val rs1 = Bits(width = params(XprLen)) + val rs2 = Bits(width = params(XprLen)) } -class RoCCResponse(implicit val conf: RocketConfiguration) extends BundleWithConf +class RoCCResponse extends Bundle { val rd = Bits(width = 5) - val data = Bits(width = conf.xprlen) + val data = Bits(width = params(XprLen)) } -class RoCCInterface(implicit val conf: RocketConfiguration) extends BundleWithConf +class RoCCInterface extends Bundle { - implicit val as = conf.as val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) - val mem = new HellaCacheIO()(conf.dcache) + val mem = new HellaCacheIO val busy = Bool(OUTPUT) val s = Bool(INPUT) val interrupt = Bool(OUTPUT) // These should be handled differently, eventually - val imem = new UncachedTileLinkIO()(conf.tl) + val imem = new UncachedTileLinkIO val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO val exception = Bool(INPUT) } -abstract class RoCC(conf: RocketConfiguration) extends Module +abstract class RoCC extends Module { - val io = new RoCCInterface()(conf) - + val io = new RoCCInterface io.mem.req.bits.phys := Bool(true) // don't perform address translation } -class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf) +class AccumulatorExample extends RoCC { val n = 4 - val regfile = Mem(UInt(width = conf.xprlen), n) + val regfile = Mem(UInt(width = params(XprLen)), n) val busy = Vec.fill(n){Reg(init=Bool(false))} val cmd = Queue(io.cmd) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index f882acf1..fa048908 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,50 +4,46 @@ import Chisel._ import uncore._ import Util._ -case class RocketConfiguration(tl: TileLinkConfiguration, as: AddressSpaceConfiguration, - icache: ICacheConfig, dcache: DCacheConfig, - rocc: Option[RocketConfiguration => RoCC] = None, - retireWidth: Int = 1, - vm: Boolean = true, - fastLoadWord: Boolean = true, - fastLoadByte: Boolean = false, - fastMulDiv: Boolean = true) -{ - val dcacheReqTagBits = 7 // enforce compliance with require() - val xprlen = 64 - val nxpr = 32 - val nxprbits = log2Up(nxpr) - if (fastLoadByte) require(fastLoadWord) -} +case object NDCachePorts extends Field[Int] +case object NTilePorts extends Field[Int] +case object BuildRoCC extends Field[Option[() => RoCC]] +case object RetireWidth extends Field[Int] +case object UseVM extends Field[Boolean] +case object FastLoadWord extends Field[Boolean] +case object FastLoadByte extends Field[Boolean] +case object FastMulDiv extends Field[Boolean] +case object DcacheReqTagBits extends Field[Int] +case object XprLen extends Field[Int] +case object NXpr extends Field[Int] +case object NXprBits extends Field[Int] +case object RocketDCacheParams extends Field[PF] +case object RocketFrontendParams extends Field[PF] -class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module(_reset = resetSignal) -{ - val memPorts = 2 + (!confIn.rocc.isEmpty).toInt // Number of ports to outer memory system from tile: 1 from I$, 1 from D$, maybe 1 from Rocc - val dcachePortId = 0 - val icachePortId = 1 - val roccPortId = 2 - val dcachePorts = 2 + (!confIn.rocc.isEmpty).toInt // Number of ports into D$: 1 from core, 1 from PTW, maybe 1 from RoCC - implicit val tlConf = confIn.tl - implicit val lnConf = confIn.tl.ln - implicit val icConf = confIn.icache - implicit val dcConf = confIn.dcache.copy(reqtagbits = confIn.dcacheReqTagBits + log2Up(dcachePorts), databits = confIn.xprlen) - implicit val conf = confIn.copy(dcache = dcConf) - require(conf.retireWidth == 1) // for now... +class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { + + if(params(FastLoadByte)) require(params(FastLoadWord)) + require(params(RetireWidth) == 1) // for now... val io = new Bundle { val tilelink = new TileLinkIO - val host = new HTIFIO(lnConf.nClients) + val host = new HTIFIO } + // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - val core = Module(new Core) + val optionalRoCC = params(BuildRoCC) + + params.alter(params(RocketFrontendParams)) // Used in icache, Core val icache = Module(new Frontend) + params.alter(params(RocketDCacheParams)) // Used in dcache, PTW, RoCCm Core val dcache = Module(new HellaCache) - val ptw = Module(new PTW(if (confIn.rocc.isEmpty) 2 else 5)) // 2 ports, 1 from I$, 1 from D$, maybe 3 from RoCC + val ptw = Module(new PTW(if(optionalRoCC.isEmpty) 2 else 5)) + // 2 ports, 1 from I$, 1 from D$, maybe 3 from RoCC + val core = Module(new Core) - val dcacheArb = Module(new HellaCacheArbiter(dcachePorts)) - dcacheArb.io.requestor(0) <> ptw.io.mem - dcacheArb.io.requestor(1) <> core.io.dmem - dcache.io.cpu <> dcacheArb.io.mem + val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) + dcArb.io.requestor(0) <> ptw.io.mem + dcArb.io.requestor(1) <> core.io.dmem + dcArb.io.mem <> dcache.io.cpu ptw.io.requestor(0) <> icache.io.cpu.ptw ptw.io.requestor(1) <> dcache.io.cpu.ptw @@ -56,28 +52,31 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module core.io.imem <> icache.io.cpu core.io.ptw <> ptw.io.dpath - val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts)) - memArb.io.in(dcachePortId) <> dcache.io.mem - memArb.io.in(icachePortId) <> icache.io.mem + val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(params(NTilePorts))) + val dcPortId = 0 + memArb.io.in(dcPortId) <> dcache.io.mem + memArb.io.in(1) <> icache.io.mem - if (!conf.rocc.isEmpty) { - val rocc = Module((conf.rocc.get)(conf)) + if(!optionalRoCC.isEmpty) { + val rocc = Module(optionalRoCC.get()) val dcIF = Module(new SimpleHellaCacheIF) dcIF.io.requestor <> rocc.io.mem core.io.rocc <> rocc.io - dcacheArb.io.requestor(2) <> dcIF.io.cache - memArb.io.in(roccPortId) <> rocc.io.imem + dcArb.io.requestor(2) <> dcIF.io.cache + memArb.io.in(2) <> rocc.io.imem ptw.io.requestor(2) <> rocc.io.iptw ptw.io.requestor(3) <> rocc.io.dptw ptw.io.requestor(4) <> rocc.io.pptw } - + io.tilelink.acquire <> memArb.io.out.acquire - memArb.io.out.grant <> io.tilelink.grant + io.tilelink.grant <> memArb.io.out.grant io.tilelink.finish <> memArb.io.out.finish - dcache.io.mem.probe <> io.tilelink.probe + // Probes and releases routed directly to coherent dcache + io.tilelink.probe <> dcache.io.mem.probe + // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) io.tilelink.release.valid := dcache.io.mem.release.valid dcache.io.mem.release.ready := io.tilelink.release.ready io.tilelink.release.bits := dcache.io.mem.release.bits - io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) + io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UInt(dcPortId, log2Up(params(NTilePorts)))) } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 62b757bb..97d735b2 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -1,7 +1,7 @@ package rocket import Chisel._ -import uncore.AddressSpaceConfiguration +import uncore._ import scala.math._ class CAMIO(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { @@ -64,26 +64,26 @@ class PseudoLRU(n: Int) } } -class TLBReq()(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf +class TLBReq extends Bundle { - val asid = UInt(width = conf.asidBits) - val vpn = UInt(width = conf.vpnBits+1) + val asid = UInt(width = params(ASIdBits)) + val vpn = UInt(width = params(VPNBits)+1) val passthrough = Bool() val instruction = Bool() } -class TLBResp(entries: Int)(implicit val conf: AddressSpaceConfiguration) extends BundleWithConf +class TLBResp(entries: Int) extends Bundle { // lookup responses val miss = Bool(OUTPUT) val hit_idx = UInt(OUTPUT, entries) - val ppn = UInt(OUTPUT, conf.ppnBits) + val ppn = UInt(OUTPUT, params(PPNBits)) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) } -class TLB(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Module +class TLB(entries: Int) extends Module { val io = new Bundle { val req = Decoupled(new TLBReq).flip @@ -96,7 +96,7 @@ class TLB(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Module val r_refill_tag = Reg(UInt()) val r_refill_waddr = Reg(UInt()) - val tag_cam = Module(new RocketCAM(entries, conf.asidBits+conf.vpnBits)) + val tag_cam = Module(new RocketCAM(entries, params(ASIdBits)+params(VPNBits))) val tag_ram = Mem(io.ptw.resp.bits.ppn.clone, entries) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt @@ -133,7 +133,7 @@ class TLB(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Module val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val bad_va = io.req.bits.vpn(conf.vpnBits) != io.req.bits.vpn(conf.vpnBits-1) + val bad_va = io.req.bits.vpn(params(VPNBits)) != io.req.bits.vpn(params(VPNBits)-1) val tlb_hit = io.ptw.status.vm && tag_hit val tlb_miss = io.ptw.status.vm && !tag_hit && !bad_va @@ -146,7 +146,7 @@ class TLB(entries: Int)(implicit conf: AddressSpaceConfiguration) extends Module io.resp.xcpt_st := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sw_array & tag_cam.io.hits).orR, (uw_array & tag_cam.io.hits).orR) io.resp.xcpt_if := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sx_array & tag_cam.io.hits).orR, (ux_array & tag_cam.io.hits).orR) io.resp.miss := tlb_miss - io.resp.ppn := Mux(io.ptw.status.vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(conf.ppnBits-1,0)) + io.resp.ppn := Mux(io.ptw.status.vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) io.resp.hit_idx := tag_cam.io.hits io.ptw.req.valid := state === s_request diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index e52b2e4d..0fc0805f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -27,11 +27,6 @@ object Util { import Util._ -abstract trait BundleWithConf extends Bundle { - val conf: AnyRef - override def clone = this.getClass.getConstructors.head.newInstance(conf).asInstanceOf[this.type] -} - object Str { def apply(s: String): UInt = { From ca5f38ff26dce84e54d2ce5f6c70932df2360e15 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 10 Aug 2014 23:07:15 -0700 Subject: [PATCH 0754/1087] a few more fixes. some param lookups fail (here() in Alter blocks) --- rocket/src/main/scala/icache.scala | 1 + rocket/src/main/scala/tile.scala | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 0db261a1..43035d55 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -19,6 +19,7 @@ class FrontendResp extends Bundle { } class CPUFrontendIO extends Bundle { + params.alter(params(CoreBTBParams)) val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index fa048908..b7f7e412 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -32,8 +32,8 @@ class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val optionalRoCC = params(BuildRoCC) - params.alter(params(RocketFrontendParams)) // Used in icache, Core - val icache = Module(new Frontend) + val p = params.alter(params(CoreBTBParams)).alter(params(RocketFrontendParams)) // Used in icache, Core + val icache = Module(new Frontend)(p) //TODO PARAMS: best way to alter both? params.alter(params(RocketDCacheParams)) // Used in dcache, PTW, RoCCm Core val dcache = Module(new HellaCache) val ptw = Module(new PTW(if(optionalRoCC.isEmpty) 2 else 5)) From 2de268b3b157b532f3e69c8d808856437c02d85c Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 11 Aug 2014 18:36:23 -0700 Subject: [PATCH 0755/1087] Cache utility traits. Completely compiles, asm tests hang. --- rocket/src/main/scala/btb.scala | 89 ++++---- rocket/src/main/scala/core.scala | 3 +- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/ctrl.scala | 8 +- rocket/src/main/scala/icache.scala | 107 ++++----- rocket/src/main/scala/nbdcache.scala | 324 +++++++++++++-------------- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/tile.scala | 13 +- 8 files changed, 281 insertions(+), 267 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 4cecf1d7..7cf4094f 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -5,13 +5,18 @@ import Util._ import Node._ import uncore._ -case object Entries extends Field[Int] +case object BTBEntries extends Field[Int] case object NRAS extends Field[Int] -case object MatchBits extends Field[Int] -case object Pages0 extends Field[Int] -case object Pages extends Field[Int] -case object OpaqueBits extends Field[Int] -case object NBHT extends Field[Int] + +abstract trait BTBParameters extends UsesParameters { + val vaddrBits = params(VAddrBits) + val matchBits = params(PgIdxBits) + val entries = params(BTBEntries) + val nRAS = params(NRAS) + val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages + val opaqueBits = log2Up(entries) + val nBHT = 1 << log2Up(entries*2) +} class RAS(nras: Int) { def push(addr: UInt): Unit = { @@ -33,8 +38,8 @@ class RAS(nras: Int) { private val stack = Vec.fill(nras){Reg(UInt())} } -class BHTResp extends Bundle { - val index = UInt(width = log2Up(params(NBHT)).max(1)) +class BHTResp extends Bundle with BTBParameters { + val index = UInt(width = log2Up(nBHT).max(1)) val value = UInt(width = 2) } @@ -55,11 +60,11 @@ class BHT(nbht: Int) { val history = Reg(UInt(width = nbhtbits)) } -class BTBUpdate extends Bundle { +class BTBUpdate extends Bundle with BTBParameters { val prediction = Valid(new BTBResp) - val pc = UInt(width = params(VAddrBits)) - val target = UInt(width = params(VAddrBits)) - val returnAddr = UInt(width = params(VAddrBits)) + val pc = UInt(width = vaddrBits) + val target = UInt(width = vaddrBits) + val returnAddr = UInt(width = vaddrBits) val taken = Bool() val isJump = Bool() val isCall = Bool() @@ -67,42 +72,42 @@ class BTBUpdate extends Bundle { val incorrectTarget = Bool() } -class BTBResp extends Bundle { +class BTBResp extends Bundle with BTBParameters { val taken = Bool() - val target = UInt(width = params(VAddrBits)) - val entry = UInt(width = params(OpaqueBits)) + val target = UInt(width = vaddrBits) + val entry = UInt(width = opaqueBits) val bht = new BHTResp } // fully-associative branch target buffer -class BTB extends Module { +class BTB extends Module with BTBParameters { val io = new Bundle { - val req = UInt(INPUT, params(VAddrBits)) + val req = UInt(INPUT, vaddrBits) val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip val invalidate = Bool(INPUT) } - val idxValid = Reg(init=UInt(0, params(Entries))) - val idxs = Mem(UInt(width=params(MatchBits)), params(Entries)) - val idxPages = Mem(UInt(width=log2Up(params(Pages))), params(Entries)) - val tgts = Mem(UInt(width=params(MatchBits)), params(Entries)) - val tgtPages = Mem(UInt(width=log2Up(params(Pages))), params(Entries)) - val pages = Mem(UInt(width=params(VAddrBits)-params(MatchBits)), params(Pages)) - val pageValid = Reg(init=UInt(0, params(Pages))) - val idxPagesOH = idxPages.map(UIntToOH(_)(params(Pages)-1,0)) - val tgtPagesOH = tgtPages.map(UIntToOH(_)(params(Pages)-1,0)) + val idxValid = Reg(init=UInt(0, entries)) + val idxs = Mem(UInt(width=matchBits), entries) + val idxPages = Mem(UInt(width=log2Up(nPages)), entries) + val tgts = Mem(UInt(width=matchBits), entries) + val tgtPages = Mem(UInt(width=log2Up(nPages)), entries) + val pages = Mem(UInt(width=vaddrBits-matchBits), nPages) + val pageValid = Reg(init=UInt(0, nPages)) + val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) + val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) - val useRAS = Reg(UInt(width = params(Entries))) - val isJump = Reg(UInt(width = params(Entries))) + val useRAS = Reg(UInt(width = entries)) + val isJump = Reg(UInt(width = entries)) - private def page(addr: UInt) = addr >> params(MatchBits) + private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { val p = page(addr) Vec(pages.map(_ === p)).toBits & pageValid } private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { - val idx = addr(params(MatchBits)-1,0) + val idx = addr(matchBits-1,0) val idxMatch = idxs.map(_ === idx).toBits val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits idxValid & idxMatch & idxPageMatch @@ -123,7 +128,7 @@ class BTB extends Module { } val updateHit = update.bits.prediction.valid - val updateValid = update.bits.incorrectTarget || updateHit && Bool(params(NBHT) > 0) + val updateValid = update.bits.incorrectTarget || updateHit && Bool(nBHT > 0) val updateTarget = updateValid && update.bits.incorrectTarget val useUpdatePageHit = updatePageHit.orR @@ -136,20 +141,20 @@ class BTB extends Module { val samePage = page(update.bits.pc) === page(update_target) val usePageHit = (pageHit & ~idxPageReplEn).orR val doTgtPageRepl = updateTarget && !samePage && !usePageHit - val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(params(Pages)-2,0) << 1 | idxPageUpdateOH(params(Pages)-1)) + val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(nPages-2,0) << 1 | idxPageUpdateOH(nPages-1)) val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val doPageRepl = doIdxPageRepl || doTgtPageRepl val pageReplEn = idxPageReplEn | tgtPageReplEn - idxPageRepl := UIntToOH(Counter(update.valid && doPageRepl, params(Pages))._1) + idxPageRepl := UIntToOH(Counter(update.valid && doPageRepl, nPages)._1) when (update.valid && !(updateValid && !updateTarget)) { - val nextRepl = Counter(!updateHit && updateValid, params(Entries))._1 + val nextRepl = Counter(!updateHit && updateValid, entries)._1 val waddr = Mux(updateHit, update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon - idxValid := idxValid & ~Vec.tabulate(params(Entries))(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits + idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits idxValid(waddr) := updateValid when (updateTarget) { @@ -162,11 +167,11 @@ class BTB extends Module { isJump(waddr) := update.bits.isJump } - require(params(Pages) % 2 == 0) - val idxWritesEven = (idxPageUpdateOH & Fill(params(Pages)/2, UInt(1,2))).orR + require(nPages % 2 == 0) + val idxWritesEven = (idxPageUpdateOH & Fill(nPages/2, UInt(1,2))).orR def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = - for (i <- i until params(Pages) by mod) + for (i <- i until nPages by mod) when (en && pageReplEn(i)) { pages(i) := data } writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), @@ -187,16 +192,16 @@ class BTB extends Module { io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) - if (params(NBHT) > 0) { - val bht = new BHT(params(NBHT)) + if (nBHT > 0) { + val bht = new BHT(nBHT) val res = bht.get(io.req) when (update.valid && updateHit && !update.bits.isJump) { bht.update(update.bits.prediction.bits.bht, update.bits.taken) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res } - if (params(NRAS) > 0) { - val ras = new RAS(params(NRAS)) + if (nRAS > 0) { + val ras = new RAS(nRAS) val doPeek = Mux1H(hits, useRAS) when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index c9ec77a7..0b22b42a 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -24,7 +24,8 @@ class Core extends Module val dpath = Module(new Datapath) if (!params(BuildFPU).isEmpty) { - val fpu = Module(params(BuildFPU).get(),params(FPUParams)) + val p = Some(params.alter(params(FPUParams))) + val fpu = Module(params(BuildFPU).get())(p) dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index c15a3389..5a2cd7cf 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -160,7 +160,7 @@ class CSRFile extends Module when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } val read_impl = Bits(2) - val read_ptbr = reg_ptbr(params(PAddrBits)-1, params(PgIdxBits)) << params(PgIdxBits) + val read_ptbr = reg_ptbr(params(PAddrBits)-1, params(PgIdxBits)) << UInt(params(PgIdxBits)) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 1073019c..78396897 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -43,7 +43,7 @@ class CtrlDpathIO extends Bundle // exception handling val retire = Bool(OUTPUT) val exception = Bool(OUTPUT) - val cause = UInt(OUTPUT, params[Int]("xprlen")) + val cause = UInt(OUTPUT, params(XprLen)) val badvaddr_wen = Bool(OUTPUT) // high for a load/store access fault // inputs from datapath val inst = Bits(INPUT, 32) @@ -401,7 +401,7 @@ class Control extends Module val id_reg_fence = Reg(init=Bool(false)) val sr = io.dpath.status - var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (params[Int]("xprlen")-1) | i))) + var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (params(XprLen)-1) | i))) val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) val id_interrupt = io.dpath.status.ei && id_interrupt_unmasked @@ -437,7 +437,7 @@ class Control extends Module val id_amo_rl = io.dpath.inst(25) val id_fence_next = id_fence || id_amo && id_amo_rl val id_mem_busy = !io.dmem.ordered || ex_reg_mem_val - val id_rocc_busy = Bool(params[Boolean]("HasRoCC")) && + val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && (io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val) id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy val id_do_fence = id_rocc_busy && id_fence || @@ -690,7 +690,7 @@ class Control extends Module // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = - if (params[Boolean]("fastLoadWord")) Bool(!params[Boolean]("fastLoadByte")) && mem_reg_slow_bypass + if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass else Bool(true) val data_hazard_mem = mem_reg_wen && (id_renx1_not0 && id_raddr1 === io.dpath.mem_waddr || diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 43035d55..aa9af2a9 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -7,19 +7,27 @@ import Util._ case object InstBytes extends Field[Int] case object CoreBTBParams extends Field[PF] -class FrontendReq extends Bundle { - val pc = UInt(width = params(VAddrBits)+1) +abstract trait FrontendParameters extends CacheParameters { + val instBytes = params(InstBytes) + val co = params(TLCoherence) + val code = params(ECCCode) +} + +abstract class FrontendBundle extends Bundle with FrontendParameters +abstract class FrontendModule extends Module with FrontendParameters + +class FrontendReq extends FrontendBundle { + val pc = UInt(width = vaddrBits+1) } -class FrontendResp extends Bundle { - val pc = UInt(width = params(VAddrBits)+1) // ID stage PC - val data = Bits(width = params(InstBytes)*8) +class FrontendResp extends FrontendBundle { + val pc = UInt(width = vaddrBits+1) // ID stage PC + val data = Bits(width = instBytes*8) val xcpt_ma = Bool() val xcpt_if = Bool() } -class CPUFrontendIO extends Bundle { - params.alter(params(CoreBTBParams)) +class CPUFrontendIO extends FrontendBundle { val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip @@ -28,14 +36,14 @@ class CPUFrontendIO extends Bundle { val invalidate = Bool(OUTPUT) } -class Frontend extends Module +class Frontend extends FrontendModule { val io = new Bundle { val cpu = new CPUFrontendIO().flip val mem = new UncachedTileLinkIO } - val btb = Module(new BTB, params(CoreBTBParams)) + val btb = Module(new BTB) val icache = Module(new ICache) val tlb = Module(new TLB(params(NTLBEntries))) @@ -48,14 +56,14 @@ class Frontend extends Module val s2_btb_resp_bits = Reg(btb.io.resp.bits.clone) val s2_xcpt_if = Reg(init=Bool(false)) - val msb = params(VAddrBits)-1 + val msb = vaddrBits-1 val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val pcp4_0 = s1_pc + UInt(params(InstBytes)) + val pcp4_0 = s1_pc + UInt(instBytes) val pcp4 = Cat(s1_pc(msb) & pcp4_0(msb), pcp4_0(msb,0)) val icmiss = s2_valid && !icache.io.resp.valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & params(RowBytes)) === (s1_pc & params(RowBytes))) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & rowBytes) === (s1_pc & rowBytes)) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -75,13 +83,13 @@ class Frontend extends Module s2_valid := Bool(false) } - btb.io.req := s1_pc & SInt(-params(InstBytes)) + btb.io.req := s1_pc & SInt(-instBytes) btb.io.update := io.cpu.btb_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> UInt(params(PgIdxBits)) + tlb.io.req.bits.vpn := s1_pc >> UInt(pgIdxBits) tlb.io.req.bits.asid := UInt(0) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) @@ -95,29 +103,28 @@ class Frontend extends Module icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) - io.cpu.resp.bits.pc := s2_pc & SInt(-params(InstBytes)) // discard PC LSBs - io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(params(RowBytes))-1,log2Up(params(InstBytes))) << log2Up(params(InstBytes)*8)) - io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(params(InstBytes))-1,0) != UInt(0) + io.cpu.resp.bits.pc := s2_pc & SInt(-instBytes) // discard PC LSBs + io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(instBytes)) << log2Up(instBytes*8)) + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(instBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.btb_resp.valid := s2_btb_resp_valid io.cpu.btb_resp.bits := s2_btb_resp_bits } -class ICacheReq extends Bundle { - val idx = UInt(width = params(PgIdxBits)) +class ICacheReq extends FrontendBundle { + val idx = UInt(width = pgIdxBits) val ppn = UInt(width = params(PPNBits)) // delayed one cycle val kill = Bool() // delayed one cycle } -class ICacheResp extends Bundle { - val data = Bits(width = params(InstBytes)*8) - val datablock = Bits(width = params(RowBits)) +class ICacheResp extends FrontendBundle { + val data = Bits(width = instBytes*8) + val datablock = Bits(width = rowBits) } -class ICache extends Module +class ICache extends FrontendModule { - val (nSets, nWays, co, ecc) = (params(NSets), params(NWays), params(TLCoherence), params(ECCCode)) val io = new Bundle { val req = Valid(new ICacheReq).flip val resp = Decoupled(new ICacheResp) @@ -125,8 +132,8 @@ class ICache extends Module val mem = new UncachedTileLinkIO } require(isPow2(nSets) && isPow2(nWays)) - require(isPow2(params(InstBytes))) - require(params(PgIdxBits) >= params(UntagBits)) + require(isPow2(instBytes)) + require(pgIdxBits >= untagBits) val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) @@ -135,13 +142,13 @@ class ICache extends Module val rdy = Bool() val s2_valid = Reg(init=Bool(false)) - val s2_addr = Reg(UInt(width = params(PAddrBits))) + val s2_addr = Reg(UInt(width = paddrBits)) val s2_any_tag_hit = Bool() val s1_valid = Reg(init=Bool(false)) - val s1_pgoff = Reg(UInt(width = params(PgIdxBits))) + val s1_pgoff = Reg(UInt(width = pgIdxBits)) val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt - val s1_tag = s1_addr(params(TagBits)+params(UntagBits)-1,params(UntagBits)) + val s1_tag = s1_addr(tagBits+untagBits-1,untagBits) val s0_valid = io.req.valid || s1_valid && stall val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx) @@ -156,9 +163,9 @@ class ICache extends Module s2_addr := s1_addr } - val s2_tag = s2_addr(params(TagBits)+params(UntagBits)-1,params(UntagBits)) - val s2_idx = s2_addr(params(UntagBits)-1,params(OffBits)) - val s2_offset = s2_addr(params(OffBits)-1,0) + val s2_tag = s2_addr(tagBits+untagBits-1,untagBits) + val s2_idx = s2_addr(untagBits-1,blockOffBits) + val s2_offset = s2_addr(blockOffBits-1,0) val s2_hit = s2_valid && s2_any_tag_hit val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss @@ -168,8 +175,8 @@ class ICache extends Module var refill_valid = io.mem.grant.valid var refill_bits = io.mem.grant.bits def doRefill(g: Grant): Bool = Bool(true) - if(params(RefillCycles) > 1) { - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, params(RefillCycles), doRefill)) + if(refillCycles > 1) { + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCycles, doRefill)) ser.io.in <> io.mem.grant refill_cnt = ser.io.cnt refill_done = ser.io.done @@ -181,18 +188,18 @@ class ICache extends Module } //assert(!c.tlco.isVoluntary(refill_bits.payload) || !refill_valid, "UncachedRequestors shouldn't get voluntary grants.") - val repl_way = if (params(IsDM)) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) - val entagbits = ecc.width(params(TagBits)) + val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) + val entagbits = code.width(tagBits) val tag_array = Mem(Bits(width = entagbits*nWays), nSets, seqRead = true) val tag_raddr = Reg(UInt()) when (refill_done) { - val wmask = FillInterleaved(entagbits, if (params(IsDM)) Bits(1) else UIntToOH(repl_way)) - val tag = ecc.encode(s2_tag).toUInt + val wmask = FillInterleaved(entagbits, if (isDM) Bits(1) else UIntToOH(repl_way)) + val tag = code.encode(s2_tag).toUInt tag_array.write(s2_idx, Fill(nWays, tag), wmask) } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - tag_raddr := s0_pgoff(params(UntagBits)-1,params(OffBits)) + tag_raddr := s0_pgoff(untagBits-1,blockOffBits) } val vb_array = Reg(init=Bits(0, nSets*nWays)) @@ -212,38 +219,38 @@ class ICache extends Module val s2_dout = Vec.fill(nWays){Reg(Bits())} for (i <- 0 until nWays) { - val s1_vb = vb_array(Cat(UInt(i), s1_pgoff(params(UntagBits)-1,params(OffBits)))).toBool + val s1_vb = vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool val s2_vb = Reg(Bool()) val s2_tag_disparity = Reg(Bool()) val s2_tag_match = Reg(Bool()) val tag_out = tag_array(tag_raddr)(entagbits*(i+1)-1, entagbits*i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb - s2_tag_disparity := ecc.decode(tag_out).error + s2_tag_disparity := code.decode(tag_out).error s2_tag_match := s1_tag_match(i) } - s1_tag_match(i) := tag_out(params(TagBits)-1,0) === s1_tag + s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag s2_tag_hit(i) := s2_vb && s2_tag_match - s2_disparity(i) := s2_vb && (s2_tag_disparity || ecc.decode(s2_dout(i)).error) + s2_disparity(i) := s2_vb && (s2_tag_disparity || code.decode(s2_dout(i)).error) } s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) for (i <- 0 until nWays) { - val data_array = Mem(Bits(width = ecc.width(params(RowBits))), nSets*params(RefillCycles), seqRead = true) + val data_array = Mem(Bits(width = code.width(rowBits)), nSets*refillCycles, seqRead = true) val s1_raddr = Reg(UInt()) when (refill_valid && repl_way === UInt(i)) { - val e_d = ecc.encode(refill_bits.payload.data) - if(params(RefillCycles) > 1) data_array(Cat(s2_idx,refill_cnt)) := e_d + val e_d = code.encode(refill_bits.payload.data) + if(refillCycles > 1) data_array(Cat(s2_idx,refill_cnt)) := e_d else data_array(s2_idx) := e_d } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { - s1_raddr := s0_pgoff(params(UntagBits)-1,params(OffBits)-(if(params(RefillCycles) > 1) refill_cnt.getWidth else 0)) + s1_raddr := s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) } // if s1_tag_match is critical, replace with partial tag check - when (s1_valid && rdy && !stall && (Bool(params(IsDM)) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } + when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } } - val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(params(RowBytes))-1,log2Up(params(InstBytes))) << log2Up(params(InstBytes)*8)))(params(InstBytes)*8-1,0)) + val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(rowBytes)-1,log2Up(instBytes)) << log2Up(instBytes*8)))(instBytes*8-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) @@ -255,7 +262,7 @@ class ICache extends Module // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready - io.mem.acquire.bits.payload := Acquire(co.getUncachedReadAcquireType, s2_addr >> UInt(params(OffBits)), UInt(0)) + io.mem.acquire.bits.payload := Acquire(co.getUncachedReadAcquireType, s2_addr >> UInt(blockOffBits), UInt(0)) io.mem.finish <> ack_q.io.deq // control state machine diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0a33d01d..3e54eac9 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -4,26 +4,35 @@ import Chisel._ import uncore._ import Util._ -//Knobs case object StoreDataQueueDepth extends Field[Int] case object ReplayQueueDepth extends Field[Int] case object NMSHRs extends Field[Int] case object NTLBEntries extends Field[Int] case object CoreReqTagBits extends Field[Int] case object CoreDataBits extends Field[Int] -case object RowWords extends Field[Int] case object ECCCode extends Field[Code] -//Derived -case object MaxAddrBits extends Field[Int] -case object CoreDataBytes extends Field[Int] -case object WordOffBits extends Field[Int] -case object RowBytes extends Field[Int] -case object RowOffBits extends Field[Int] -case object DoNarrowRead extends Field[Int] -case object EncDataBits extends Field[Int] -case object EncRowBits extends Field[Int] case object LRSCCycles extends Field[Int] +abstract trait L1HellaCacheParameters extends CacheParameters { + val indexmsb = untagBits-1 + val indexlsb = blockOffBits + val offsetmsb = indexlsb-1 + val offsetlsb = wordOffBits + + val co = params(TLCoherence) + val code = params(ECCCode) + val coreReqTagBits = params(CoreReqTagBits) + val coreDataBits = params(CoreDataBits) + val maxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) + val coreDataBytes = coreDataBits/8 + val doNarrowRead = coreDataBits * nWays % rowBits == 0 + val encDataBits = code.width(coreDataBits) + val encRowBits = encDataBits*rowWords +} + +abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters +abstract class L1HellaCacheModule extends Module with L1HellaCacheParameters + class StoreGen(typ: Bits, addr: Bits, dat: Bits) { val byte = typ === MT_B || typ === MT_BU @@ -56,28 +65,71 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } +class HellaCacheReq extends L1HellaCacheBundle { + val kill = Bool() + val typ = Bits(width = MT_SZ) + val phys = Bool() + val addr = UInt(width = maxAddrBits) + val data = Bits(width = coreDataBits) + val tag = Bits(width = coreReqTagBits) + val cmd = Bits(width = M_SZ) +} + +class HellaCacheResp extends L1HellaCacheBundle { + val nack = Bool() // comes 2 cycles after req.fire + val replay = Bool() + val typ = Bits(width = 3) + val has_data = Bool() + val data = Bits(width = coreDataBits) + val data_subword = Bits(width = coreDataBits) + val tag = Bits(width = coreReqTagBits) + val cmd = Bits(width = 4) + val addr = UInt(width = maxAddrBits) + val store_data = Bits(width = coreDataBits) +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions + val pf = new AlignmentExceptions +} + +// interface between D$ and processor/DTLB +class HellaCacheIO extends L1HellaCacheBundle { + val req = Decoupled(new HellaCacheReq) + val resp = Valid(new HellaCacheResp).flip + val replay_next = Valid(Bits(width = coreReqTagBits)).flip + val xcpt = (new HellaCacheExceptions).asInput + val ptw = new TLBPTWIO().flip + val ordered = Bool(INPUT) +} + class MSHRReq extends HellaCacheReq { val tag_match = Bool() val old_meta = new L1Metadata - val way_en = Bits(width = params(NWays)) + val way_en = Bits(width = nWays) } class Replay extends HellaCacheReq { val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth))) } -class DataReadReq extends Bundle { - val way_en = Bits(width = params(NWays)) - val addr = Bits(width = params(UntagBits)) +class DataReadReq extends L1HellaCacheBundle { + val way_en = Bits(width = nWays) + val addr = Bits(width = untagBits) } class DataWriteReq extends DataReadReq { - val wmask = Bits(width = params(RowWords)) - val data = Bits(width = params(EncRowBits)) + val wmask = Bits(width = rowWords) + val data = Bits(width = encRowBits) } class L1MetaReadReq extends MetaReadReq { - val tag = Bits(width = params(TagBits)) + val tag = Bits(width = tagBits) } class L1MetaWriteReq extends @@ -91,23 +143,22 @@ object L1Metadata { meta } } -class L1Metadata extends Metadata { - val coh = params(TLCoherence).clientMetadataOnFlush.clone +class L1Metadata extends Metadata with L1HellaCacheParameters { + val coh = co.clientMetadataOnFlush.clone } class InternalProbe extends Probe with HasClientTransactionId -class WritebackReq extends Bundle { - val tag = Bits(width = params(TagBits)) - val idx = Bits(width = params(IdxBits)) - val way_en = Bits(width = params(NWays)) +class WritebackReq extends L1HellaCacheBundle { + val tag = Bits(width = tagBits) + val idx = Bits(width = idxBits) + val way_en = Bits(width = nWays) val client_xact_id = Bits(width = params(TLClientXactIdBits)) val master_xact_id = Bits(width = params(TLMasterXactIdBits)) - val r_type = UInt(width = params(TLCoherence).releaseTypeWidth) + val r_type = UInt(width = co.releaseTypeWidth) } -class MSHR(id: Int) extends Module { - val co = params(TLCoherence) +class MSHR(id: Int) extends L1HellaCacheModule { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -117,7 +168,7 @@ class MSHR(id: Int) extends Module { val req_sdq_id = UInt(INPUT, log2Up(params(StoreDataQueueDepth))) val idx_match = Bool(OUTPUT) - val tag = Bits(OUTPUT, params(TagBits)) + val tag = Bits(OUTPUT, tagBits) val mem_req = Decoupled(new Acquire) val mem_resp = new DataWriteReq().asOutput @@ -136,17 +187,17 @@ class MSHR(id: Int) extends Module { val acquire_type = Reg(UInt()) val release_type = Reg(UInt()) val line_state = Reg(new ClientMetadata()(co)) - val refill_count = Reg(UInt(width = log2Up(params(RefillCycles)))) // TODO: zero-width wire + val refill_count = Reg(UInt(width = log2Up(refillCycles))) // TODO: zero-width wire val req = Reg(new MSHRReq()) val req_cmd = io.req_bits.cmd - val req_idx = req.addr(params(UntagBits)-1,params(OffBits)) - val idx_match = req_idx === io.req_bits.addr(params(UntagBits)-1,params(OffBits)) + val req_idx = req.addr(untagBits-1,blockOffBits) + val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - require(isPow2(params(RefillCycles))) + require(isPow2(refillCycles)) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) - val refill_done = reply && (if(params(RefillCycles) > 1) refill_count.andR else Bool(true)) + val refill_done = reply && (if(refillCycles > 1) refill_count.andR else Bool(true)) val wb_done = reply && (state === s_wb_resp) val meta_on_flush = co.clientMetadataOnFlush @@ -172,7 +223,7 @@ class MSHR(id: Int) extends Module { when (state === s_refill_resp) { when (refill_done) { state := s_meta_write_req } when (reply) { - if(params(RefillCycles) > 1) refill_count := refill_count + UInt(1) + if(refillCycles > 1) refill_count := refill_count + UInt(1) line_state := meta_on_grant } } @@ -222,8 +273,8 @@ class MSHR(id: Int) extends Module { io.idx_match := (state != s_invalid) && idx_match io.mem_resp := req - io.mem_resp.addr := (if(params(RefillCycles) > 1) Cat(req_idx, refill_count) else req_idx) << params(RowOffBits) - io.tag := req.addr >> params(UntagBits) + io.mem_resp.addr := (if(refillCycles > 1) Cat(req_idx, refill_count) else req_idx) << rowOffBits + io.tag := req.addr >> untagBits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -259,7 +310,7 @@ class MSHR(id: Int) extends Module { io.replay.valid := state === s_drain_rpq && rpq.io.deq.valid io.replay.bits := rpq.io.deq.bits io.replay.bits.phys := Bool(true) - io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(params(OffBits)-1,0)).toUInt + io.replay.bits.addr := Cat(io.tag, req_idx, rpq.io.deq.bits.addr(blockOffBits-1,0)).toUInt when (!io.meta_read.ready) { rpq.io.deq.ready := Bool(false) @@ -267,7 +318,7 @@ class MSHR(id: Int) extends Module { } } -class MSHRFile extends Module { +class MSHRFile extends L1HellaCacheModule { val io = new Bundle { val req = Decoupled(new MSHRReq).flip val secondary_miss = Bool(OUTPUT) @@ -294,7 +345,7 @@ class MSHRFile extends Module { val idxMatch = Vec.fill(params(NMSHRs)){Bool()} val tagList = Vec.fill(params(NMSHRs)){Bits()} - val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> params(UntagBits) + val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits val wbTagList = Vec.fill(params(NMSHRs)){Bits()} val memRespMux = Vec.fill(params(NMSHRs)){new DataWriteReq} @@ -367,20 +418,19 @@ class MSHRFile extends Module { } } - -class WritebackUnit extends Module { +class WritebackUnit extends L1HellaCacheModule { val io = new Bundle { val req = Decoupled(new WritebackReq()).flip val meta_read = Decoupled(new L1MetaReadReq) val data_req = Decoupled(new DataReadReq()) - val data_resp = Bits(INPUT, params(EncRowBits)) + val data_resp = Bits(INPUT, encRowBits) val release = Decoupled(new Release) } val active = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) - val cnt = Reg(init = UInt(0, width = log2Up(params(RefillCycles)+1))) + val cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) val req = Reg(new WritebackReq) io.release.valid := false @@ -391,8 +441,8 @@ class WritebackUnit extends Module { r1_data_req_fired := true cnt := cnt + 1 } - if(params(RefillCycles) > 1) { // Coalescing buffer inserted - when (!r1_data_req_fired && !r2_data_req_fired && cnt === params(RefillCycles)) { + if(refillCycles > 1) { // Coalescing buffer inserted + when (!r1_data_req_fired && !r2_data_req_fired && cnt === refillCycles) { io.release.valid := true active := !io.release.ready } @@ -415,7 +465,7 @@ class WritebackUnit extends Module { req := io.req.bits } - val fire = active && cnt < UInt(params(RefillCycles)) + val fire = active && cnt < UInt(refillCycles) io.req.ready := !active // We reissue the meta read as it sets up the muxing for s2_data_muxed @@ -425,20 +475,20 @@ class WritebackUnit extends Module { io.data_req.valid := fire io.data_req.bits.way_en := req.way_en - if(params(RefillCycles) > 1) { - io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(params(RefillCycles))-1,0)) << params(RowOffBits) + if(refillCycles > 1) { + io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(refillCycles)-1,0)) << rowOffBits } else { - io.data_req.bits.addr := req.idx << params(RowOffBits) + io.data_req.bits.addr := req.idx << rowOffBits } io.release.bits.r_type := req.r_type io.release.bits.addr := Cat(req.tag, req.idx).toUInt io.release.bits.client_xact_id := req.client_xact_id io.release.bits.master_xact_id := req.master_xact_id - if(params(RefillCycles) > 1) { + if(refillCycles > 1) { val data_buf = Reg(Bits()) when(active && r2_data_req_fired) { - data_buf := Cat(io.data_resp, data_buf(params(RefillCycles)*params(EncRowBits)-1, params(EncRowBits))) + data_buf := Cat(io.data_resp, data_buf(refillCycles*encRowBits-1, encRowBits)) } io.release.bits.data := data_buf } else { @@ -447,15 +497,14 @@ class WritebackUnit extends Module { } -class ProbeUnit extends Module { - val co = params(TLCoherence) +class ProbeUnit extends L1HellaCacheModule { val io = new Bundle { val req = Decoupled(new InternalProbe).flip val rep = Decoupled(new Release) val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val wb_req = Decoupled(new WritebackReq) - val way_en = Bits(INPUT, params(NWays)) + val way_en = Bits(INPUT, nWays) val mshr_rdy = Bool(INPUT) val line_state = new ClientMetadata()(co).asInput } @@ -508,60 +557,60 @@ class ProbeUnit extends Module { io.meta_read.valid := state === s_meta_read io.meta_read.bits.idx := req.addr - io.meta_read.bits.tag := req.addr >> params(IdxBits) + io.meta_read.bits.tag := req.addr >> idxBits io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr io.meta_write.bits.data.coh := co.clientMetadataOnProbe(req, line_state) - io.meta_write.bits.data.tag := req.addr >> UInt(params(IdxBits)) + io.meta_write.bits.data.tag := req.addr >> UInt(idxBits) io.wb_req.valid := state === s_writeback_req io.wb_req.bits.way_en := way_en io.wb_req.bits.idx := req.addr - io.wb_req.bits.tag := req.addr >> UInt(params(IdxBits)) + io.wb_req.bits.tag := req.addr >> UInt(idxBits) io.wb_req.bits.r_type := co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)) io.wb_req.bits.client_xact_id := req.client_xact_id io.wb_req.bits.master_xact_id := req.master_xact_id } -class DataArray extends Module { +class DataArray extends L1HellaCacheModule { val io = new Bundle { val read = Decoupled(new DataReadReq).flip val write = Decoupled(new DataWriteReq).flip - val resp = Vec.fill(params(NWays)){Bits(OUTPUT, params(EncRowBits))} + val resp = Vec.fill(nWays){Bits(OUTPUT, encRowBits)} } - val waddr = io.write.bits.addr >> params(RowOffBits) - val raddr = io.read.bits.addr >> params(RowOffBits) + val waddr = io.write.bits.addr >> rowOffBits + val raddr = io.read.bits.addr >> rowOffBits - if (params(DoNarrowRead)) { - for (w <- 0 until params(NWays) by params(RowWords)) { - val wway_en = io.write.bits.way_en(w+params(RowWords)-1,w) - val rway_en = io.read.bits.way_en(w+params(RowWords)-1,w) - val resp = Vec.fill(params(RowWords)){Bits(width = params(EncRowBits))} + if (doNarrowRead) { + for (w <- 0 until nWays by rowWords) { + val wway_en = io.write.bits.way_en(w+rowWords-1,w) + val rway_en = io.read.bits.way_en(w+rowWords-1,w) + val resp = Vec.fill(rowWords){Bits(width = encRowBits)} val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = Mem(Bits(width=params(EncRowBits)), params(NSets)*params(RefillCycles), seqRead = true) + val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { - val data = Fill(params(RowWords), io.write.bits.data(params(EncDataBits)*(p+1)-1,params(EncDataBits)*p)) - val mask = FillInterleaved(params(EncDataBits), wway_en) + val data = Fill(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) + val mask = FillInterleaved(encDataBits, wway_en) array.write(waddr, data, mask) } resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid)) } - for (dw <- 0 until params(RowWords)) { - val r = Vec(resp.map(_(params(EncDataBits)*(dw+1)-1,params(EncDataBits)*dw))) + for (dw <- 0 until rowWords) { + val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw))) val resp_mux = if (r.size == 1) r - else Vec(r(r_raddr(params(RowOffBits)-1,params(WordOffBits))), r.tail:_*) + else Vec(r(r_raddr(rowOffBits-1,wordOffBits)), r.tail:_*) io.resp(w+dw) := resp_mux.toBits } } } else { - val wmask = FillInterleaved(params(EncDataBits), io.write.bits.wmask) - for (w <- 0 until params(NWays)) { - val array = Mem(Bits(width=params(EncRowBits)), params(NSets)*params(RefillCycles), seqRead = true) + val wmask = FillInterleaved(encDataBits, io.write.bits.wmask) + for (w <- 0 until nWays) { + val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true) when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } @@ -573,17 +622,17 @@ class DataArray extends Module { io.write.ready := Bool(true) } -class AMOALU extends Module { +class AMOALU extends L1HellaCacheModule { val io = new Bundle { - val addr = Bits(INPUT, params(OffBits)) + val addr = Bits(INPUT, blockOffBits) val cmd = Bits(INPUT, 4) val typ = Bits(INPUT, 3) - val lhs = Bits(INPUT, params(CoreDataBits)) - val rhs = Bits(INPUT, params(CoreDataBits)) - val out = Bits(OUTPUT, params(CoreDataBits)) + val lhs = Bits(INPUT, coreDataBits) + val rhs = Bits(INPUT, coreDataBits) + val out = Bits(OUTPUT, coreDataBits) } - require(params(CoreDataBits) == 64) + require(coreDataBits == 64) val storegen = new StoreGen(io.typ, io.addr, io.rhs) val rhs = storegen.wordData @@ -614,67 +663,18 @@ class AMOALU extends Module { io.out := wmask & out | ~wmask & io.lhs } -class HellaCacheReq extends Bundle { - val kill = Bool() - val typ = Bits(width = MT_SZ) - val phys = Bool() - val addr = UInt(width = params(MaxAddrBits)) - val data = Bits(width = params(CoreDataBits)) - val tag = Bits(width = params(CoreReqTagBits)) - val cmd = Bits(width = M_SZ) -} - -class HellaCacheResp extends Bundle { - val nack = Bool() // comes 2 cycles after req.fire - val replay = Bool() - val typ = Bits(width = 3) - val has_data = Bool() - val data = Bits(width = params(CoreDataBits)) - val data_subword = Bits(width = params(CoreDataBits)) - val tag = Bits(width = params(CoreReqTagBits)) - val cmd = Bits(width = 4) - val addr = UInt(width = params(MaxAddrBits)) - val store_data = Bits(width = params(CoreDataBits)) -} - -class AlignmentExceptions extends Bundle { - val ld = Bool() - val st = Bool() -} - -class HellaCacheExceptions extends Bundle { - val ma = new AlignmentExceptions - val pf = new AlignmentExceptions -} - -// interface between D$ and processor/DTLB -class HellaCacheIO extends Bundle { - val req = Decoupled(new HellaCacheReq) - val resp = Valid(new HellaCacheResp).flip - val replay_next = Valid(Bits(width = params(CoreReqTagBits))).flip - val xcpt = (new HellaCacheExceptions).asInput - val ptw = new TLBPTWIO().flip - val ordered = Bool(INPUT) -} - -class HellaCache extends Module { - val co = params(TLCoherence) +class HellaCache extends L1HellaCacheModule { val io = new Bundle { val cpu = (new HellaCacheIO).flip val mem = new TileLinkIO } require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed - require(isPow2(params(NSets))) - require(isPow2(params(NWays))) // TODO: relax this + require(isPow2(nSets)) + require(isPow2(nWays)) // TODO: relax this require(params(RowBits) <= params(TLDataBits)) - require(params(PAddrBits)-params(OffBits) == params(TLAddrBits) ) - require(params(UntagBits) <= params(PgIdxBits)) - - val indexmsb = params(UntagBits)-1 - val indexlsb = params(OffBits) - val offsetmsb = indexlsb-1 - val offsetlsb = params(WordOffBits) + require(paddrBits-blockOffBits == params(TLAddrBits) ) + require(untagBits <= pgIdxBits) val wb = Module(new WritebackUnit) val prober = Module(new ProbeUnit) @@ -708,7 +708,7 @@ class HellaCache extends Module { dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.asid := UInt(0) - dtlb.io.req.bits.vpn := s1_req.addr >> params(PgIdxBits) + dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits dtlb.io.req.bits.instruction := Bool(false) when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } @@ -716,11 +716,11 @@ class HellaCache extends Module { s1_req := io.cpu.req.bits } when (wb.io.meta_read.valid) { - s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << params(OffBits) + s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << blockOffBits s1_req.phys := Bool(true) } when (prober.io.meta_read.valid) { - s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << params(OffBits) + s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << blockOffBits s1_req.phys := Bool(true) } when (mshrs.io.replay.valid) { @@ -729,7 +729,7 @@ class HellaCache extends Module { when (s2_recycle) { s1_req := s2_req } - val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(params(PgIdxBits)-1,0)) + val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) when (s1_clk_en) { s2_req.kill := s1_req.kill @@ -769,12 +769,12 @@ class HellaCache extends Module { data.io.write.valid := writeArb.io.out.valid writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits - val wdata_encoded = (0 until params(RowWords)).map(i => params(ECCCode).encode(writeArb.io.out.bits.data(params(CoreDataBits)*(i+1)-1,params(CoreDataBits)*i))) + val wdata_encoded = (0 until rowWords).map(i => code.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i))) data.io.write.bits.data := Vec(wdata_encoded).toBits // tag read for new requests metaReadArb.io.in(4).valid := io.cpu.req.valid - metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> params(OffBits) + metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> blockOffBits when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) } // data read for new requests @@ -785,14 +785,14 @@ class HellaCache extends Module { // recycled requests metaReadArb.io.in(0).valid := s2_recycle - metaReadArb.io.in(0).bits.idx := s2_req.addr >> params(OffBits) + metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits readArb.io.in(0).valid := s2_recycle readArb.io.in(0).bits.addr := s2_req.addr readArb.io.in(0).bits.way_en := SInt(-1) // tag check and way muxing - def wayMap[T <: Data](f: Int => T) = Vec((0 until params(NWays)).map(f)) - val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> params(UntagBits))).toBits + def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f)) + val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).toBits val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && co.isValid(meta.io.resp(w).coh)).toBits s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug val s1_writeback = s1_clk_en && !s1_valid && !s1_replay @@ -806,13 +806,13 @@ class HellaCache extends Module { val lrsc_valid = lrsc_count.orR val lrsc_addr = Reg(UInt()) val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) - val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> params(OffBits)) + val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> blockOffBits) val s2_sc_fail = s2_sc && !s2_lrsc_addr_match when (lrsc_valid) { lrsc_count := lrsc_count - 1 } when (s2_valid_masked && s2_hit || s2_replay) { when (s2_lr) { when (!lrsc_valid) { lrsc_count := params(LRSCCycles)-1 } - lrsc_addr := s2_req.addr >> params(OffBits) + lrsc_addr := s2_req.addr >> blockOffBits } when (s2_sc) { lrsc_count := 0 @@ -820,21 +820,21 @@ class HellaCache extends Module { } when (io.cpu.ptw.sret) { lrsc_count := 0 } - val s2_data = Vec.fill(params(NWays)){Bits(width = params(EncRowBits))} - for (w <- 0 until params(NWays)) { - val regs = Vec.fill(params(RowWords)){Reg(Bits(width = params(EncDataBits)))} + val s2_data = Vec.fill(nWays){Bits(width = encRowBits)} + for (w <- 0 until nWays) { + val regs = Vec.fill(rowWords){Reg(Bits(width = encDataBits))} val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { - val en = en1 && ((Bool(i == 0) || !params(DoNarrowRead)) || s1_writeback) - when (en) { regs(i) := data.io.resp(w) >> params(EncDataBits)*i } + val en = en1 && (Bool(i == 0 || !doNarrowRead) || s1_writeback) + when (en) { regs(i) := data.io.resp(w) >> encDataBits*i } } s2_data(w) := regs.toBits } val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) - val s2_data_decoded = (0 until params(RowWords)).map(i => params(ECCCode).decode(s2_data_muxed(params(EncDataBits)*(i+1)-1,params(EncDataBits)*i))) + val s2_data_decoded = (0 until rowWords).map(i => code.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i))) val s2_data_corrected = Vec(s2_data_decoded.map(_.corrected)).toBits val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits - val s2_word_idx = if (params(DoNarrowRead)) UInt(0) else s2_req.addr(log2Up(params(RowWords)*params(CoreDataBytes))-1,3) + val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,3) val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits @@ -847,10 +847,10 @@ class HellaCache extends Module { } writeArb.io.in(0).bits.addr := s3_req.addr - writeArb.io.in(0).bits.wmask := UInt(1) << (if(params(RowOffBits) > offsetlsb) - s3_req.addr(params(RowOffBits)-1,offsetlsb).toUInt + writeArb.io.in(0).bits.wmask := UInt(1) << (if(rowOffBits > offsetlsb) + s3_req.addr(rowOffBits-1,offsetlsb).toUInt else UInt(0)) - writeArb.io.in(0).bits.data := Fill(params(RowWords), s3_req.data) + writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way @@ -897,8 +897,8 @@ class HellaCache extends Module { // refills def doRefill(g: Grant): Bool = co.messageUpdatesDataArray(g) - val refill = if(params(RefillCycles) > 1) { - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, params(RefillCycles), doRefill)) + val refill = if(refillCycles > 1) { + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCycles, doRefill)) ser.io.in <> io.mem.grant ser.io.out } else io.mem.grant @@ -908,7 +908,7 @@ class HellaCache extends Module { writeArb.io.in(1).valid := refill.valid && doRefill(refill.bits.payload) writeArb.io.in(1).bits := mshrs.io.mem_resp writeArb.io.in(1).bits.wmask := SInt(-1) - writeArb.io.in(1).bits.data := refill.bits.payload.data(params(EncRowBits)-1,0) + writeArb.io.in(1).bits.data := refill.bits.payload.data(encRowBits-1,0) readArb.io.out.ready := !refill.valid || refill.ready // insert bubble if refill gets blocked readArb.io.out <> data.io.read @@ -929,8 +929,8 @@ class HellaCache extends Module { ((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), (s3_valid, s3_req, s3_req.data), (s4_valid, s4_req, s4_req.data) - ).map(r => (r._1 && (s1_addr >> params(WordOffBits) === r._2.addr >> params(WordOffBits)) && isWrite(r._2.cmd), r._3)) - val s2_store_bypass_data = Reg(Bits(width = params(CoreDataBits))) + ).map(r => (r._1 && (s1_addr >> wordOffBits === r._2.addr >> wordOffBits) && isWrite(r._2.cmd), r._3)) + val s2_store_bypass_data = Reg(Bits(width = coreDataBits)) val s2_store_bypass = Reg(Bool()) when (s1_clk_en) { s2_store_bypass := false @@ -941,7 +941,7 @@ class HellaCache extends Module { } // load data subword mux/sign extension - val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(params(CoreDataBits)))) + val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index abe534a6..db555957 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -71,7 +71,7 @@ class PTW(n: Int) extends Module val resp_val = state === s_done || state === s_error val resp_err = state === s_error || state === s_wait - val r_resp_ppn = io.mem.req.bits.addr >> params(PgIdxBits) + val r_resp_ppn = io.mem.req.bits.addr >> UInt(params(PgIdxBits)) val resp_ppn = Vec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index b7f7e412..d0f49fbe 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -32,15 +32,16 @@ class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val optionalRoCC = params(BuildRoCC) - val p = params.alter(params(CoreBTBParams)).alter(params(RocketFrontendParams)) // Used in icache, Core + val p = params.alter(params(RocketFrontendParams)) // Used in icache, Core val icache = Module(new Frontend)(p) //TODO PARAMS: best way to alter both? - params.alter(params(RocketDCacheParams)) // Used in dcache, PTW, RoCCm Core - val dcache = Module(new HellaCache) - val ptw = Module(new PTW(if(optionalRoCC.isEmpty) 2 else 5)) + val p2 = params.alter(params(RocketDCacheParams)) // Used in dcache, PTW, RoCCm Core + val dcache = Module(new HellaCache)(p2) + val ptw = Module(new PTW(if(optionalRoCC.isEmpty) 2 else 5))(p2) // 2 ports, 1 from I$, 1 from D$, maybe 3 from RoCC - val core = Module(new Core) + val p3 = params.alter(params(RocketFrontendParams)).alter(params(RocketDCacheParams)) + val core = Module(new Core)(p3) - val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) + val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts)))(p2) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem dcArb.io.mem <> dcache.io.cpu From 6a4193cf90c9e055faae928ce771cd68340cb642 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 14 Aug 2014 11:31:42 -0700 Subject: [PATCH 0756/1087] minor cache param cleanup --- rocket/src/main/scala/icache.scala | 1 - rocket/src/main/scala/nbdcache.scala | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index aa9af2a9..aac6a21b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,7 +5,6 @@ import uncore._ import Util._ case object InstBytes extends Field[Int] -case object CoreBTBParams extends Field[PF] abstract trait FrontendParameters extends CacheParameters { val instBytes = params(InstBytes) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3e54eac9..b1a44d0c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -7,11 +7,12 @@ import Util._ case object StoreDataQueueDepth extends Field[Int] case object ReplayQueueDepth extends Field[Int] case object NMSHRs extends Field[Int] -case object NTLBEntries extends Field[Int] case object CoreReqTagBits extends Field[Int] case object CoreDataBits extends Field[Int] -case object ECCCode extends Field[Code] case object LRSCCycles extends Field[Int] +//TODO PARAMS Also used by icache: is this ok?: +case object NTLBEntries extends Field[Int] +case object ECCCode extends Field[Code] abstract trait L1HellaCacheParameters extends CacheParameters { val indexmsb = untagBits-1 From 83c6c2c9e2eb9f21db9ef58939f729f1f63f1e5d Mon Sep 17 00:00:00 2001 From: Sagar Karandikar Date: Fri, 29 Aug 2014 10:26:48 -0700 Subject: [PATCH 0757/1087] rename refs to zynq-fpga to fpga-zynq --- rocket/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/README.md b/rocket/README.md index f3257d63..b9992a4c 100644 --- a/rocket/README.md +++ b/rocket/README.md @@ -13,8 +13,8 @@ future. We are currently in the process of cleaning up the repository. Please Currently, a Rocket core with an 8 KB direct-mapped L1 instruction cache and an 8 KB direct-mapped L1 data cache has been instantiated and committed to -the zynq-fpga infrastructure repository. A copy of the generated Verilog is available -[here](https://raw.githubusercontent.com/ucb-bar/zynq-fpga/master/hw/src/verilog/Slave.v). +the fpga-zynq infrastructure repository. A copy of the generated Verilog is available +[here](https://raw.githubusercontent.com/ucb-bar/fpga-zynq/master/hw/src/verilog/Slave.v). The following table compares a 32-bit ARM Cortex-A5 core to a 64-bit RISC-V Rocket core built in the same TSMC process (40GPLUS). Fourth column is the From b42a2ab40ac22b206f349072b2a2c5f478e002f9 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 1 Sep 2014 13:28:58 -0700 Subject: [PATCH 0758/1087] Final parameter refactor --- rocket/src/main/scala/btb.scala | 4 +- rocket/src/main/scala/core.scala | 43 +++++++++++++++---- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/icache.scala | 41 +++++++++--------- rocket/src/main/scala/multiplier.scala | 4 +- rocket/src/main/scala/nbdcache.scala | 37 ++++++---------- rocket/src/main/scala/tile.scala | 58 +++++++++----------------- 7 files changed, 93 insertions(+), 96 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 7cf4094f..7cf24b5d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -5,13 +5,13 @@ import Util._ import Node._ import uncore._ -case object BTBEntries extends Field[Int] +case object NBTBEntries extends Field[Int] case object NRAS extends Field[Int] abstract trait BTBParameters extends UsesParameters { val vaddrBits = params(VAddrBits) val matchBits = params(PgIdxBits) - val entries = params(BTBEntries) + val entries = params(NBTBEntries) val nRAS = params(NRAS) val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages val opaqueBits = log2Up(entries) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 0b22b42a..a6dee39f 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -2,10 +2,34 @@ package rocket import Chisel._ import Util._ -import uncore.HTIFIO +import uncore._ -case object FPUParams extends Field[PF] case object BuildFPU extends Field[Option[() => FPU]] +case object XprLen extends Field[Int] +case object NMultXpr extends Field[Int] +case object RetireWidth extends Field[Int] +case object UseVM extends Field[Boolean] +case object FastLoadWord extends Field[Boolean] +case object FastLoadByte extends Field[Boolean] +case object FastMulDiv extends Field[Boolean] +case object CoreInstBits extends Field[Int] +case object CoreDataBits extends Field[Int] +case object CoreDCacheReqTagBits extends Field[Int] + +abstract trait CoreParameters extends UsesParameters { + val xprLen = params(XprLen) + val coreInstBits = params(CoreInstBits) + val coreInstBytes = coreInstBits/8 + val coreDataBits = xprLen + val coreDataBytes = coreDataBits/8 + val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) + val coreMaxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) + + if(params(FastLoadByte)) require(params(FastLoadWord)) + require(params(RetireWidth) == 1) // for now... +} +abstract class CoreBundle extends Bundle with CoreParameters +abstract class CoreModule extends Module with CoreParameters class RocketIO extends Bundle { @@ -16,19 +40,20 @@ class RocketIO extends Bundle val rocc = new RoCCInterface().flip } -class Core extends Module +class Core extends Module with CoreParameters { val io = new RocketIO val ctrl = Module(new Control) val dpath = Module(new Datapath) - if (!params(BuildFPU).isEmpty) { - val p = Some(params.alter(params(FPUParams))) - val fpu = Module(params(BuildFPU).get())(p) - dpath.io.fpu <> fpu.io.dpath - ctrl.io.fpu <> fpu.io.ctrl - } + //If so specified, build an FPU module and wire it in + params(BuildFPU) + .map { bf => Module(bf()) } + .foreach { fpu => + dpath.io.fpu <> fpu.io.dpath + ctrl.io.fpu <> fpu.io.ctrl + } ctrl.io.dpath <> dpath.io.ctrl dpath.io.host <> io.host diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index e932b1dd..84a0a400 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -171,7 +171,7 @@ class Datapath extends Module io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(params(VAddrBits)-1,0)).toUInt io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) require(io.dmem.req.bits.tag.getWidth >= 6) - require(params(DcacheReqTagBits) >= 6) + require(params(CoreDCacheReqTagBits) >= 6) // processor control regfile read val pcr = Module(new CSRFile) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index aac6a21b..1f85a22b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -4,29 +4,30 @@ import Chisel._ import uncore._ import Util._ -case object InstBytes extends Field[Int] +case object NITLBEntries extends Field[Int] +case object ECCCode extends Field[Option[Code]] -abstract trait FrontendParameters extends CacheParameters { - val instBytes = params(InstBytes) +abstract trait L1CacheParameters extends CacheParameters with CoreParameters { val co = params(TLCoherence) - val code = params(ECCCode) -} + val code = params(ECCCode).getOrElse(new IdentityCode) +} +abstract trait FrontendParameters extends L1CacheParameters abstract class FrontendBundle extends Bundle with FrontendParameters abstract class FrontendModule extends Module with FrontendParameters -class FrontendReq extends FrontendBundle { - val pc = UInt(width = vaddrBits+1) +class FrontendReq extends CoreBundle { + val pc = UInt(width = params(VAddrBits)+1) } -class FrontendResp extends FrontendBundle { - val pc = UInt(width = vaddrBits+1) // ID stage PC - val data = Bits(width = instBytes*8) +class FrontendResp extends CoreBundle { + val pc = UInt(width = params(VAddrBits)+1) // ID stage PC + val data = Bits(width = coreInstBits) val xcpt_ma = Bool() val xcpt_if = Bool() } -class CPUFrontendIO extends FrontendBundle { +class CPUFrontendIO extends CoreBundle { val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip @@ -44,7 +45,7 @@ class Frontend extends FrontendModule val btb = Module(new BTB) val icache = Module(new ICache) - val tlb = Module(new TLB(params(NTLBEntries))) + val tlb = Module(new TLB(params(NITLBEntries))) val s1_pc_ = Reg(UInt()) val s1_pc = s1_pc_ & SInt(-2) // discard LSB of PC (throughout the pipeline) @@ -57,7 +58,7 @@ class Frontend extends FrontendModule val msb = vaddrBits-1 val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val pcp4_0 = s1_pc + UInt(instBytes) + val pcp4_0 = s1_pc + UInt(coreInstBytes) val pcp4 = Cat(s1_pc(msb) & pcp4_0(msb), pcp4_0(msb,0)) val icmiss = s2_valid && !icache.io.resp.valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) @@ -82,7 +83,7 @@ class Frontend extends FrontendModule s2_valid := Bool(false) } - btb.io.req := s1_pc & SInt(-instBytes) + btb.io.req := s1_pc & SInt(-coreInstBytes) btb.io.update := io.cpu.btb_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate @@ -102,9 +103,9 @@ class Frontend extends FrontendModule icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) - io.cpu.resp.bits.pc := s2_pc & SInt(-instBytes) // discard PC LSBs - io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(instBytes)) << log2Up(instBytes*8)) - io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(instBytes)-1,0) != UInt(0) + io.cpu.resp.bits.pc := s2_pc & SInt(-coreInstBytes) // discard PC LSBs + io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)) + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.btb_resp.valid := s2_btb_resp_valid @@ -118,7 +119,7 @@ class ICacheReq extends FrontendBundle { } class ICacheResp extends FrontendBundle { - val data = Bits(width = instBytes*8) + val data = Bits(width = coreInstBits) val datablock = Bits(width = rowBits) } @@ -131,7 +132,7 @@ class ICache extends FrontendModule val mem = new UncachedTileLinkIO } require(isPow2(nSets) && isPow2(nWays)) - require(isPow2(instBytes)) + require(isPow2(coreInstBytes)) require(pgIdxBits >= untagBits) val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4) @@ -249,7 +250,7 @@ class ICache extends FrontendModule // if s1_tag_match is critical, replace with partial tag check when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } } - val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(rowBytes)-1,log2Up(instBytes)) << log2Up(instBytes*8)))(instBytes*8-1,0)) + val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)))(coreInstBits-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 02cf199c..6a595c4b 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -9,12 +9,12 @@ class MultiplierReq extends Bundle { val dw = Bits(width = SZ_DW) val in1 = Bits(width = params(XprLen)) val in2 = Bits(width = params(XprLen)) - val tag = UInt(width = params(NXprBits)) + val tag = UInt(width = log2Up(params(NMultXpr))) } class MultiplierResp extends Bundle { val data = Bits(width = params(XprLen)) - val tag = UInt(width = params(NXprBits)) + val tag = UInt(width = log2Up(params(NMultXpr))) } class MultiplierIO extends Bundle { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b1a44d0c..cdf360c4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -7,25 +7,14 @@ import Util._ case object StoreDataQueueDepth extends Field[Int] case object ReplayQueueDepth extends Field[Int] case object NMSHRs extends Field[Int] -case object CoreReqTagBits extends Field[Int] -case object CoreDataBits extends Field[Int] case object LRSCCycles extends Field[Int] -//TODO PARAMS Also used by icache: is this ok?: -case object NTLBEntries extends Field[Int] -case object ECCCode extends Field[Code] +case object NDTLBEntries extends Field[Int] -abstract trait L1HellaCacheParameters extends CacheParameters { +abstract trait L1HellaCacheParameters extends L1CacheParameters { val indexmsb = untagBits-1 val indexlsb = blockOffBits val offsetmsb = indexlsb-1 val offsetlsb = wordOffBits - - val co = params(TLCoherence) - val code = params(ECCCode) - val coreReqTagBits = params(CoreReqTagBits) - val coreDataBits = params(CoreDataBits) - val maxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) - val coreDataBytes = coreDataBits/8 val doNarrowRead = coreDataBits * nWays % rowBits == 0 val encDataBits = code.width(coreDataBits) val encRowBits = encDataBits*rowWords @@ -66,26 +55,26 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } -class HellaCacheReq extends L1HellaCacheBundle { +class HellaCacheReq extends CoreBundle { val kill = Bool() val typ = Bits(width = MT_SZ) val phys = Bool() - val addr = UInt(width = maxAddrBits) + val addr = UInt(width = coreMaxAddrBits) val data = Bits(width = coreDataBits) - val tag = Bits(width = coreReqTagBits) + val tag = Bits(width = coreDCacheReqTagBits) val cmd = Bits(width = M_SZ) } -class HellaCacheResp extends L1HellaCacheBundle { +class HellaCacheResp extends CoreBundle { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val typ = Bits(width = 3) val has_data = Bool() val data = Bits(width = coreDataBits) val data_subword = Bits(width = coreDataBits) - val tag = Bits(width = coreReqTagBits) + val tag = Bits(width = coreDCacheReqTagBits) val cmd = Bits(width = 4) - val addr = UInt(width = maxAddrBits) + val addr = UInt(width = coreMaxAddrBits) val store_data = Bits(width = coreDataBits) } @@ -100,22 +89,22 @@ class HellaCacheExceptions extends Bundle { } // interface between D$ and processor/DTLB -class HellaCacheIO extends L1HellaCacheBundle { +class HellaCacheIO extends CoreBundle { val req = Decoupled(new HellaCacheReq) val resp = Valid(new HellaCacheResp).flip - val replay_next = Valid(Bits(width = coreReqTagBits)).flip + val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val xcpt = (new HellaCacheExceptions).asInput val ptw = new TLBPTWIO().flip val ordered = Bool(INPUT) } -class MSHRReq extends HellaCacheReq { +class MSHRReq extends HellaCacheReq with L1HellaCacheParameters { val tag_match = Bool() val old_meta = new L1Metadata val way_en = Bits(width = nWays) } -class Replay extends HellaCacheReq { +class Replay extends HellaCacheReq with L1HellaCacheParameters { val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth))) } @@ -704,7 +693,7 @@ class HellaCache extends L1HellaCacheModule { val s1_sc = s1_req.cmd === M_XSC val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) - val dtlb = Module(new TLB(params(NTLBEntries))) + val dtlb = Module(new TLB(params(NDTLBEntries))) dtlb.io.ptw <> io.cpu.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index d0f49fbe..0b54af00 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,44 +4,24 @@ import Chisel._ import uncore._ import Util._ +case object WhichL1Cache extends Field[String] case object NDCachePorts extends Field[Int] case object NTilePorts extends Field[Int] +case object NPTWPorts extends Field[Int] case object BuildRoCC extends Field[Option[() => RoCC]] -case object RetireWidth extends Field[Int] -case object UseVM extends Field[Boolean] -case object FastLoadWord extends Field[Boolean] -case object FastLoadByte extends Field[Boolean] -case object FastMulDiv extends Field[Boolean] -case object DcacheReqTagBits extends Field[Int] -case object XprLen extends Field[Int] -case object NXpr extends Field[Int] -case object NXprBits extends Field[Int] -case object RocketDCacheParams extends Field[PF] -case object RocketFrontendParams extends Field[PF] class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { - - if(params(FastLoadByte)) require(params(FastLoadWord)) - require(params(RetireWidth) == 1) // for now... - val io = new Bundle { val tilelink = new TileLinkIO val host = new HTIFIO } - // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - val optionalRoCC = params(BuildRoCC) + val icache = Module(new Frontend, { case CacheName => "L1I" }) + val dcache = Module(new HellaCache, { case CacheName => "L1D" }) + val ptw = Module(new PTW(params(NPTWPorts))) + val core = Module(new Core) - val p = params.alter(params(RocketFrontendParams)) // Used in icache, Core - val icache = Module(new Frontend)(p) //TODO PARAMS: best way to alter both? - val p2 = params.alter(params(RocketDCacheParams)) // Used in dcache, PTW, RoCCm Core - val dcache = Module(new HellaCache)(p2) - val ptw = Module(new PTW(if(optionalRoCC.isEmpty) 2 else 5))(p2) - // 2 ports, 1 from I$, 1 from D$, maybe 3 from RoCC - val p3 = params.alter(params(RocketFrontendParams)).alter(params(RocketDCacheParams)) - val core = Module(new Core)(p3) - - val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts)))(p2) + val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem dcArb.io.mem <> dcache.io.cpu @@ -58,17 +38,19 @@ class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { memArb.io.in(dcPortId) <> dcache.io.mem memArb.io.in(1) <> icache.io.mem - if(!optionalRoCC.isEmpty) { - val rocc = Module(optionalRoCC.get()) - val dcIF = Module(new SimpleHellaCacheIF) - dcIF.io.requestor <> rocc.io.mem - core.io.rocc <> rocc.io - dcArb.io.requestor(2) <> dcIF.io.cache - memArb.io.in(2) <> rocc.io.imem - ptw.io.requestor(2) <> rocc.io.iptw - ptw.io.requestor(3) <> rocc.io.dptw - ptw.io.requestor(4) <> rocc.io.pptw - } + //If so specified, build an RoCC module and wire it in + params(BuildRoCC) + .map { br => Module(br()) } + .foreach { rocc => + val dcIF = Module(new SimpleHellaCacheIF) + dcIF.io.requestor <> rocc.io.mem + core.io.rocc <> rocc.io + dcArb.io.requestor(2) <> dcIF.io.cache + memArb.io.in(2) <> rocc.io.imem + ptw.io.requestor(2) <> rocc.io.iptw + ptw.io.requestor(3) <> rocc.io.dptw + ptw.io.requestor(4) <> rocc.io.pptw + } io.tilelink.acquire <> memArb.io.out.acquire io.tilelink.grant <> memArb.io.out.grant From f9922a106b9ccf072580ea97da98e9046ef66012 Mon Sep 17 00:00:00 2001 From: Scott Beamer Date: Tue, 2 Sep 2014 14:34:36 -0700 Subject: [PATCH 0759/1087] fixes sbt error during first run --- rocket/chisel-dependent.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/chisel-dependent.sbt b/rocket/chisel-dependent.sbt index e784e5e9..c65c7811 100644 --- a/rocket/chisel-dependent.sbt +++ b/rocket/chisel-dependent.sbt @@ -1,7 +1,7 @@ // Provide a managed dependency on chisel if -DchiselVersion="" is // supplied on the command line. -val chiselVersion = System.getProperty("chiselVersion", "None") +lazy val chiselVersion = System.getProperty("chiselVersion", "None") libraryDependencies ++= ( if (chiselVersion != "None" ) ( "edu.berkeley.cs" %% "chisel" % chiselVersion From 600c5d50a9b870a632ee098d61eb47a74bcc7584 Mon Sep 17 00:00:00 2001 From: Scott Beamer Date: Tue, 2 Sep 2014 15:14:56 -0700 Subject: [PATCH 0760/1087] better fix with explanation of sbt issue --- rocket/chisel-dependent.sbt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rocket/chisel-dependent.sbt b/rocket/chisel-dependent.sbt index c65c7811..1681364e 100644 --- a/rocket/chisel-dependent.sbt +++ b/rocket/chisel-dependent.sbt @@ -1,8 +1,10 @@ // Provide a managed dependency on chisel if -DchiselVersion="" is // supplied on the command line. -lazy val chiselVersion = System.getProperty("chiselVersion", "None") +val chiselVersion_r = System.getProperty("chiselVersion", "None") -libraryDependencies ++= ( if (chiselVersion != "None" ) ( - "edu.berkeley.cs" %% "chisel" % chiselVersion +// _r a temporary fix until sbt 13.6 https://github.com/sbt/sbt/issues/1465 + +libraryDependencies ++= ( if (chiselVersion_r != "None" ) ( + "edu.berkeley.cs" %% "chisel" % chiselVersion_r ) :: Nil; else Nil) From 5eb5e9eaf592f715e4d8da2af790b3d80e97784a Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 7 Sep 2014 17:54:41 -0700 Subject: [PATCH 0761/1087] Standardize ()=>Module(...) top-level Parameters --- rocket/src/main/scala/core.scala | 2 +- rocket/src/main/scala/tile.scala | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index a6dee39f..d5549ef1 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -49,7 +49,7 @@ class Core extends Module with CoreParameters //If so specified, build an FPU module and wire it in params(BuildFPU) - .map { bf => Module(bf()) } + .map { bf => bf() } .foreach { fpu => dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 0b54af00..a332c089 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -4,7 +4,7 @@ import Chisel._ import uncore._ import Util._ -case object WhichL1Cache extends Field[String] +case object CoreName extends Field[String] case object NDCachePorts extends Field[Int] case object NTilePorts extends Field[Int] case object NPTWPorts extends Field[Int] @@ -16,10 +16,10 @@ class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val host = new HTIFIO } - val icache = Module(new Frontend, { case CacheName => "L1I" }) + val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) val dcache = Module(new HellaCache, { case CacheName => "L1D" }) val ptw = Module(new PTW(params(NPTWPorts))) - val core = Module(new Core) + val core = Module(new Core, { case CoreName => "Rocket" }) val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) dcArb.io.requestor(0) <> ptw.io.mem @@ -40,11 +40,11 @@ class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { //If so specified, build an RoCC module and wire it in params(BuildRoCC) - .map { br => Module(br()) } + .map { br => br() } .foreach { rocc => val dcIF = Module(new SimpleHellaCacheIF) - dcIF.io.requestor <> rocc.io.mem core.io.rocc <> rocc.io + dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2) <> dcIF.io.cache memArb.io.in(2) <> rocc.io.imem ptw.io.requestor(2) <> rocc.io.iptw @@ -62,4 +62,5 @@ class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { dcache.io.mem.release.ready := io.tilelink.release.ready io.tilelink.release.bits := dcache.io.mem.release.bits io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UInt(dcPortId, log2Up(params(NTilePorts)))) + } From a999c055edb9b4d8ce1566401ec29716f1cc5ed2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 11 Sep 2014 01:46:42 -0700 Subject: [PATCH 0762/1087] Don't take an interrupt when EX stage PC is invalid It was possible to take an interrupt on the instruction in the shadow of a short forward branch. EPC would thus get the wrong value, and so a wrong-path instruction would be executed upon return from interrupt. h/t Yunsup --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 84a0a400..24e9c428 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -240,7 +240,7 @@ class Datapath extends Module Mux(io.ctrl.mem_branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(!io.ctrl.mem_jalr && !io.ctrl.mem_branch, imm(IMM_UJ, mem_reg_inst), SInt(4))) val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(params(VAddrBits)-1,0)), mem_br_target) - io.ctrl.mem_misprediction := mem_npc != Mux(io.ctrl.ex_valid, ex_reg_pc, id_pc) + io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 val mem_int_wdata = Mux(io.ctrl.mem_jalr, mem_br_target, mem_reg_wdata) From 25180b71f784396d7ee1be1a496d15c992c150f8 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 12 Sep 2014 15:36:42 -0700 Subject: [PATCH 0763/1087] add LICENSE --- rocket/LICENSE | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 rocket/LICENSE diff --git a/rocket/LICENSE b/rocket/LICENSE new file mode 100644 index 00000000..60e19fad --- /dev/null +++ b/rocket/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2011-2014, The Regents of the University of California +(Regents). All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the Regents nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING +OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED +HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE +MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. From 8abf62fae345b05be38ecb79689757345f8dd476 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 12 Sep 2014 18:06:41 -0700 Subject: [PATCH 0764/1087] add LICENSE --- rocket/src/main/scala/arbiter.scala | 2 ++ rocket/src/main/scala/btb.scala | 2 ++ rocket/src/main/scala/consts.scala | 2 ++ rocket/src/main/scala/core.scala | 2 ++ rocket/src/main/scala/csr.scala | 2 ++ rocket/src/main/scala/ctrl.scala | 2 ++ rocket/src/main/scala/decode.scala | 2 ++ rocket/src/main/scala/dpath.scala | 2 ++ rocket/src/main/scala/dpath_alu.scala | 2 ++ rocket/src/main/scala/ecc.scala | 2 ++ rocket/src/main/scala/fpu.scala | 2 ++ rocket/src/main/scala/icache.scala | 2 ++ rocket/src/main/scala/instructions.scala | 2 ++ rocket/src/main/scala/multiplier.scala | 2 ++ rocket/src/main/scala/nbdcache.scala | 2 ++ rocket/src/main/scala/package.scala | 2 ++ rocket/src/main/scala/ptw.scala | 2 ++ rocket/src/main/scala/rocc.scala | 2 ++ rocket/src/main/scala/tile.scala | 2 ++ rocket/src/main/scala/tlb.scala | 2 ++ rocket/src/main/scala/util.scala | 2 ++ 21 files changed, 42 insertions(+) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 917cb816..b2a8482f 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 7cf24b5d..02ca111e 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 2b59413c..464ac39b 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket package constants diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index d5549ef1..f02f1424 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 5a2cd7cf..b1e76254 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 78396897..d72eba45 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 6f27b4dd..f86a63b4 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 24e9c428..7df1a8d3 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index f2d565cd..4f0edc87 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala index cc116ae5..a0612a1d 100644 --- a/rocket/src/main/scala/ecc.scala +++ b/rocket/src/main/scala/ecc.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index b2db3502..9f069b45 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1f85a22b..fb2dbb0c 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 5be33ff4..56ebf3c1 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 6a595c4b..a09f8586 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index cdf360c4..9ed5658f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 0fcb6580..85a0bf11 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package object rocket extends rocket.constants.ScalarOpConstants with rocket.constants.VectorOpConstants diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index db555957..3663a6b4 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index c0d8decb..d3099043 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index a332c089..34533e80 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 97d735b2..c5acad45 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 0fc0805f..485dc57f 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -1,3 +1,5 @@ +// See LICENSE for license details. + package rocket import Chisel._ From 238f7761f683cf0e363ef9de3d64a90e66bee3de Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 17 Sep 2014 11:23:25 -0700 Subject: [PATCH 0765/1087] update README --- rocket/README.md | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/rocket/README.md b/rocket/README.md index b9992a4c..72707989 100644 --- a/rocket/README.md +++ b/rocket/README.md @@ -1,5 +1,5 @@ -Rocket Core Generator -================================================================ +Rocket Core +=========== Rocket is a 6-stage single-issue in-order pipeline that executes the 64-bit scalar RISC-V ISA. Rocket implements an MMU that supports page-based virtual @@ -8,13 +8,9 @@ also has an optional IEEE 754-2008-compliant FPU, which implements both single- and double-precision floating-point operations, including fused multiply-add. -We plan to open-source our Rocket core generator written in Chisel in the near -future. We are currently in the process of cleaning up the repository. Please stay tuned. - -Currently, a Rocket core with an 8 KB direct-mapped L1 instruction cache -and an 8 KB direct-mapped L1 data cache has been instantiated and committed to -the fpga-zynq infrastructure repository. A copy of the generated Verilog is available -[here](https://raw.githubusercontent.com/ucb-bar/fpga-zynq/master/hw/src/verilog/Slave.v). +This repository is not intended to be a self-running repository. To +instantiate a Rocket core, please use the Rocket chip generator found in the +rocket-chip git repository. The following table compares a 32-bit ARM Cortex-A5 core to a 64-bit RISC-V Rocket core built in the same TSMC process (40GPLUS). Fourth column is the From 180d3d365d21f93d91b02e257ee4a6fa4be62f04 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Wed, 17 Sep 2014 14:24:03 -0700 Subject: [PATCH 0766/1087] Expanded front-end to support superscalar fetch. --- rocket/src/main/scala/btb.scala | 13 +++++++++++++ rocket/src/main/scala/core.scala | 3 +++ rocket/src/main/scala/dpath.scala | 4 ++-- rocket/src/main/scala/icache.scala | 25 +++++++++++++++++-------- 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 02ca111e..e3762bfd 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -62,6 +62,14 @@ class BHT(nbht: Int) { val history = Reg(UInt(width = nbhtbits)) } +// BTB update occurs during branch resolution (i.e., PC redirection if a mispredict). +// - "pc" is what future fetch PCs will tag match against. +// - "br_pc" is the PC of the branch instruction. +// - "resp.mask" provides a mask of valid instructions (instructions are +// masked off by the predicted, taken branch). +// Assumption: superscalar commits are batched together into a single +// "taken" update ("history compression"), and correspond to the +// superscalar fetch 1:1. class BTBUpdate extends Bundle with BTBParameters { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) @@ -71,11 +79,13 @@ class BTBUpdate extends Bundle with BTBParameters { val isJump = Bool() val isCall = Bool() val isReturn = Bool() + val br_pc = UInt(width = vaddrBits) val incorrectTarget = Bool() } class BTBResp extends Bundle with BTBParameters { val taken = Bool() + val mask = Bits(width = log2Up(params(FetchWidth))) val target = UInt(width = vaddrBits) val entry = UInt(width = opaqueBits) val bht = new BHTResp @@ -102,6 +112,7 @@ class BTB extends Module with BTBParameters { val useRAS = Reg(UInt(width = entries)) val isJump = Reg(UInt(width = entries)) + val brIdx = Mem(UInt(width=log2Up(params(FetchWidth))), entries) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { @@ -167,6 +178,7 @@ class BTB extends Module with BTBParameters { tgtPages(waddr) := tgtPageUpdate useRAS(waddr) := update.bits.isReturn isJump(waddr) := update.bits.isJump + brIdx(waddr) := update.bits.br_pc } require(nPages % 2 == 0) @@ -193,6 +205,7 @@ class BTB extends Module with BTBParameters { io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) + io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) if (nBHT > 0) { val bht = new BHT(nBHT) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index f02f1424..43409b46 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -9,6 +9,7 @@ import uncore._ case object BuildFPU extends Field[Option[() => FPU]] case object XprLen extends Field[Int] case object NMultXpr extends Field[Int] +case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] case object FastLoadWord extends Field[Boolean] @@ -20,6 +21,7 @@ case object CoreDCacheReqTagBits extends Field[Int] abstract trait CoreParameters extends UsesParameters { val xprLen = params(XprLen) + val coreFetchWidth = params(FetchWidth) val coreInstBits = params(CoreInstBits) val coreInstBytes = coreInstBits/8 val coreDataBits = xprLen @@ -28,6 +30,7 @@ abstract trait CoreParameters extends UsesParameters { val coreMaxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) if(params(FastLoadByte)) require(params(FastLoadWord)) + require(params(FetchWidth) == 1) // for now... require(params(RetireWidth) == 1) // for now... } abstract class CoreBundle extends Bundle with CoreParameters diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7df1a8d3..ef4f353c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -47,9 +47,9 @@ class Datapath extends Module val wb_reg_rs2 = Reg(Bits()) // instruction decode stage - val id_inst = io.imem.resp.bits.data + val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) val id_pc = io.imem.resp.bits.pc - + class RegFile { private val rf = Mem(UInt(width = 64), 31) private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index fb2dbb0c..05c36902 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,5 +1,3 @@ -// See LICENSE for license details. - package rocket import Chisel._ @@ -24,7 +22,8 @@ class FrontendReq extends CoreBundle { class FrontendResp extends CoreBundle { val pc = UInt(width = params(VAddrBits)+1) // ID stage PC - val data = Bits(width = coreInstBits) + val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) + val mask = Bits(width = coreFetchWidth) val xcpt_ma = Bool() val xcpt_if = Bool() } @@ -60,12 +59,12 @@ class Frontend extends FrontendModule val msb = vaddrBits-1 val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val pcp4_0 = s1_pc + UInt(coreInstBytes) - val pcp4 = Cat(s1_pc(msb) & pcp4_0(msb), pcp4_0(msb,0)) + val ntpc_0 = s1_pc + UInt(coreInstBytes) + val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,0)) val icmiss = s2_valid && !icache.io.resp.valid - val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, pcp4) + val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((pcp4 & rowBytes) === (s1_pc & rowBytes)) + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -106,7 +105,17 @@ class Frontend extends FrontendModule io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc & SInt(-coreInstBytes) // discard PC LSBs - io.cpu.resp.bits.data := icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)) + + + val fetch_data = icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) + for (i <- 0 until coreFetchWidth) { + io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) + } + + val all_ones = UInt((1 << coreFetchWidth)-1) + val msk_pc = all_ones << s2_pc(log2Up(coreFetchWidth)-1+2,2) + io.cpu.resp.bits.mask := msk_pc & btb.io.resp.bits.mask + io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if From 3e256439c9bc121c824360c02e08dd876c3ca409 Mon Sep 17 00:00:00 2001 From: Adam Izraelevitz Date: Wed, 24 Sep 2014 13:04:20 -0700 Subject: [PATCH 0767/1087] Add abstract class Tile --- rocket/src/main/scala/tile.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 34533e80..04c7753d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -12,11 +12,14 @@ case object NTilePorts extends Field[Int] case object NPTWPorts extends Field[Int] case object BuildRoCC extends Field[Option[() => RoCC]] -class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { +abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val io = new Bundle { val tilelink = new TileLinkIO val host = new HTIFIO } +} + +class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) val dcache = Module(new HellaCache, { case CacheName => "L1D" }) From b55c38cdc716a2573238f658bd9cdcb72e3b55d6 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 24 Sep 2014 22:17:28 -0700 Subject: [PATCH 0768/1087] Remove spurious vec consts --- rocket/src/main/scala/consts.scala | 25 ------------------------- rocket/src/main/scala/package.scala | 3 +-- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 464ac39b..4c601d81 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -61,28 +61,3 @@ trait ScalarOpConstants { val RA = UInt(1, 5) } - -trait VectorOpConstants { - val VEC_X = Bits("b??", 2).toUInt - val VEC_FN_N = UInt(0, 2) - val VEC_VL = UInt(1, 2) - val VEC_CFG = UInt(2, 2) - val VEC_CFGVL = UInt(3, 2) - - val VCMD_I = UInt(0, 3) - val VCMD_F = UInt(1, 3) - val VCMD_TX = UInt(2, 3) - val VCMD_TF = UInt(3, 3) - val VCMD_MX = UInt(4, 3) - val VCMD_MF = UInt(5, 3) - val VCMD_A = UInt(6, 3) - val VCMD_X = UInt(0, 3) - - val VIMM_VLEN = UInt(0, 1) - val VIMM_ALU = UInt(1, 1) - val VIMM_X = UInt(0, 1) - - val VIMM2_RS2 = UInt(0, 1) - val VIMM2_ALU = UInt(1, 1) - val VIMM2_X = UInt(0, 1) -} diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 85a0bf11..e24c646f 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -1,8 +1,7 @@ // See LICENSE for license details. package object rocket extends - rocket.constants.ScalarOpConstants with - rocket.constants.VectorOpConstants + rocket.constants.ScalarOpConstants { val START_ADDR = 0x2000 } From 8eb64205f5f004f54f6ca99001c175ba842d93f8 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 25 Sep 2014 11:59:19 -0700 Subject: [PATCH 0769/1087] bug fix for nbdcache s2_data --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9ed5658f..3d9bc150 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -817,7 +817,7 @@ class HellaCache extends L1HellaCacheModule { val regs = Vec.fill(rowWords){Reg(Bits(width = encDataBits))} val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { - val en = en1 && (Bool(i == 0 || !doNarrowRead) || s1_writeback) + val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) when (en) { regs(i) := data.io.resp(w) >> encDataBits*i } } s2_data(w) := regs.toBits From 868e74765697add459e3ee2577c58894aefaf20f Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Thu, 25 Sep 2014 18:52:58 -0700 Subject: [PATCH 0770/1087] Factored out Rocket specifics from CoreParameters - Added new RocketCoreParameters - Other cores using Rocket as a library will no longer conflict against Rocket's requires(). --- rocket/src/main/scala/core.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index f02f1424..ecb41f01 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -28,10 +28,15 @@ abstract trait CoreParameters extends UsesParameters { val coreMaxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) if(params(FastLoadByte)) require(params(FastLoadWord)) +} + +abstract trait RocketCoreParameters extends CoreParameters +{ require(params(RetireWidth) == 1) // for now... } -abstract class CoreBundle extends Bundle with CoreParameters -abstract class CoreModule extends Module with CoreParameters + +abstract class CoreBundle extends Bundle with RocketCoreParameters +abstract class CoreModule extends Module with RocketCoreParameters class RocketIO extends Bundle { From f9178100616240ea1801a327fe5f7aaa40a90285 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 26 Sep 2014 05:14:50 -0700 Subject: [PATCH 0771/1087] Removed RocketCoreParameters from use. - The nbdache (among others?) use CoreParameters, which has nothing to do with RetireWidth requirements. - This conflicts with other cores which uses nbdcache. - RocketCoreParameters may be unneccessary, and the require() check can be moved deeper into Rocket. --- rocket/src/main/scala/core.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index ecb41f01..a49ca7e2 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -35,8 +35,8 @@ abstract trait RocketCoreParameters extends CoreParameters require(params(RetireWidth) == 1) // for now... } -abstract class CoreBundle extends Bundle with RocketCoreParameters -abstract class CoreModule extends Module with RocketCoreParameters +abstract class CoreBundle extends Bundle with CoreParameters +abstract class CoreModule extends Module with CoreParameters class RocketIO extends Bundle { From a71bdbbc5418d8325a45ac89e0ede1d4efb79b4e Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 19 Sep 2014 15:05:45 -0700 Subject: [PATCH 0772/1087] Update history register in fetch speculatively --- rocket/src/main/scala/btb.scala | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 02ca111e..f6de407e 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -41,7 +41,9 @@ class RAS(nras: Int) { } class BHTResp extends Bundle with BTBParameters { + // TODO only carry history, not both index and history val index = UInt(width = log2Up(nBHT).max(1)) + val history = UInt(width = log2Up(nBHT).max(1)) val value = UInt(width = 2) } @@ -50,12 +52,19 @@ class BHT(nbht: Int) { def get(addr: UInt): BHTResp = { val res = new BHTResp res.index := addr(nbhtbits+1,2) ^ history + res.history := history res.value := table(res.index) + // TODO we actually want to include the final prediction result from the BTB + val taken = res.value(0) + // TODO only update history on an actual instruction fetch + history := Cat(taken, history(nbhtbits-1,1)) res } - def update(d: BHTResp, taken: Bool): Unit = { + def update(d: BHTResp, taken: Bool, mispredict: Bool): Unit = { table(d.index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - history := Cat(taken, history(nbhtbits-1,1)) + when (mispredict) { + history := Cat(taken, d.history(nbhtbits-1,1)) + } } private val table = Mem(UInt(width = 2), nbht) @@ -197,7 +206,7 @@ class BTB extends Module with BTBParameters { if (nBHT > 0) { val bht = new BHT(nBHT) val res = bht.get(io.req) - when (update.valid && updateHit && !update.bits.isJump) { bht.update(update.bits.prediction.bits.bht, update.bits.taken) } + when (update.valid && updateHit && !update.bits.isJump) { bht.update(update.bits.prediction.bits.bht, update.bits.taken, update.bits.incorrectTarget) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res } From 681b43f3983bc9303db0d63a65ae7f133b22f8bf Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 26 Sep 2014 10:39:57 -0700 Subject: [PATCH 0773/1087] Bug fixes with global history register. - Updated in fetch speculatively. * Updates gated off by cpu.resp.fire(). * BTB direction factored into history update. - All branches update the BHT. - Each instruction carries history; index into BHT is recomputed by passing in mem_reg_pc. --- rocket/src/main/scala/btb.scala | 41 +++++++++++++++--------------- rocket/src/main/scala/icache.scala | 3 ++- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index f6de407e..7dfd15a2 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -8,7 +8,7 @@ import Node._ import uncore._ case object NBTBEntries extends Field[Int] -case object NRAS extends Field[Int] +case object NRAS extends Field[Int] abstract trait BTBParameters extends UsesParameters { val vaddrBits = params(VAddrBits) @@ -41,27 +41,26 @@ class RAS(nras: Int) { } class BHTResp extends Bundle with BTBParameters { - // TODO only carry history, not both index and history - val index = UInt(width = log2Up(nBHT).max(1)) val history = UInt(width = log2Up(nBHT).max(1)) val value = UInt(width = 2) } class BHT(nbht: Int) { - val nbhtbits = log2Up(nbht) - def get(addr: UInt): BHTResp = { + val nbhtbits = log2Up(nbht) + def get(enable: Bool, addr: UInt, btb_hit: Bool): BHTResp = { val res = new BHTResp - res.index := addr(nbhtbits+1,2) ^ history + val index = addr(nbhtbits+1,2) ^ history res.history := history - res.value := table(res.index) - // TODO we actually want to include the final prediction result from the BTB - val taken = res.value(0) - // TODO only update history on an actual instruction fetch - history := Cat(taken, history(nbhtbits-1,1)) + res.value := table(index) + val taken = res.value(0) && btb_hit + when (enable) { + history := Cat(taken, history(nbhtbits-1,1)) + } res } - def update(d: BHTResp, taken: Bool, mispredict: Bool): Unit = { - table(d.index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) + def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { + val index = addr(nbhtbits+1,2) ^ d.history + table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } @@ -93,7 +92,7 @@ class BTBResp extends Bundle with BTBParameters { // fully-associative branch target buffer class BTB extends Module with BTBParameters { val io = new Bundle { - val req = UInt(INPUT, vaddrBits) + val req = Valid(UInt(INPUT, vaddrBits)).flip val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip val invalidate = Bool(INPUT) @@ -125,10 +124,10 @@ class BTB extends Module with BTBParameters { } val update = Pipe(io.update) - val update_target = io.req + val update_target = io.req.bits - val pageHit = pageMatch(io.req) - val hits = tagMatch(io.req, pageHit) + val pageHit = pageMatch(io.req.bits) + val hits = tagMatch(io.req.bits, pageHit) val updatePageHit = pageMatch(update.bits.pc) val updateHits = tagMatch(update.bits.pc, updatePageHit) @@ -169,7 +168,7 @@ class BTB extends Module with BTBParameters { idxValid(waddr) := updateValid when (updateTarget) { - assert(io.req === update.bits.target, "BTB request != I$ target") + assert(io.req.bits === update.bits.target, "BTB request != I$ target") idxs(waddr) := update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate @@ -205,8 +204,10 @@ class BTB extends Module with BTBParameters { if (nBHT > 0) { val bht = new BHT(nBHT) - val res = bht.get(io.req) - when (update.valid && updateHit && !update.bits.isJump) { bht.update(update.bits.prediction.bits.bht, update.bits.taken, update.bits.incorrectTarget) } + val res = bht.get(io.req.valid, io.req.bits, hits.orR) + when (update.valid && !update.bits.isJump) { + bht.update(update.bits.pc, update.bits.prediction.bits.bht, update.bits.taken, update.bits.incorrectTarget) + } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index fb2dbb0c..da1f4c85 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -85,7 +85,8 @@ class Frontend extends FrontendModule s2_valid := Bool(false) } - btb.io.req := s1_pc & SInt(-coreInstBytes) + btb.io.req.valid := io.cpu.resp.fire() + btb.io.req.bits := s1_pc & SInt(-coreInstBytes) btb.io.update := io.cpu.btb_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate From 8ccd07cfeb5af8e487758eb44b49e9d28277f5c4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sun, 28 Sep 2014 05:16:36 -0700 Subject: [PATCH 0774/1087] Moved updating global history from fetch to decode. - No longer update global history in fetch stage. - Only update global history when instruction is a branch. - Does allow for the possibility of back-to-back branches to see slightly different histories on subsequent executions. --- rocket/src/main/scala/btb.scala | 27 +++++++++++++++------------ rocket/src/main/scala/icache.scala | 27 ++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 7dfd15a2..644b02ee 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -47,17 +47,16 @@ class BHTResp extends Bundle with BTBParameters { class BHT(nbht: Int) { val nbhtbits = log2Up(nbht) - def get(enable: Bool, addr: UInt, btb_hit: Bool): BHTResp = { + def get(addr: UInt): BHTResp = { val res = new BHTResp val index = addr(nbhtbits+1,2) ^ history - res.history := history res.value := table(index) - val taken = res.value(0) && btb_hit - when (enable) { - history := Cat(taken, history(nbhtbits-1,1)) - } + res.history := history res } + def updateSpeculativeHistory(taken: Bool): Unit = { + history := Cat(taken, history(nbhtbits-1,1)) + } def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { val index = addr(nbhtbits+1,2) ^ d.history table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) @@ -92,9 +91,10 @@ class BTBResp extends Bundle with BTBParameters { // fully-associative branch target buffer class BTB extends Module with BTBParameters { val io = new Bundle { - val req = Valid(UInt(INPUT, vaddrBits)).flip + val req = UInt(INPUT, vaddrBits) val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip + val decode = Valid(new Bundle{val taken = Bool()}).flip val invalidate = Bool(INPUT) } @@ -124,10 +124,10 @@ class BTB extends Module with BTBParameters { } val update = Pipe(io.update) - val update_target = io.req.bits + val update_target = io.req - val pageHit = pageMatch(io.req.bits) - val hits = tagMatch(io.req.bits, pageHit) + val pageHit = pageMatch(io.req) + val hits = tagMatch(io.req, pageHit) val updatePageHit = pageMatch(update.bits.pc) val updateHits = tagMatch(update.bits.pc, updatePageHit) @@ -168,7 +168,7 @@ class BTB extends Module with BTBParameters { idxValid(waddr) := updateValid when (updateTarget) { - assert(io.req.bits === update.bits.target, "BTB request != I$ target") + assert(io.req === update.bits.target, "BTB request != I$ target") idxs(waddr) := update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate @@ -204,7 +204,10 @@ class BTB extends Module with BTBParameters { if (nBHT > 0) { val bht = new BHT(nBHT) - val res = bht.get(io.req.valid, io.req.bits, hits.orR) + val res = bht.get(io.req) + when (io.decode.valid) { + bht.updateSpeculativeHistory(io.decode.bits.taken) + } when (update.valid && !update.bits.isJump) { bht.update(update.bits.pc, update.bits.prediction.bits.bht, update.bits.taken, update.bits.incorrectTarget) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index da1f4c85..4119683d 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore._ import Util._ +import Instructions._ case object NITLBEntries extends Field[Int] case object ECCCode extends Field[Option[Code]] @@ -44,7 +45,7 @@ class Frontend extends FrontendModule val cpu = new CPUFrontendIO().flip val mem = new UncachedTileLinkIO } - + val btb = Module(new BTB) val icache = Module(new ICache) val tlb = Module(new TLB(params(NITLBEntries))) @@ -85,10 +86,12 @@ class Frontend extends FrontendModule s2_valid := Bool(false) } - btb.io.req.valid := io.cpu.resp.fire() - btb.io.req.bits := s1_pc & SInt(-coreInstBytes) + btb.io.req := s1_pc & SInt(-coreInstBytes) btb.io.update := io.cpu.btb_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate + btb.io.decode.valid := io.cpu.resp.valid && DecodeIsBr(io.cpu.resp.bits.data) + btb.io.decode.bits.taken := Reg(next=btb.io.resp.bits.taken) + tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss @@ -285,3 +288,21 @@ class ICache extends FrontendModule } } } + +object DecodeIsBr { + def apply(inst: Bits): Bool = { + val signal = DecodeLogic(inst.toUInt, List(N), + Array(//JAL -> List(Y), + //JALR -> List(Y), + BEQ -> List(Y), + BNE -> List(Y), + BGE -> List(Y), + BGEU -> List(Y), + BLT -> List(Y), + BLTU -> List(Y))) + + val (is_br: Bool) :: Nil = signal + is_br + } +} + From 9cc35dee9a46dec0c906c976b2ac24d5636d8280 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Mon, 29 Sep 2014 21:41:07 -0700 Subject: [PATCH 0775/1087] Returned history update to fetch. - Global history only contains branches. - Only update BHT and history on BTB hits. - Gate off speculative update on stall or icmiss. - Fixed bug where BHT updates were delayed a cycle. --- rocket/src/main/scala/btb.scala | 69 +++++++++++++++--------------- rocket/src/main/scala/icache.scala | 25 +---------- 2 files changed, 36 insertions(+), 58 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 644b02ee..0f1ee962 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -47,22 +47,19 @@ class BHTResp extends Bundle with BTBParameters { class BHT(nbht: Int) { val nbhtbits = log2Up(nbht) - def get(addr: UInt): BHTResp = { + def get(addr: UInt, update: Bool): BHTResp = { val res = new BHTResp val index = addr(nbhtbits+1,2) ^ history res.value := table(index) res.history := history + val taken = res.value(0) + when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } - def updateSpeculativeHistory(taken: Bool): Unit = { - history := Cat(taken, history(nbhtbits-1,1)) - } def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { val index = addr(nbhtbits+1,2) ^ d.history table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - when (mispredict) { - history := Cat(taken, d.history(nbhtbits-1,1)) - } + when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } } private val table = Mem(UInt(width = 2), nbht) @@ -88,13 +85,16 @@ class BTBResp extends Bundle with BTBParameters { val bht = new BHTResp } +class BTBReq extends Bundle with BTBParameters { + val addr = UInt(width = vaddrBits) +} + // fully-associative branch target buffer class BTB extends Module with BTBParameters { val io = new Bundle { - val req = UInt(INPUT, vaddrBits) + val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip - val decode = Valid(new Bundle{val taken = Bool()}).flip val invalidate = Bool(INPUT) } @@ -123,23 +123,23 @@ class BTB extends Module with BTBParameters { idxValid & idxMatch & idxPageMatch } - val update = Pipe(io.update) - val update_target = io.req + val r_update = Pipe(io.update) + val update_target = io.req.bits.addr - val pageHit = pageMatch(io.req) - val hits = tagMatch(io.req, pageHit) - val updatePageHit = pageMatch(update.bits.pc) - val updateHits = tagMatch(update.bits.pc, updatePageHit) + val pageHit = pageMatch(io.req.bits.addr) + val hits = tagMatch(io.req.bits.addr, pageHit) + val updatePageHit = pageMatch(r_update.bits.pc) + val updateHits = tagMatch(r_update.bits.pc, updatePageHit) - private var lfsr = LFSR16(update.valid) + private var lfsr = LFSR16(r_update.valid) def rand(width: Int) = { lfsr = lfsr(lfsr.getWidth-1,1) Random.oneHot(width, lfsr) } - val updateHit = update.bits.prediction.valid - val updateValid = update.bits.incorrectTarget || updateHit && Bool(nBHT > 0) - val updateTarget = updateValid && update.bits.incorrectTarget + val updateHit = r_update.bits.prediction.valid + val updateValid = r_update.bits.incorrectTarget || updateHit && Bool(nBHT > 0) + val updateTarget = updateValid && r_update.bits.incorrectTarget val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = updateTarget && !useUpdatePageHit @@ -148,7 +148,7 @@ class BTB extends Module with BTBParameters { val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) - val samePage = page(update.bits.pc) === page(update_target) + val samePage = page(r_update.bits.pc) === page(update_target) val usePageHit = (pageHit & ~idxPageReplEn).orR val doTgtPageRepl = updateTarget && !samePage && !usePageHit val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(nPages-2,0) << 1 | idxPageUpdateOH(nPages-1)) @@ -157,24 +157,24 @@ class BTB extends Module with BTBParameters { val doPageRepl = doIdxPageRepl || doTgtPageRepl val pageReplEn = idxPageReplEn | tgtPageReplEn - idxPageRepl := UIntToOH(Counter(update.valid && doPageRepl, nPages)._1) + idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1) - when (update.valid && !(updateValid && !updateTarget)) { + when (r_update.valid && !(updateValid && !updateTarget)) { val nextRepl = Counter(!updateHit && updateValid, entries)._1 - val waddr = Mux(updateHit, update.bits.prediction.bits.entry, nextRepl) + val waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits idxValid(waddr) := updateValid when (updateTarget) { - assert(io.req === update.bits.target, "BTB request != I$ target") - idxs(waddr) := update.bits.pc + assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") + idxs(waddr) := r_update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate tgtPages(waddr) := tgtPageUpdate - useRAS(waddr) := update.bits.isReturn - isJump(waddr) := update.bits.isJump + useRAS(waddr) := r_update.bits.isReturn + isJump(waddr) := r_update.bits.isJump } require(nPages % 2 == 0) @@ -185,9 +185,9 @@ class BTB extends Module with BTBParameters { when (en && pageReplEn(i)) { pages(i) := data } writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), - Mux(idxWritesEven, page(update.bits.pc), page(update_target))) + Mux(idxWritesEven, page(r_update.bits.pc), page(update_target))) writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), - Mux(idxWritesEven, page(update_target), page(update.bits.pc))) + Mux(idxWritesEven, page(update_target), page(r_update.bits.pc))) when (doPageRepl) { pageValid := pageValid | pageReplEn } } @@ -204,12 +204,11 @@ class BTB extends Module with BTBParameters { if (nBHT > 0) { val bht = new BHT(nBHT) - val res = bht.get(io.req) - when (io.decode.valid) { - bht.updateSpeculativeHistory(io.decode.bits.taken) - } - when (update.valid && !update.bits.isJump) { - bht.update(update.bits.pc, update.bits.prediction.bits.bht, update.bits.taken, update.bits.incorrectTarget) + val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) + val update_btb_hit = io.update.bits.prediction.valid + when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { + bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, + io.update.bits.taken, io.update.bits.incorrectTarget) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 4119683d..45327e46 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,7 +5,6 @@ package rocket import Chisel._ import uncore._ import Util._ -import Instructions._ case object NITLBEntries extends Field[Int] case object ECCCode extends Field[Option[Code]] @@ -86,12 +85,10 @@ class Frontend extends FrontendModule s2_valid := Bool(false) } - btb.io.req := s1_pc & SInt(-coreInstBytes) + btb.io.req.valid := !stall && !icmiss + btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) btb.io.update := io.cpu.btb_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate - btb.io.decode.valid := io.cpu.resp.valid && DecodeIsBr(io.cpu.resp.bits.data) - btb.io.decode.bits.taken := Reg(next=btb.io.resp.bits.taken) - tlb.io.ptw <> io.cpu.ptw tlb.io.req.valid := !stall && !icmiss @@ -288,21 +285,3 @@ class ICache extends FrontendModule } } } - -object DecodeIsBr { - def apply(inst: Bits): Bool = { - val signal = DecodeLogic(inst.toUInt, List(N), - Array(//JAL -> List(Y), - //JALR -> List(Y), - BEQ -> List(Y), - BNE -> List(Y), - BGE -> List(Y), - BGEU -> List(Y), - BLT -> List(Y), - BLTU -> List(Y))) - - val (is_br: Bool) :: Nil = signal - is_br - } -} - From cde7c9d869b55a6ae8a09ee66e407200d96e38a9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 3 Oct 2014 14:31:01 -0700 Subject: [PATCH 0776/1087] simplify CSR decoding code --- rocket/src/main/scala/csr.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index b1e76254..590752e5 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -113,10 +113,7 @@ class CSRFile extends Module val map = for ((v, i) <- CSRs.all.zipWithIndex) yield v -> UInt(BigInt(1) << i) val out = ROM(map)(addr) - val a = Array.fill(CSRs.all.max+1)(null.asInstanceOf[Bool]) - for (i <- 0 until CSRs.all.size) - a(CSRs.all(i)) = out(i) - a + Map((CSRs.all zip out.toBools):_*) } val wen = cpu_req_valid || host_pcr_req_fire && host_pcr_bits.rw From 59eb7d194dfd7f004a8c8837adce7f83186e2cb4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 3 Oct 2014 16:08:08 -0700 Subject: [PATCH 0777/1087] Finalize superscalar btb. --- rocket/src/main/scala/btb.scala | 41 +++++++++++++-------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 81a6d459..94f7a6ac 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -51,45 +51,34 @@ class BHTResp extends Bundle with BTBParameters { // - updated speculatively in fetch (if there's a BTB hit). // - on a mispredict, the history register is reset (again, only if BTB hit). // The counter table: -// - each PC has its own counter, updated when a branch resolves (and BTB hit). -// - the BTB provides the predicted branch PC, allowing us to properly index -// the counter table and provide the prediction for that specific branch. -// Critical path concerns may require only providing a single counter for -// the entire fetch packet, but that complicates how multiple branches -// update that line. -class BHT(nbht: Int, fetchwidth: Int) { +// - each counter corresponds with the "fetch pc" (not the PC of the branch). +// - updated when a branch resolves (and BTB was a hit for that branch). +// The updating branch must provide its "fetch pc" in addition to its own PC. +class BHT(nbht: Int) { val nbhtbits = log2Up(nbht) - private val logfw = if (fetchwidth == 1) 0 else log2Up(fetchwidth) - - def get(fetch_addr: UInt, bridx: UInt, update: Bool): BHTResp = { + def get(addr: UInt, bridx: UInt, update: Bool): BHTResp = { val res = new BHTResp - val aligned_addr = fetch_addr >> UInt(logfw + 2) - val index = aligned_addr ^ history - val counters = table(index) - res.value := (counters >> (bridx<<1)) & Bits(0x3) + val index = addr(nbhtbits+1,2) ^ history + res.value := table(index) res.history := history val taken = res.value(0) when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { - val aligned_addr = addr >> UInt(logfw + 2) - val index = aligned_addr ^ d.history - val new_cntr = Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - var bridx: UInt = null - if (logfw == 0) bridx = UInt(0) else bridx = addr(logfw+1,2) - val mask = Bits(0x3) << (bridx<<1) - table.write(index, new_cntr, mask) + val index = addr(nbhtbits+1,2) ^ d.history + table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } } - private val table = Mem(UInt(width = 2*fetchwidth), nbht) + private val table = Mem(UInt(width = 2), nbht) val history = Reg(UInt(width = nbhtbits)) } // BTB update occurs during branch resolution. // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. +// - "bridx" is the low-order PC bits of the predicted branch. // - "resp.mask" provides a mask of valid instructions (instructions are // masked off by the predicted taken branch). class BTBUpdate extends Bundle with BTBParameters { @@ -107,7 +96,8 @@ class BTBUpdate extends Bundle with BTBParameters { class BTBResp extends Bundle with BTBParameters { val taken = Bool() - val mask = Bits(width = log2Up(params(FetchWidth))) + val mask = Bits(width = params(FetchWidth)) + val bridx = Bits(width = log2Up(params(FetchWidth))) val target = UInt(width = vaddrBits) val entry = UInt(width = opaqueBits) val bht = new BHTResp @@ -232,13 +222,14 @@ class BTB extends Module with BTBParameters { io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) + io.resp.bits.bridx := brIdx(io.resp.bits.entry) if (nBHT > 0) { - val bht = new BHT(nBHT, params(FetchWidth)) + val bht = new BHT(nBHT) val res = bht.get(io.req.bits.addr, brIdx(io.resp.bits.entry), io.req.valid && hits.orR && !Mux1H(hits, isJump)) val update_btb_hit = io.update.bits.prediction.valid when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { - bht.update(io.update.bits.br_pc, io.update.bits.prediction.bits.bht, + bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken, io.update.bits.incorrectTarget) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } From 7bb729901883c871c82dffe564ced3af879695d3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 14 Oct 2014 17:28:37 -0700 Subject: [PATCH 0778/1087] Don't pollute BTB with PC+4 target predictions --- rocket/src/main/scala/btb.scala | 8 ++++---- rocket/src/main/scala/ctrl.scala | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 0f1ee962..dae63215 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -75,7 +75,7 @@ class BTBUpdate extends Bundle with BTBParameters { val isJump = Bool() val isCall = Bool() val isReturn = Bool() - val incorrectTarget = Bool() + val mispredict = Bool() } class BTBResp extends Bundle with BTBParameters { @@ -138,8 +138,8 @@ class BTB extends Module with BTBParameters { } val updateHit = r_update.bits.prediction.valid - val updateValid = r_update.bits.incorrectTarget || updateHit && Bool(nBHT > 0) - val updateTarget = updateValid && r_update.bits.incorrectTarget + val updateValid = r_update.bits.mispredict || updateHit && Bool(nBHT > 0) + val updateTarget = updateValid && r_update.bits.mispredict && r_update.bits.taken val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = updateTarget && !useUpdatePageHit @@ -208,7 +208,7 @@ class BTB extends Module with BTBParameters { val update_btb_hit = io.update.bits.prediction.valid when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, - io.update.bits.taken, io.update.bits.incorrectTarget) + io.update.bits.taken, io.update.bits.mispredict) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d72eba45..6ea50107 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -652,11 +652,11 @@ class Control extends Module Mux(replay_wb, PC_WB, // replay PC_MEM))) - io.imem.btb_update.valid := mem_reg_branch || mem_reg_jal || mem_reg_jalr + io.imem.btb_update.valid := (mem_reg_branch || io.imem.btb_update.bits.isJump) && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.btb_update.bits.taken := mem_reg_jal || mem_reg_branch && io.dpath.mem_br_taken - io.imem.btb_update.bits.incorrectTarget := take_pc_mem + io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump + io.imem.btb_update.bits.mispredict := take_pc_mem io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr io.imem.btb_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra From 08d2c1333041712a65edd84b1291c1de196c0f08 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Mon, 20 Oct 2014 18:45:23 -0700 Subject: [PATCH 0779/1087] Fixed btb/icache bugs regarding resp mask, fw==1 --- rocket/src/main/scala/btb.scala | 14 ++++++++++---- rocket/src/main/scala/icache.scala | 8 +++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index b323ef98..c5e7636d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -79,7 +79,7 @@ class BHT(nbht: Int) { // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. // - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). +// shifting off the lowest log(inst_bytes) bits off). // - "resp.mask" provides a mask of valid instructions (instructions are // masked off by the predicted taken branch). class BTBUpdate extends Bundle with BTBParameters { @@ -196,9 +196,9 @@ class BTB extends Module with BTBParameters { useRAS(waddr) := r_update.bits.isReturn isJump(waddr) := r_update.bits.isJump if (params(FetchWidth) == 1) { - brIdx(waddr) := UInt(0) + brIdx(waddr) := UInt(0) } else { - brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) + brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) } } @@ -226,8 +226,14 @@ class BTB extends Module with BTBParameters { io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) - io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) io.resp.bits.bridx := brIdx(io.resp.bits.entry) + if (params(FetchWidth) == 1) { + io.resp.bits.mask := UInt(1) + } else { + io.resp.bits.mask := Mux(io.resp.valid, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), + ((UInt(1) << UInt(params(FetchWidth)))-UInt(1))) +// val all_ones = UInt((1 << coreFetchWidth)-1) + } if (nBHT > 0) { val bht = new BHT(nBHT) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 0c7fbaba..c854760b 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -58,9 +58,10 @@ class Frontend extends FrontendModule val s2_xcpt_if = Reg(init=Bool(false)) val msb = vaddrBits-1 + val lsb = log2Up(coreFetchWidth*coreInstBytes) val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val ntpc_0 = s1_pc + UInt(coreInstBytes) - val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,0)) + val ntpc_0 = s1_pc + UInt(coreInstBytes*coreFetchWidth) + val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure val icmiss = s2_valid && !icache.io.resp.valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt @@ -115,7 +116,8 @@ class Frontend extends FrontendModule val all_ones = UInt((1 << coreFetchWidth)-1) val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) - io.cpu.resp.bits.mask := msk_pc & btb.io.resp.bits.mask + // TODO what is the best way to handle the clock-gating of s2_btb_resp_bits? + io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if From 3be3cd7731ec5bbc582d03545b8dfa45596ba1a8 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Mon, 3 Nov 2014 01:13:22 -0800 Subject: [PATCH 0780/1087] Fixed error with icache/btb resp mask. --- rocket/src/main/scala/btb.scala | 5 ++--- rocket/src/main/scala/icache.scala | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index c5e7636d..d0506601 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -230,9 +230,8 @@ class BTB extends Module with BTBParameters { if (params(FetchWidth) == 1) { io.resp.bits.mask := UInt(1) } else { - io.resp.bits.mask := Mux(io.resp.valid, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), - ((UInt(1) << UInt(params(FetchWidth)))-UInt(1))) -// val all_ones = UInt((1 << coreFetchWidth)-1) + // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case + io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) } if (nBHT > 0) { diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c854760b..cdf21bca 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -114,9 +114,8 @@ class Frontend extends FrontendModule io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) } - val all_ones = UInt((1 << coreFetchWidth)-1) + val all_ones = UInt((1 << (coreFetchWidth+1))-1) val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) - // TODO what is the best way to handle the clock-gating of s2_btb_resp_bits? io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) From bf901e4bcae099d549a73b366bcbfd02fe847170 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 5 Nov 2014 13:01:26 -0800 Subject: [PATCH 0781/1087] Remove master_xact_id from Release --- rocket/src/main/scala/nbdcache.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3d9bc150..80d1a6bc 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -146,7 +146,6 @@ class WritebackReq extends L1HellaCacheBundle { val idx = Bits(width = idxBits) val way_en = Bits(width = nWays) val client_xact_id = Bits(width = params(TLClientXactIdBits)) - val master_xact_id = Bits(width = params(TLMasterXactIdBits)) val r_type = UInt(width = co.releaseTypeWidth) } @@ -286,7 +285,6 @@ class MSHR(id: Int) extends L1HellaCacheModule { io.wb_req.bits.idx := req_idx io.wb_req.bits.way_en := req.way_en io.wb_req.bits.client_xact_id := Bits(id) - io.wb_req.bits.master_xact_id := Bits(0) // DNC io.wb_req.bits.r_type := co.getReleaseTypeOnVoluntaryWriteback() io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready @@ -476,7 +474,6 @@ class WritebackUnit extends L1HellaCacheModule { io.release.bits.r_type := req.r_type io.release.bits.addr := Cat(req.tag, req.idx).toUInt io.release.bits.client_xact_id := req.client_xact_id - io.release.bits.master_xact_id := req.master_xact_id if(refillCycles > 1) { val data_buf = Reg(Bits()) when(active && r2_data_req_fired) { @@ -545,7 +542,7 @@ class ProbeUnit extends L1HellaCacheModule { io.req.ready := state === s_invalid io.rep.valid := state === s_release && !(hit && co.needsWriteback(line_state)) - io.rep.bits := Release(co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)), req.addr, req.client_xact_id, req.master_xact_id) + io.rep.bits := Release(co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)), req.addr, req.client_xact_id) io.meta_read.valid := state === s_meta_read io.meta_read.bits.idx := req.addr @@ -563,7 +560,6 @@ class ProbeUnit extends L1HellaCacheModule { io.wb_req.bits.tag := req.addr >> UInt(idxBits) io.wb_req.bits.r_type := co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)) io.wb_req.bits.client_xact_id := req.client_xact_id - io.wb_req.bits.master_xact_id := req.master_xact_id } class DataArray extends L1HellaCacheModule { From fea31d2167f00a4842827b9c06122c18d79eb9e4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Tue, 11 Nov 2014 03:34:05 -0800 Subject: [PATCH 0782/1087] Significant changes and fixes to BTB for superscalar fetch. - BTBUpdate only occurs on mispredicts now. - RASUpdate broken out from BTBUpdate (allows RASUpdate to be performed in Decode). - Added optional 2nd CAM port to BTB for updates (for when updates to the BTB may occur out-of-order). - Fixed resp.mask bit logic. --- rocket/src/main/scala/btb.scala | 89 +++++++++++++++++------------- rocket/src/main/scala/ctrl.scala | 11 ++-- rocket/src/main/scala/dpath.scala | 2 +- rocket/src/main/scala/icache.scala | 6 +- 4 files changed, 63 insertions(+), 45 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d0506601..5614561d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -65,36 +65,40 @@ class BHT(nbht: Int) { when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } - def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { + def update(addr: UInt, d: BHTResp, taken: Bool): Unit = { val index = addr(nbhtbits+1,2) ^ d.history table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } + history := Cat(taken, d.history(nbhtbits-1,1)) } private val table = Mem(UInt(width = 2), nbht) val history = Reg(UInt(width = nbhtbits)) } -// BTB update occurs during branch resolution. +// BTB update occurs during branch resolution (and only on a mispredict). // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. -// - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). -// - "resp.mask" provides a mask of valid instructions (instructions are -// masked off by the predicted taken branch). class BTBUpdate extends Bundle with BTBParameters { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) val target = UInt(width = vaddrBits) - val returnAddr = UInt(width = vaddrBits) val taken = Bool() val isJump = Bool() - val isCall = Bool() val isReturn = Bool() val br_pc = UInt(width = vaddrBits) - val mispredict = Bool() } +class RASUpdate extends Bundle with BTBParameters { + val isCall = Bool() + val isReturn = Bool() + val returnAddr = UInt(width = vaddrBits) + val prediction = Valid(new BTBResp) +} + +// - "bridx" is the low-order PC bits of the predicted branch (after +// shifting off the lowest log(inst_bytes) bits off). +// - "resp.mask" provides a mask of valid instructions (instructions are +// masked off by the predicted taken branch). class BTBResp extends Bundle with BTBParameters { val taken = Bool() val mask = Bits(width = params(FetchWidth)) @@ -109,11 +113,15 @@ class BTBReq extends Bundle with BTBParameters { } // fully-associative branch target buffer -class BTB extends Module with BTBParameters { +// Higher-performance processors may cause BTB updates to occur out-of-order, +// which requires an extra CAM port for updates (to ensure no duplicates get +// placed in BTB). +class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParameters { val io = new Bundle { val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) val update = Valid(new BTBUpdate).flip + val ras_update = Valid(new RASUpdate).flip val invalidate = Bool(INPUT) } @@ -158,8 +166,7 @@ class BTB extends Module with BTBParameters { } val updateHit = r_update.bits.prediction.valid - val updateValid = r_update.bits.mispredict || updateHit && Bool(nBHT > 0) - val updateTarget = updateValid && r_update.bits.mispredict && r_update.bits.taken + val updateTarget = r_update.bits.taken val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = updateTarget && !useUpdatePageHit @@ -179,27 +186,32 @@ class BTB extends Module with BTBParameters { val pageReplEn = idxPageReplEn | tgtPageReplEn idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1) - when (r_update.valid && !(updateValid && !updateTarget)) { - val nextRepl = Counter(!updateHit && updateValid, entries)._1 - val waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) + when (r_update.valid && updateTarget) { + assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") + + val nextRepl = Counter(!updateHit, entries)._1 + var waddr:UInt = null + if (!updates_out_of_order) { + waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) + } else { + println(" BTB accepts out-of-order updates.") + waddr = Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + } // invalidate entries if we stomp on pages they depend upon idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits - idxValid(waddr) := updateValid - when (updateTarget) { - assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") - idxs(waddr) := r_update.bits.pc - tgts(waddr) := update_target - idxPages(waddr) := idxPageUpdate - tgtPages(waddr) := tgtPageUpdate - useRAS(waddr) := r_update.bits.isReturn - isJump(waddr) := r_update.bits.isJump - if (params(FetchWidth) == 1) { - brIdx(waddr) := UInt(0) - } else { - brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) - } + idxValid(waddr) := Bool(true) + idxs(waddr) := r_update.bits.pc + tgts(waddr) := update_target + idxPages(waddr) := idxPageUpdate + tgtPages(waddr) := tgtPageUpdate + useRAS(waddr) := r_update.bits.isReturn + isJump(waddr) := r_update.bits.isJump + if (params(FetchWidth) == 1) { + brIdx(waddr) := UInt(0) + } else { + brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) } require(nPages % 2 == 0) @@ -231,7 +243,9 @@ class BTB extends Module with BTBParameters { io.resp.bits.mask := UInt(1) } else { // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case - io.resp.bits.mask := Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)) + val all_ones = UInt((1 << (params(FetchWidth)+1))-1) + io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), + all_ones) } if (nBHT > 0) { @@ -239,8 +253,7 @@ class BTB extends Module with BTBParameters { val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) val update_btb_hit = io.update.bits.prediction.valid when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { - bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, - io.update.bits.taken, io.update.bits.mispredict) + bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res @@ -252,13 +265,13 @@ class BTB extends Module with BTBParameters { when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek } - when (io.update.valid) { - when (io.update.bits.isCall) { - ras.push(io.update.bits.returnAddr) + when (io.ras_update.valid) { + when (io.ras_update.bits.isCall) { + ras.push(io.ras_update.bits.returnAddr) when (doPeek) { - io.resp.bits.target := io.update.bits.returnAddr + io.resp.bits.target := io.ras_update.bits.returnAddr } - }.elsewhen (io.update.bits.isReturn && io.update.bits.prediction.valid) { + }.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) { ras.pop } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6ea50107..f3aff610 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -652,15 +652,18 @@ class Control extends Module Mux(replay_wb, PC_WB, // replay PC_MEM))) - io.imem.btb_update.valid := (mem_reg_branch || io.imem.btb_update.bits.isJump) && !take_pc_wb + io.imem.btb_update.valid := take_pc_mem && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump - io.imem.btb_update.bits.mispredict := take_pc_mem io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr - io.imem.btb_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra - io.imem.req.valid := take_pc + io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb + io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) + io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra + io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0))) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 08e9d53f..4e05f50c 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -286,8 +286,8 @@ class Datapath extends Module wb_reg_pc)).toUInt // PC_WB io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc - io.imem.btb_update.bits.returnAddr := mem_int_wdata io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.ras_update.bits.returnAddr := mem_int_wdata // for hazard/bypass opportunity detection io.ctrl.ex_waddr := ex_reg_inst(11,7) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index cdf21bca..7f94a64f 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -33,18 +33,19 @@ class CPUFrontendIO extends CoreBundle { val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip val btb_update = Valid(new BTBUpdate) + val ras_update = Valid(new RASUpdate) val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } -class Frontend extends FrontendModule +class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule { val io = new Bundle { val cpu = new CPUFrontendIO().flip val mem = new UncachedTileLinkIO } - val btb = Module(new BTB) + val btb = Module(new BTB(btb_updates_out_of_order)) val icache = Module(new ICache) val tlb = Module(new TLB(params(NITLBEntries))) @@ -88,6 +89,7 @@ class Frontend extends FrontendModule btb.io.req.valid := !stall && !icmiss btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) btb.io.update := io.cpu.btb_update + btb.io.ras_update := io.cpu.ras_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate tlb.io.ptw <> io.cpu.ptw From c9e787481891085db413eca9a15f1596e5fa83a2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 11 Nov 2014 17:36:48 -0800 Subject: [PATCH 0783/1087] Major tilelink revision for uncached message types --- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 45327e46..d046b858 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -258,14 +258,14 @@ class ICache extends FrontendModule io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ack_q.io.enq.valid := refill_done && co.requiresAckForGrant(refill_bits.payload.g_type) + ack_q.io.enq.valid := refill_done && co.requiresAckForGrant(refill_bits.payload) ack_q.io.enq.bits.payload.master_xact_id := refill_bits.payload.master_xact_id ack_q.io.enq.bits.header.dst := refill_bits.header.src // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready - io.mem.acquire.bits.payload := Acquire(co.getUncachedReadAcquireType, s2_addr >> UInt(blockOffBits), UInt(0)) + io.mem.acquire.bits.payload := UncachedRead(s2_addr >> UInt(blockOffBits)) io.mem.finish <> ack_q.io.deq // control state machine diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 80d1a6bc..3cc3d5e1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -254,7 +254,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { } val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload.g_type) + ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload) ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp @@ -288,9 +288,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { io.wb_req.bits.r_type := co.getReleaseTypeOnVoluntaryWriteback() io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready - io.mem_req.bits.a_type := acquire_type - io.mem_req.bits.addr := Cat(io.tag, req_idx).toUInt - io.mem_req.bits.client_xact_id := Bits(id) + io.mem_req.bits := Acquire(acquire_type, Cat(io.tag, req_idx).toUInt, Bits(id)) io.mem_finish <> ackq.io.deq io.meta_read.valid := state === s_drain_rpq From b7b2923bff2cd33df4824fb99c624dba51e00531 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 11 Nov 2014 18:18:35 -0800 Subject: [PATCH 0784/1087] Cleanup MSHR internal bundles --- rocket/src/main/scala/nbdcache.scala | 46 ++++++++++++++++++---------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3cc3d5e1..864db3f0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -57,22 +57,28 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) } -class HellaCacheReq extends CoreBundle { +trait HasCoreData extends CoreBundle { + val data = Bits(width = coreDataBits) +} + +class HellaCacheReqInternal extends CoreBundle { val kill = Bool() val typ = Bits(width = MT_SZ) val phys = Bool() val addr = UInt(width = coreMaxAddrBits) - val data = Bits(width = coreDataBits) val tag = Bits(width = coreDCacheReqTagBits) val cmd = Bits(width = M_SZ) } -class HellaCacheResp extends CoreBundle { +class HellaCacheReq extends HellaCacheReqInternal + with HasCoreData + +class HellaCacheResp extends CoreBundle + with HasCoreData { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val typ = Bits(width = 3) val has_data = Bool() - val data = Bits(width = coreDataBits) val data_subword = Bits(width = coreDataBits) val tag = Bits(width = coreDCacheReqTagBits) val cmd = Bits(width = 4) @@ -100,15 +106,23 @@ class HellaCacheIO extends CoreBundle { val ordered = Bool(INPUT) } -class MSHRReq extends HellaCacheReq with L1HellaCacheParameters { +trait HasSDQId extends CoreBundle { + val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth))) +} + +trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { val tag_match = Bool() val old_meta = new L1Metadata val way_en = Bits(width = nWays) } -class Replay extends HellaCacheReq with L1HellaCacheParameters { - val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth))) -} +class MSHRReq extends HellaCacheReqInternal with HasMissInfo with HasCoreData + +class MSHRReqInternal extends HellaCacheReqInternal with HasMissInfo with HasSDQId + +class Replay extends HellaCacheReqInternal with L1HellaCacheParameters with HasCoreData + +class ReplayInternal extends HellaCacheReqInternal with L1HellaCacheParameters with HasSDQId class DataReadReq extends L1HellaCacheBundle { val way_en = Bits(width = nWays) @@ -155,8 +169,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { val req_pri_rdy = Bool(OUTPUT) val req_sec_val = Bool(INPUT) val req_sec_rdy = Bool(OUTPUT) - val req_bits = new MSHRReq().asInput - val req_sdq_id = UInt(INPUT, log2Up(params(StoreDataQueueDepth))) + val req_bits = new MSHRReqInternal().asInput val idx_match = Bool(OUTPUT) val tag = Bits(OUTPUT, tagBits) @@ -165,7 +178,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { val mem_resp = new DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) - val replay = Decoupled(new Replay) + val replay = Decoupled(new ReplayInternal) val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) val wb_req = Decoupled(new WritebackReq) @@ -179,7 +192,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { val release_type = Reg(UInt()) val line_state = Reg(new ClientMetadata()(co)) val refill_count = Reg(UInt(width = log2Up(refillCycles))) // TODO: zero-width wire - val req = Reg(new MSHRReq()) + val req = Reg(new MSHRReqInternal()) val req_cmd = io.req_bits.cmd val req_idx = req.addr(untagBits-1,blockOffBits) @@ -195,10 +208,9 @@ class MSHR(id: Int) extends L1HellaCacheModule { val meta_on_grant = co.clientMetadataOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) val meta_on_hit = co.clientMetadataOnHit(req_cmd, io.req_bits.old_meta.coh) - val rpq = Module(new Queue(new Replay, params(ReplayQueueDepth))) + val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth))) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) rpq.io.enq.bits := io.req_bits - rpq.io.enq.bits.sdq_id := io.req_sdq_id rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid when (state === s_drain_rpq && !rpq.io.deq.valid) { @@ -342,7 +354,7 @@ class MSHRFile extends L1HellaCacheModule { val mem_req_arb = Module(new Arbiter(new Acquire, params(NMSHRs))) val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), params(NMSHRs))) val wb_req_arb = Module(new Arbiter(new WritebackReq, params(NMSHRs))) - val replay_arb = Module(new Arbiter(new Replay, params(NMSHRs))) + val replay_arb = Module(new Arbiter(new ReplayInternal, params(NMSHRs))) val alloc_arb = Module(new Arbiter(Bool(), params(NMSHRs))) var idx_match = Bool(false) @@ -364,7 +376,7 @@ class MSHRFile extends L1HellaCacheModule { mshr.io.req_sec_val := io.req.valid && sdq_rdy && tag_match mshr.io.req_bits := io.req.bits - mshr.io.req_sdq_id := sdq_alloc_id + mshr.io.req_bits.sdq_id := sdq_alloc_id mshr.io.meta_read <> meta_read_arb.io.in(i) mshr.io.meta_write <> meta_write_arb.io.in(i) @@ -401,7 +413,7 @@ class MSHRFile extends L1HellaCacheModule { io.replay <> replay_arb.io.out when (io.replay.valid || sdq_enq) { - sdq_val := sdq_val & ~(UIntToOH(io.replay.bits.sdq_id) & Fill(params(StoreDataQueueDepth), free_sdq)) | + sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(params(StoreDataQueueDepth), free_sdq)) | PriorityEncoderOH(~sdq_val(params(StoreDataQueueDepth)-1,0)) & Fill(params(StoreDataQueueDepth), sdq_enq) } } From 6749f67b7ff2b75665e3db277ab15b5e4d0c181e Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sun, 16 Nov 2014 22:02:27 -0800 Subject: [PATCH 0785/1087] Fixed BHT update error. - separated out BTB/BHT update - BHT updates counters on every branch - BTB update only on mispredicted and taken branches --- rocket/src/main/scala/btb.scala | 78 ++++++++++++++++-------------- rocket/src/main/scala/ctrl.scala | 11 ++++- rocket/src/main/scala/dpath.scala | 1 + rocket/src/main/scala/icache.scala | 4 +- 4 files changed, 54 insertions(+), 40 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 5614561d..62a5daf8 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -51,7 +51,7 @@ class BHTResp extends Bundle with BTBParameters { // - updated speculatively in fetch (if there's a BTB hit). // - on a mispredict, the history register is reset (again, only if BTB hit). // The counter table: -// - each counter corresponds with the "fetch pc" (not the PC of the branch). +// - each counter corresponds with the address of the fetch packet ("fetch pc"). // - updated when a branch resolves (and BTB was a hit for that branch). // The updating branch must provide its "fetch pc". class BHT(nbht: Int) { @@ -65,10 +65,10 @@ class BHT(nbht: Int) { when (update) { history := Cat(taken, history(nbhtbits-1,1)) } res } - def update(addr: UInt, d: BHTResp, taken: Bool): Unit = { + def update(addr: UInt, d: BHTResp, taken: Bool, mispredict: Bool): Unit = { val index = addr(nbhtbits+1,2) ^ d.history table(index) := Cat(taken, (d.value(1) & d.value(0)) | ((d.value(1) | d.value(0)) & taken)) - history := Cat(taken, d.history(nbhtbits-1,1)) + when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } } private val table = Mem(UInt(width = 2), nbht) @@ -88,6 +88,15 @@ class BTBUpdate extends Bundle with BTBParameters { val br_pc = UInt(width = vaddrBits) } +// BHT update occurs during branch resolution on all conditional branches. +// - "pc" is what future fetch PCs will tag match against. +class BHTUpdate extends Bundle with BTBParameters { + val prediction = Valid(new BTBResp) + val pc = UInt(width = vaddrBits) + val taken = Bool() + val mispredict = Bool() +} + class RASUpdate extends Bundle with BTBParameters { val isCall = Bool() val isReturn = Bool() @@ -96,9 +105,9 @@ class RASUpdate extends Bundle with BTBParameters { } // - "bridx" is the low-order PC bits of the predicted branch (after -// shifting off the lowest log(inst_bytes) bits off). +// shifting off the lowest log(inst_bytes) bits off). // - "resp.mask" provides a mask of valid instructions (instructions are -// masked off by the predicted taken branch). +// masked off by the predicted taken branch). class BTBResp extends Bundle with BTBParameters { val taken = Bool() val mask = Bits(width = params(FetchWidth)) @@ -120,7 +129,8 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val io = new Bundle { val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) - val update = Valid(new BTBUpdate).flip + val btb_update = Valid(new BTBUpdate).flip + val bht_update = Valid(new BHTUpdate).flip val ras_update = Valid(new RASUpdate).flip val invalidate = Bool(INPUT) } @@ -151,67 +161,62 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete idxValid & idxMatch & idxPageMatch } - val r_update = Pipe(io.update) + val r_btb_update = Pipe(io.btb_update) val update_target = io.req.bits.addr val pageHit = pageMatch(io.req.bits.addr) val hits = tagMatch(io.req.bits.addr, pageHit) - val updatePageHit = pageMatch(r_update.bits.pc) - val updateHits = tagMatch(r_update.bits.pc, updatePageHit) + val updatePageHit = pageMatch(r_btb_update.bits.pc) + val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) - private var lfsr = LFSR16(r_update.valid) + private var lfsr = LFSR16(r_btb_update.valid) def rand(width: Int) = { lfsr = lfsr(lfsr.getWidth-1,1) Random.oneHot(width, lfsr) } - val updateHit = r_update.bits.prediction.valid - val updateTarget = r_update.bits.taken + val updateHit = r_btb_update.bits.prediction.valid val useUpdatePageHit = updatePageHit.orR - val doIdxPageRepl = updateTarget && !useUpdatePageHit + val doIdxPageRepl = !useUpdatePageHit val idxPageRepl = UInt() val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) - val samePage = page(r_update.bits.pc) === page(update_target) + val samePage = page(r_btb_update.bits.pc) === page(update_target) val usePageHit = (pageHit & ~idxPageReplEn).orR - val doTgtPageRepl = updateTarget && !samePage && !usePageHit + val doTgtPageRepl = !samePage && !usePageHit val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(nPages-2,0) << 1 | idxPageUpdateOH(nPages-1)) val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) val doPageRepl = doIdxPageRepl || doTgtPageRepl val pageReplEn = idxPageReplEn | tgtPageReplEn - idxPageRepl := UIntToOH(Counter(r_update.valid && doPageRepl, nPages)._1) + idxPageRepl := UIntToOH(Counter(r_btb_update.valid && doPageRepl, nPages)._1) - when (r_update.valid && updateTarget) { - assert(io.req.bits.addr === r_update.bits.target, "BTB request != I$ target") + when (r_btb_update.valid) { + assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") val nextRepl = Counter(!updateHit, entries)._1 - var waddr:UInt = null - if (!updates_out_of_order) { - waddr = Mux(updateHit, r_update.bits.prediction.bits.entry, nextRepl) - } else { - println(" BTB accepts out-of-order updates.") - waddr = Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) - } + val waddr = + if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits - idxValid(waddr) := Bool(true) - idxs(waddr) := r_update.bits.pc + idxValid(waddr) := Bool(true) + idxs(waddr) := r_btb_update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate tgtPages(waddr) := tgtPageUpdate - useRAS(waddr) := r_update.bits.isReturn - isJump(waddr) := r_update.bits.isJump + useRAS(waddr) := r_btb_update.bits.isReturn + isJump(waddr) := r_btb_update.bits.isJump if (params(FetchWidth) == 1) { brIdx(waddr) := UInt(0) } else { - brIdx(waddr) := r_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) + brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) } require(nPages % 2 == 0) @@ -222,9 +227,9 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete when (en && pageReplEn(i)) { pages(i) := data } writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), - Mux(idxWritesEven, page(r_update.bits.pc), page(update_target))) + Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target))) writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), - Mux(idxWritesEven, page(update_target), page(r_update.bits.pc))) + Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc))) when (doPageRepl) { pageValid := pageValid | pageReplEn } } @@ -243,17 +248,16 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete io.resp.bits.mask := UInt(1) } else { // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case - val all_ones = UInt((1 << (params(FetchWidth)+1))-1) io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), - all_ones) + SInt(-1)) } if (nBHT > 0) { val bht = new BHT(nBHT) val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) - val update_btb_hit = io.update.bits.prediction.valid - when (io.update.valid && update_btb_hit && !io.update.bits.isJump) { - bht.update(io.update.bits.pc, io.update.bits.prediction.bits.bht, io.update.bits.taken) + val update_btb_hit = io.bht_update.bits.prediction.valid + when (io.bht_update.valid && update_btb_hit) { + bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict) } when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } io.resp.bits.bht := res diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index f3aff610..18ec1b25 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -652,17 +652,24 @@ class Control extends Module Mux(replay_wb, PC_WB, // replay PC_MEM))) - io.imem.btb_update.valid := take_pc_mem && !take_pc_wb + io.imem.btb_update.valid := io.dpath.mem_misprediction && ((mem_reg_branch && io.dpath.mem_br_taken) || mem_reg_jalr || mem_reg_jal) && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra + + io.imem.bht_update.valid := mem_reg_branch && !take_pc_wb + io.imem.bht_update.bits.taken := io.dpath.mem_br_taken + io.imem.bht_update.bits.mispredict := io.dpath.mem_misprediction + io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb io.imem.ras_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) io.imem.ras_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 4e05f50c..beff52fb 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -287,6 +287,7 @@ class Datapath extends Module io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.bht_update.bits.pc := mem_reg_pc io.imem.ras_update.bits.returnAddr := mem_int_wdata // for hazard/bypass opportunity detection diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 7f94a64f..7b4cf57e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -33,6 +33,7 @@ class CPUFrontendIO extends CoreBundle { val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip val btb_update = Valid(new BTBUpdate) + val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) @@ -88,7 +89,8 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule btb.io.req.valid := !stall && !icmiss btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) - btb.io.update := io.cpu.btb_update + btb.io.btb_update := io.cpu.btb_update + btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate From f19b3ca43e5375956bec18b8621749ad28d8d2b7 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sun, 16 Nov 2014 22:04:33 -0800 Subject: [PATCH 0786/1087] Deleted extra spaces at EOL in ctrl.scala --- rocket/src/main/scala/ctrl.scala | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 18ec1b25..247a12b9 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -79,7 +79,7 @@ abstract trait DecodeConstants // | | | | | | | | | | | | | | | | | | | | | | | | | amo // | | | | | | | | | | | | | | | | | | | | | | | | | | List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) - + val table: Array[(UInt, List[UInt])] } @@ -162,7 +162,7 @@ object XDecode extends DecodeConstants SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), @@ -322,12 +322,12 @@ class Control extends Module if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) - + val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: (id_branch: Bool) :: (id_jal: Bool) :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: cs2 = cs1 val id_csr :: (id_fence_i: Bool) :: (id_sret: Bool) :: (id_syscall: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 - + val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) val ex_reg_branch = Reg(Bool()) @@ -470,14 +470,14 @@ class Control extends Module ex_reg_wen := Bool(false) ex_reg_fp_wen := Bool(false) ex_reg_sret := Bool(false) - ex_reg_flush_inst := Bool(false) + ex_reg_flush_inst := Bool(false) ex_reg_fp_val := Bool(false) ex_reg_rocc_val := Bool(false) ex_reg_replay_next := Bool(false) ex_reg_load_use := Bool(false) ex_reg_csr := CSR.N ex_reg_xcpt := Bool(false) - } + } .otherwise { ex_reg_branch := id_branch ex_reg_jal := id_jal @@ -514,7 +514,7 @@ class Control extends Module val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), (ex_reg_fp_val && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) - + mem_reg_replay := !take_pc_mem_wb && replay_ex mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt && !mem_reg_replay_next when (ex_xcpt) { mem_reg_cause := ex_cause } @@ -599,7 +599,7 @@ class Control extends Module } val wb_set_sboard = wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val - val replay_wb_common = + val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_rocc_val && !io.rocc.cmd.ready @@ -699,7 +699,7 @@ class Control extends Module io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) val id_ex_hazard = data_hazard_ex && (ex_reg_csr != CSR.N || ex_reg_jalr || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) - + // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass From 72ea24283b7d7a5214ff591358c1f224d0aa7a3b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 7 Dec 2014 03:09:54 -0800 Subject: [PATCH 0787/1087] multibeat TL; passes all tests --- rocket/src/main/scala/icache.scala | 28 ++++----- rocket/src/main/scala/nbdcache.scala | 90 +++++++++++++--------------- rocket/src/main/scala/util.scala | 47 --------------- 3 files changed, 52 insertions(+), 113 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index d046b858..ca12fa06 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -12,6 +12,7 @@ case object ECCCode extends Field[Option[Code]] abstract trait L1CacheParameters extends CacheParameters with CoreParameters { val co = params(TLCoherence) val code = params(ECCCode).getOrElse(new IdentityCode) + val outerDataBeats = params(TLDataBeats) } abstract trait FrontendParameters extends L1CacheParameters @@ -173,22 +174,13 @@ class ICache extends FrontendModule val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - var refill_cnt = UInt(0) - var refill_done = state === s_refill - var refill_valid = io.mem.grant.valid - var refill_bits = io.mem.grant.bits - def doRefill(g: Grant): Bool = Bool(true) - if(refillCycles > 1) { - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCycles, doRefill)) - ser.io.in <> io.mem.grant - refill_cnt = ser.io.cnt - refill_done = ser.io.done - refill_valid = ser.io.out.valid - refill_bits = ser.io.out.bits - ser.io.out.ready := Bool(true) - } else { - io.mem.grant.ready := Bool(true) - } + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCyclesPerBeat, (g: Grant) => co.messageUpdatesDataArray(g))) + ser.io.in <> io.mem.grant + val (refill_cnt, refill_wrap) = Counter(ser.io.out.fire(), refillCycles) //TODO Zero width wire + val refill_done = state === s_refill && refill_wrap + val refill_valid = ser.io.out.valid + val refill_bits = ser.io.out.bits + ser.io.out.ready := Bool(true) //assert(!c.tlco.isVoluntary(refill_bits.payload) || !refill_valid, "UncachedRequestors shouldn't get voluntary grants.") val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) @@ -243,8 +235,8 @@ class ICache extends FrontendModule val s1_raddr = Reg(UInt()) when (refill_valid && repl_way === UInt(i)) { val e_d = code.encode(refill_bits.payload.data) - if(refillCycles > 1) data_array(Cat(s2_idx,refill_cnt)) := e_d - else data_array(s2_idx) := e_d + if(refillCycles > 1) data_array(Cat(s2_idx, refill_cnt)) := e_d + else data_array(s2_idx) := e_d } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 864db3f0..b16d3f50 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -191,7 +191,6 @@ class MSHR(id: Int) extends L1HellaCacheModule { val acquire_type = Reg(UInt()) val release_type = Reg(UInt()) val line_state = Reg(new ClientMetadata()(co)) - val refill_count = Reg(UInt(width = log2Up(refillCycles))) // TODO: zero-width wire val req = Reg(new MSHRReqInternal()) val req_cmd = io.req_bits.cmd @@ -199,9 +198,8 @@ class MSHR(id: Int) extends L1HellaCacheModule { val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits) val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) - require(isPow2(refillCycles)) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) - val refill_done = reply && (if(refillCycles > 1) refill_count.andR else Bool(true)) + val (refill_cnt, refill_done) = Counter(reply && co.messageUpdatesDataArray(io.mem_grant.bits.payload), refillCycles) // TODO: Zero width? val wb_done = reply && (state === s_wb_resp) val meta_on_flush = co.clientMetadataOnFlush @@ -226,7 +224,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { when (state === s_refill_resp) { when (refill_done) { state := s_meta_write_req } when (reply) { - if(refillCycles > 1) refill_count := refill_count + UInt(1) + if(refillCycles > 1) refill_cnt := refill_cnt + UInt(1) line_state := meta_on_grant } } @@ -242,13 +240,12 @@ class MSHR(id: Int) extends L1HellaCacheModule { when (io.wb_req.fire()) { // s_wb_req state := s_wb_resp } - when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req acquire_type := co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits) } when (io.req_pri_val && io.req_pri_rdy) { line_state := meta_on_flush - refill_count := UInt(0) + refill_cnt := UInt(0) acquire_type := co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush) release_type := co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits @@ -276,7 +273,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { io.idx_match := (state != s_invalid) && idx_match io.mem_resp := req - io.mem_resp.addr := (if(refillCycles > 1) Cat(req_idx, refill_count) else req_idx) << rowOffBits + io.mem_resp.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits io.tag := req.addr >> untagBits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -351,7 +348,7 @@ class MSHRFile extends L1HellaCacheModule { val memRespMux = Vec.fill(params(NMSHRs)){new DataWriteReq} val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, params(NMSHRs))) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, params(NMSHRs))) - val mem_req_arb = Module(new Arbiter(new Acquire, params(NMSHRs))) + val mem_req_arb = Module(new LockingArbiter(new Acquire, params(NMSHRs), outerDataBeats, co.messageHasData _)) val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), params(NMSHRs))) val wb_req_arb = Module(new Arbiter(new WritebackReq, params(NMSHRs))) val replay_arb = Module(new Arbiter(new ReplayInternal, params(NMSHRs))) @@ -430,7 +427,9 @@ class WritebackUnit extends L1HellaCacheModule { val active = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) - val cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) + val cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width + val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1)) + val beat_done = buf_v.andR val req = Reg(new WritebackReq) io.release.valid := false @@ -441,27 +440,22 @@ class WritebackUnit extends L1HellaCacheModule { r1_data_req_fired := true cnt := cnt + 1 } - if(refillCycles > 1) { // Coalescing buffer inserted - when (!r1_data_req_fired && !r2_data_req_fired && cnt === refillCycles) { - io.release.valid := true - active := !io.release.ready - } - } else { // No buffer, data released a cycle earlier - when (r2_data_req_fired) { - io.release.valid := true - when(!io.release.ready) { - r1_data_req_fired := false - r2_data_req_fired := false - cnt := UInt(0) - } .otherwise { - active := false - } + when (r2_data_req_fired) { + io.release.valid := beat_done + when(!io.release.ready) { + r1_data_req_fired := false + r2_data_req_fired := false + cnt := cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) + } .elsewhen(beat_done) { if(refillCyclesPerBeat > 1) buf_v := 0 } + when(!r1_data_req_fired) { + active := cnt < UInt(refillCycles) } } } when (io.req.fire()) { active := true cnt := 0 + if(refillCyclesPerBeat > 1) buf_v := 0 req := io.req.bits } @@ -475,25 +469,23 @@ class WritebackUnit extends L1HellaCacheModule { io.data_req.valid := fire io.data_req.bits.way_en := req.way_en - if(refillCycles > 1) { - io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(refillCycles)-1,0)) << rowOffBits - } else { - io.data_req.bits.addr := req.idx << rowOffBits - } + io.data_req.bits.addr := (if(refillCycles > 1) Cat(req.idx, cnt(log2Up(refillCycles)-1,0)) + else req.idx) << rowOffBits io.release.bits.r_type := req.r_type io.release.bits.addr := Cat(req.tag, req.idx).toUInt io.release.bits.client_xact_id := req.client_xact_id - if(refillCycles > 1) { - val data_buf = Reg(Bits()) - when(active && r2_data_req_fired) { - data_buf := Cat(io.data_resp, data_buf(refillCycles*encRowBits-1, encRowBits)) - } - io.release.bits.data := data_buf - } else { - io.release.bits.data := io.data_resp - } - + io.release.bits.data := + (if(refillCyclesPerBeat > 1) { + val data_buf = Reg(Bits()) + when(active && r2_data_req_fired && !beat_done) { + data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat-1)*encRowBits-1, encRowBits)) + buf_v := (if(refillCyclesPerBeat > 2) + Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1)) + else UInt(1)) + } + Cat(io.data_resp, data_buf) + } else { io.data_resp }) } class ProbeUnit extends L1HellaCacheModule { @@ -551,8 +543,12 @@ class ProbeUnit extends L1HellaCacheModule { } io.req.ready := state === s_invalid - io.rep.valid := state === s_release && !(hit && co.needsWriteback(line_state)) - io.rep.bits := Release(co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)), req.addr, req.client_xact_id) + io.rep.valid := state === s_release && + !(hit && co.needsWriteback(line_state)) // Otherwise WBU will issue release + io.rep.bits := Release(co.getReleaseTypeOnProbe(req, + Mux(hit, line_state, co.clientMetadataOnFlush)), + req.addr, + req.client_xact_id) io.meta_read.valid := state === s_meta_read io.meta_read.bits.idx := req.addr @@ -878,8 +874,8 @@ class HellaCache extends L1HellaCacheModule { metaReadArb.io.in(1) <> mshrs.io.meta_read metaWriteArb.io.in(0) <> mshrs.io.meta_write - // probes - val releaseArb = Module(new Arbiter(new Release, 2)) + // probes and releases + val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, co.messageHasData _)) DecoupledLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release val probe = DecoupledLogicalNetworkIOUnwrapper(io.mem.probe) @@ -895,11 +891,9 @@ class HellaCache extends L1HellaCacheModule { // refills def doRefill(g: Grant): Bool = co.messageUpdatesDataArray(g) - val refill = if(refillCycles > 1) { - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCycles, doRefill)) - ser.io.in <> io.mem.grant - ser.io.out - } else io.mem.grant + val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCyclesPerBeat, doRefill)) + ser.io.in <> io.mem.grant + val refill = ser.io.out mshrs.io.mem_grant.valid := refill.fire() mshrs.io.mem_grant.bits := refill.bits refill.ready := writeArb.io.in(1).ready || !doRefill(refill.bits.payload) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 485dc57f..464fdbb2 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -158,50 +158,3 @@ object Random private def partition(value: UInt, slices: Int) = Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices)) } - -class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: Int, doSer: T => Bool) extends Module { - val io = new Bundle { - val in = Decoupled(gen.clone).flip - val out = Decoupled(gen.clone) - val cnt = UInt(OUTPUT, log2Up(n)) - val done = Bool(OUTPUT) - } - require(io.in.bits.payload.data.getWidth % n == 0) - val narrowWidth = io.in.bits.payload.data.getWidth / n - val cnt = Reg(init=UInt(0, width = log2Up(n))) - val wrap = cnt === UInt(n-1) - val rbits = Reg(init=io.in.bits) - val active = Reg(init=Bool(false)) - - val shifter = Vec.fill(n){Bits(width = narrowWidth)} - (0 until n).foreach { - i => shifter(i) := rbits.payload.data((i+1)*narrowWidth-1,i*narrowWidth) - } - - io.done := Bool(false) - io.cnt := cnt - io.in.ready := !active - io.out.valid := active || io.in.valid - io.out.bits := io.in.bits - when(!active && io.in.valid) { - when(doSer(io.in.bits.payload)) { - cnt := Mux(io.out.ready, UInt(1), UInt(0)) - rbits := io.in.bits - active := Bool(true) - } - io.done := !doSer(io.in.bits.payload) - } - when(active) { - io.out.bits := rbits - io.out.bits.payload.data := shifter(cnt) - when(io.out.ready) { - cnt := cnt + UInt(1) - when(wrap) { - cnt := UInt(0) - io.done := Bool(true) - active := Bool(false) - } - } - } -} - From c9320862aee58797cd9f144c523464a00c1b03f5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 12 Dec 2014 12:05:41 -0800 Subject: [PATCH 0788/1087] add l2 dmem signal to rocc --- rocket/src/main/scala/rocc.scala | 6 ++++++ rocket/src/main/scala/tile.scala | 23 ++++++----------------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index d3099043..68cef693 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -43,6 +43,7 @@ class RoCCInterface extends Bundle // These should be handled differently, eventually val imem = new UncachedTileLinkIO + val dmem = new TileLinkIO val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO @@ -124,6 +125,11 @@ class AccumulatorExample extends RoCC io.imem.acquire.valid := false io.imem.grant.ready := false io.imem.finish.valid := false + io.dmem.acquire.valid := false + io.dmem.release.valid := false + io.dmem.finish.valid := false + io.dmem.probe.ready := false + io.dmem.grant.ready := false io.iptw.req.valid := false io.dptw.req.valid := false io.pptw.req.valid := false diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 04c7753d..4ad0897d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -38,10 +38,10 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { core.io.imem <> icache.io.cpu core.io.ptw <> ptw.io.dpath - val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(params(NTilePorts))) - val dcPortId = 0 - memArb.io.in(dcPortId) <> dcache.io.mem - memArb.io.in(1) <> icache.io.mem + val memArb = Module(new TileLinkIOArbiterThatAppendsArbiterId(params(NTilePorts))) + io.tilelink <> memArb.io.out + memArb.io.in(0) <> dcache.io.mem + memArb.io.in(1) <> TileLinkIOWrapper(icache.io.mem) //If so specified, build an RoCC module and wire it in params(BuildRoCC) @@ -51,21 +51,10 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { core.io.rocc <> rocc.io dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2) <> dcIF.io.cache - memArb.io.in(2) <> rocc.io.imem + memArb.io.in(2) <> TileLinkIOWrapper(rocc.io.imem) + memArb.io.in(3) <> rocc.io.dmem ptw.io.requestor(2) <> rocc.io.iptw ptw.io.requestor(3) <> rocc.io.dptw ptw.io.requestor(4) <> rocc.io.pptw } - - io.tilelink.acquire <> memArb.io.out.acquire - io.tilelink.grant <> memArb.io.out.grant - io.tilelink.finish <> memArb.io.out.finish - // Probes and releases routed directly to coherent dcache - io.tilelink.probe <> dcache.io.mem.probe - // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client) - io.tilelink.release.valid := dcache.io.mem.release.valid - dcache.io.mem.release.ready := io.tilelink.release.ready - io.tilelink.release.bits := dcache.io.mem.release.bits - io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UInt(dcPortId, log2Up(params(NTilePorts)))) - } From d29793d1f7bef43872dafd94d3ea109a1dac5a9d Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 15 Dec 2014 19:23:38 -0800 Subject: [PATCH 0789/1087] cleanup CoherenceMetadata and coherence params --- rocket/src/main/scala/nbdcache.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b16d3f50..26cc4bbe 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -190,7 +190,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { val acquire_type = Reg(UInt()) val release_type = Reg(UInt()) - val line_state = Reg(new ClientMetadata()(co)) + val line_state = Reg(new ClientMetadata) val req = Reg(new MSHRReqInternal()) val req_cmd = io.req_bits.cmd @@ -497,7 +497,7 @@ class ProbeUnit extends L1HellaCacheModule { val wb_req = Decoupled(new WritebackReq) val way_en = Bits(INPUT, nWays) val mshr_rdy = Bool(INPUT) - val line_state = new ClientMetadata()(co).asInput + val line_state = new ClientMetadata().asInput } val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 9) @@ -749,7 +749,7 @@ class HellaCache extends L1HellaCacheModule { io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))(co)) + def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))) val meta = Module(new MetadataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2)) From 08dcf4c6ca89e6468e240643f802723a64aa38f1 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 17 Dec 2014 14:28:05 -0800 Subject: [PATCH 0790/1087] refactor cache params --- rocket/src/main/scala/icache.scala | 2 + rocket/src/main/scala/nbdcache.scala | 59 ++++++++++++++-------------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index ca12fa06..e9b1671c 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -13,6 +13,8 @@ abstract trait L1CacheParameters extends CacheParameters with CoreParameters { val co = params(TLCoherence) val code = params(ECCCode).getOrElse(new IdentityCode) val outerDataBeats = params(TLDataBeats) + val refillCyclesPerBeat = params(TLDataBits)/rowBits + val refillCycles = refillCyclesPerBeat*outerDataBeats } abstract trait FrontendParameters extends L1CacheParameters diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 26cc4bbe..287742e1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -13,13 +13,15 @@ case object LRSCCycles extends Field[Int] case object NDTLBEntries extends Field[Int] abstract trait L1HellaCacheParameters extends L1CacheParameters { - val indexmsb = untagBits-1 - val indexlsb = blockOffBits - val offsetmsb = indexlsb-1 + val idxMSB = untagBits-1 + val idxLSB = blockOffBits + val offsetmsb = idxLSB-1 val offsetlsb = wordOffBits val doNarrowRead = coreDataBits * nWays % rowBits == 0 val encDataBits = code.width(coreDataBits) val encRowBits = encDataBits*rowWords + val sdqDepth = params(StoreDataQueueDepth) + val nMSHRs = params(NMSHRs) } abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters @@ -106,8 +108,8 @@ class HellaCacheIO extends CoreBundle { val ordered = Bool(INPUT) } -trait HasSDQId extends CoreBundle { - val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth))) +trait HasSDQId extends CoreBundle with L1HellaCacheParameters { + val sdq_id = UInt(width = log2Up(sdqDepth)) } trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { @@ -116,13 +118,10 @@ trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { val way_en = Bits(width = nWays) } -class MSHRReq extends HellaCacheReqInternal with HasMissInfo with HasCoreData - -class MSHRReqInternal extends HellaCacheReqInternal with HasMissInfo with HasSDQId - -class Replay extends HellaCacheReqInternal with L1HellaCacheParameters with HasCoreData - -class ReplayInternal extends HellaCacheReqInternal with L1HellaCacheParameters with HasSDQId +class Replay extends HellaCacheReqInternal with HasCoreData +class ReplayInternal extends HellaCacheReqInternal with HasSDQId +class MSHRReq extends Replay with HasMissInfo +class MSHRReqInternal extends ReplayInternal with HasMissInfo class DataReadReq extends L1HellaCacheBundle { val way_en = Bits(width = nWays) @@ -333,26 +332,26 @@ class MSHRFile extends L1HellaCacheModule { val fence_rdy = Bool(OUTPUT) } - val sdq_val = Reg(init=Bits(0, params(StoreDataQueueDepth))) - val sdq_alloc_id = PriorityEncoder(~sdq_val(params(StoreDataQueueDepth)-1,0)) + val sdq_val = Reg(init=Bits(0, sdqDepth)) + val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) val sdq_rdy = !sdq_val.andR val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) - val sdq = Mem(io.req.bits.data, params(StoreDataQueueDepth)) + val sdq = Mem(io.req.bits.data, sdqDepth) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val idxMatch = Vec.fill(params(NMSHRs)){Bool()} - val tagList = Vec.fill(params(NMSHRs)){Bits()} + val idxMatch = Vec.fill(nMSHRs){Bool()} + val tagList = Vec.fill(nMSHRs){Bits()} val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits - val wbTagList = Vec.fill(params(NMSHRs)){Bits()} - val memRespMux = Vec.fill(params(NMSHRs)){new DataWriteReq} - val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, params(NMSHRs))) - val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, params(NMSHRs))) - val mem_req_arb = Module(new LockingArbiter(new Acquire, params(NMSHRs), outerDataBeats, co.messageHasData _)) - val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), params(NMSHRs))) - val wb_req_arb = Module(new Arbiter(new WritebackReq, params(NMSHRs))) - val replay_arb = Module(new Arbiter(new ReplayInternal, params(NMSHRs))) - val alloc_arb = Module(new Arbiter(Bool(), params(NMSHRs))) + val wbTagList = Vec.fill(nMSHRs){Bits()} + val memRespMux = Vec.fill(nMSHRs){new DataWriteReq} + val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) + val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) + val mem_req_arb = Module(new LockingArbiter(new Acquire, nMSHRs, outerDataBeats, co.messageHasData _)) + val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), nMSHRs)) + val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) + val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs)) + val alloc_arb = Module(new Arbiter(Bool(), nMSHRs)) var idx_match = Bool(false) var pri_rdy = Bool(false) @@ -361,7 +360,7 @@ class MSHRFile extends L1HellaCacheModule { io.fence_rdy := true io.probe_rdy := true - for (i <- 0 until params(NMSHRs)) { + for (i <- 0 until nMSHRs) { val mshr = Module(new MSHR(i)) idxMatch(i) := mshr.io.idx_match @@ -410,8 +409,8 @@ class MSHRFile extends L1HellaCacheModule { io.replay <> replay_arb.io.out when (io.replay.valid || sdq_enq) { - sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(params(StoreDataQueueDepth), free_sdq)) | - PriorityEncoderOH(~sdq_val(params(StoreDataQueueDepth)-1,0)) & Fill(params(StoreDataQueueDepth), sdq_enq) + sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(sdqDepth, free_sdq)) | + PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq) } } @@ -943,7 +942,7 @@ class HellaCache extends L1HellaCacheModule { // nack it like it's hot val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || - s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready + s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay) when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) } val s2_nack_victim = s2_hit && mshrs.io.secondary_miss From 77e5e6b5617a86afd10ee631ba91ea415ab070a4 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 17 Dec 2014 19:29:28 -0800 Subject: [PATCH 0791/1087] refill bug --- rocket/src/main/scala/nbdcache.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 287742e1..dccdf7a4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -221,11 +221,8 @@ class MSHR(id: Int) extends L1HellaCacheModule { state := s_meta_write_resp } when (state === s_refill_resp) { + when (reply) { line_state := meta_on_grant } when (refill_done) { state := s_meta_write_req } - when (reply) { - if(refillCycles > 1) refill_cnt := refill_cnt + UInt(1) - line_state := meta_on_grant - } } when (io.mem_req.fire()) { // s_refill_req state := s_refill_resp From 1cb65d5ec1075f70dd68c21bd4b4e8eecee86b02 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 29 Dec 2014 22:56:18 -0800 Subject: [PATCH 0792/1087] %s/master/manager/g --- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index e9b1671c..63b1ec74 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -253,7 +253,7 @@ class ICache extends FrontendModule val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) ack_q.io.enq.valid := refill_done && co.requiresAckForGrant(refill_bits.payload) - ack_q.io.enq.bits.payload.master_xact_id := refill_bits.payload.master_xact_id + ack_q.io.enq.bits.payload.manager_xact_id := refill_bits.payload.manager_xact_id ack_q.io.enq.bits.header.dst := refill_bits.header.src // output signals diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index dccdf7a4..7fa3044c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -260,7 +260,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload) - ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id + ackq.io.enq.bits.payload.manager_xact_id := io.mem_grant.bits.payload.manager_xact_id ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp io.mem_finish.valid := ackq.io.deq.valid && can_finish From 6181de4cc952fed21f7754ebbfc70c5153a1ca60 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 3 Jan 2015 13:34:38 -0800 Subject: [PATCH 0793/1087] Much refactor, so control --- rocket/src/main/scala/ctrl.scala | 126 ++++++++++++++++++------------ rocket/src/main/scala/dpath.scala | 24 ++---- 2 files changed, 83 insertions(+), 67 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 6ea50107..1d1c7cfb 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -14,11 +14,7 @@ class CtrlDpathIO extends Bundle val sel_pc = UInt(OUTPUT, 3) val killd = Bool(OUTPUT) val ren = Vec.fill(2)(Bool(OUTPUT)) - val sel_alu2 = UInt(OUTPUT, 3) - val sel_alu1 = UInt(OUTPUT, 2) - val sel_imm = UInt(OUTPUT, 3) - val fn_dw = Bool(OUTPUT) - val fn_alu = UInt(OUTPUT, SZ_ALU_FN) + val ex_ctrl = new IntCtrlSigs().asOutput val div_mul_val = Bool(OUTPUT) val div_mul_kill = Bool(OUTPUT) val div_val = Bool(OUTPUT) @@ -83,6 +79,43 @@ abstract trait DecodeConstants val table: Array[(UInt, List[UInt])] } +class IntCtrlSigs extends Bundle { + val legal = Bool() + val fp = Bool() + val rocc = Bool() + val branch = Bool() + val jal = Bool() + val jalr = Bool() + val rrs2 = Bool() + val rrs1 = Bool() + val sel_alu2 = Bits(width = A2_X.getWidth) + val sel_alu1 = Bits(width = A1_X.getWidth) + val sel_imm = Bits(width = IMM_X.getWidth) + val alu_dw = Bool() + val alu_fn = Bits(width = FN_X.getWidth) + val mem = Bool() + val mem_cmd = Bits(width = M_SZ) + val mem_type = Bits(width = MT_SZ) + val mul = Bool() + val div = Bool() + val wrd = Bool() + val csr = Bits(width = CSR.SZ) + val fence_i = Bool() + val sret = Bool() + val scall = Bool() + val replay_next = Bool() + val fence = Bool() + val amo = Bool() + + def decode(inst: UInt, table: Iterable[(UInt, List[UInt])]) = { + val decoder = DecodeLogic(inst, XDecode.decode_default, table) + Vec(legal, fp, rocc, branch, jal, jalr, rrs2, rrs1, sel_alu2, sel_alu1, + sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, mul, div, wrd, csr, + fence_i, sret, scall, replay_next, fence, amo) := decoder + this + } +} + object XDecode extends DecodeConstants { val table = Array( @@ -321,13 +354,9 @@ class Control extends Module if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table - val cs = DecodeLogic(io.dpath.inst, XDecode.decode_default, decode_table) - - val (id_int_val: Bool) :: (id_fp_val: Bool) :: (id_rocc_val: Bool) :: (id_branch: Bool) :: (id_jal: Bool) :: (id_jalr: Bool) :: (id_renx2: Bool) :: (id_renx1: Bool) :: cs0 = cs - val id_sel_alu2 :: id_sel_alu1 :: id_sel_imm :: (id_fn_dw: Bool) :: id_fn_alu :: cs1 = cs0 - val (id_mem_val: Bool) :: id_mem_cmd :: id_mem_type :: (id_mul_val: Bool) :: (id_div_val: Bool) :: (id_wen: Bool) :: cs2 = cs1 - val id_csr :: (id_fence_i: Bool) :: (id_sret: Bool) :: (id_syscall: Bool) :: (id_replay_next: Bool) :: (id_fence: Bool) :: (id_amo: Bool) :: Nil = cs2 - + val id_ctrl = new IntCtrlSigs().decode(io.dpath.inst, decode_table) + val ex_ctrl = Reg(new IntCtrlSigs) + val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) val ex_reg_branch = Reg(Bool()) @@ -418,9 +447,9 @@ class Control extends Module val id_csr_addr = io.dpath.inst(31,20) val isLegalCSR = Vec.tabulate(1 << id_csr_addr.getWidth)(i => Bool(legal_csrs contains i)) - val id_csr_en = id_csr != CSR.N + val id_csr_en = id_ctrl.csr != CSR.N val id_csr_fp = Bool(!params(BuildFPU).isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) - val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_csr) + val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_ctrl.csr) val id_csr_invalid = id_csr_en && !isLegalCSR(id_csr_addr) val id_csr_privileged = id_csr_en && (id_csr_addr(11,10) === UInt(3) && id_csr_wen || @@ -437,24 +466,24 @@ class Control extends Module // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) val id_amo_aq = io.dpath.inst(26) val id_amo_rl = io.dpath.inst(25) - val id_fence_next = id_fence || id_amo && id_amo_rl + val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl val id_mem_busy = !io.dmem.ordered || ex_reg_mem_val val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && (io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val) id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy - val id_do_fence = id_rocc_busy && id_fence || - id_mem_busy && (id_amo && id_amo_aq || id_fence_i || id_reg_fence && (id_mem_val || id_rocc_val) || id_csr_flush) + val id_do_fence = id_rocc_busy && id_ctrl.fence || + id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_flush) val (id_xcpt, id_cause) = checkExceptions(List( (id_interrupt, id_interrupt_cause), (io.imem.resp.bits.xcpt_ma, UInt(Causes.misaligned_fetch)), (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), - (!id_int_val || id_csr_invalid, UInt(Causes.illegal_instruction)), + (!id_ctrl.legal || id_csr_invalid, UInt(Causes.illegal_instruction)), (id_csr_privileged, UInt(Causes.privileged_instruction)), - (id_sret && !io.dpath.status.s, UInt(Causes.privileged_instruction)), - ((id_fp_val || id_csr_fp) && !io.dpath.status.ef, UInt(Causes.fp_disabled)), - (id_syscall, UInt(Causes.syscall)), - (id_rocc_val && !io.dpath.status.er, UInt(Causes.accelerator_disabled)))) + (id_ctrl.sret && !io.dpath.status.s, UInt(Causes.privileged_instruction)), + ((id_ctrl.fp || id_csr_fp) && !io.dpath.status.ef,UInt(Causes.fp_disabled)), + (id_ctrl.scall, UInt(Causes.syscall)), + (id_ctrl.rocc && !io.dpath.status.er, UInt(Causes.accelerator_disabled)))) ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } @@ -479,25 +508,26 @@ class Control extends Module ex_reg_xcpt := Bool(false) } .otherwise { - ex_reg_branch := id_branch - ex_reg_jal := id_jal - ex_reg_jalr := id_jalr + ex_ctrl := id_ctrl + ex_reg_branch := id_ctrl.branch + ex_reg_jal := id_ctrl.jal + ex_reg_jalr := id_ctrl.jalr ex_reg_btb_hit := io.imem.btb_resp.valid when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } - ex_reg_div_mul_val := id_mul_val || id_div_val - ex_reg_mem_val := id_mem_val.toBool + ex_reg_div_mul_val := id_ctrl.mul || id_ctrl.div + ex_reg_mem_val := id_ctrl.mem ex_reg_valid := Bool(true) - ex_reg_csr := id_csr - ex_reg_wen := id_wen - ex_reg_fp_wen := id_fp_val && io.fpu.dec.wen - ex_reg_sret := id_sret - ex_reg_flush_inst := id_fence_i - ex_reg_fp_val := id_fp_val - ex_reg_rocc_val := id_rocc_val.toBool - ex_reg_replay_next := id_replay_next || id_csr_flush + ex_reg_csr := id_ctrl.csr + ex_reg_wen := id_ctrl.wrd + ex_reg_fp_wen := id_ctrl.fp && io.fpu.dec.wen + ex_reg_sret := id_ctrl.sret + ex_reg_flush_inst := id_ctrl.fence_i + ex_reg_fp_val := id_ctrl.fp + ex_reg_rocc_val := id_ctrl.rocc + ex_reg_replay_next := id_ctrl.replay_next || id_csr_flush ex_reg_load_use := id_load_use - ex_reg_mem_cmd := id_mem_cmd - ex_reg_mem_type := id_mem_type.toUInt + ex_reg_mem_cmd := id_ctrl.mem_cmd + ex_reg_mem_type := id_ctrl.mem_type ex_reg_xcpt := id_xcpt } @@ -675,9 +705,9 @@ class Control extends Module } // stall for RAW/WAW hazards on PCRs, loads, AMOs, and mul/div in execute stage. - val id_renx1_not0 = id_renx1 && id_raddr1 != UInt(0) - val id_renx2_not0 = id_renx2 && id_raddr2 != UInt(0) - val id_wen_not0 = id_wen && id_waddr != UInt(0) + val id_renx1_not0 = id_ctrl.rrs1 && id_raddr1 != UInt(0) + val id_renx2_not0 = id_ctrl.rrs2 && id_raddr2 != UInt(0) + val id_wen_not0 = id_ctrl.wrd && id_waddr != UInt(0) val data_hazard_ex = ex_reg_wen && (id_renx1_not0 && id_raddr1 === io.dpath.ex_waddr || id_renx2_not0 && id_raddr2 === io.dpath.ex_waddr || @@ -729,8 +759,8 @@ class Control extends Module val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || - id_fp_val && id_stall_fpu || - id_mem_val && !io.dmem.req.ready || + id_ctrl.fp && id_stall_fpu || + id_ctrl.mem && !io.dmem.req.ready || id_do_fence val ctrl_draind = id_interrupt || ex_reg_replay_next ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind @@ -741,13 +771,9 @@ class Control extends Module io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen io.dpath.wb_load := wb_reg_mem_val && wb_reg_wen - io.dpath.ren(1) := id_renx2 - io.dpath.ren(0) := id_renx1 - io.dpath.sel_alu2 := id_sel_alu2.toUInt - io.dpath.sel_alu1 := id_sel_alu1.toUInt - io.dpath.sel_imm := id_sel_imm.toUInt - io.dpath.fn_dw := id_fn_dw.toBool - io.dpath.fn_alu := id_fn_alu.toUInt + io.dpath.ren(1) := id_ctrl.rrs2 + io.dpath.ren(0) := id_ctrl.rrs1 + io.dpath.ex_ctrl := ex_ctrl io.dpath.div_mul_val := ex_reg_div_mul_val io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common io.dpath.ex_fp_val:= ex_reg_fp_val @@ -767,7 +793,7 @@ class Control extends Module io.dpath.ex_rocc_val := ex_reg_rocc_val io.dpath.mem_rocc_val := mem_reg_rocc_val - io.fpu.valid := !ctrl_killd && id_fp_val + io.fpu.valid := !ctrl_killd && id_ctrl.fp io.fpu.killx := ctrl_killx io.fpu.killm := killm_common diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 7df1a8d3..c2758e87 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -22,11 +22,6 @@ class Datapath extends Module // execute definitions val ex_reg_pc = Reg(UInt()) val ex_reg_inst = Reg(Bits()) - val ex_reg_ctrl_fn_dw = Reg(UInt()) - val ex_reg_ctrl_fn_alu = Reg(UInt()) - val ex_reg_sel_alu2 = Reg(UInt()) - val ex_reg_sel_alu1 = Reg(UInt()) - val ex_reg_sel_imm = Reg(UInt()) val ex_reg_kill = Reg(Bool()) val ex_reg_rs_bypass = Vec.fill(2)(Reg(Bool())) val ex_reg_rs_lsb = Vec.fill(2)(Reg(Bits())) @@ -102,11 +97,6 @@ class Datapath extends Module when (!io.ctrl.killd) { ex_reg_pc := id_pc ex_reg_inst := id_inst - ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUInt - ex_reg_ctrl_fn_alu := io.ctrl.fn_alu - ex_reg_sel_alu2 := io.ctrl.sel_alu2 - ex_reg_sel_alu1 := io.ctrl.sel_alu1 - ex_reg_sel_imm := io.ctrl.sel_imm ex_reg_rs_bypass := io.ctrl.bypass for (i <- 0 until id_rs.size) { when (io.ctrl.ren(i)) { @@ -129,18 +119,18 @@ class Datapath extends Module val ex_rs = for (i <- 0 until id_rs.size) yield Mux(ex_reg_rs_bypass(i), bypass(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) - val ex_imm = imm(ex_reg_sel_imm, ex_reg_inst) - val ex_op1 = MuxLookup(ex_reg_sel_alu1, SInt(0), Seq( + val ex_imm = imm(io.ctrl.ex_ctrl.sel_imm, ex_reg_inst) + val ex_op1 = MuxLookup(io.ctrl.ex_ctrl.sel_alu1, SInt(0), Seq( A1_RS1 -> ex_rs(0).toSInt, A1_PC -> ex_reg_pc.toSInt)) - val ex_op2 = MuxLookup(ex_reg_sel_alu2, SInt(0), Seq( + val ex_op2 = MuxLookup(io.ctrl.ex_ctrl.sel_alu2, SInt(0), Seq( A2_RS2 -> ex_rs(1).toSInt, A2_IMM -> ex_imm, A2_FOUR -> SInt(4))) val alu = Module(new ALU) - alu.io.dw := ex_reg_ctrl_fn_dw - alu.io.fn := ex_reg_ctrl_fn_alu + alu.io.dw := io.ctrl.ex_ctrl.alu_dw + alu.io.fn := io.ctrl.ex_ctrl.alu_fn alu.io.in2 := ex_op2.toUInt alu.io.in1 := ex_op1 @@ -148,8 +138,8 @@ class Datapath extends Module val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, earlyOut = params(FastMulDiv))) div.io.req.valid := io.ctrl.div_mul_val - div.io.req.bits.dw := ex_reg_ctrl_fn_dw - div.io.req.bits.fn := ex_reg_ctrl_fn_alu + div.io.req.bits.dw := io.ctrl.ex_ctrl.alu_dw + div.io.req.bits.fn := io.ctrl.ex_ctrl.alu_fn div.io.req.bits.in1 := ex_rs(0) div.io.req.bits.in2 := ex_rs(1) div.io.req.bits.tag := io.ctrl.ex_waddr From 94b75c7cb161c978c336fdd88a396cd7592e26f5 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 4 Jan 2015 15:21:17 -0800 Subject: [PATCH 0794/1087] Continue refactoring control --- rocket/src/main/scala/ctrl.scala | 705 ++++++++++++------------------ rocket/src/main/scala/dpath.scala | 30 +- 2 files changed, 305 insertions(+), 430 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 1d1c7cfb..7e9667ac 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -12,29 +12,15 @@ class CtrlDpathIO extends Bundle { // outputs to datapath val sel_pc = UInt(OUTPUT, 3) - val killd = Bool(OUTPUT) + val killd = Bool(OUTPUT) + val killm = Bool(OUTPUT) val ren = Vec.fill(2)(Bool(OUTPUT)) val ex_ctrl = new IntCtrlSigs().asOutput - val div_mul_val = Bool(OUTPUT) - val div_mul_kill = Bool(OUTPUT) - val div_val = Bool(OUTPUT) - val div_kill = Bool(OUTPUT) + val mem_ctrl = new IntCtrlSigs().asOutput val csr = UInt(OUTPUT, 3) val sret = Bool(OUTPUT) - val mem_load = Bool(OUTPUT) - val wb_load = Bool(OUTPUT) - val ex_fp_val= Bool(OUTPUT) - val mem_fp_val= Bool(OUTPUT) - val ex_wen = Bool(OUTPUT) val ex_valid = Bool(OUTPUT) - val mem_jalr = Bool(OUTPUT) - val mem_branch = Bool(OUTPUT) - val mem_wen = Bool(OUTPUT) val wb_wen = Bool(OUTPUT) - val ex_mem_type = Bits(OUTPUT, 3) - val ex_rs2_val = Bool(OUTPUT) - val ex_rocc_val = Bool(OUTPUT) - val mem_rocc_val = Bool(OUTPUT) val bypass = Vec.fill(2)(Bool(OUTPUT)) val bypass_src = Vec.fill(2)(Bits(OUTPUT, SZ_BYP)) val ll_ready = Bool(OUTPUT) @@ -66,15 +52,15 @@ abstract trait DecodeConstants val xpr64 = Y val decode_default = - // jal fence.i - // | jalr mul_val | sret - // fp_val| | renx2 | div_val | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | wen | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,CSR.X,N,X,X,X,X,X) + // jal renf1 fence.i + // | jalr | renf2 | sret + // fp_val| | renx2 | | renf3 | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,N,X,X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -86,8 +72,8 @@ class IntCtrlSigs extends Bundle { val branch = Bool() val jal = Bool() val jalr = Bool() - val rrs2 = Bool() - val rrs1 = Bool() + val rxs2 = Bool() + val rxs1 = Bool() val sel_alu2 = Bits(width = A2_X.getWidth) val sel_alu1 = Bits(width = A1_X.getWidth) val sel_imm = Bits(width = IMM_X.getWidth) @@ -96,9 +82,12 @@ class IntCtrlSigs extends Bundle { val mem = Bool() val mem_cmd = Bits(width = M_SZ) val mem_type = Bits(width = MT_SZ) - val mul = Bool() + val rfs1 = Bool() + val rfs2 = Bool() + val rfs3 = Bool() + val wfd = Bool() val div = Bool() - val wrd = Bool() + val wxd = Bool() val csr = Bits(width = CSR.SZ) val fence_i = Bool() val sret = Bool() @@ -109,9 +98,10 @@ class IntCtrlSigs extends Bundle { def decode(inst: UInt, table: Iterable[(UInt, List[UInt])]) = { val decoder = DecodeLogic(inst, XDecode.decode_default, table) - Vec(legal, fp, rocc, branch, jal, jalr, rrs2, rrs1, sel_alu2, sel_alu1, - sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, mul, div, wrd, csr, - fence_i, sret, scall, replay_next, fence, amo) := decoder + Vec(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, sel_alu1, + sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, + rfs1, rfs2, rfs3, wfd, div, wxd, + csr, fence_i, sret, scall, replay_next, fence, amo) := decoder this } } @@ -119,225 +109,225 @@ class IntCtrlSigs extends Bundle { object XDecode extends DecodeConstants { val table = Array( - // jal fence.i - // | jalr mul_val | sret - // fp_val| | renx2 | div_val | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | wen | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), + // jal renf1 fence.i + // | jalr | renf2 | sret + // fp_val| | renx2 | | renf3 | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,Y,CSR.N,N,N,N,N,N,N), - LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,Y,CSR.N,N,N,N,N,N,N), - LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,N), - LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,N), - LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,Y,CSR.N,N,N,N,N,N,N), - LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,Y,CSR.N,N,N,N,N,N,N), - LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,Y,CSR.N,N,N,N,N,N,N), - SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,CSR.N,N,N,N,N,N,N), - SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,CSR.N,N,N,N,N,N,N), - SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), + LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,Y,CSR.N,N,N,N,N,N,Y), + LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), - MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, Y,N,Y,CSR.N,N,N,N,N,N,N), + MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,Y,Y,CSR.N,N,N,N,N,N,N), + DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,Y,N,N,N), - SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,Y,N,N,N,N), - FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,Y,N), - FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,Y,N,N,Y,N,N), - CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N), - CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.C,N,N,N,N,N,N)) + SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,Y,N,N,N), + SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N,N,N,N), + FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,Y,N), + FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N,Y,N,N), + CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N,N), + CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N,N), + CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N,N), + CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( - // jal fence.i - // | jalr mul_val | sret - // fp_val| | renx2 | div_val | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | wen | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,CSR.N,N,N,N,N,N,N), - FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,CSR.N,N,N,N,N,N,N), - FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,CSR.N,N,N,N,N,N,N)) + // jal renf1 fence.i + // | jalr | renf2 | sret + // fp_val| | renx2 | | renf3 | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), + FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), + FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N,N,N,N), + FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N,N,N,N)) } object RoCCDecode extends DecodeConstants { val table = Array( - // jal fence.i - // | jalr mul_val | sret - // fp_val| | renx2 | div_val | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | wen | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | csr | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,CSR.N,N,N,N,N,N,N)) + // jal renf1 fence.i + // | jalr | renf2 | sret + // fp_val| | renx2 | | renf3 | | syscall + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next + // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | + CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N)) } class Control extends Module @@ -356,68 +346,36 @@ class Control extends Module val id_ctrl = new IntCtrlSigs().decode(io.dpath.inst, decode_table) val ex_ctrl = Reg(new IntCtrlSigs) + val mem_ctrl = Reg(new IntCtrlSigs) + val wb_ctrl = Reg(new IntCtrlSigs) val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) - val ex_reg_branch = Reg(Bool()) - val ex_reg_jal = Reg(Bool()) - val ex_reg_jalr = Reg(Bool()) val ex_reg_btb_hit = Reg(Bool()) val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) - val ex_reg_sret = Reg(Bool()) - val ex_reg_wen = Reg(Bool()) - val ex_reg_fp_wen = Reg(Bool()) - val ex_reg_flush_inst = Reg(Bool()) - val ex_reg_div_mul_val = Reg(Bool()) - val ex_reg_mem_val = Reg(Bool()) val ex_reg_xcpt = Reg(Bool()) - val ex_reg_fp_val = Reg(Bool()) - val ex_reg_rocc_val = Reg(Bool()) val ex_reg_replay_next = Reg(Bool()) val ex_reg_load_use = Reg(Bool()) - val ex_reg_csr = Reg(UInt()) - val ex_reg_mem_cmd = Reg(Bits()) - val ex_reg_mem_type = Reg(Bits()) val ex_reg_cause = Reg(UInt()) val mem_reg_xcpt_interrupt = Reg(Bool()) val mem_reg_valid = Reg(Bool()) - val mem_reg_branch = Reg(Bool()) - val mem_reg_jal = Reg(Bool()) - val mem_reg_jalr = Reg(Bool()) val mem_reg_btb_hit = Reg(Bool()) val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) - val mem_reg_sret = Reg(Bool()) - val mem_reg_wen = Reg(Bool()) - val mem_reg_fp_wen = Reg(Bool()) - val mem_reg_flush_inst = Reg(Bool()) - val mem_reg_div_mul_val = Reg(Bool()) - val mem_reg_mem_val = Reg(Bool()) val mem_reg_xcpt = Reg(Bool()) - val mem_reg_fp_val = Reg(Bool()) - val mem_reg_rocc_val = Reg(Bool()) val mem_reg_replay = Reg(Bool()) val mem_reg_replay_next = Reg(Bool()) - val mem_reg_csr = Reg(UInt()) val mem_reg_cause = Reg(UInt()) val mem_reg_slow_bypass = Reg(Bool()) val wb_reg_valid = Reg(Bool()) - val wb_reg_csr = Reg(UInt()) - val wb_reg_wen = Reg(Bool()) - val wb_reg_fp_wen = Reg(Bool()) - val wb_reg_rocc_val = Reg(Bool()) - val wb_reg_flush_inst = Reg(Bool()) - val wb_reg_mem_val = Reg(Bool()) - val wb_reg_sret = Reg(Bool()) val wb_reg_xcpt = Reg(Bool()) val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) val wb_reg_fp_val = Reg(Bool()) - val wb_reg_div_mul_val = Reg(Bool()) val take_pc_wb = Bool() - val take_pc_mem = io.dpath.mem_misprediction && (mem_reg_branch || mem_reg_jalr || mem_reg_jal) + val take_pc_mem = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) val take_pc_mem_wb = take_pc_wb || take_pc_mem val take_pc = take_pc_mem_wb val ctrl_killd = Bool() @@ -467,9 +425,10 @@ class Control extends Module val id_amo_aq = io.dpath.inst(26) val id_amo_rl = io.dpath.inst(25) val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl - val id_mem_busy = !io.dmem.ordered || ex_reg_mem_val + val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && - (io.rocc.busy || ex_reg_rocc_val || mem_reg_rocc_val || wb_reg_rocc_val) + (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc || + mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy val id_do_fence = id_rocc_busy && id_ctrl.fence || id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_flush) @@ -489,150 +448,79 @@ class Control extends Module when (id_xcpt) { ex_reg_cause := id_cause } when (ctrl_killd) { - ex_reg_branch := false - ex_reg_jal := false - ex_reg_jalr := false ex_reg_btb_hit := false - ex_reg_div_mul_val := Bool(false) - ex_reg_mem_val := Bool(false) ex_reg_valid := Bool(false) - ex_reg_wen := Bool(false) - ex_reg_fp_wen := Bool(false) - ex_reg_sret := Bool(false) - ex_reg_flush_inst := Bool(false) - ex_reg_fp_val := Bool(false) - ex_reg_rocc_val := Bool(false) ex_reg_replay_next := Bool(false) ex_reg_load_use := Bool(false) - ex_reg_csr := CSR.N ex_reg_xcpt := Bool(false) } .otherwise { ex_ctrl := id_ctrl - ex_reg_branch := id_ctrl.branch - ex_reg_jal := id_ctrl.jal - ex_reg_jalr := id_ctrl.jalr ex_reg_btb_hit := io.imem.btb_resp.valid when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } - ex_reg_div_mul_val := id_ctrl.mul || id_ctrl.div - ex_reg_mem_val := id_ctrl.mem ex_reg_valid := Bool(true) - ex_reg_csr := id_ctrl.csr - ex_reg_wen := id_ctrl.wrd - ex_reg_fp_wen := id_ctrl.fp && io.fpu.dec.wen - ex_reg_sret := id_ctrl.sret - ex_reg_flush_inst := id_ctrl.fence_i - ex_reg_fp_val := id_ctrl.fp - ex_reg_rocc_val := id_ctrl.rocc ex_reg_replay_next := id_ctrl.replay_next || id_csr_flush ex_reg_load_use := id_load_use - ex_reg_mem_cmd := id_ctrl.mem_cmd - ex_reg_mem_type := id_ctrl.mem_type ex_reg_xcpt := id_xcpt } // replay inst in ex stage - val wb_dcache_miss = wb_reg_mem_val && !io.dmem.resp.valid - val replay_ex_structural = ex_reg_mem_val && !io.dmem.req.ready || - ex_reg_div_mul_val && !io.dpath.div_mul_rdy + val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid + val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready || + ex_ctrl.div && !io.dpath.div_mul_rdy val replay_ex_other = wb_dcache_miss && ex_reg_load_use || mem_reg_replay_next - val replay_ex = replay_ex_structural || replay_ex_other + val replay_ex = ex_reg_valid && replay_ex_structural || replay_ex_other ctrl_killx := take_pc_mem_wb || replay_ex // detect 2-cycle load-use delay for LB/LH/SC - val ex_slow_bypass = ex_reg_mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_reg_mem_type) + val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type) val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), - (ex_reg_fp_val && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) + (ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) mem_reg_replay := !take_pc_mem_wb && replay_ex mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt && !mem_reg_replay_next when (ex_xcpt) { mem_reg_cause := ex_cause } - mem_reg_div_mul_val := ex_reg_div_mul_val && io.dpath.div_mul_rdy when (ctrl_killx) { mem_reg_valid := false - mem_reg_branch := false - mem_reg_jal := false - mem_reg_jalr := false - mem_reg_csr := CSR.N - mem_reg_wen := Bool(false) - mem_reg_fp_wen := Bool(false) - mem_reg_sret := Bool(false) - mem_reg_mem_val := Bool(false) - mem_reg_flush_inst := Bool(false) - mem_reg_fp_val := Bool(false) - mem_reg_rocc_val := Bool(false) mem_reg_replay_next := Bool(false) mem_reg_xcpt := Bool(false) } .otherwise { + mem_ctrl := ex_ctrl mem_reg_valid := ex_reg_valid - mem_reg_branch := ex_reg_branch - mem_reg_jal := ex_reg_jal - mem_reg_jalr := ex_reg_jalr mem_reg_btb_hit := ex_reg_btb_hit when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } - mem_reg_csr := ex_reg_csr - mem_reg_wen := ex_reg_wen - mem_reg_fp_wen := ex_reg_fp_wen - mem_reg_sret := ex_reg_sret - mem_reg_mem_val := ex_reg_mem_val - mem_reg_flush_inst := ex_reg_flush_inst - mem_reg_fp_val := ex_reg_fp_val - mem_reg_rocc_val := ex_reg_rocc_val mem_reg_replay_next := ex_reg_replay_next mem_reg_slow_bypass := ex_slow_bypass mem_reg_xcpt := ex_xcpt } val (mem_xcpt, mem_cause) = checkExceptions(List( - (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), - (mem_reg_mem_val && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), - (mem_reg_mem_val && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), - (mem_reg_mem_val && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)), - (mem_reg_mem_val && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)))) + (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) - val dcache_kill_mem = mem_reg_wen && io.dmem.replay_next.valid // structural hazard on writeback port - val fpu_kill_mem = mem_reg_fp_val && io.fpu.nack_mem + val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port + val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem + wb_reg_valid := !ctrl_killm + when (!ctrl_killm) { wb_ctrl := mem_ctrl } wb_reg_replay := replay_mem && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb when (mem_xcpt) { wb_reg_cause := mem_cause } - when (ctrl_killm) { - wb_reg_valid := Bool(false) - wb_reg_csr := CSR.N - wb_reg_wen := Bool(false) - wb_reg_fp_wen := Bool(false) - wb_reg_sret := Bool(false) - wb_reg_flush_inst := Bool(false) - wb_reg_mem_val := Bool(false) - wb_reg_div_mul_val := Bool(false) - wb_reg_fp_val := Bool(false) - wb_reg_rocc_val := Bool(false) - } - .otherwise { - wb_reg_valid := mem_reg_valid - wb_reg_csr := mem_reg_csr - wb_reg_wen := mem_reg_wen - wb_reg_fp_wen := mem_reg_fp_wen - wb_reg_sret := mem_reg_sret && !mem_reg_replay - wb_reg_flush_inst := mem_reg_flush_inst - wb_reg_mem_val := mem_reg_mem_val - wb_reg_div_mul_val := mem_reg_div_mul_val - wb_reg_fp_val := mem_reg_fp_val - wb_reg_rocc_val := mem_reg_rocc_val - } - - val wb_set_sboard = wb_reg_div_mul_val || wb_dcache_miss || wb_reg_rocc_val + val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay - val wb_rocc_val = wb_reg_rocc_val && !replay_wb_common - val replay_wb = replay_wb_common || wb_reg_rocc_val && !io.rocc.cmd.ready + val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common + val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready class Scoreboard(n: Int) { @@ -657,7 +545,7 @@ class Control extends Module val id_stall_fpu = if (!params(BuildFPU).isEmpty) { val fp_sboard = new Scoreboard(32) - fp_sboard.set((wb_dcache_miss && wb_reg_fp_wen || io.fpu.sboard_set) && !replay_wb, io.dpath.wb_waddr) + fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && io.dpath.retire, io.dpath.wb_waddr) fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) @@ -674,29 +562,29 @@ class Control extends Module io.dpath.badvaddr_wen := wb_reg_xcpt // don't care for non-memory exceptions // control transfer from ex/wb - take_pc_wb := replay_wb || wb_reg_xcpt || wb_reg_sret + take_pc_wb := replay_wb || wb_reg_xcpt || io.dpath.sret io.dpath.sel_pc := - Mux(wb_reg_xcpt, PC_PCR, // exception - Mux(wb_reg_sret, PC_PCR, // sret instruction - Mux(replay_wb, PC_WB, // replay - PC_MEM))) + Mux(wb_reg_xcpt, PC_PCR, // exception + Mux(replay_wb, PC_WB, // replay + Mux(wb_reg_valid && wb_ctrl.sret, PC_PCR, // sret instruction + PC_MEM))) - io.imem.btb_update.valid := (mem_reg_branch || io.imem.btb_update.bits.isJump) && !take_pc_wb + io.imem.btb_update.valid := mem_reg_valid && (mem_ctrl.branch || io.imem.btb_update.bits.isJump) && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.btb_update.bits.taken := mem_reg_branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump + io.imem.btb_update.bits.taken := mem_ctrl.branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump io.imem.btb_update.bits.mispredict := take_pc_mem - io.imem.btb_update.bits.isJump := mem_reg_jal || mem_reg_jalr - io.imem.btb_update.bits.isCall := mem_reg_wen && io.dpath.mem_waddr(0) - io.imem.btb_update.bits.isReturn := mem_reg_jalr && io.dpath.mem_rs1_ra - io.imem.req.valid := take_pc + io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr + io.imem.btb_update.bits.isCall := mem_ctrl.wxd && io.dpath.mem_waddr(0) + io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra + io.imem.req.valid := take_pc val bypassDst = Array(id_raddr1, id_raddr2) val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0))) - bypassSrc(BYP_EX) = (ex_reg_wen, io.dpath.ex_waddr) - bypassSrc(BYP_MEM) = (mem_reg_wen && !mem_reg_mem_val, io.dpath.mem_waddr) - bypassSrc(BYP_DC) = (mem_reg_wen, io.dpath.mem_waddr) + bypassSrc(BYP_EX) = (ex_reg_valid && ex_ctrl.wxd, io.dpath.ex_waddr) + bypassSrc(BYP_MEM) = (mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, io.dpath.mem_waddr) + bypassSrc(BYP_DC) = (mem_reg_valid && mem_ctrl.wxd, io.dpath.mem_waddr) val doBypass = bypassDst.map(d => bypassSrc.map(s => s._1 && s._2 === d)) for (i <- 0 until io.dpath.bypass.size) { @@ -705,50 +593,49 @@ class Control extends Module } // stall for RAW/WAW hazards on PCRs, loads, AMOs, and mul/div in execute stage. - val id_renx1_not0 = id_ctrl.rrs1 && id_raddr1 != UInt(0) - val id_renx2_not0 = id_ctrl.rrs2 && id_raddr2 != UInt(0) - val id_wen_not0 = id_ctrl.wrd && id_waddr != UInt(0) - val data_hazard_ex = ex_reg_wen && + val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0) + val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0) + val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0) + val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc + val data_hazard_ex = ex_ctrl.wxd && (id_renx1_not0 && id_raddr1 === io.dpath.ex_waddr || id_renx2_not0 && id_raddr2 === io.dpath.ex_waddr || id_wen_not0 && id_waddr === io.dpath.ex_waddr) - val fp_data_hazard_ex = ex_reg_fp_wen && + val fp_data_hazard_ex = ex_ctrl.wfd && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.ex_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = data_hazard_ex && (ex_reg_csr != CSR.N || ex_reg_jalr || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) || - fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val) + val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex) // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass else Bool(true) - val data_hazard_mem = mem_reg_wen && + val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc + val data_hazard_mem = mem_ctrl.wxd && (id_renx1_not0 && id_raddr1 === io.dpath.mem_waddr || id_renx2_not0 && id_raddr2 === io.dpath.mem_waddr || id_wen_not0 && id_waddr === io.dpath.mem_waddr) - val fp_data_hazard_mem = mem_reg_fp_wen && + val fp_data_hazard_mem = mem_ctrl.wfd && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = data_hazard_mem && (mem_reg_csr != CSR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) || - fp_data_hazard_mem && mem_reg_fp_val - id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem) + val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem) + id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. - val data_hazard_wb = wb_reg_wen && + val data_hazard_wb = wb_ctrl.wxd && (id_renx1_not0 && id_raddr1 === io.dpath.wb_waddr || id_renx2_not0 && id_raddr2 === io.dpath.wb_waddr || id_wen_not0 && id_waddr === io.dpath.wb_waddr) - val fp_data_hazard_wb = wb_reg_fp_wen && + val fp_data_hazard_wb = wb_ctrl.wfd && (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = data_hazard_wb && wb_set_sboard || - fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val) + val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) val id_sboard_hazard = (id_renx1_not0 && sboard.readBypassed(id_raddr1) || @@ -767,40 +654,28 @@ class Control extends Module io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind io.imem.resp.ready := !ctrl_stalld || ctrl_draind - io.imem.invalidate := wb_reg_flush_inst + io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i - io.dpath.mem_load := mem_reg_mem_val && mem_reg_wen - io.dpath.wb_load := wb_reg_mem_val && wb_reg_wen - io.dpath.ren(1) := id_ctrl.rrs2 - io.dpath.ren(0) := id_ctrl.rrs1 + io.dpath.ren(1) := id_ctrl.rxs2 + io.dpath.ren(0) := id_ctrl.rxs1 io.dpath.ex_ctrl := ex_ctrl - io.dpath.div_mul_val := ex_reg_div_mul_val - io.dpath.div_mul_kill := mem_reg_div_mul_val && killm_common - io.dpath.ex_fp_val:= ex_reg_fp_val - io.dpath.mem_fp_val:= mem_reg_fp_val - io.dpath.mem_jalr := mem_reg_jalr - io.dpath.mem_branch := mem_reg_branch - io.dpath.ex_wen := ex_reg_wen + io.dpath.mem_ctrl := mem_ctrl io.dpath.ex_valid := ex_reg_valid - io.dpath.mem_wen := mem_reg_wen - io.dpath.ll_ready := !wb_reg_wen - io.dpath.wb_wen := wb_reg_wen && !replay_wb + io.dpath.ll_ready := !(wb_reg_valid && wb_ctrl.wxd) io.dpath.retire := wb_reg_valid && !replay_wb - io.dpath.csr := wb_reg_csr - io.dpath.sret := wb_reg_sret - io.dpath.ex_mem_type := ex_reg_mem_type - io.dpath.ex_rs2_val := ex_reg_mem_val && isWrite(ex_reg_mem_cmd) || ex_reg_rocc_val - io.dpath.ex_rocc_val := ex_reg_rocc_val - io.dpath.mem_rocc_val := mem_reg_rocc_val + io.dpath.wb_wen := io.dpath.retire && wb_ctrl.wxd + io.dpath.csr := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) + io.dpath.sret := wb_reg_valid && wb_ctrl.sret && !replay_wb + io.dpath.killm := killm_common io.fpu.valid := !ctrl_killd && id_ctrl.fp io.fpu.killx := ctrl_killx io.fpu.killm := killm_common - io.dmem.req.valid := ex_reg_mem_val + io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem io.dmem.req.bits.kill := killm_common || mem_xcpt - io.dmem.req.bits.cmd := ex_reg_mem_cmd - io.dmem.req.bits.typ := ex_reg_mem_type + io.dmem.req.bits.cmd := ex_ctrl.mem_cmd + io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.phys := Bool(false) io.rocc.cmd.valid := wb_rocc_val diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index c2758e87..d5949e55 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -137,13 +137,13 @@ class Datapath extends Module // multiplier and divider val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, earlyOut = params(FastMulDiv))) - div.io.req.valid := io.ctrl.div_mul_val + div.io.req.valid := io.ctrl.ex_valid && io.ctrl.ex_ctrl.div div.io.req.bits.dw := io.ctrl.ex_ctrl.alu_dw div.io.req.bits.fn := io.ctrl.ex_ctrl.alu_fn div.io.req.bits.in1 := ex_rs(0) div.io.req.bits.in2 := ex_rs(1) div.io.req.bits.tag := io.ctrl.ex_waddr - div.io.kill := io.ctrl.div_mul_kill + div.io.kill := io.ctrl.killm && Reg(next = div.io.req.fire()) io.ctrl.div_mul_rdy := div.io.req.ready io.fpu.fromint_data := ex_rs(0) @@ -161,7 +161,7 @@ class Datapath extends Module // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(params(VAddrBits)-1,0)).toUInt - io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val) + io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_ctrl.fp) require(io.dmem.req.bits.tag.getWidth >= 6) require(params(CoreDCacheReqTagBits) >= 6) @@ -186,12 +186,12 @@ class Datapath extends Module mem_reg_pc := ex_reg_pc mem_reg_inst := ex_reg_inst mem_reg_wdata := alu.io.out - } - when (io.ctrl.ex_rs2_val) { - mem_reg_rs2 := ex_rs(1) + when (io.ctrl.ex_ctrl.rxs2 && (io.ctrl.ex_ctrl.mem || io.ctrl.ex_ctrl.rocc)) { + mem_reg_rs2 := ex_rs(1) + } } - io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2) + io.dmem.req.bits.data := Mux(io.ctrl.mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool @@ -229,21 +229,21 @@ class Datapath extends Module io.ctrl.mem_br_taken := mem_reg_wdata(0) val mem_br_target = mem_reg_pc + - Mux(io.ctrl.mem_branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), - Mux(!io.ctrl.mem_jalr && !io.ctrl.mem_branch, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(params(VAddrBits)-1,0)), mem_br_target) + Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), + Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) + val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(params(VAddrBits)-1,0)), mem_br_target) io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 - val mem_int_wdata = Mux(io.ctrl.mem_jalr, mem_br_target, mem_reg_wdata) + val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata) // writeback stage when (!mem_reg_kill) { wb_reg_pc := mem_reg_pc wb_reg_inst := mem_reg_inst - wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_int_wdata) - } - when (io.ctrl.mem_rocc_val) { - wb_reg_rs2 := mem_reg_rs2 + wb_reg_wdata := Mux(io.ctrl.mem_ctrl.fp && io.ctrl.mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) + when (io.ctrl.mem_ctrl.rocc) { + wb_reg_rs2 := mem_reg_rs2 + } } wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, Mux(io.ctrl.ll_wen, ll_wdata, From 2aee85cb111866cbb1c107e8a60607e89ac5f59f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 4 Jan 2015 16:40:16 -0800 Subject: [PATCH 0795/1087] Flush pipeline from MEM stage This means we no longer have to rely on the instruction behind a serializing instruction being valid, simplifying the control. But we have to be a little more cautious when flusing the I$/ITLB/BTB. --- rocket/src/main/scala/ctrl.scala | 426 ++++++++++++++--------------- rocket/src/main/scala/icache.scala | 4 +- 2 files changed, 213 insertions(+), 217 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 7e9667ac..b7bb99d4 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -56,11 +56,11 @@ abstract trait DecodeConstants // | jalr | renf2 | sret // fp_val| | renx2 | | renf3 | | syscall // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,N,X,X,X,X,X) + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | + // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -92,7 +92,6 @@ class IntCtrlSigs extends Bundle { val fence_i = Bool() val sret = Bool() val scall = Bool() - val replay_next = Bool() val fence = Bool() val amo = Bool() @@ -101,7 +100,7 @@ class IntCtrlSigs extends Bundle { Vec(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, rfs1, rfs2, rfs3, wfd, div, wxd, - csr, fence_i, sret, scall, replay_next, fence, amo) := decoder + csr, fence_i, sret, scall, fence, amo) := decoder this } } @@ -113,113 +112,113 @@ object XDecode extends DecodeConstants // | jalr | renf2 | sret // fp_val| | renx2 | | renf3 | | syscall // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | + // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), + LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), - SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,Y), + LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N,N), + DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,Y,N,N,N), - SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N,N,N,N), - FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,Y,N), - FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N,Y,N,N), - CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N,N), - CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N,N), - CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N,N), - CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N,N)) + SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,Y,N,N), + SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N,N,N), + FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,Y,N), + FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N,N,N), + CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N), + CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N), + CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N), + CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N), + CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N), + CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N)) } object FDecode extends DecodeConstants @@ -229,68 +228,68 @@ object FDecode extends DecodeConstants // | jalr | renf2 | sret // fp_val| | renx2 | | renf3 | | syscall // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N,N), - FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N,N,N,N), - FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N,N,N,N), - FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N,N,N,N)) + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | + // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), + FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), + FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), + FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), + FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), + FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), + FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), + FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), + FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), + FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), + FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N,N,N), + FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N,N,N)) } object RoCCDecode extends DecodeConstants @@ -300,34 +299,34 @@ object RoCCDecode extends DecodeConstants // | jalr | renf2 | sret // fp_val| | renx2 | | renf3 | | syscall // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | replay_next - // | | | | | | | | | | | | | | | | | | | | | wxd | | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | | - CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N,N)) + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | + // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | | | + CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N)) } class Control extends Module @@ -354,7 +353,7 @@ class Control extends Module val ex_reg_btb_hit = Reg(Bool()) val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) val ex_reg_xcpt = Reg(Bool()) - val ex_reg_replay_next = Reg(Bool()) + val ex_reg_flush_pipe = Reg(Bool()) val ex_reg_load_use = Reg(Bool()) val ex_reg_cause = Reg(UInt()) @@ -364,7 +363,7 @@ class Control extends Module val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) val mem_reg_xcpt = Reg(Bool()) val mem_reg_replay = Reg(Bool()) - val mem_reg_replay_next = Reg(Bool()) + val mem_reg_flush_pipe = Reg(Bool()) val mem_reg_cause = Reg(UInt()) val mem_reg_slow_bypass = Reg(Bool()) @@ -375,7 +374,8 @@ class Control extends Module val wb_reg_fp_val = Reg(Bool()) val take_pc_wb = Bool() - val take_pc_mem = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) + val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) + val take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) val take_pc_mem_wb = take_pc_wb || take_pc_mem val take_pc = take_pc_mem_wb val ctrl_killd = Bool() @@ -450,7 +450,7 @@ class Control extends Module when (ctrl_killd) { ex_reg_btb_hit := false ex_reg_valid := Bool(false) - ex_reg_replay_next := Bool(false) + ex_reg_flush_pipe := Bool(false) ex_reg_load_use := Bool(false) ex_reg_xcpt := Bool(false) } @@ -459,7 +459,7 @@ class Control extends Module ex_reg_btb_hit := io.imem.btb_resp.valid when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } ex_reg_valid := Bool(true) - ex_reg_replay_next := id_ctrl.replay_next || id_csr_flush + ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush ex_reg_load_use := id_load_use ex_reg_xcpt := id_xcpt } @@ -468,31 +468,27 @@ class Control extends Module val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready || ex_ctrl.div && !io.dpath.div_mul_rdy - val replay_ex_other = wb_dcache_miss && ex_reg_load_use || mem_reg_replay_next - val replay_ex = ex_reg_valid && replay_ex_structural || replay_ex_other - ctrl_killx := take_pc_mem_wb || replay_ex + val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use + val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use) + ctrl_killx := take_pc_mem_wb || replay_ex || !ex_reg_valid // detect 2-cycle load-use delay for LB/LH/SC val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type) val (ex_xcpt, ex_cause) = checkExceptions(List( (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), (ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) - + + mem_reg_valid := !ctrl_killx mem_reg_replay := !take_pc_mem_wb && replay_ex - mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt && !mem_reg_replay_next + mem_reg_xcpt := !ctrl_killx && ex_xcpt + mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt when (ex_xcpt) { mem_reg_cause := ex_cause } - when (ctrl_killx) { - mem_reg_valid := false - mem_reg_replay_next := Bool(false) - mem_reg_xcpt := Bool(false) - } - .otherwise { + when (!ctrl_killx) { mem_ctrl := ex_ctrl - mem_reg_valid := ex_reg_valid mem_reg_btb_hit := ex_reg_btb_hit when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } - mem_reg_replay_next := ex_reg_replay_next + mem_reg_flush_pipe := ex_reg_flush_pipe mem_reg_slow_bypass := ex_slow_bypass mem_reg_xcpt := ex_xcpt } @@ -574,7 +570,7 @@ class Control extends Module io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.taken := mem_ctrl.branch && io.dpath.mem_br_taken || io.imem.btb_update.bits.isJump - io.imem.btb_update.bits.mispredict := take_pc_mem + io.imem.btb_update.bits.mispredict := mem_misprediction io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr io.imem.btb_update.bits.isCall := mem_ctrl.wxd && io.dpath.mem_waddr(0) io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra @@ -649,7 +645,7 @@ class Control extends Module id_ctrl.fp && id_stall_fpu || id_ctrl.mem && !io.dmem.req.ready || id_do_fence - val ctrl_draind = id_interrupt || ex_reg_replay_next + val ctrl_draind = id_interrupt ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 63b1ec74..f780b698 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -105,7 +105,7 @@ class Frontend extends FrontendModule icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn - icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss + icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.cpu.ptw.invalidate icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) @@ -216,7 +216,7 @@ class ICache extends FrontendModule val s2_dout = Vec.fill(nWays){Reg(Bits())} for (i <- 0 until nWays) { - val s1_vb = vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool + val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool val s2_vb = Reg(Bool()) val s2_tag_disparity = Reg(Bool()) val s2_tag_match = Reg(Bool()) From 87ad1a5703ae70ceb800f041d4b01844b573d930 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 4 Jan 2015 19:46:01 -0800 Subject: [PATCH 0796/1087] More control cleanup --- rocket/src/main/scala/ctrl.scala | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index b7bb99d4..49054d18 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -31,7 +31,6 @@ class CtrlDpathIO extends Bundle val badvaddr_wen = Bool(OUTPUT) // high for a load/store access fault // inputs from datapath val inst = Bits(INPUT, 32) - val jalr_eq = Bool(INPUT) val mem_br_taken = Bool(INPUT) val mem_misprediction = Bool(INPUT) val div_mul_rdy = Bool(INPUT) @@ -371,7 +370,6 @@ class Control extends Module val wb_reg_xcpt = Reg(Bool()) val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) - val wb_reg_fp_val = Reg(Bool()) val take_pc_wb = Bool() val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) @@ -444,21 +442,15 @@ class Control extends Module (id_ctrl.scall, UInt(Causes.syscall)), (id_ctrl.rocc && !io.dpath.status.er, UInt(Causes.accelerator_disabled)))) + ex_reg_valid := !ctrl_killd + ex_reg_xcpt := !ctrl_killd && id_xcpt ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } - when (ctrl_killd) { - ex_reg_btb_hit := false - ex_reg_valid := Bool(false) - ex_reg_flush_pipe := Bool(false) - ex_reg_load_use := Bool(false) - ex_reg_xcpt := Bool(false) - } - .otherwise { + when (!ctrl_killd) { ex_ctrl := id_ctrl ex_reg_btb_hit := io.imem.btb_resp.valid when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } - ex_reg_valid := Bool(true) ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush ex_reg_load_use := id_load_use ex_reg_xcpt := id_xcpt From 00e074cdd993395aba40d6a5b9da615527ec40b7 Mon Sep 17 00:00:00 2001 From: Scott Beamer Date: Thu, 29 Jan 2015 15:29:25 -0800 Subject: [PATCH 0797/1087] fixes slight bug for non-power of 2 number of ras entries --- rocket/src/main/scala/btb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 62a5daf8..d5fefa1e 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -23,7 +23,7 @@ abstract trait BTBParameters extends UsesParameters { class RAS(nras: Int) { def push(addr: UInt): Unit = { when (count < nras) { count := count + 1 } - val nextPos = Mux(Bool(isPow2(nras)) || pos > 0, pos+1, UInt(0)) + val nextPos = Mux(Bool(isPow2(nras)) || pos < nras-1, pos+1, UInt(0)) stack(nextPos) := addr pos := nextPos } From 741e6b77adaefaa51eb3db7cd776cdb06c7bd9fe Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sun, 1 Feb 2015 20:04:13 -0800 Subject: [PATCH 0798/1087] Rename some params, use refactored TileLink --- rocket/src/main/scala/btb.scala | 3 +- rocket/src/main/scala/core.scala | 17 +- rocket/src/main/scala/csr.scala | 56 ++--- rocket/src/main/scala/ctrl.scala | 8 +- rocket/src/main/scala/dpath.scala | 14 +- rocket/src/main/scala/dpath_alu.scala | 10 +- rocket/src/main/scala/icache.scala | 24 +- rocket/src/main/scala/multiplier.scala | 10 +- rocket/src/main/scala/nbdcache.scala | 315 ++++++++++--------------- rocket/src/main/scala/ptw.scala | 28 +-- rocket/src/main/scala/rocc.scala | 14 +- rocket/src/main/scala/tlb.scala | 49 ++-- 12 files changed, 248 insertions(+), 300 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d5fefa1e..86f5934b 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -10,8 +10,7 @@ import uncore._ case object NBTBEntries extends Field[Int] case object NRAS extends Field[Int] -abstract trait BTBParameters extends UsesParameters { - val vaddrBits = params(VAddrBits) +abstract trait BTBParameters extends CoreParameters { val matchBits = params(PgIdxBits) val entries = params(NBTBEntries) val nRAS = params(NRAS) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index a42c7721..51f7007b 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -7,7 +7,7 @@ import Util._ import uncore._ case object BuildFPU extends Field[Option[() => FPU]] -case object XprLen extends Field[Int] +case object XLen extends Field[Int] case object NMultXpr extends Field[Int] case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] @@ -20,14 +20,23 @@ case object CoreDataBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int] abstract trait CoreParameters extends UsesParameters { - val xprLen = params(XprLen) + val xLen = params(XLen) + val paddrBits = params(PAddrBits) + val vaddrBits = params(VAddrBits) + val pgIdxBits = params(PgIdxBits) + val ppnBits = params(PPNBits) + val vpnBits = params(VPNBits) + val permBits = params(PermBits) + val asIdBits = params(ASIdBits) + + val retireWidth = params(RetireWidth) val coreFetchWidth = params(FetchWidth) val coreInstBits = params(CoreInstBits) val coreInstBytes = coreInstBits/8 - val coreDataBits = xprLen + val coreDataBits = xLen val coreDataBytes = coreDataBits/8 val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) - val coreMaxAddrBits = math.max(params(PPNBits),params(VPNBits)+1) + params(PgIdxBits) + val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits if(params(FastLoadByte)) require(params(FastLoadWord)) } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 590752e5..ec288987 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -34,52 +34,52 @@ object CSR val C = Bits(3,2) } -class CSRFileIO extends Bundle { +class CSRFileIO extends CoreBundle { val host = new HTIFIO val rw = new Bundle { val addr = UInt(INPUT, 12) val cmd = Bits(INPUT, CSR.SZ) - val rdata = Bits(OUTPUT, params(XprLen)) - val wdata = Bits(INPUT, params(XprLen)) + val rdata = Bits(OUTPUT, xLen) + val wdata = Bits(INPUT, xLen) } val status = new Status().asOutput - val ptbr = UInt(OUTPUT, params(PAddrBits)) - val evec = UInt(OUTPUT, params(VAddrBits)+1) + val ptbr = UInt(OUTPUT, paddrBits) + val evec = UInt(OUTPUT, vaddrBits+1) val exception = Bool(INPUT) - val retire = UInt(INPUT, log2Up(1+params(RetireWidth))) - val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+params(RetireWidth)))) - val cause = UInt(INPUT, params(XprLen)) + val retire = UInt(INPUT, log2Up(1+retireWidth)) + val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) + val cause = UInt(INPUT, xLen) val badvaddr_wen = Bool(INPUT) - val pc = UInt(INPUT, params(VAddrBits)+1) + val pc = UInt(INPUT, vaddrBits+1) val sret = Bool(INPUT) val fatc = Bool(OUTPUT) val replay = Bool(OUTPUT) - val time = UInt(OUTPUT, params(XprLen)) + val time = UInt(OUTPUT, xLen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip val rocc = new RoCCInterface().flip } -class CSRFile extends Module +class CSRFile extends CoreModule { val io = new CSRFileIO - val reg_epc = Reg(Bits(width = params(VAddrBits)+1)) - val reg_badvaddr = Reg(Bits(width = params(VAddrBits))) - val reg_evec = Reg(Bits(width = params(VAddrBits))) + val reg_epc = Reg(Bits(width = vaddrBits+1)) + val reg_badvaddr = Reg(Bits(width = vaddrBits)) + val reg_evec = Reg(Bits(width = vaddrBits)) val reg_compare = Reg(Bits(width = 32)) - val reg_cause = Reg(Bits(width = params(XprLen))) - val reg_tohost = Reg(init=Bits(0, params(XprLen))) - val reg_fromhost = Reg(init=Bits(0, params(XprLen))) - val reg_sup0 = Reg(Bits(width = params(XprLen))) - val reg_sup1 = Reg(Bits(width = params(XprLen))) - val reg_ptbr = Reg(UInt(width = params(PAddrBits))) + val reg_cause = Reg(Bits(width = xLen)) + val reg_tohost = Reg(init=Bits(0, xLen)) + val reg_fromhost = Reg(init=Bits(0, xLen)) + val reg_sup0 = Reg(Bits(width = xLen)) + val reg_sup1 = Reg(Bits(width = xLen)) + val reg_ptbr = Reg(UInt(width = paddrBits)) val reg_stats = Reg(init=Bool(false)) val reg_status = Reg(new Status) // reset down below - val reg_time = WideCounter(params(XprLen)) - val reg_instret = WideCounter(params(XprLen), io.retire) - val reg_uarch_counters = io.uarch_counters.map(WideCounter(params(XprLen), _)) + val reg_time = WideCounter(xLen) + val reg_instret = WideCounter(xLen, io.retire) + val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) @@ -128,7 +128,7 @@ class CSRFile extends Module when (io.badvaddr_wen) { val wdata = io.rw.wdata - val (upper, lower) = Split(wdata, params(VAddrBits)) + val (upper, lower) = Split(wdata, vaddrBits) val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) reg_badvaddr := Cat(sign, lower).toSInt } @@ -159,7 +159,7 @@ class CSRFile extends Module when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } val read_impl = Bits(2) - val read_ptbr = reg_ptbr(params(PAddrBits)-1, params(PgIdxBits)) << UInt(params(PgIdxBits)) + val read_ptbr = reg_ptbr(paddrBits-1, pgIdxBits) << UInt(pgIdxBits) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), @@ -211,8 +211,8 @@ class CSRFile extends Module when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.epc)) { reg_epc := wdata(params(VAddrBits),0).toSInt } - when (decoded_addr(CSRs.evec)) { reg_evec := wdata(params(VAddrBits)-1,0).toSInt } + when (decoded_addr(CSRs.epc)) { reg_epc := wdata(vaddrBits,0).toSInt } + when (decoded_addr(CSRs.evec)) { reg_evec := wdata(vaddrBits-1,0).toSInt } when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false) } when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } @@ -220,7 +220,7 @@ class CSRFile extends Module when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata } when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata } - when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(params(PAddrBits)-1, params(PgIdxBits)), Bits(0, params(PgIdxBits))).toUInt } + when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 5ef47a26..1811e2bc 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -8,7 +8,7 @@ import uncore.constants.MemoryOpConstants._ import ALU._ import Util._ -class CtrlDpathIO extends Bundle +class CtrlDpathIO extends CoreBundle { // outputs to datapath val sel_pc = UInt(OUTPUT, 3) @@ -27,7 +27,7 @@ class CtrlDpathIO extends Bundle // exception handling val retire = Bool(OUTPUT) val exception = Bool(OUTPUT) - val cause = UInt(OUTPUT, params(XprLen)) + val cause = UInt(OUTPUT, xLen) val badvaddr_wen = Bool(OUTPUT) // high for a load/store access fault // inputs from datapath val inst = Bits(INPUT, 32) @@ -328,7 +328,7 @@ object RoCCDecode extends DecodeConstants CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N)) } -class Control extends Module +class Control extends CoreModule { val io = new Bundle { val dpath = new CtrlDpathIO @@ -388,7 +388,7 @@ class Control extends Module val id_reg_fence = Reg(init=Bool(false)) val sr = io.dpath.status - var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (params(XprLen)-1) | i))) + var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (xLen-1) | i))) val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) val id_interrupt = io.dpath.status.ei && id_interrupt_unmasked diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f60ec7bf..0ea675c0 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -7,7 +7,7 @@ import Instructions._ import Util._ import uncore._ -class Datapath extends Module +class Datapath extends CoreModule { val io = new Bundle { val host = new HTIFIO @@ -149,10 +149,10 @@ class Datapath extends Module io.fpu.fromint_data := ex_rs(0) def vaSign(a0: UInt, ea: Bits) = { - // efficient means to compress 64-bit VA into params(VAddrBits)+1 bits - // (VA is bad if VA(params(VAddrBits)) != VA(params(VAddrBits)-1)) - val a = a0 >> params(VAddrBits)-1 - val e = ea(params(VAddrBits),params(VAddrBits)-1) + // efficient means to compress 64-bit VA into vaddrBits+1 bits + // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) + val a = a0 >> vaddrBits-1 + val e = ea(vaddrBits,vaddrBits-1) Mux(a === UInt(0) || a === UInt(1), e != UInt(0), Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), e(0))) @@ -160,7 +160,7 @@ class Datapath extends Module // D$ request interface (registered inside D$ module) // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(params(VAddrBits)-1,0)).toUInt + io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_ctrl.fp) require(io.dmem.req.bits.tag.getWidth >= 6) require(params(CoreDCacheReqTagBits) >= 6) @@ -231,7 +231,7 @@ class Datapath extends Module val mem_br_target = mem_reg_pc + Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(params(VAddrBits)-1,0)), mem_br_target) + val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target) io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 4f0edc87..a6b258f7 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -43,13 +43,13 @@ object ALU } import ALU._ -class ALUIO extends Bundle { +class ALUIO extends CoreBundle { val dw = Bits(INPUT, SZ_DW) val fn = Bits(INPUT, SZ_ALU_FN) - val in2 = UInt(INPUT, params(XprLen)) - val in1 = UInt(INPUT, params(XprLen)) - val out = UInt(OUTPUT, params(XprLen)) - val adder_out = UInt(OUTPUT, params(XprLen)) + val in2 = UInt(INPUT, xLen) + val in1 = UInt(INPUT, xLen) + val out = UInt(OUTPUT, xLen) + val adder_out = UInt(OUTPUT, xLen) } class ALU extends Module diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index bb28c7de..4c35bde2 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -4,7 +4,6 @@ import Chisel._ import uncore._ import Util._ -case object NITLBEntries extends Field[Int] case object ECCCode extends Field[Option[Code]] abstract trait L1CacheParameters extends CacheParameters with CoreParameters { @@ -20,18 +19,18 @@ abstract class FrontendBundle extends Bundle with FrontendParameters abstract class FrontendModule extends Module with FrontendParameters class FrontendReq extends CoreBundle { - val pc = UInt(width = params(VAddrBits)+1) + val pc = UInt(width = vaddrBits+1) } class FrontendResp extends CoreBundle { - val pc = UInt(width = params(VAddrBits)+1) // ID stage PC + val pc = UInt(width = vaddrBits+1) // ID stage PC val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) val mask = Bits(width = coreFetchWidth) val xcpt_ma = Bool() val xcpt_if = Bool() } -class CPUFrontendIO extends CoreBundle { +class CPUFrontendIO extends Bundle { val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip @@ -51,7 +50,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val btb = Module(new BTB(btb_updates_out_of_order)) val icache = Module(new ICache) - val tlb = Module(new TLB(params(NITLBEntries))) + val tlb = Module(new TLB) val s1_pc_ = Reg(UInt()) val s1_pc = s1_pc_ & SInt(-2) // discard LSB of PC (throughout the pipeline) @@ -134,7 +133,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule class ICacheReq extends FrontendBundle { val idx = UInt(width = pgIdxBits) - val ppn = UInt(width = params(PPNBits)) // delayed one cycle + val ppn = UInt(width = ppnBits) // delayed one cycle val kill = Bool() // delayed one cycle } @@ -190,14 +189,15 @@ class ICache extends FrontendModule val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCyclesPerBeat, (g: Grant) => co.messageUpdatesDataArray(g))) + val ser = Module(new FlowThroughSerializer( + io.mem.grant.bits, + refillCyclesPerBeat)) ser.io.in <> io.mem.grant val (refill_cnt, refill_wrap) = Counter(ser.io.out.fire(), refillCycles) //TODO Zero width wire val refill_done = state === s_refill && refill_wrap val refill_valid = ser.io.out.valid val refill_bits = ser.io.out.bits ser.io.out.ready := Bool(true) - //assert(!c.tlco.isVoluntary(refill_bits.payload) || !refill_valid, "UncachedRequestors shouldn't get voluntary grants.") val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) val entagbits = code.width(tagBits) @@ -251,7 +251,7 @@ class ICache extends FrontendModule val s1_raddr = Reg(UInt()) when (refill_valid && repl_way === UInt(i)) { val e_d = code.encode(refill_bits.payload.data) - if(refillCycles > 1) data_array(Cat(s2_idx, refill_cnt)) := e_d + if(refillCycles > 1) data_array(Cat(s2_idx, refill_bits.payload.addr_beat)) := e_d else data_array(s2_idx) := e_d } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM @@ -266,14 +266,14 @@ class ICache extends FrontendModule io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ack_q.io.enq.valid := refill_done && co.requiresAckForGrant(refill_bits.payload) - ack_q.io.enq.bits.payload.manager_xact_id := refill_bits.payload.manager_xact_id + ack_q.io.enq.valid := refill_done && refill_bits.payload.requiresAck() + ack_q.io.enq.bits.payload := refill_bits.payload.makeFinish() ack_q.io.enq.bits.header.dst := refill_bits.header.src // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready - io.mem.acquire.bits.payload := UncachedRead(s2_addr >> UInt(blockOffBits)) + io.mem.acquire.bits.payload := UncachedReadBlock(addr_block = s2_addr >> UInt(blockOffBits)) io.mem.finish <> ack_q.io.deq // control state machine diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index a09f8586..2b0ca819 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -6,16 +6,16 @@ import Chisel._ import ALU._ import Util._ -class MultiplierReq extends Bundle { +class MultiplierReq extends CoreBundle { val fn = Bits(width = SZ_ALU_FN) val dw = Bits(width = SZ_DW) - val in1 = Bits(width = params(XprLen)) - val in2 = Bits(width = params(XprLen)) + val in1 = Bits(width = xLen) + val in2 = Bits(width = xLen) val tag = UInt(width = log2Up(params(NMultXpr))) } -class MultiplierResp extends Bundle { - val data = Bits(width = params(XprLen)) +class MultiplierResp extends CoreBundle { + val data = Bits(width = xLen) val tag = UInt(width = log2Up(params(NMultXpr))) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7fa3044c..c991713f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -6,17 +6,21 @@ import Chisel._ import uncore._ import Util._ +case object WordBits extends Field[Int] case object StoreDataQueueDepth extends Field[Int] case object ReplayQueueDepth extends Field[Int] case object NMSHRs extends Field[Int] case object LRSCCycles extends Field[Int] -case object NDTLBEntries extends Field[Int] abstract trait L1HellaCacheParameters extends L1CacheParameters { + val wordBits = params(WordBits) + val wordBytes = wordBits/8 + val wordOffBits = log2Up(wordBytes) val idxMSB = untagBits-1 val idxLSB = blockOffBits val offsetmsb = idxLSB-1 val offsetlsb = wordOffBits + val rowWords = rowBits/wordBits val doNarrowRead = coreDataBits * nWays % rowBits == 0 val encDataBits = code.width(coreDataBits) val encRowBits = encDataBits*rowWords @@ -27,64 +31,39 @@ abstract trait L1HellaCacheParameters extends L1CacheParameters { abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters abstract class L1HellaCacheModule extends Module with L1HellaCacheParameters -class StoreGen(typ: Bits, addr: Bits, dat: Bits) -{ - val byte = typ === MT_B || typ === MT_BU - val half = typ === MT_H || typ === MT_HU - val word = typ === MT_W || typ === MT_WU - def mask = - Mux(byte, Bits( 1) << addr(2,0), - Mux(half, Bits( 3) << Cat(addr(2,1), Bits(0,1)), - Mux(word, Bits( 15) << Cat(addr(2), Bits(0,2)), - Bits(255)))) - def data = - Mux(byte, Fill(8, dat( 7,0)), - Mux(half, Fill(4, dat(15,0)), - wordData)) - lazy val wordData = - Mux(word, Fill(2, dat(31,0)), - dat) -} - -class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool) -{ - val t = new StoreGen(typ, addr, dat) - val sign = typ === MT_B || typ === MT_H || typ === MT_W || typ === MT_D - - val wordShift = Mux(addr(2), dat(63,32), dat(31,0)) - val word = Cat(Mux(t.word, Fill(32, sign && wordShift(31)), dat(63,32)), wordShift) - val halfShift = Mux(addr(1), word(31,16), word(15,0)) - val half = Cat(Mux(t.half, Fill(48, sign && halfShift(15)), word(63,16)), halfShift) - val byteShift = Mux(zero, UInt(0), Mux(addr(0), half(15,8), half(7,0))) - val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift) +trait HasCoreMemOp extends CoreBundle { + val addr = UInt(width = coreMaxAddrBits) + val tag = Bits(width = coreDCacheReqTagBits) + val cmd = Bits(width = M_SZ) + val typ = Bits(width = MT_SZ) } trait HasCoreData extends CoreBundle { val data = Bits(width = coreDataBits) } -class HellaCacheReqInternal extends CoreBundle { - val kill = Bool() - val typ = Bits(width = MT_SZ) - val phys = Bool() - val addr = UInt(width = coreMaxAddrBits) - val tag = Bits(width = coreDCacheReqTagBits) - val cmd = Bits(width = M_SZ) +trait HasSDQId extends CoreBundle with L1HellaCacheParameters { + val sdq_id = UInt(width = log2Up(sdqDepth)) } -class HellaCacheReq extends HellaCacheReqInternal - with HasCoreData +trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { + val tag_match = Bool() + val old_meta = new L1Metadata + val way_en = Bits(width = nWays) +} -class HellaCacheResp extends CoreBundle - with HasCoreData { +class HellaCacheReqInternal extends HasCoreMemOp { + val kill = Bool() + val phys = Bool() +} + +class HellaCacheReq extends HellaCacheReqInternal with HasCoreData + +class HellaCacheResp extends HasCoreMemOp with HasCoreData { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() - val typ = Bits(width = 3) val has_data = Bool() val data_subword = Bits(width = coreDataBits) - val tag = Bits(width = coreDCacheReqTagBits) - val cmd = Bits(width = 4) - val addr = UInt(width = coreMaxAddrBits) val store_data = Bits(width = coreDataBits) } @@ -108,27 +87,12 @@ class HellaCacheIO extends CoreBundle { val ordered = Bool(INPUT) } -trait HasSDQId extends CoreBundle with L1HellaCacheParameters { - val sdq_id = UInt(width = log2Up(sdqDepth)) -} - -trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { - val tag_match = Bool() - val old_meta = new L1Metadata - val way_en = Bits(width = nWays) -} - -class Replay extends HellaCacheReqInternal with HasCoreData -class ReplayInternal extends HellaCacheReqInternal with HasSDQId -class MSHRReq extends Replay with HasMissInfo -class MSHRReqInternal extends ReplayInternal with HasMissInfo - -class DataReadReq extends L1HellaCacheBundle { +class L1DataReadReq extends L1HellaCacheBundle { val way_en = Bits(width = nWays) val addr = Bits(width = untagBits) } -class DataWriteReq extends DataReadReq { +class L1DataWriteReq extends L1DataReadReq { val wmask = Bits(width = rowWords) val data = Bits(width = encRowBits) } @@ -152,14 +116,16 @@ class L1Metadata extends Metadata with L1HellaCacheParameters { val coh = co.clientMetadataOnFlush.clone } -class InternalProbe extends Probe with HasClientTransactionId +class Replay extends HellaCacheReqInternal with HasCoreData +class ReplayInternal extends HellaCacheReqInternal with HasSDQId -class WritebackReq extends L1HellaCacheBundle { - val tag = Bits(width = tagBits) - val idx = Bits(width = idxBits) +class MSHRReq extends Replay with HasMissInfo +class MSHRReqInternal extends ReplayInternal with HasMissInfo + +class ProbeInternal extends Probe with HasClientTransactionId + +class WritebackReq extends Release with CacheParameters { val way_en = Bits(width = nWays) - val client_xact_id = Bits(width = params(TLClientXactIdBits)) - val r_type = UInt(width = co.releaseTypeWidth) } class MSHR(id: Int) extends L1HellaCacheModule { @@ -174,7 +140,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { val tag = Bits(OUTPUT, tagBits) val mem_req = Decoupled(new Acquire) - val mem_resp = new DataWriteReq().asOutput + val mem_resp = new L1DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new ReplayInternal) @@ -188,17 +154,19 @@ class MSHR(id: Int) extends L1HellaCacheModule { val state = Reg(init=s_invalid) val acquire_type = Reg(UInt()) - val release_type = Reg(UInt()) val line_state = Reg(new ClientMetadata) val req = Reg(new MSHRReqInternal()) val req_cmd = io.req_bits.cmd val req_idx = req.addr(untagBits-1,blockOffBits) val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits) - val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits)) + val sec_rdy = idx_match && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits) && + Vec(s_wb_req, s_wb_resp, s_meta_clear, s_refill_req, s_refill_resp).contains(state) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) - val (refill_cnt, refill_done) = Counter(reply && co.messageUpdatesDataArray(io.mem_grant.bits.payload), refillCycles) // TODO: Zero width? + val gnt_multi_data = io.mem_grant.bits.payload.hasMultibeatData() + val (refill_cnt, refill_count_done) = Counter(reply && gnt_multi_data, refillCycles) // TODO: Zero width? + val refill_done = reply && (!gnt_multi_data || refill_count_done) val wb_done = reply && (state === s_wb_resp) val meta_on_flush = co.clientMetadataOnFlush @@ -234,16 +202,14 @@ class MSHR(id: Int) extends L1HellaCacheModule { state := s_meta_clear } when (io.wb_req.fire()) { // s_wb_req - state := s_wb_resp + state := Mux(io.wb_req.bits.requiresAck(), s_wb_resp, s_meta_clear) } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req acquire_type := co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits) } when (io.req_pri_val && io.req_pri_rdy) { line_state := meta_on_flush - refill_cnt := UInt(0) acquire_type := co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush) - release_type := co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc req := io.req_bits when (io.req_bits.tag_match) { @@ -259,8 +225,8 @@ class MSHR(id: Int) extends L1HellaCacheModule { } val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload) - ackq.io.enq.bits.payload.manager_xact_id := io.mem_grant.bits.payload.manager_xact_id + ackq.io.enq.valid := (wb_done || refill_done) && io.mem_grant.bits.payload.requiresAck() + ackq.io.enq.bits.payload := io.mem_grant.bits.payload.makeFinish() ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp io.mem_finish.valid := ackq.io.deq.valid && can_finish @@ -286,14 +252,17 @@ class MSHR(id: Int) extends L1HellaCacheModule { io.meta_write.bits.way_en := req.way_en io.wb_req.valid := state === s_wb_req && ackq.io.enq.ready - io.wb_req.bits.tag := req.old_meta.tag - io.wb_req.bits.idx := req_idx + io.wb_req.bits := Release.makeVoluntaryWriteback( + meta = req.old_meta.coh, + client_xact_id = UInt(id), + addr_block = Cat(req.old_meta.tag, req_idx)) io.wb_req.bits.way_en := req.way_en - io.wb_req.bits.client_xact_id := Bits(id) - io.wb_req.bits.r_type := co.getReleaseTypeOnVoluntaryWriteback() io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready - io.mem_req.bits := Acquire(acquire_type, Cat(io.tag, req_idx).toUInt, Bits(id)) + io.mem_req.bits := Acquire( + a_type = acquire_type, + addr_block = Cat(io.tag, req_idx).toUInt, + client_xact_id = Bits(id)) io.mem_finish <> ackq.io.deq io.meta_read.valid := state === s_drain_rpq @@ -317,7 +286,7 @@ class MSHRFile extends L1HellaCacheModule { val secondary_miss = Bool(OUTPUT) val mem_req = Decoupled(new Acquire) - val mem_resp = new DataWriteReq().asOutput + val mem_resp = new L1DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new Replay) @@ -341,10 +310,14 @@ class MSHRFile extends L1HellaCacheModule { val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits val wbTagList = Vec.fill(nMSHRs){Bits()} - val memRespMux = Vec.fill(nMSHRs){new DataWriteReq} + val memRespMux = Vec.fill(nMSHRs){new L1DataWriteReq} val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) - val mem_req_arb = Module(new LockingArbiter(new Acquire, nMSHRs, outerDataBeats, co.messageHasData _)) + val mem_req_arb = Module(new LockingArbiter( + new Acquire, + nMSHRs, + outerDataBeats, + (a: Acquire) => a.hasMultibeatData())) val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), nMSHRs)) val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs)) @@ -362,7 +335,7 @@ class MSHRFile extends L1HellaCacheModule { idxMatch(i) := mshr.io.idx_match tagList(i) := mshr.io.tag - wbTagList(i) := mshr.io.wb_req.bits.tag + wbTagList(i) := mshr.io.wb_req.bits.addr_block >> UInt(idxBits) alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready @@ -413,9 +386,9 @@ class MSHRFile extends L1HellaCacheModule { class WritebackUnit extends L1HellaCacheModule { val io = new Bundle { - val req = Decoupled(new WritebackReq()).flip + val req = Decoupled(new WritebackReq).flip val meta_read = Decoupled(new L1MetaReadReq) - val data_req = Decoupled(new DataReadReq()) + val data_req = Decoupled(new L1DataReadReq) val data_resp = Bits(INPUT, encRowBits) val release = Decoupled(new Release) } @@ -423,9 +396,10 @@ class WritebackUnit extends L1HellaCacheModule { val active = Reg(init=Bool(false)) val r1_data_req_fired = Reg(init=Bool(false)) val r2_data_req_fired = Reg(init=Bool(false)) - val cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width + val data_req_cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1)) val beat_done = buf_v.andR + val (beat_cnt, all_beats_done) = Counter(io.release.fire(), outerDataBeats) val req = Reg(new WritebackReq) io.release.valid := false @@ -434,59 +408,62 @@ class WritebackUnit extends L1HellaCacheModule { r2_data_req_fired := r1_data_req_fired when (io.data_req.fire() && io.meta_read.fire()) { r1_data_req_fired := true - cnt := cnt + 1 + data_req_cnt := data_req_cnt + 1 } when (r2_data_req_fired) { io.release.valid := beat_done when(!io.release.ready) { r1_data_req_fired := false r2_data_req_fired := false - cnt := cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) + data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) } .elsewhen(beat_done) { if(refillCyclesPerBeat > 1) buf_v := 0 } when(!r1_data_req_fired) { - active := cnt < UInt(refillCycles) + active := data_req_cnt < UInt(refillCycles) } } } when (io.req.fire()) { active := true - cnt := 0 + data_req_cnt := 0 if(refillCyclesPerBeat > 1) buf_v := 0 req := io.req.bits } - val fire = active && cnt < UInt(refillCycles) io.req.ready := !active - // We reissue the meta read as it sets up the muxing for s2_data_muxed + val req_idx = req.addr_block(idxBits-1, 0) + val fire = active && data_req_cnt < UInt(refillCycles) + + // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed io.meta_read.valid := fire - io.meta_read.bits.idx := req.idx - io.meta_read.bits.tag := req.tag + io.meta_read.bits.idx := req_idx + io.meta_read.bits.tag := req.addr_block >> UInt(idxBits) io.data_req.valid := fire io.data_req.bits.way_en := req.way_en - io.data_req.bits.addr := (if(refillCycles > 1) Cat(req.idx, cnt(log2Up(refillCycles)-1,0)) - else req.idx) << rowOffBits + io.data_req.bits.addr := (if(refillCycles > 1) + Cat(req_idx, data_req_cnt(log2Up(refillCycles)-1,0)) + else req_idx) << rowOffBits - io.release.bits.r_type := req.r_type - io.release.bits.addr := Cat(req.tag, req.idx).toUInt - io.release.bits.client_xact_id := req.client_xact_id - io.release.bits.data := - (if(refillCyclesPerBeat > 1) { - val data_buf = Reg(Bits()) - when(active && r2_data_req_fired && !beat_done) { - data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat-1)*encRowBits-1, encRowBits)) - buf_v := (if(refillCyclesPerBeat > 2) - Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1)) - else UInt(1)) - } - Cat(io.data_resp, data_buf) - } else { io.data_resp }) + io.release.bits := req + io.release.bits.addr_beat := beat_cnt + io.release.bits.data := (if(refillCyclesPerBeat > 1) { + // If the cache rows are narrower than a TLDataBeat, + // then buffer enough data_resps to make a whole beat + val data_buf = Reg(Bits()) + when(active && r2_data_req_fired && !beat_done) { + data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat-1)*encRowBits-1, encRowBits)) + buf_v := (if(refillCyclesPerBeat > 2) + Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1)) + else UInt(1)) + } + Cat(io.data_resp, data_buf) + } else { io.data_resp }) } class ProbeUnit extends L1HellaCacheModule { val io = new Bundle { - val req = Decoupled(new InternalProbe).flip + val req = Decoupled(new ProbeInternal).flip val rep = Decoupled(new Release) val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) @@ -500,7 +477,7 @@ class ProbeUnit extends L1HellaCacheModule { val state = Reg(init=s_invalid) val line_state = Reg(co.clientMetadataOnFlush.clone) val way_en = Reg(Bits()) - val req = Reg(new InternalProbe) + val req = Reg(new ProbeInternal) val hit = way_en.orR when (state === s_meta_write && io.meta_write.ready) { @@ -538,36 +515,32 @@ class ProbeUnit extends L1HellaCacheModule { state := s_invalid } + val reply = Mux(hit, req.makeRelease(req.client_xact_id, line_state), + req.makeRelease(req.client_xact_id)) io.req.ready := state === s_invalid io.rep.valid := state === s_release && !(hit && co.needsWriteback(line_state)) // Otherwise WBU will issue release - io.rep.bits := Release(co.getReleaseTypeOnProbe(req, - Mux(hit, line_state, co.clientMetadataOnFlush)), - req.addr, - req.client_xact_id) + io.rep.bits := reply io.meta_read.valid := state === s_meta_read - io.meta_read.bits.idx := req.addr - io.meta_read.bits.tag := req.addr >> idxBits + io.meta_read.bits.idx := req.addr_block + io.meta_read.bits.tag := req.addr_block >> idxBits io.meta_write.valid := state === s_meta_write io.meta_write.bits.way_en := way_en - io.meta_write.bits.idx := req.addr + io.meta_write.bits.idx := req.addr_block + io.meta_write.bits.data.tag := req.addr_block >> idxBits io.meta_write.bits.data.coh := co.clientMetadataOnProbe(req, line_state) - io.meta_write.bits.data.tag := req.addr >> UInt(idxBits) io.wb_req.valid := state === s_writeback_req + io.wb_req.bits := reply io.wb_req.bits.way_en := way_en - io.wb_req.bits.idx := req.addr - io.wb_req.bits.tag := req.addr >> UInt(idxBits) - io.wb_req.bits.r_type := co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)) - io.wb_req.bits.client_xact_id := req.client_xact_id } class DataArray extends L1HellaCacheModule { val io = new Bundle { - val read = Decoupled(new DataReadReq).flip - val write = Decoupled(new DataWriteReq).flip + val read = Decoupled(new L1DataReadReq).flip + val write = Decoupled(new L1DataWriteReq).flip val resp = Vec.fill(nWays){Bits(OUTPUT, encRowBits)} } @@ -612,47 +585,6 @@ class DataArray extends L1HellaCacheModule { io.write.ready := Bool(true) } -class AMOALU extends L1HellaCacheModule { - val io = new Bundle { - val addr = Bits(INPUT, blockOffBits) - val cmd = Bits(INPUT, 4) - val typ = Bits(INPUT, 3) - val lhs = Bits(INPUT, coreDataBits) - val rhs = Bits(INPUT, coreDataBits) - val out = Bits(OUTPUT, coreDataBits) - } - - require(coreDataBits == 64) - val storegen = new StoreGen(io.typ, io.addr, io.rhs) - val rhs = storegen.wordData - - val sgned = io.cmd === M_XA_MIN || io.cmd === M_XA_MAX - val max = io.cmd === M_XA_MAX || io.cmd === M_XA_MAXU - val min = io.cmd === M_XA_MIN || io.cmd === M_XA_MINU - val word = io.typ === MT_W || io.typ === MT_WU || io.typ === MT_B || io.typ === MT_BU - - val mask = SInt(-1,64) ^ (io.addr(2) << 31) - val adder_out = (io.lhs & mask).toUInt + (rhs & mask) - - val cmp_lhs = Mux(word && !io.addr(2), io.lhs(31), io.lhs(63)) - val cmp_rhs = Mux(word && !io.addr(2), rhs(31), rhs(63)) - val lt_lo = io.lhs(31,0) < rhs(31,0) - val lt_hi = io.lhs(63,32) < rhs(63,32) - val eq_hi = io.lhs(63,32) === rhs(63,32) - val lt = Mux(word, Mux(io.addr(2), lt_hi, lt_lo), lt_hi || eq_hi && lt_lo) - val less = Mux(cmp_lhs === cmp_rhs, lt, Mux(sgned, cmp_lhs, cmp_rhs)) - - val out = Mux(io.cmd === M_XA_ADD, adder_out, - Mux(io.cmd === M_XA_AND, io.lhs & rhs, - Mux(io.cmd === M_XA_OR, io.lhs | rhs, - Mux(io.cmd === M_XA_XOR, io.lhs ^ rhs, - Mux(Mux(less, min, max), io.lhs, - storegen.data))))) - - val wmask = FillInterleaved(8, storegen.mask) - io.out := wmask & out | ~wmask & io.lhs -} - class HellaCache extends L1HellaCacheModule { val io = new Bundle { val cpu = (new HellaCacheIO).flip @@ -693,7 +625,7 @@ class HellaCache extends L1HellaCacheModule { val s1_sc = s1_req.cmd === M_XSC val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) - val dtlb = Module(new TLB(params(NDTLBEntries))) + val dtlb = Module(new TLB) dtlb.io.ptw <> io.cpu.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys @@ -754,8 +686,8 @@ class HellaCache extends L1HellaCacheModule { // data val data = Module(new DataArray) - val readArb = Module(new Arbiter(new DataReadReq, 4)) - val writeArb = Module(new Arbiter(new DataWriteReq, 2)) + val readArb = Module(new Arbiter(new L1DataReadReq, 4)) + val writeArb = Module(new Arbiter(new L1DataWriteReq, 2)) data.io.write.valid := writeArb.io.out.valid writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits @@ -837,9 +769,9 @@ class HellaCache extends L1HellaCacheModule { } writeArb.io.in(0).bits.addr := s3_req.addr - writeArb.io.in(0).bits.wmask := UInt(1) << (if(rowOffBits > offsetlsb) - s3_req.addr(rowOffBits-1,offsetlsb).toUInt - else UInt(0)) + val rowIdx = s3_req.addr(rowOffBits-1,offsetlsb).toUInt + val rowWMask = UInt(1) << (if(rowOffBits > offsetlsb) rowIdx else UInt(0)) + writeArb.io.in(0).bits.wmask := rowWMask writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way @@ -871,7 +803,7 @@ class HellaCache extends L1HellaCacheModule { metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes and releases - val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, co.messageHasData _)) + val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) DecoupledLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release val probe = DecoupledLogicalNetworkIOUnwrapper(io.mem.probe) @@ -886,14 +818,15 @@ class HellaCache extends L1HellaCacheModule { prober.io.mshr_rdy := mshrs.io.probe_rdy // refills - def doRefill(g: Grant): Bool = co.messageUpdatesDataArray(g) - val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCyclesPerBeat, doRefill)) + val ser = Module(new FlowThroughSerializer( + io.mem.grant.bits, + refillCyclesPerBeat)) ser.io.in <> io.mem.grant val refill = ser.io.out mshrs.io.mem_grant.valid := refill.fire() mshrs.io.mem_grant.bits := refill.bits - refill.ready := writeArb.io.in(1).ready || !doRefill(refill.bits.payload) - writeArb.io.in(1).valid := refill.valid && doRefill(refill.bits.payload) + refill.ready := writeArb.io.in(1).ready || !refill.bits.payload.hasData() + writeArb.io.in(1).valid := refill.valid && refill.bits.payload.hasData() writeArb.io.in(1).bits := mshrs.io.mem_resp writeArb.io.in(1).bits.wmask := SInt(-1) writeArb.io.in(1).bits.data := refill.bits.payload.data(encRowBits-1,0) @@ -1008,15 +941,15 @@ class SimpleHellaCacheIF extends Module io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) io.cache.req <> req_arb.io.out - // replay queues - // replayq1 holds the older request - // replayq2 holds the newer request (for the first nack) - // we need to split the queues like this for the case where the older request - // goes through but gets nacked, while the newer request stalls - // if this happens, the newer request will go through before the older - // request - // we don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as - // there will only be two requests going through at most +/* replay queues: + replayq1 holds the older request. + replayq2 holds the newer request (for the first nack). + We need to split the queues like this for the case where the older request + goes through but gets nacked, while the newer request stalls. + If this happens, the newer request will go through before the older one. + We don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as + there will only be two requests going through at most. +*/ // stash d$ request in stage 2 if nacked (older request) replayq1.io.enq.valid := Bool(false) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 3663a6b4..cba40021 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -6,28 +6,28 @@ import Chisel._ import uncore._ import Util._ -class PTWResp extends Bundle { +class PTWResp extends CoreBundle { val error = Bool() - val ppn = UInt(width = params(PPNBits)) - val perm = Bits(width = params(PermBits)) + val ppn = UInt(width = ppnBits) + val perm = Bits(width = permBits) } -class TLBPTWIO extends Bundle { - val req = Decoupled(UInt(width = params(VPNBits))) +class TLBPTWIO extends CoreBundle { + val req = Decoupled(UInt(width = vpnBits)) val resp = Valid(new PTWResp).flip val status = new Status().asInput val invalidate = Bool(INPUT) val sret = Bool(INPUT) } -class DatapathPTWIO extends Bundle { - val ptbr = UInt(INPUT, params(PAddrBits)) +class DatapathPTWIO extends CoreBundle { + val ptbr = UInt(INPUT, paddrBits) val invalidate = Bool(INPUT) val sret = Bool(INPUT) val status = new Status().asInput } -class PTW(n: Int) extends Module +class PTW(n: Int) extends CoreModule { val io = new Bundle { val requestor = Vec.fill(n){new TLBPTWIO}.flip @@ -36,8 +36,8 @@ class PTW(n: Int) extends Module } val levels = 3 - val bitsPerLevel = params(VPNBits)/levels - require(params(VPNBits) == levels * bitsPerLevel) + val bitsPerLevel = vpnBits/levels + require(vpnBits == levels * bitsPerLevel) val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(UInt(), 5) val state = Reg(init=s_ready) @@ -49,14 +49,14 @@ class PTW(n: Int) extends Module val vpn_idx = Vec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) - val arb = Module(new RRArbiter(UInt(width = params(VPNBits)), n)) + val arb = Module(new RRArbiter(UInt(width = vpnBits), n)) arb.io.in <> io.requestor.map(_.req) arb.io.out.ready := state === s_ready when (arb.io.out.fire()) { r_req_vpn := arb.io.out.bits r_req_dest := arb.io.chosen - r_pte := Cat(io.dpath.ptbr(params(PAddrBits)-1,params(PgIdxBits)), io.mem.resp.bits.data(params(PgIdxBits)-1,0)) + r_pte := Cat(io.dpath.ptbr(paddrBits-1,pgIdxBits), io.mem.resp.bits.data(pgIdxBits-1,0)) } when (io.mem.resp.valid) { @@ -67,13 +67,13 @@ class PTW(n: Int) extends Module io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := M_XRD io.mem.req.bits.typ := MT_D - io.mem.req.bits.addr := Cat(r_pte(params(PAddrBits)-1,params(PgIdxBits)), vpn_idx).toUInt << log2Up(params(XprLen)/8) + io.mem.req.bits.addr := Cat(r_pte(paddrBits-1,pgIdxBits), vpn_idx).toUInt << log2Up(xLen/8) io.mem.req.bits.kill := Bool(false) val resp_val = state === s_done || state === s_error val resp_err = state === s_error || state === s_wait - val r_resp_ppn = io.mem.req.bits.addr >> UInt(params(PgIdxBits)) + val r_resp_ppn = io.mem.req.bits.addr >> UInt(pgIdxBits) val resp_ppn = Vec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 68cef693..49d8d332 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -19,17 +19,17 @@ class RoCCInstruction extends Bundle val opcode = Bits(width = 7) } -class RoCCCommand extends Bundle +class RoCCCommand extends CoreBundle { val inst = new RoCCInstruction - val rs1 = Bits(width = params(XprLen)) - val rs2 = Bits(width = params(XprLen)) + val rs1 = Bits(width = xLen) + val rs2 = Bits(width = xLen) } -class RoCCResponse extends Bundle +class RoCCResponse extends CoreBundle { val rd = Bits(width = 5) - val data = Bits(width = params(XprLen)) + val data = Bits(width = xLen) } class RoCCInterface extends Bundle @@ -50,7 +50,7 @@ class RoCCInterface extends Bundle val exception = Bool(INPUT) } -abstract class RoCC extends Module +abstract class RoCC extends CoreModule { val io = new RoCCInterface io.mem.req.bits.phys := Bool(true) // don't perform address translation @@ -59,7 +59,7 @@ abstract class RoCC extends Module class AccumulatorExample extends RoCC { val n = 4 - val regfile = Mem(UInt(width = params(XprLen)), n) + val regfile = Mem(UInt(width = xLen), n) val busy = Vec.fill(n){Reg(init=Bool(false))} val cmd = Queue(io.cmd) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index c5acad45..8c369c9b 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -6,23 +6,33 @@ import Chisel._ import uncore._ import scala.math._ -class CAMIO(entries: Int, addr_bits: Int, tag_bits: Int) extends Bundle { +case object NTLBEntries extends Field[Int] + +abstract trait TLBParameters extends CoreParameters { + val entries = params(NTLBEntries) + val camAddrBits = ceil(log(entries)/log(2)).toInt + val camTagBits = asIdBits + vpnBits +} + +abstract class TLBBundle extends Bundle with TLBParameters +abstract class TLBModule extends Module with TLBParameters + +class CAMIO extends TLBBundle { val clear = Bool(INPUT) val clear_hit = Bool(INPUT) - val tag = Bits(INPUT, tag_bits) + val tag = Bits(INPUT, camTagBits) val hit = Bool(OUTPUT) val hits = UInt(OUTPUT, entries) val valid_bits = Bits(OUTPUT, entries) val write = Bool(INPUT) - val write_tag = Bits(INPUT, tag_bits) - val write_addr = UInt(INPUT, addr_bits) + val write_tag = Bits(INPUT, camTagBits) + val write_addr = UInt(INPUT, camAddrBits) } -class RocketCAM(entries: Int, tag_bits: Int) extends Module { - val addr_bits = ceil(log(entries)/log(2)).toInt - val io = new CAMIO(entries, addr_bits, tag_bits) - val cam_tags = Mem(Bits(width = tag_bits), entries) +class RocketCAM extends TLBModule { + val io = new CAMIO + val cam_tags = Mem(Bits(width = camTagBits), entries) val vb_array = Reg(init=Bits(0, entries)) when (io.write) { @@ -66,30 +76,27 @@ class PseudoLRU(n: Int) } } -class TLBReq extends Bundle -{ - val asid = UInt(width = params(ASIdBits)) - val vpn = UInt(width = params(VPNBits)+1) +class TLBReq extends TLBBundle { + val asid = UInt(width = asIdBits) + val vpn = UInt(width = vpnBits+1) val passthrough = Bool() val instruction = Bool() } -class TLBResp(entries: Int) extends Bundle -{ +class TLBResp(cnt: Option[Int] = None) extends TLBBundle { // lookup responses val miss = Bool(OUTPUT) - val hit_idx = UInt(OUTPUT, entries) - val ppn = UInt(OUTPUT, params(PPNBits)) + val hit_idx = UInt(OUTPUT, cnt.getOrElse(entries)) + val ppn = UInt(OUTPUT, ppnBits) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) } -class TLB(entries: Int) extends Module -{ +class TLB extends TLBModule { val io = new Bundle { val req = Decoupled(new TLBReq).flip - val resp = new TLBResp(entries) + val resp = new TLBResp val ptw = new TLBPTWIO } @@ -98,7 +105,7 @@ class TLB(entries: Int) extends Module val r_refill_tag = Reg(UInt()) val r_refill_waddr = Reg(UInt()) - val tag_cam = Module(new RocketCAM(entries, params(ASIdBits)+params(VPNBits))) + val tag_cam = Module(new RocketCAM) val tag_ram = Mem(io.ptw.resp.bits.ppn.clone, entries) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt @@ -135,7 +142,7 @@ class TLB(entries: Int) extends Module val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val bad_va = io.req.bits.vpn(params(VPNBits)) != io.req.bits.vpn(params(VPNBits)-1) + val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) val tlb_hit = io.ptw.status.vm && tag_hit val tlb_miss = io.ptw.status.vm && !tag_hit && !bad_va From 3d35ccd4013950e752a178fc9eebf55aa6475380 Mon Sep 17 00:00:00 2001 From: Stephen Twigg Date: Tue, 3 Feb 2015 18:10:54 -0800 Subject: [PATCH 0799/1087] Explicitely convert results of Bits Muxes to UInt Chisel updated to emit SInt result instead of UInt so this commit addresses this change. --- rocket/src/main/scala/dpath.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0ea675c0..d9013379 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -231,10 +231,10 @@ class Datapath extends CoreModule val mem_br_target = mem_reg_pc + Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target) + val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target).toUInt io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 - val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata) + val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata).toUInt // writeback stage when (!mem_reg_kill) { From aa46b8b72d97c3e98c92e1f074c56c99959bc8f2 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 3 Feb 2015 19:32:08 -0800 Subject: [PATCH 0800/1087] Slightly refactor TLBResp --- rocket/src/main/scala/tlb.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 8c369c9b..78254acb 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -76,23 +76,26 @@ class PseudoLRU(n: Int) } } -class TLBReq extends TLBBundle { +class TLBReq extends CoreBundle { val asid = UInt(width = asIdBits) val vpn = UInt(width = vpnBits+1) val passthrough = Bool() val instruction = Bool() } -class TLBResp(cnt: Option[Int] = None) extends TLBBundle { +class TLBRespNoHitIndex extends CoreBundle { // lookup responses val miss = Bool(OUTPUT) - val hit_idx = UInt(OUTPUT, cnt.getOrElse(entries)) val ppn = UInt(OUTPUT, ppnBits) val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) } +class TLBResp extends TLBRespNoHitIndex with TLBParameters { + val hit_idx = UInt(OUTPUT, entries) +} + class TLB extends TLBModule { val io = new Bundle { val req = Decoupled(new TLBReq).flip From 0b131173e683588d8aedf51f4ab9bfe33ba8ea6d Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 16 Feb 2015 10:59:57 -0800 Subject: [PATCH 0801/1087] WritebackUnit multibeat control logic bugfix --- rocket/src/main/scala/nbdcache.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c991713f..1c75e176 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -418,7 +418,8 @@ class WritebackUnit extends L1HellaCacheModule { data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) } .elsewhen(beat_done) { if(refillCyclesPerBeat > 1) buf_v := 0 } when(!r1_data_req_fired) { - active := data_req_cnt < UInt(refillCycles) + // We're done if this is the final data request and the Release can be sent + active := data_req_cnt < UInt(refillCycles) || !io.release.ready } } } From 1e0c16c557ceef1442badb1d4871f18fa52f2a23 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Sat, 28 Feb 2015 17:00:05 -0800 Subject: [PATCH 0802/1087] new metadata api --- rocket/src/main/scala/icache.scala | 8 +- rocket/src/main/scala/nbdcache.scala | 105 +++++++++++++++------------ 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 4c35bde2..80d1ff02 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -7,10 +7,10 @@ import Util._ case object ECCCode extends Field[Option[Code]] abstract trait L1CacheParameters extends CacheParameters with CoreParameters { - val co = params(TLCoherence) - val code = params(ECCCode).getOrElse(new IdentityCode) val outerDataBeats = params(TLDataBeats) - val refillCyclesPerBeat = params(TLDataBits)/rowBits + val outerDataBits = params(TLDataBits) + val code = params(ECCCode).getOrElse(new IdentityCode) + val refillCyclesPerBeat = outerDataBits/rowBits val refillCycles = refillCyclesPerBeat*outerDataBeats } @@ -273,7 +273,7 @@ class ICache extends FrontendModule // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready - io.mem.acquire.bits.payload := UncachedReadBlock(addr_block = s2_addr >> UInt(blockOffBits)) + io.mem.acquire.bits.payload := GetBlock(addr_block = s2_addr >> UInt(blockOffBits)) io.mem.finish <> ack_q.io.deq // control state machine diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 1c75e176..e4d370b1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -113,7 +113,7 @@ object L1Metadata { } } class L1Metadata extends Metadata with L1HellaCacheParameters { - val coh = co.clientMetadataOnFlush.clone + val coh = new ClientMetadata } class Replay extends HellaCacheReqInternal with HasCoreData @@ -153,31 +153,37 @@ class MSHR(id: Int) extends L1HellaCacheModule { val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) - val acquire_type = Reg(UInt()) - val line_state = Reg(new ClientMetadata) + val new_coh_state = Reg(init=ClientMetadata.onReset) val req = Reg(new MSHRReqInternal()) - - val req_cmd = io.req_bits.cmd val req_idx = req.addr(untagBits-1,blockOffBits) val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits) - val sec_rdy = idx_match && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits) && - Vec(s_wb_req, s_wb_resp, s_meta_clear, s_refill_req, s_refill_resp).contains(state) - + // We only accept secondary misses if we haven't yet sent an Acquire to outer memory + // or if the Acquire that was sent will obtain a Grant with sufficient permissions + // to let us replay this new request. I.e. we don't handle multiple outstanding + // Acquires on the same block for now. + val cmd_requires_second_acquire = + req.old_meta.coh.requiresAcquireOnSecondaryMiss(req.cmd, io.req_bits.cmd) + val states_before_refill = Vec(s_wb_req, s_wb_resp, s_meta_clear) + val sec_rdy = idx_match && + (states_before_refill.contains(state) || + (Vec(s_refill_req, s_refill_resp).contains(state) && + !cmd_requires_second_acquire)) val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) val gnt_multi_data = io.mem_grant.bits.payload.hasMultibeatData() val (refill_cnt, refill_count_done) = Counter(reply && gnt_multi_data, refillCycles) // TODO: Zero width? val refill_done = reply && (!gnt_multi_data || refill_count_done) val wb_done = reply && (state === s_wb_resp) - val meta_on_flush = co.clientMetadataOnFlush - val meta_on_grant = co.clientMetadataOnGrant(io.mem_grant.bits.payload, io.mem_req.bits) - val meta_on_hit = co.clientMetadataOnHit(req_cmd, io.req_bits.old_meta.coh) - val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth))) - rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd) + rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) rpq.io.enq.bits := io.req_bits rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid + val coh_on_grant = req.old_meta.coh.onGrant( + incoming = io.mem_grant.bits.payload, + pending = req.cmd) + val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd) + when (state === s_drain_rpq && !rpq.io.deq.valid) { state := s_invalid } @@ -189,7 +195,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { state := s_meta_write_resp } when (state === s_refill_resp) { - when (reply) { line_state := meta_on_grant } + when (reply) { new_coh_state := coh_on_grant } when (refill_done) { state := s_meta_write_req } } when (io.mem_req.fire()) { // s_refill_req @@ -205,22 +211,25 @@ class MSHR(id: Int) extends L1HellaCacheModule { state := Mux(io.wb_req.bits.requiresAck(), s_wb_resp, s_meta_clear) } when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req - acquire_type := co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits) + //If we get a secondary miss that needs more permissions before we've sent + // out the primary miss's Acquire, we can upgrade the permissions we're + // going to ask for in s_refill_req + when(cmd_requires_second_acquire) { + req.cmd := io.req_bits.cmd + } } when (io.req_pri_val && io.req_pri_rdy) { - line_state := meta_on_flush - acquire_type := co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush) + val coh = io.req_bits.old_meta.coh req := io.req_bits - when (io.req_bits.tag_match) { - when (co.isHit(req_cmd, io.req_bits.old_meta.coh)) { // set dirty bit + when(coh.isHit(io.req_bits.cmd)) { // set dirty bit state := s_meta_write_req - line_state := meta_on_hit + new_coh_state := coh_on_hit }.otherwise { // upgrade permissions state := s_refill_req } }.otherwise { // writback if necessary and refill - state := Mux(co.needsWriteback(io.req_bits.old_meta.coh), s_wb_req, s_meta_clear) + state := Mux(coh.requiresVoluntaryWriteback(), s_wb_req, s_meta_clear) } } @@ -243,26 +252,27 @@ class MSHR(id: Int) extends L1HellaCacheModule { val meta_hazard = Reg(init=UInt(0,2)) when (meta_hazard != UInt(0)) { meta_hazard := meta_hazard + 1 } when (io.meta_write.fire()) { meta_hazard := 1 } - io.probe_rdy := !idx_match || (state != s_wb_req && state != s_wb_resp && state != s_meta_clear && meta_hazard === 0) + io.probe_rdy := !idx_match || (!states_before_refill.contains(state) && meta_hazard === 0) io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx - io.meta_write.bits.data.coh := Mux(state === s_meta_clear, meta_on_flush, line_state) + io.meta_write.bits.data.coh := Mux(state === s_meta_clear, + req.old_meta.coh.onCacheControl(M_FLUSH), + new_coh_state) io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en io.wb_req.valid := state === s_wb_req && ackq.io.enq.ready - io.wb_req.bits := Release.makeVoluntaryWriteback( - meta = req.old_meta.coh, + io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback( client_xact_id = UInt(id), addr_block = Cat(req.old_meta.tag, req_idx)) io.wb_req.bits.way_en := req.way_en io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready - io.mem_req.bits := Acquire( - a_type = acquire_type, + io.mem_req.bits := req.old_meta.coh.makeAcquire( addr_block = Cat(io.tag, req_idx).toUInt, - client_xact_id = Bits(id)) + client_xact_id = Bits(id), + op_code = req.cmd) io.mem_finish <> ackq.io.deq io.meta_read.valid := state === s_drain_rpq @@ -285,7 +295,7 @@ class MSHRFile extends L1HellaCacheModule { val req = Decoupled(new MSHRReq).flip val secondary_miss = Bool(OUTPUT) - val mem_req = Decoupled(new Acquire) + val mem_req = Decoupled(new Acquire) //TODO make sure TLParameters are correct ????? val mem_resp = new L1DataWriteReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) @@ -471,15 +481,15 @@ class ProbeUnit extends L1HellaCacheModule { val wb_req = Decoupled(new WritebackReq) val way_en = Bits(INPUT, nWays) val mshr_rdy = Bool(INPUT) - val line_state = new ClientMetadata().asInput + val block_state = new ClientMetadata().asInput } - val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 9) + val s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 8) val state = Reg(init=s_invalid) - val line_state = Reg(co.clientMetadataOnFlush.clone) + val old_coh = Reg(new ClientMetadata) val way_en = Reg(Bits()) val req = Reg(new ProbeInternal) - val hit = way_en.orR + val tag_matches = way_en.orR when (state === s_meta_write && io.meta_write.ready) { state := s_invalid @@ -492,13 +502,14 @@ class ProbeUnit extends L1HellaCacheModule { } when (state === s_release && io.rep.ready) { state := s_invalid - when (hit) { - state := Mux(co.needsWriteback(line_state), s_writeback_req, s_meta_write) + when (tag_matches) { + state := Mux(old_coh.requiresVoluntaryWriteback(), + s_writeback_req, s_meta_write) } } when (state === s_mshr_req) { state := s_release - line_state := io.line_state + old_coh := io.block_state way_en := io.way_en when (!io.mshr_rdy) { state := s_meta_read } } @@ -512,15 +523,11 @@ class ProbeUnit extends L1HellaCacheModule { state := s_meta_read req := io.req.bits } - when (state === s_reset) { - state := s_invalid - } - val reply = Mux(hit, req.makeRelease(req.client_xact_id, line_state), - req.makeRelease(req.client_xact_id)) + val reply = old_coh.makeRelease(req, req.client_xact_id) io.req.ready := state === s_invalid io.rep.valid := state === s_release && - !(hit && co.needsWriteback(line_state)) // Otherwise WBU will issue release + !(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release io.rep.bits := reply io.meta_read.valid := state === s_meta_read @@ -531,7 +538,7 @@ class ProbeUnit extends L1HellaCacheModule { io.meta_write.bits.way_en := way_en io.meta_write.bits.idx := req.addr_block io.meta_write.bits.data.tag := req.addr_block >> idxBits - io.meta_write.bits.data.coh := co.clientMetadataOnProbe(req, line_state) + io.meta_write.bits.data.coh := old_coh.onProbe(req) io.wb_req.valid := state === s_writeback_req io.wb_req.bits := reply @@ -596,7 +603,7 @@ class HellaCache extends L1HellaCacheModule { require(isPow2(nSets)) require(isPow2(nWays)) // TODO: relax this require(params(RowBits) <= params(TLDataBits)) - require(paddrBits-blockOffBits == params(TLAddrBits) ) + require(paddrBits-blockOffBits == params(TLBlockAddrBits) ) require(untagBits <= pgIdxBits) val wb = Module(new WritebackUnit) @@ -678,7 +685,7 @@ class HellaCache extends L1HellaCacheModule { io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st // tags - def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))) + def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) val meta = Module(new MetadataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2)) @@ -716,13 +723,15 @@ class HellaCache extends L1HellaCacheModule { // tag check and way muxing def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f)) val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).toBits - val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && co.isValid(meta.io.resp(w).coh)).toBits + val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.isValid()).toBits s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug val s1_writeback = s1_clk_en && !s1_valid && !s1_replay val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) val s2_tag_match = s2_tag_match_way.orR val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) - val s2_hit = s2_tag_match && co.isHit(s2_req.cmd, s2_hit_state) && s2_hit_state === co.clientMetadataOnHit(s2_req.cmd, s2_hit_state) + val s2_hit = s2_tag_match && + s2_hit_state.isHit(s2_req.cmd) && + s2_hit_state === s2_hit_state.onHit(s2_req.cmd) // load-reserved/store-conditional val lrsc_count = Reg(init=UInt(0)) @@ -813,7 +822,7 @@ class HellaCache extends L1HellaCacheModule { prober.io.req.bits := probe.bits prober.io.rep <> releaseArb.io.in(1) prober.io.way_en := s2_tag_match_way - prober.io.line_state := s2_hit_state + prober.io.block_state := s2_hit_state prober.io.meta_read <> metaReadArb.io.in(2) prober.io.meta_write <> metaWriteArb.io.in(1) prober.io.mshr_rdy := mshrs.io.probe_rdy From 5d07733057183359757d25883a5a05b1d3ca0dcc Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Tue, 3 Mar 2015 16:40:39 -0800 Subject: [PATCH 0803/1087] Removed TLBPTWIO from the io.cpu bundle for icache/dcache --- rocket/src/main/scala/icache.scala | 8 ++++---- rocket/src/main/scala/nbdcache.scala | 6 +++--- rocket/src/main/scala/tile.scala | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 80d1ff02..ffdcd9ee 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -37,7 +37,6 @@ class CPUFrontendIO extends Bundle { val btb_update = Valid(new BTBUpdate) val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) - val ptw = new TLBPTWIO().flip val invalidate = Bool(OUTPUT) } @@ -45,6 +44,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule { val io = new Bundle { val cpu = new CPUFrontendIO().flip + val ptw = new TLBPTWIO() val mem = new UncachedTileLinkIO } @@ -94,9 +94,9 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule btb.io.btb_update := io.cpu.btb_update btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update - btb.io.invalidate := io.cpu.invalidate || io.cpu.ptw.invalidate + btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate - tlb.io.ptw <> io.cpu.ptw + tlb.io.ptw <> io.ptw tlb.io.req.valid := !stall && !icmiss tlb.io.req.bits.vpn := s1_pc >> UInt(pgIdxBits) tlb.io.req.bits.asid := UInt(0) @@ -108,7 +108,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn - icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.cpu.ptw.invalidate + icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.ptw.invalidate icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e4d370b1..72829979 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -83,7 +83,6 @@ class HellaCacheIO extends CoreBundle { val resp = Valid(new HellaCacheResp).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val xcpt = (new HellaCacheExceptions).asInput - val ptw = new TLBPTWIO().flip val ordered = Bool(INPUT) } @@ -596,6 +595,7 @@ class DataArray extends L1HellaCacheModule { class HellaCache extends L1HellaCacheModule { val io = new Bundle { val cpu = (new HellaCacheIO).flip + val ptw = new TLBPTWIO() val mem = new TileLinkIO } @@ -634,7 +634,7 @@ class HellaCache extends L1HellaCacheModule { val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val dtlb = Module(new TLB) - dtlb.io.ptw <> io.cpu.ptw + dtlb.io.ptw <> io.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.asid := UInt(0) @@ -750,7 +750,7 @@ class HellaCache extends L1HellaCacheModule { lrsc_count := 0 } } - when (io.cpu.ptw.sret) { lrsc_count := 0 } + when (io.ptw.sret) { lrsc_count := 0 } val s2_data = Vec.fill(nWays){Bits(width = encRowBits)} for (w <- 0 until nWays) { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 4ad0897d..de188bdf 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -31,8 +31,8 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { dcArb.io.requestor(1) <> core.io.dmem dcArb.io.mem <> dcache.io.cpu - ptw.io.requestor(0) <> icache.io.cpu.ptw - ptw.io.requestor(1) <> dcache.io.cpu.ptw + ptw.io.requestor(0) <> icache.io.ptw + ptw.io.requestor(1) <> dcache.io.ptw core.io.host <> io.host core.io.imem <> icache.io.cpu From 06dea3790a9a6b54ad514d8e0074f557e2e00a45 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Tue, 3 Mar 2015 16:50:41 -0800 Subject: [PATCH 0804/1087] Removed sret from ptw; sret now comes thru io.cpu to dcache --- rocket/src/main/scala/ctrl.scala | 1 + rocket/src/main/scala/dpath.scala | 1 - rocket/src/main/scala/nbdcache.scala | 3 ++- rocket/src/main/scala/ptw.scala | 3 --- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 1811e2bc..bb29c2ac 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -675,6 +675,7 @@ class Control extends CoreModule io.dmem.req.bits.cmd := ex_ctrl.mem_cmd io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.phys := Bool(false) + io.dmem.sret := io.dpath.sret io.rocc.cmd.valid := wb_rocc_val io.rocc.exception := wb_reg_xcpt && sr.er diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index d9013379..f5346b3b 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -177,7 +177,6 @@ class Datapath extends CoreModule io.ptw.ptbr := pcr.io.ptbr io.ptw.invalidate := pcr.io.fatc - io.ptw.sret := io.ctrl.sret io.ptw.status := pcr.io.status // memory stage diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 72829979..9d3ca5b8 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -83,6 +83,7 @@ class HellaCacheIO extends CoreBundle { val resp = Valid(new HellaCacheResp).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val xcpt = (new HellaCacheExceptions).asInput + val sret = Bool(OUTPUT) val ordered = Bool(INPUT) } @@ -750,7 +751,7 @@ class HellaCache extends L1HellaCacheModule { lrsc_count := 0 } } - when (io.ptw.sret) { lrsc_count := 0 } + when (io.cpu.sret) { lrsc_count := 0 } val s2_data = Vec.fill(nWays){Bits(width = encRowBits)} for (w <- 0 until nWays) { diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index cba40021..a165cfc0 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -17,13 +17,11 @@ class TLBPTWIO extends CoreBundle { val resp = Valid(new PTWResp).flip val status = new Status().asInput val invalidate = Bool(INPUT) - val sret = Bool(INPUT) } class DatapathPTWIO extends CoreBundle { val ptbr = UInt(INPUT, paddrBits) val invalidate = Bool(INPUT) - val sret = Bool(INPUT) val status = new Status().asInput } @@ -83,7 +81,6 @@ class PTW(n: Int) extends CoreModule io.requestor(i).resp.bits.perm := r_pte(8,3) io.requestor(i).resp.bits.ppn := resp_ppn.toUInt io.requestor(i).invalidate := io.dpath.invalidate - io.requestor(i).sret := io.dpath.sret io.requestor(i).status := io.dpath.status } From b36d751250730e13b9c56a7fd0faeeead7e07e26 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 5 Mar 2015 13:14:16 -0800 Subject: [PATCH 0805/1087] sret bugfix: bypass arbiter --- rocket/src/main/scala/rocc.scala | 1 + rocket/src/main/scala/tile.scala | 1 + 2 files changed, 2 insertions(+) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 49d8d332..425c96b5 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -121,6 +121,7 @@ class AccumulatorExample extends RoCC io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 io.mem.req.bits.data := Bits(0) // we're not performing any stores... + io.mem.sret := false io.imem.acquire.valid := false io.imem.grant.ready := false diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index de188bdf..bb5356ee 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -26,6 +26,7 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { val ptw = Module(new PTW(params(NPTWPorts))) val core = Module(new Core, { case CoreName => "Rocket" }) + dcache.io.cpu.sret := core.io.dmem.sret val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem From 95aa295c39e0f25253fcdac408517517e9fb6589 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 9 Mar 2015 16:34:43 -0700 Subject: [PATCH 0806/1087] Use HeaderlessTileLinkIO to cut down on unconnected port errors in VCS --- rocket/src/main/scala/arbiter.scala | 13 +++++++++++++ rocket/src/main/scala/icache.scala | 6 +++--- rocket/src/main/scala/nbdcache.scala | 6 +++--- rocket/src/main/scala/rocc.scala | 4 ++-- rocket/src/main/scala/tile.scala | 8 ++++---- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index b2a8482f..a097b8b1 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -52,3 +52,16 @@ class HellaCacheArbiter(n: Int) extends Module io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> UInt(log2Up(n)) } } + +class RocketTileLinkIOArbiter(n: Int) extends TileLinkArbiterLike(n) + with AppendsArbiterId { + val io = new Bundle { + val in = Vec.fill(n){new HeaderlessTileLinkIO}.flip + val out = new HeaderlessTileLinkIO + } + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupClientSourceHeaderless(io.in.map(_.release), io.out.release) + hookupFinish(io.in.map(_.finish), io.out.finish) + hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe) + hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) +} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index ffdcd9ee..b3d40b09 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -45,7 +45,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val io = new Bundle { val cpu = new CPUFrontendIO().flip val ptw = new TLBPTWIO() - val mem = new UncachedTileLinkIO + val mem = new HeaderlessUncachedTileLinkIO } val btb = Module(new BTB(btb_updates_out_of_order)) @@ -148,7 +148,7 @@ class ICache extends FrontendModule val req = Valid(new ICacheReq).flip val resp = Decoupled(new ICacheResp) val invalidate = Bool(INPUT) - val mem = new UncachedTileLinkIO + val mem = new HeaderlessUncachedTileLinkIO } require(isPow2(nSets) && isPow2(nWays)) require(isPow2(coreInstBytes)) @@ -273,7 +273,7 @@ class ICache extends FrontendModule // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready - io.mem.acquire.bits.payload := GetBlock(addr_block = s2_addr >> UInt(blockOffBits)) + io.mem.acquire.bits := GetBlock(addr_block = s2_addr >> UInt(blockOffBits)) io.mem.finish <> ack_q.io.deq // control state machine diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9d3ca5b8..7ea62577 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -597,7 +597,7 @@ class HellaCache extends L1HellaCacheModule { val io = new Bundle { val cpu = (new HellaCacheIO).flip val ptw = new TLBPTWIO() - val mem = new TileLinkIO + val mem = new HeaderlessTileLinkIO } require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed @@ -802,7 +802,7 @@ class HellaCache extends L1HellaCacheModule { mshrs.io.req.bits.data := s2_req.data when (mshrs.io.req.fire()) { replacer.miss } - io.mem.acquire <> DecoupledLogicalNetworkIOWrapper(mshrs.io.mem_req) + io.mem.acquire <> mshrs.io.mem_req // replays readArb.io.in(1).valid := mshrs.io.replay.valid @@ -815,7 +815,7 @@ class HellaCache extends L1HellaCacheModule { // probes and releases val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) - DecoupledLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release + releaseArb.io.out <> io.mem.release val probe = DecoupledLogicalNetworkIOUnwrapper(io.mem.probe) prober.io.req.valid := probe.valid && !lrsc_valid diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 425c96b5..f1132383 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -42,8 +42,8 @@ class RoCCInterface extends Bundle val interrupt = Bool(OUTPUT) // These should be handled differently, eventually - val imem = new UncachedTileLinkIO - val dmem = new TileLinkIO + val imem = new HeaderlessUncachedTileLinkIO + val dmem = new HeaderlessTileLinkIO val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index bb5356ee..cac5e2e4 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -14,7 +14,7 @@ case object BuildRoCC extends Field[Option[() => RoCC]] abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val io = new Bundle { - val tilelink = new TileLinkIO + val tilelink = new HeaderlessTileLinkIO val host = new HTIFIO } } @@ -39,10 +39,10 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { core.io.imem <> icache.io.cpu core.io.ptw <> ptw.io.dpath - val memArb = Module(new TileLinkIOArbiterThatAppendsArbiterId(params(NTilePorts))) + val memArb = Module(new RocketTileLinkIOArbiter(params(NTilePorts))) io.tilelink <> memArb.io.out memArb.io.in(0) <> dcache.io.mem - memArb.io.in(1) <> TileLinkIOWrapper(icache.io.mem) + memArb.io.in(1) <> HeaderlessTileLinkIOWrapper(icache.io.mem) //If so specified, build an RoCC module and wire it in params(BuildRoCC) @@ -52,7 +52,7 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { core.io.rocc <> rocc.io dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2) <> dcIF.io.cache - memArb.io.in(2) <> TileLinkIOWrapper(rocc.io.imem) + memArb.io.in(2) <> HeaderlessTileLinkIOWrapper(rocc.io.imem) memArb.io.in(3) <> rocc.io.dmem ptw.io.requestor(2) <> rocc.io.iptw ptw.io.requestor(3) <> rocc.io.dptw From e293d890351e39cc5f4079552026a16128e81de4 Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Tue, 10 Mar 2015 10:28:05 -0700 Subject: [PATCH 0807/1087] fix decodelogic bug for bitwidths >= 64 s/1L/BigInt(1)/ --- rocket/src/main/scala/decode.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index f86a63b4..716b6bdf 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -159,10 +159,10 @@ object SimplifyDC def getImplicitDC(maxterms: Seq[Term], term: Term, bits: Int, above: Boolean): Term = { for (i <- 0 until bits) { var t: Term = null - if (above && ((term.value | term.mask) & (1L << i)) == 0) - t = new Term(term.value | (1L << i), term.mask) - else if (!above && (term.value & (1L << i)) != 0) - t = new Term(term.value & ~(1L << i), term.mask) + if (above && ((term.value | term.mask) & (BigInt(1) << i)) == 0) + t = new Term(term.value | (BigInt(1) << i), term.mask) + else if (!above && (term.value & (BigInt(1) << i)) != 0) + t = new Term(term.value & ~(BigInt(1) << i), term.mask) if (t != null && !maxterms.exists(_.intersects(t))) return t } From ea018b3d846dce1fe2c72d2a1067b75b95ac1d08 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 11 Mar 2015 22:33:03 -0700 Subject: [PATCH 0808/1087] stall rocket decode when not rocc ready --- rocket/src/main/scala/ctrl.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index bb29c2ac..590dc1e2 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -370,6 +370,7 @@ class Control extends CoreModule val wb_reg_xcpt = Reg(Bool()) val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) + val wb_reg_rocc_pending = Reg(init=Bool(false)) val take_pc_wb = Bool() val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) @@ -510,6 +511,9 @@ class Control extends CoreModule val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready + when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready } + when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) } + class Scoreboard(n: Int) { def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) @@ -646,6 +650,7 @@ class Control extends CoreModule id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_ctrl.fp && id_stall_fpu || id_ctrl.mem && !io.dmem.req.ready || + Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || id_do_fence val ctrl_draind = id_interrupt ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind From 51e4cd7616b527714a0623263a78c877d7d6899a Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 12 Mar 2015 16:27:40 -0700 Subject: [PATCH 0809/1087] Added UncachedTileLinkIO port to RocketTile, simplify arbitration --- rocket/src/main/scala/arbiter.scala | 11 +++++++ rocket/src/main/scala/rocc.scala | 8 ++--- rocket/src/main/scala/tile.scala | 45 ++++++++++++++--------------- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index a097b8b1..5aa931f4 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -53,6 +53,17 @@ class HellaCacheArbiter(n: Int) extends Module } } +class RocketUncachedTileLinkIOArbiter(n: Int) extends TileLinkArbiterLike(n) + with AppendsArbiterId { + val io = new Bundle { + val in = Vec.fill(n){new HeaderlessUncachedTileLinkIO}.flip + val out = new HeaderlessUncachedTileLinkIO + } + hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) + hookupFinish(io.in.map(_.finish), io.out.finish) + hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) +} + class RocketTileLinkIOArbiter(n: Int) extends TileLinkArbiterLike(n) with AppendsArbiterId { val io = new Bundle { diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index f1132383..0f044dae 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -7,6 +7,8 @@ import Node._ import uncore._ import Util._ +case object RoCCMemTagBits extends Field[Int] + class RoCCInstruction extends Bundle { val funct = Bits(width = 7) @@ -43,7 +45,7 @@ class RoCCInterface extends Bundle // These should be handled differently, eventually val imem = new HeaderlessUncachedTileLinkIO - val dmem = new HeaderlessTileLinkIO + val dmem = new HeaderlessUncachedTileLinkIO val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO @@ -127,10 +129,8 @@ class AccumulatorExample extends RoCC io.imem.grant.ready := false io.imem.finish.valid := false io.dmem.acquire.valid := false - io.dmem.release.valid := false - io.dmem.finish.valid := false - io.dmem.probe.ready := false io.dmem.grant.ready := false + io.dmem.finish.valid := false io.iptw.req.valid := false io.dptw.req.valid := false io.pptw.req.valid := false diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index cac5e2e4..fb3c3633 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -8,25 +8,24 @@ import Util._ case object CoreName extends Field[String] case object NDCachePorts extends Field[Int] -case object NTilePorts extends Field[Int] case object NPTWPorts extends Field[Int] case object BuildRoCC extends Field[Option[() => RoCC]] abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val io = new Bundle { - val tilelink = new HeaderlessTileLinkIO + val cached = new HeaderlessTileLinkIO + val uncached = new HeaderlessTileLinkIO val host = new HTIFIO } } class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { - val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) val dcache = Module(new HellaCache, { case CacheName => "L1D" }) val ptw = Module(new PTW(params(NPTWPorts))) val core = Module(new Core, { case CoreName => "Rocket" }) - dcache.io.cpu.sret := core.io.dmem.sret + dcache.io.cpu.sret := core.io.dmem.sret // Bypass sret to dcache val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem @@ -39,23 +38,23 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { core.io.imem <> icache.io.cpu core.io.ptw <> ptw.io.dpath - val memArb = Module(new RocketTileLinkIOArbiter(params(NTilePorts))) - io.tilelink <> memArb.io.out - memArb.io.in(0) <> dcache.io.mem - memArb.io.in(1) <> HeaderlessTileLinkIOWrapper(icache.io.mem) - - //If so specified, build an RoCC module and wire it in - params(BuildRoCC) - .map { br => br() } - .foreach { rocc => - val dcIF = Module(new SimpleHellaCacheIF) - core.io.rocc <> rocc.io - dcIF.io.requestor <> rocc.io.mem - dcArb.io.requestor(2) <> dcIF.io.cache - memArb.io.in(2) <> HeaderlessTileLinkIOWrapper(rocc.io.imem) - memArb.io.in(3) <> rocc.io.dmem - ptw.io.requestor(2) <> rocc.io.iptw - ptw.io.requestor(3) <> rocc.io.dptw - ptw.io.requestor(4) <> rocc.io.pptw - } + // Connect the caches and ROCC to the outer memory system + io.cached <> dcache.io.mem + // If so specified, build an RoCC module and wire it in + // otherwise, just hookup the icache + io.uncached <> params(BuildRoCC).map { buildItHere => + val rocc = buildItHere() + val memArb = Module(new RocketUncachedTileLinkIOArbiter(3)) + val dcIF = Module(new SimpleHellaCacheIF) + core.io.rocc <> rocc.io + dcIF.io.requestor <> rocc.io.mem + dcArb.io.requestor(2) <> dcIF.io.cache + memArb.io.in(0) <> icache.io.mem + memArb.io.in(1) <> rocc.io.imem + memArb.io.in(2) <> rocc.io.dmem + ptw.io.requestor(2) <> rocc.io.iptw + ptw.io.requestor(3) <> rocc.io.dptw + ptw.io.requestor(4) <> rocc.io.pptw + memArb.io.out + }.getOrElse(icache.io.mem) } From ebbd14254ce7eb6c595d562e03f6c083265873f5 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 13 Mar 2015 02:12:23 -0700 Subject: [PATCH 0810/1087] uncached port should be a HeaderlessUncachedTileLinkIO type --- rocket/src/main/scala/tile.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index fb3c3633..def17452 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -14,7 +14,7 @@ case object BuildRoCC extends Field[Option[() => RoCC]] abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val io = new Bundle { val cached = new HeaderlessTileLinkIO - val uncached = new HeaderlessTileLinkIO + val uncached = new HeaderlessUncachedTileLinkIO val host = new HTIFIO } } From e85c54cc4b08cf78e86f752733ef3ef931e96371 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 14 Mar 2015 02:49:07 -0700 Subject: [PATCH 0811/1087] New privileged ISA implementation --- rocket/src/main/scala/consts.scala | 6 + rocket/src/main/scala/csr.scala | 385 ++++++++++++----- rocket/src/main/scala/ctrl.scala | 521 +++++++++++------------ rocket/src/main/scala/dpath.scala | 12 +- rocket/src/main/scala/icache.scala | 1 + rocket/src/main/scala/instructions.scala | 121 +++--- rocket/src/main/scala/nbdcache.scala | 1 + rocket/src/main/scala/package.scala | 2 +- rocket/src/main/scala/ptw.scala | 63 ++- rocket/src/main/scala/tlb.scala | 60 ++- 10 files changed, 690 insertions(+), 482 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 4c601d81..030753d0 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -59,5 +59,11 @@ trait ScalarOpConstants { val DW_64 = Y val DW_XPR = Y + val SZ_PRV = 2 + val PRV_U = 0 + val PRV_S = 1 + val PRV_H = 2 + val PRV_M = 3 + val RA = UInt(1, 5) } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index ec288987..7dec9a09 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -4,34 +4,71 @@ package rocket import Chisel._ import Util._ +import Instructions._ import Node._ import uncore._ import scala.math._ -class Status extends Bundle { - val ip = Bits(width = 8) - val im = Bits(width = 8) - val zero = Bits(width = 7) - val er = Bool() - val vm = Bool() - val s64 = Bool() - val u64 = Bool() - val ef = Bool() - val pei = Bool() - val ei = Bool() +class MStatus extends Bundle { + val sd = Bool() + val zero6 = UInt(width = 19) + val ha = UInt(width = 4) + val sa = UInt(width = 4) + val ua = UInt(width = 4) + val zero5 = UInt(width = 1) + val xs = UInt(width = 2) + val fs = UInt(width = 2) + val mtie = Bool() + val htie = Bool() + val stie = Bool() + val zero4 = UInt(width = 1) + val vm = UInt(width = 4) + val zero3 = UInt(width = 1) + val mprv = UInt(width = 2) + val zero2 = UInt(width = 3) + val prv2 = UInt(width = 2) + val ie2 = Bool() + val prv1 = UInt(width = 2) + val ie1 = Bool() + val prv = UInt(width = 2) + val ie = Bool() + val msip = Bool() + val hsip = Bool() + val ssip = Bool() + val zero1 = UInt(width = 1) +} + +class SStatus extends Bundle { + val sd = Bool() + val zero6 = UInt(width = 32) + val xs = UInt(width = 2) + val fs = UInt(width = 2) + val tip = Bool() + val zero5 = UInt(width = 1) + val tie = Bool() + val zero4 = UInt(width = 4) + val ua = UInt(width = 4) + val zero3 = UInt(width = 7) val ps = Bool() - val s = Bool() + val pie = UInt(width = 1) + val zero2 = UInt(width = 2) + val ie = Bool() + val zero1 = UInt(width = 2) + val sip = Bool() + val zero0 = UInt(width = 1) } object CSR { // commands - val SZ = 2 - val X = Bits("b??", 2) - val N = Bits(0,2) - val W = Bits(1,2) - val S = Bits(2,2) - val C = Bits(3,2) + val SZ = 3 + val X = UInt.DC(SZ) + val N = UInt(0,SZ) + val W = UInt(1,SZ) + val S = UInt(2,SZ) + val C = UInt(3,SZ) + val I = UInt(4,SZ) + val R = UInt(5,SZ) } class CSRFileIO extends CoreBundle { @@ -43,40 +80,49 @@ class CSRFileIO extends CoreBundle { val wdata = Bits(INPUT, xLen) } - val status = new Status().asOutput + val csr_replay = Bool(OUTPUT) + val csr_xcpt = Bool(OUTPUT) + + val status = new MStatus().asOutput val ptbr = UInt(OUTPUT, paddrBits) val evec = UInt(OUTPUT, vaddrBits+1) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) val cause = UInt(INPUT, xLen) - val badvaddr_wen = Bool(INPUT) - val pc = UInt(INPUT, vaddrBits+1) + val mbadaddr_wen = Bool(INPUT) + val pc = SInt(INPUT, vaddrBits+1) val sret = Bool(INPUT) val fatc = Bool(OUTPUT) - val replay = Bool(OUTPUT) val time = UInt(OUTPUT, xLen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip val rocc = new RoCCInterface().flip + val interrupt = Bool(OUTPUT) + val interrupt_cause = UInt(OUTPUT, xLen) } class CSRFile extends CoreModule { val io = new CSRFileIO - val reg_epc = Reg(Bits(width = vaddrBits+1)) - val reg_badvaddr = Reg(Bits(width = vaddrBits)) - val reg_evec = Reg(Bits(width = vaddrBits)) - val reg_compare = Reg(Bits(width = 32)) - val reg_cause = Reg(Bits(width = xLen)) + val reg_mstatus = Reg(new MStatus) + val reg_mepc = Reg(SInt(width = vaddrBits+1)) + val reg_mcause = Reg(Bits(width = xLen)) + val reg_mbadaddr = Reg(SInt(width = vaddrBits+1)) + val reg_mscratch = Reg(Bits(width = xLen)) + + val reg_sepc = Reg(SInt(width = vaddrBits+1)) + val reg_scause = Reg(Bits(width = xLen)) + val reg_sbadaddr = Reg(SInt(width = vaddrBits+1)) + val reg_sscratch = Reg(Bits(width = xLen)) + val reg_stvec = Reg(SInt(width = vaddrBits)) + val reg_stimecmp = Reg(Bits(width = 32)) + val reg_sptbr = Reg(UInt(width = paddrBits)) + val reg_tohost = Reg(init=Bits(0, xLen)) val reg_fromhost = Reg(init=Bits(0, xLen)) - val reg_sup0 = Reg(Bits(width = xLen)) - val reg_sup1 = Reg(Bits(width = xLen)) - val reg_ptbr = Reg(UInt(width = paddrBits)) val reg_stats = Reg(init=Bool(false)) - val reg_status = Reg(new Status) // reset down below val reg_time = WideCounter(xLen) val reg_instret = WideCounter(xLen, io.retire) val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) @@ -84,12 +130,27 @@ class CSRFile extends CoreModule val reg_frm = Reg(UInt(width = 3)) val r_irq_timer = Reg(init=Bool(false)) - val r_irq_ipi = Reg(init=Bool(true)) val irq_rocc = Bool(!params(BuildRoCC).isEmpty) && io.rocc.interrupt - val cpu_req_valid = io.rw.cmd != CSR.N + io.interrupt_cause := 0 + io.interrupt := io.interrupt_cause(xLen-1) + def checkInterrupt(max_priv: UInt, cond: Bool, num: Int) = { + when (cond && (reg_mstatus.prv < max_priv || reg_mstatus.prv === max_priv && reg_mstatus.ie)) { + io.interrupt_cause := UInt((BigInt(1) << (xLen-1)) + num) + } + } + + checkInterrupt(PRV_S, r_irq_timer, 0) + checkInterrupt(PRV_S, reg_mstatus.ssip, 1) + checkInterrupt(PRV_M, reg_mstatus.msip, 1) + checkInterrupt(PRV_M, reg_fromhost != 0, 2) + checkInterrupt(PRV_M, irq_rocc, 3) + + val system_insn = io.rw.cmd === CSR.I + val cpu_ren = io.rw.cmd != CSR.N && !system_insn + val host_pcr_req_valid = Reg(Bool()) // don't reset - val host_pcr_req_fire = host_pcr_req_valid && !cpu_req_valid + val host_pcr_req_fire = host_pcr_req_valid && !cpu_ren val host_pcr_rep_valid = Reg(Bool()) // don't reset val host_pcr_bits = Reg(io.host.pcr_req.bits) io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid @@ -108,58 +169,116 @@ class CSRFile extends CoreModule io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy - val addr = Mux(cpu_req_valid, io.rw.addr, host_pcr_bits.addr | 0x500) - val decoded_addr = { - val map = for ((v, i) <- CSRs.all.zipWithIndex) - yield v -> UInt(BigInt(1) << i) - val out = ROM(map)(addr) - Map((CSRs.all zip out.toBools):_*) - } + val addr = Mux(cpu_ren, io.rw.addr, host_pcr_bits.addr) + val decoded_addr = Map(( + for ((v, i) <- CSRs.all.zipWithIndex) + yield v -> (addr === CSRs.all(i))):_*) - val wen = cpu_req_valid || host_pcr_req_fire && host_pcr_bits.rw - val wdata = Mux(cpu_req_valid, io.rw.wdata, host_pcr_bits.data) + val addr_valid = decoded_addr.values.reduce(_||_) + val fp_csr = decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr) + val csr_addr_priv = io.rw.addr(9,8) + val priv_sufficient = reg_mstatus.prv >= csr_addr_priv + val read_only = io.rw.addr(11,10).andR + val cpu_wen = cpu_ren && io.rw.cmd != CSR.R && priv_sufficient + val wen = cpu_wen && !read_only || host_pcr_req_fire && host_pcr_bits.rw + val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata, + Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, + Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, + host_pcr_bits.data))) - io.status := reg_status - io.status.ip := Cat(r_irq_timer, reg_fromhost.orR, r_irq_ipi, Bool(false), - Bool(false), irq_rocc, Bool(false), Bool(false)) - io.fatc := wen && decoded_addr(CSRs.fatc) - io.evec := Mux(io.exception, reg_evec.toSInt, reg_epc).toUInt - io.ptbr := reg_ptbr + val opcode = io.rw.addr(3,0) + // The following comparison is meant to be opcode === SFENCE_VM(23,20). But + // FOR SOME FUCKING REASON, extracting SFENCE_VM(23,20) gives 3, not 4. + val insn_sfence_vm = opcode === 4 && system_insn && priv_sufficient + val insn_redirect_trap = opcode === MRTS(23,20) && system_insn && priv_sufficient + val insn_ret = opcode === SRET(23,20) /* or H/MRET */ && io.rw.addr(1) && system_insn && priv_sufficient + val insn_break = opcode === SBREAK(23,20) && io.rw.addr(0) && system_insn && priv_sufficient + val insn_call = opcode === SCALL(23,20) /* or H/MCALL */ && system_insn && priv_sufficient - when (io.badvaddr_wen) { - val wdata = io.rw.wdata - val (upper, lower) = Split(wdata, vaddrBits) - val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) - reg_badvaddr := Cat(sign, lower).toSInt - } + val csr_xcpt = (cpu_wen && read_only) || + (cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) || + (system_insn && !priv_sufficient) || + insn_call || insn_break - when (io.exception) { - reg_status.s := true - reg_status.ps := reg_status.s - reg_status.ei := false - reg_status.pei := reg_status.ei - reg_epc := io.pc.toSInt - reg_cause := io.cause + val mtvec = reg_mstatus.prv << 6 + io.fatc := insn_sfence_vm + io.evec := Mux(io.exception || csr_xcpt, mtvec.zext, + Mux(insn_redirect_trap, reg_stvec, + Mux(reg_mstatus.prv(1), reg_mepc, reg_sepc))).toUInt + io.ptbr := reg_sptbr + io.csr_xcpt := csr_xcpt || insn_redirect_trap || insn_ret /* sort of a lie */ + io.status := reg_mstatus + io.status.fs := reg_mstatus.fs.orR.toSInt // either off or dirty (no clean/initial support yet) + io.status.xs := reg_mstatus.xs.orR.toSInt // either off or dirty (no clean/initial support yet) + io.status.sd := reg_mstatus.xs.orR || reg_mstatus.fs.orR + + when (io.exception || csr_xcpt) { + reg_mstatus.ie := false + reg_mstatus.prv := PRV_M + reg_mstatus.mprv := PRV_M + reg_mstatus.prv1 := reg_mstatus.prv + reg_mstatus.ie1 := reg_mstatus.ie + reg_mstatus.prv2 := reg_mstatus.prv1 + reg_mstatus.ie2 := reg_mstatus.ie1 + + reg_mepc := io.pc + reg_mcause := io.cause + when (csr_xcpt) { + reg_mcause := Causes.illegal_instruction + when (insn_break) { reg_mcause := Causes.breakpoint } + when (insn_call) { reg_mcause := Causes.scall + csr_addr_priv } + } + + reg_mbadaddr := io.pc + when (io.cause === Causes.fault_load || io.cause === Causes.misaligned_load || + io.cause === Causes.fault_store || io.cause === Causes.misaligned_store) { + val wdata = io.rw.wdata + val (upper, lower) = Split(wdata, vaddrBits) + val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) + reg_mbadaddr := Cat(sign, lower).toSInt + } } - when (io.sret) { - reg_status.s := reg_status.ps - reg_status.ei := reg_status.pei + when (insn_ret) { + reg_mstatus.ie := reg_mstatus.ie1 + reg_mstatus.prv := reg_mstatus.prv1 + reg_mstatus.prv1 := reg_mstatus.prv2 + reg_mstatus.ie1 := reg_mstatus.ie2 + reg_mstatus.prv2 := PRV_U + reg_mstatus.ie2 := true } - when (reg_time(reg_compare.getWidth-1,0) === reg_compare) { + when (insn_redirect_trap) { + reg_mstatus.prv := PRV_S + reg_sbadaddr := reg_mbadaddr + reg_scause := reg_mcause + reg_sepc := reg_mepc + } + + assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") + + when (reg_time(reg_stimecmp.getWidth-1,0) === reg_stimecmp) { r_irq_timer := true } io.time := reg_time - io.host.ipi_req.valid := cpu_req_valid && decoded_addr(CSRs.send_ipi) + io.host.ipi_req.valid := cpu_wen && decoded_addr(CSRs.send_ipi) io.host.ipi_req.bits := io.rw.wdata - io.replay := io.host.ipi_req.valid && !io.host.ipi_req.ready + io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } - val read_impl = Bits(2) - val read_ptbr = reg_ptbr(paddrBits-1, pgIdxBits) << UInt(pgIdxBits) + val read_mstatus = io.status.toBits + val read_sstatus = new SStatus + read_sstatus := new SStatus().fromBits(read_mstatus) // sstatus mostly overlaps mstatus + read_sstatus.zero0 := 0 + read_sstatus.zero1 := 0 + read_sstatus.zero2 := 0 + read_sstatus.zero3 := 0 + read_sstatus.zero4 := 0 + read_sstatus.zero5 := 0 + read_sstatus.ua := io.status.ua + read_sstatus.tip := r_irq_timer val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), @@ -167,23 +286,25 @@ class CSRFile extends CoreModule CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.cycle -> reg_time, CSRs.time -> reg_time, + CSRs.scycle -> reg_time, + CSRs.stime -> reg_time, CSRs.instret -> reg_instret, - CSRs.sup0 -> reg_sup0, - CSRs.sup1 -> reg_sup1, - CSRs.epc -> reg_epc, - CSRs.badvaddr -> reg_badvaddr, - CSRs.ptbr -> read_ptbr, - CSRs.asid -> UInt(0), - CSRs.count -> reg_time, - CSRs.compare -> reg_compare, - CSRs.evec -> reg_evec, - CSRs.cause -> reg_cause, - CSRs.status -> io.status.toBits, + CSRs.sinstret -> reg_instret, + CSRs.mstatus -> read_mstatus, + CSRs.mscratch -> reg_mscratch, + CSRs.mepc -> reg_mepc, + CSRs.mbadaddr -> reg_mbadaddr, + CSRs.mcause -> reg_mcause, + CSRs.sstatus -> read_sstatus.toBits, + CSRs.sscratch -> reg_sscratch, + CSRs.sepc -> reg_sepc, + CSRs.scause -> reg_scause, + CSRs.sbadaddr -> reg_sbadaddr, + CSRs.sptbr -> reg_sptbr, + CSRs.sasid -> UInt(0), + CSRs.stimecmp -> reg_stimecmp, + CSRs.stvec -> reg_stvec, CSRs.hartid -> io.host.id, - CSRs.impl -> read_impl, - CSRs.fatc -> read_impl, // don't care - CSRs.send_ipi -> read_impl, // don't care - CSRs.clear_ipi -> read_impl, // don't care CSRs.stats -> reg_stats, CSRs.tohost -> reg_tohost, CSRs.fromhost -> reg_fromhost) @@ -199,46 +320,80 @@ class CSRFile extends CoreModule } when (wen) { - when (decoded_addr(CSRs.status)) { - reg_status := new Status().fromBits(wdata) - reg_status.s64 := true - reg_status.u64 := true - reg_status.zero := 0 - if (!params(UseVM)) reg_status.vm := false - if (params(BuildRoCC).isEmpty) reg_status.er := false - if (params(BuildFPU).isEmpty) reg_status.ef := false + when (decoded_addr(CSRs.mstatus)) { + val new_mstatus = new MStatus().fromBits(wdata) + reg_mstatus.ssip := new_mstatus.ssip + reg_mstatus.msip := new_mstatus.msip + reg_mstatus.stie := new_mstatus.stie + reg_mstatus.ie := new_mstatus.ie + reg_mstatus.ie1 := new_mstatus.ie1 + reg_mstatus.ie2 := new_mstatus.ie2 + when (new_mstatus.mprv != PRV_H) { reg_mstatus.mprv := new_mstatus.mprv } + when (new_mstatus.prv != PRV_H) { reg_mstatus.prv := new_mstatus.prv } + when (new_mstatus.prv1 != PRV_H) { reg_mstatus.prv1 := new_mstatus.prv1 } + when (new_mstatus.prv2 != PRV_H) { reg_mstatus.prv2 := new_mstatus.prv2 } + if (params(UseVM)) when (new_mstatus.vm === 0 || new_mstatus.vm === 5) { reg_mstatus.vm := new_mstatus.vm } + if (!params(BuildFPU).isEmpty) reg_mstatus.fs := new_mstatus.fs + if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_mstatus.xs + } + when (decoded_addr(CSRs.sstatus)) { + val new_sstatus = new SStatus().fromBits(wdata) + reg_mstatus.ssip := new_sstatus.sip + reg_mstatus.stie := new_sstatus.tie + reg_mstatus.ie := new_sstatus.ie + reg_mstatus.ie1 := new_sstatus.pie + reg_mstatus.prv1 := Mux(new_sstatus.ps, PRV_S, PRV_U) + if (!params(BuildFPU).isEmpty) reg_mstatus.fs := new_sstatus.fs + if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs } when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.epc)) { reg_epc := wdata(vaddrBits,0).toSInt } - when (decoded_addr(CSRs.evec)) { reg_evec := wdata(vaddrBits-1,0).toSInt } - when (decoded_addr(CSRs.count)) { reg_time := wdata.toUInt } - when (decoded_addr(CSRs.compare)) { reg_compare := wdata(31,0).toUInt; r_irq_timer := Bool(false) } + when (decoded_addr(CSRs.mepc)) { reg_mepc := wdata(vaddrBits,0).toSInt } + when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } + when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } + when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata } + when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBits,0).toSInt } + when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt } + when (decoded_addr(CSRs.scycle)) { reg_time := wdata.toUInt } + when (decoded_addr(CSRs.stime)) { reg_time := wdata.toUInt } + when (decoded_addr(CSRs.sinstret)) { reg_instret := wdata.toUInt } + when (decoded_addr(CSRs.stimecmp)) { reg_stimecmp := wdata(31,0).toUInt; r_irq_timer := Bool(false) } when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } - when (decoded_addr(CSRs.clear_ipi)){ r_irq_ipi := wdata(0) } - when (decoded_addr(CSRs.sup0)) { reg_sup0 := wdata } - when (decoded_addr(CSRs.sup1)) { reg_sup1 := wdata } - when (decoded_addr(CSRs.ptbr)) { reg_ptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } + when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } + when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } } - io.host.ipi_rep.ready := Bool(true) - when (io.host.ipi_rep.valid) { r_irq_ipi := Bool(true) } + io.host.ipi_rep.ready := true + when (io.host.ipi_rep.valid) { reg_mstatus.msip := true } when(this.reset) { - reg_status.ei := false - reg_status.pei := false - reg_status.ef := false - reg_status.er := false - reg_status.ps := false - reg_status.s := true - reg_status.u64 := true - reg_status.s64 := true - reg_status.vm := false - reg_status.zero := 0 - reg_status.im := 0 - reg_status.ip := 0 + reg_mstatus.zero1 := 0 + reg_mstatus.ssip := false + reg_mstatus.hsip := false + reg_mstatus.msip := false + reg_mstatus.ie := false + reg_mstatus.prv := PRV_M + reg_mstatus.ie1 := false + reg_mstatus.prv1 := PRV_S + reg_mstatus.ie2 := false + reg_mstatus.prv2 := PRV_S + reg_mstatus.mprv := PRV_M + reg_mstatus.zero2 := 0 + reg_mstatus.vm := 0 + reg_mstatus.zero3 := 0 + reg_mstatus.stie := false + reg_mstatus.htie := false + reg_mstatus.mtie := false + reg_mstatus.fs := 0 + reg_mstatus.xs := 0 + reg_mstatus.zero4 := 0 + reg_mstatus.ua := 4 + reg_mstatus.sa := 4 + reg_mstatus.ha := 0 + reg_mstatus.zero5 := 0 + reg_mstatus.sd := false } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 590dc1e2..450a8817 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -17,8 +17,7 @@ class CtrlDpathIO extends CoreBundle val ren = Vec.fill(2)(Bool(OUTPUT)) val ex_ctrl = new IntCtrlSigs().asOutput val mem_ctrl = new IntCtrlSigs().asOutput - val csr = UInt(OUTPUT, 3) - val sret = Bool(OUTPUT) + val csr_cmd = UInt(OUTPUT, CSR.SZ) val ex_valid = Bool(OUTPUT) val wb_wen = Bool(OUTPUT) val bypass = Vec.fill(2)(Bool(OUTPUT)) @@ -28,11 +27,11 @@ class CtrlDpathIO extends CoreBundle val retire = Bool(OUTPUT) val exception = Bool(OUTPUT) val cause = UInt(OUTPUT, xLen) - val badvaddr_wen = Bool(OUTPUT) // high for a load/store access fault // inputs from datapath val inst = Bits(INPUT, 32) val mem_br_taken = Bool(INPUT) val mem_misprediction = Bool(INPUT) + val mem_npc_misaligned = Bool(INPUT) val div_mul_rdy = Bool(INPUT) val ll_wen = Bool(INPUT) val ll_waddr = UInt(INPUT, 5) @@ -40,10 +39,14 @@ class CtrlDpathIO extends CoreBundle val mem_rs1_ra = Bool(INPUT) val mem_waddr = UInt(INPUT, 5) val wb_waddr = UInt(INPUT, 5) - val status = new Status().asInput + val status = new MStatus().asInput val fp_sboard_clr = Bool(INPUT) val fp_sboard_clra = UInt(INPUT, 5) + // inputs from csr file val csr_replay = Bool(INPUT) + val csr_xcpt = Bool(INPUT) + val interrupt = Bool(INPUT) + val interrupt_cause = UInt(INPUT, xLen) } abstract trait DecodeConstants @@ -52,14 +55,14 @@ abstract trait DecodeConstants val decode_default = // jal renf1 fence.i - // | jalr | renf2 | sret - // fp_val| | renx2 | | renf3 | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | - // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X,X,X) + // | jalr | renf2 | + // fp_val| | renx2 | | renf3 | + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) val table: Array[(UInt, List[UInt])] } @@ -89,8 +92,6 @@ class IntCtrlSigs extends Bundle { val wxd = Bool() val csr = Bits(width = CSR.SZ) val fence_i = Bool() - val sret = Bool() - val scall = Bool() val fence = Bool() val amo = Bool() @@ -98,8 +99,7 @@ class IntCtrlSigs extends Bundle { val decoder = DecodeLogic(inst, XDecode.decode_default, table) Vec(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, - rfs1, rfs2, rfs3, wfd, div, wxd, - csr, fence_i, sret, scall, fence, amo) := decoder + rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo) := decoder this } } @@ -108,224 +108,230 @@ object XDecode extends DecodeConstants { val table = Array( // jal renf1 fence.i - // | jalr | renf2 | sret - // fp_val| | renx2 | | renf3 | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | - // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + // | jalr | renf2 | + // fp_val| | renx2 | | renf3 | + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N,N,N), + LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N), + LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N), + LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N), + LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N), + LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N), + LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N), + LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N), + SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N), + SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N), + SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N), + SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N), - AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), - SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N,N,Y), + LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), + ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), - REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N,N,N), + DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,Y,N,N), - SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N,N,N), - FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,Y,N), - FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N,N,N), - CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N), - CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N), - CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N), - CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N,N,N), - CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N,N,N), - CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N,N,N)) + FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N), + FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N), + + SFENCE_VM-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SBREAK-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + HCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + MRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + MRTS-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), + CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), + CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N), + CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), + CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), + CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) } object FDecode extends DecodeConstants { val table = Array( // jal renf1 fence.i - // | jalr | renf2 | sret - // fp_val| | renx2 | | renf3 | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | - // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N,N,N), - FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N,N,N), - FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N,N,N), - FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), - FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), - FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), - FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), - FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), - FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N,N,N), - FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N,N,N), - FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N,N,N), - FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N,N,N)) + // | jalr | renf2 | + // fp_val| | renx2 | | renf3 | + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), + FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N), + FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N), + FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N), + FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N)) } object RoCCDecode extends DecodeConstants { val table = Array( // jal renf1 fence.i - // | jalr | renf2 | sret - // fp_val| | renx2 | | renf3 | | syscall - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | | | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | | | - // | | | | | | | | | | | | | | | | | | | | | wxd | | | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | | | - CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N,N,N), - CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N,N,N)) + // | jalr | renf2 | + // fp_val| | renx2 | | renf3 | + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) } class Control extends CoreModule @@ -374,7 +380,8 @@ class Control extends CoreModule val take_pc_wb = Bool() val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) - val take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) + val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) + val take_pc_mem = want_take_pc_mem && !io.dpath.mem_npc_misaligned val take_pc_mem_wb = take_pc_wb || take_pc_mem val take_pc = take_pc_mem_wb val ctrl_killd = Bool() @@ -388,38 +395,19 @@ class Control extends CoreModule val id_load_use = Bool() val id_reg_fence = Reg(init=Bool(false)) - val sr = io.dpath.status - var id_interrupts = (0 until sr.ip.getWidth).map(i => (sr.im(i) && sr.ip(i), UInt(BigInt(1) << (xLen-1) | i))) - - val (id_interrupt_unmasked, id_interrupt_cause) = checkExceptions(id_interrupts) - val id_interrupt = io.dpath.status.ei && id_interrupt_unmasked - - def checkExceptions(x: Seq[(Bool, UInt)]) = - (x.map(_._1).reduce(_||_), PriorityMux(x)) - - val fp_csrs = CSRs.fcsr :: CSRs.frm :: CSRs.fflags :: Nil - val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) - if(params(BuildFPU).isEmpty) - legal_csrs --= fp_csrs - - val id_csr_addr = io.dpath.inst(31,20) - val isLegalCSR = Vec.tabulate(1 << id_csr_addr.getWidth)(i => Bool(legal_csrs contains i)) val id_csr_en = id_ctrl.csr != CSR.N - val id_csr_fp = Bool(!params(BuildFPU).isEmpty) && id_csr_en && DecodeLogic(id_csr_addr, fp_csrs, CSRs.all.toSet -- fp_csrs) - val id_csr_wen = id_raddr1 != UInt(0) || !Vec(CSR.S, CSR.C).contains(id_ctrl.csr) - val id_csr_invalid = id_csr_en && !isLegalCSR(id_csr_addr) - val id_csr_privileged = id_csr_en && - (id_csr_addr(11,10) === UInt(3) && id_csr_wen || - id_csr_addr(11,10) === UInt(2) || - id_csr_addr(11,10) === UInt(1) && !io.dpath.status.s || - id_csr_addr(9,8) >= UInt(2) || - id_csr_addr(9,8) === UInt(1) && !io.dpath.status.s && id_csr_wen) - // flush pipeline on CSR writes that may have side effects - val id_csr_flush = { - val safe_csrs = CSRs.sup0 :: CSRs.sup1 :: CSRs.epc :: Nil - id_csr_en && id_csr_wen && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs) - } + val id_system_insn = id_ctrl.csr === CSR.I + val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0) + val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) + val id_csr_addr = io.dpath.inst(31,20) + // this is overly conservative + val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil + val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) + val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs)) + val id_illegal_insn = !id_ctrl.legal || + id_ctrl.fp && !io.dpath.status.fs.orR || + id_ctrl.rocc && !io.dpath.status.xs.orR // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) val id_amo_aq = io.dpath.inst(26) val id_amo_rl = io.dpath.inst(25) @@ -430,26 +418,24 @@ class Control extends CoreModule mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy val id_do_fence = id_rocc_busy && id_ctrl.fence || - id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_flush) + id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en) + + def checkExceptions(x: Seq[(Bool, UInt)]) = + (x.map(_._1).reduce(_||_), PriorityMux(x)) val (id_xcpt, id_cause) = checkExceptions(List( - (id_interrupt, id_interrupt_cause), - (io.imem.resp.bits.xcpt_ma, UInt(Causes.misaligned_fetch)), + (io.dpath.interrupt, io.dpath.interrupt_cause), (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), - (!id_ctrl.legal || id_csr_invalid, UInt(Causes.illegal_instruction)), - (id_csr_privileged, UInt(Causes.privileged_instruction)), - (id_ctrl.sret && !io.dpath.status.s, UInt(Causes.privileged_instruction)), - ((id_ctrl.fp || id_csr_fp) && !io.dpath.status.ef,UInt(Causes.fp_disabled)), - (id_ctrl.scall, UInt(Causes.syscall)), - (id_ctrl.rocc && !io.dpath.status.er, UInt(Causes.accelerator_disabled)))) + (id_illegal_insn, UInt(Causes.illegal_instruction)))) ex_reg_valid := !ctrl_killd ex_reg_xcpt := !ctrl_killd && id_xcpt - ex_reg_xcpt_interrupt := id_interrupt && !take_pc && io.imem.resp.valid + ex_reg_xcpt_interrupt := io.dpath.interrupt && !take_pc && io.imem.resp.valid when (id_xcpt) { ex_reg_cause := id_cause } when (!ctrl_killd) { ex_ctrl := id_ctrl + ex_ctrl.csr := id_csr ex_reg_btb_hit := io.imem.btb_resp.valid when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush @@ -488,6 +474,7 @@ class Control extends CoreModule val (mem_xcpt, mem_cause) = checkExceptions(List( (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), + (want_take_pc_mem && io.dpath.mem_npc_misaligned, UInt(Causes.misaligned_fetch)), (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), @@ -551,18 +538,17 @@ class Control extends CoreModule // write CAUSE CSR on an exception io.dpath.exception := wb_reg_xcpt io.dpath.cause := wb_reg_cause - io.dpath.badvaddr_wen := wb_reg_xcpt // don't care for non-memory exceptions + val wb_xcpt = wb_reg_xcpt || io.dpath.csr_xcpt // control transfer from ex/wb - take_pc_wb := replay_wb || wb_reg_xcpt || io.dpath.sret + take_pc_wb := replay_wb || wb_xcpt io.dpath.sel_pc := - Mux(wb_reg_xcpt, PC_PCR, // exception + Mux(wb_xcpt, PC_PCR, // exception or [m|s]ret Mux(replay_wb, PC_WB, // replay - Mux(wb_reg_valid && wb_ctrl.sret, PC_PCR, // sret instruction - PC_MEM))) + PC_MEM)) - io.imem.btb_update.valid := mem_reg_valid && io.dpath.mem_misprediction && ((mem_ctrl.branch && io.dpath.mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb + io.imem.btb_update.valid := mem_reg_valid && !io.dpath.mem_npc_misaligned && io.dpath.mem_misprediction && ((mem_ctrl.branch && io.dpath.mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr @@ -574,7 +560,7 @@ class Control extends CoreModule io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !take_pc_wb + io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !io.dpath.mem_npc_misaligned && !take_pc_wb io.imem.ras_update.bits.isCall := mem_ctrl.wxd && io.dpath.mem_waddr(0) io.imem.ras_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit @@ -652,7 +638,7 @@ class Control extends CoreModule id_ctrl.mem && !io.dmem.req.ready || Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || id_do_fence - val ctrl_draind = id_interrupt + val ctrl_draind = io.dpath.interrupt ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind @@ -665,10 +651,9 @@ class Control extends CoreModule io.dpath.mem_ctrl := mem_ctrl io.dpath.ex_valid := ex_reg_valid io.dpath.ll_ready := !(wb_reg_valid && wb_ctrl.wxd) - io.dpath.retire := wb_reg_valid && !replay_wb + io.dpath.retire := wb_reg_valid && !replay_wb && !io.dpath.csr_xcpt io.dpath.wb_wen := io.dpath.retire && wb_ctrl.wxd - io.dpath.csr := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) - io.dpath.sret := wb_reg_valid && wb_ctrl.sret && !replay_wb + io.dpath.csr_cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) io.dpath.killm := killm_common io.fpu.valid := !ctrl_killd && id_ctrl.fp @@ -680,9 +665,9 @@ class Control extends CoreModule io.dmem.req.bits.cmd := ex_ctrl.mem_cmd io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.phys := Bool(false) - io.dmem.sret := io.dpath.sret + io.dmem.sret := wb_xcpt // obviously not an sret, but sufficient io.rocc.cmd.valid := wb_rocc_val - io.rocc.exception := wb_reg_xcpt && sr.er - io.rocc.s := sr.s + io.rocc.exception := wb_xcpt && io.dpath.status.xs != 0 + io.rocc.s := io.dpath.status.prv != 0 // should we just pass all of mstatus? } diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index f5346b3b..57021306 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -72,7 +72,7 @@ class Datapath extends CoreModule // immediate generation def imm(sel: Bits, inst: Bits) = { - val sign = inst(31).toSInt + val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), @@ -172,7 +172,6 @@ class Datapath extends CoreModule pcr.io <> io.fpu pcr.io.rocc <> io.rocc pcr.io.pc := wb_reg_pc - io.ctrl.csr_replay := pcr.io.replay pcr.io.uarch_counters.foreach(_ := Bool(false)) io.ptw.ptbr := pcr.io.ptbr @@ -232,6 +231,7 @@ class Datapath extends CoreModule Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target).toUInt io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid + io.ctrl.mem_npc_misaligned := mem_npc(1) io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata).toUInt @@ -246,7 +246,7 @@ class Datapath extends CoreModule } wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, Mux(io.ctrl.ll_wen, ll_wdata, - Mux(io.ctrl.csr != CSR.N, pcr.io.rw.rdata, + Mux(io.ctrl.csr_cmd != CSR.N, pcr.io.rw.rdata, wb_reg_wdata))) val wb_wen = io.ctrl.ll_wen || io.ctrl.wb_wen @@ -259,10 +259,8 @@ class Datapath extends CoreModule // processor control regfile write pcr.io.rw.addr := wb_reg_inst(31,20) - pcr.io.rw.cmd := io.ctrl.csr - pcr.io.rw.wdata := Mux(io.ctrl.csr === CSR.S, pcr.io.rw.rdata | wb_reg_wdata, - Mux(io.ctrl.csr === CSR.C, pcr.io.rw.rdata & ~wb_reg_wdata, - wb_reg_wdata)) + pcr.io.rw.cmd := io.ctrl.csr_cmd + pcr.io.rw.wdata := wb_reg_wdata io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) io.rocc.cmd.bits.rs1 := wb_reg_wdata diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index b3d40b09..39c3c9fd 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -102,6 +102,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule tlb.io.req.bits.asid := UInt(0) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) + tlb.io.req.bits.store := Bool(false) icache.io.mem <> io.mem icache.io.req.valid := !stall && !s0_same_block diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 56ebf3c1..c3c015c7 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -95,7 +95,12 @@ object Instructions { def SC_D = Bits("b00011????????????011?????0101111") def SCALL = Bits("b00000000000000000000000001110011") def SBREAK = Bits("b00000000000100000000000001110011") - def SRET = Bits("b10000000000000000000000001110011") + def SRET = Bits("b00010000001000000000000001110011") + def SFENCE_VM = Bits("b000100000100?????000000001110011") + def HCALL = Bits("b00010000000000000000000001110011") + def MCALL = Bits("b00100000000000000000000001110011") + def MRET = Bits("b00110000001000000000000001110011") + def MRTS = Bits("b00110000100100000000000001110011") def CSRRW = Bits("b?????????????????001?????1110011") def CSRRS = Bits("b?????????????????010?????1110011") def CSRRC = Bits("b?????????????????011?????1110011") @@ -193,29 +198,27 @@ object Causes { val misaligned_fetch = 0x0 val fault_fetch = 0x1 val illegal_instruction = 0x2 - val privileged_instruction = 0x3 - val fp_disabled = 0x4 - val syscall = 0x6 + val scall = 0x4 + val hcall = 0x5 + val mcall = 0x6 val breakpoint = 0x7 val misaligned_load = 0x8 - val misaligned_store = 0x9 - val fault_load = 0xa + val fault_load = 0x9 + val misaligned_store = 0xa val fault_store = 0xb - val accelerator_disabled = 0xc val all = { val res = collection.mutable.ArrayBuffer[Int]() res += misaligned_fetch res += fault_fetch res += illegal_instruction - res += privileged_instruction - res += fp_disabled - res += syscall + res += scall + res += hcall + res += mcall res += breakpoint res += misaligned_load - res += misaligned_store res += fault_load + res += misaligned_store res += fault_store - res += accelerator_disabled res.toArray } } @@ -223,29 +226,10 @@ object CSRs { val fflags = 0x1 val frm = 0x2 val fcsr = 0x3 - val stats = 0xc0 - val sup0 = 0x500 - val sup1 = 0x501 - val epc = 0x502 - val badvaddr = 0x503 - val ptbr = 0x504 - val asid = 0x505 - val count = 0x506 - val compare = 0x507 - val evec = 0x508 - val cause = 0x509 - val status = 0x50a - val hartid = 0x50b - val impl = 0x50c - val fatc = 0x50d - val send_ipi = 0x50e - val clear_ipi = 0x50f - val reset = 0x51d - val tohost = 0x51e - val fromhost = 0x51f val cycle = 0xc00 val time = 0xc01 val instret = 0xc02 + val stats = 0xc0 val uarch0 = 0xcc0 val uarch1 = 0xcc1 val uarch2 = 0xcc2 @@ -262,38 +246,43 @@ object CSRs { val uarch13 = 0xccd val uarch14 = 0xcce val uarch15 = 0xccf - val counth = 0x586 + val sstatus = 0x100 + val stvec = 0x101 + val stimecmp = 0x121 + val sscratch = 0x140 + val sepc = 0x141 + val sptbr = 0x188 + val sasid = 0x189 + val scycle = 0x900 + val stime = 0x901 + val sinstret = 0x902 + val scause = 0xd40 + val sbadaddr = 0xd41 + val mstatus = 0x300 + val mscratch = 0x340 + val mepc = 0x341 + val mcause = 0x342 + val mbadaddr = 0x343 + val reset = 0x780 + val tohost = 0x781 + val fromhost = 0x782 + val send_ipi = 0x783 + val hartid = 0xfc0 val cycleh = 0xc80 val timeh = 0xc81 val instreth = 0xc82 + val scycleh = 0x980 + val stimeh = 0x981 + val sinstreth = 0x982 val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags res += frm res += fcsr - res += stats - res += sup0 - res += sup1 - res += epc - res += badvaddr - res += ptbr - res += asid - res += count - res += compare - res += evec - res += cause - res += status - res += hartid - res += impl - res += fatc - res += send_ipi - res += clear_ipi - res += reset - res += tohost - res += fromhost res += cycle res += time res += instret + res += stats res += uarch0 res += uarch1 res += uarch2 @@ -310,14 +299,38 @@ object CSRs { res += uarch13 res += uarch14 res += uarch15 + res += sstatus + res += stvec + res += stimecmp + res += sscratch + res += sepc + res += sptbr + res += sasid + res += scycle + res += stime + res += sinstret + res += scause + res += sbadaddr + res += mstatus + res += mscratch + res += mepc + res += mcause + res += mbadaddr + res += reset + res += tohost + res += fromhost + res += send_ipi + res += hartid res.toArray } val all32 = { val res = collection.mutable.ArrayBuffer(all:_*) - res += counth res += cycleh res += timeh res += instreth + res += scycleh + res += stimeh + res += sinstreth res.toArray } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7ea62577..7d124d92 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -641,6 +641,7 @@ class HellaCache extends L1HellaCacheModule { dtlb.io.req.bits.asid := UInt(0) dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits dtlb.io.req.bits.instruction := Bool(false) + dtlb.io.req.bits.store := s1_write when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } when (io.cpu.req.valid) { diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index e24c646f..75c065be 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -3,5 +3,5 @@ package object rocket extends rocket.constants.ScalarOpConstants { - val START_ADDR = 0x2000 + val START_ADDR = 0x100 } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index a165cfc0..5ef014c7 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -6,23 +6,29 @@ import Chisel._ import uncore._ import Util._ +class PTWReq extends CoreBundle { + val addr = UInt(width = vpnBits) + val perm = Bits(width = permBits) +} + class PTWResp extends CoreBundle { val error = Bool() val ppn = UInt(width = ppnBits) val perm = Bits(width = permBits) + val dirty = Bool() } class TLBPTWIO extends CoreBundle { - val req = Decoupled(UInt(width = vpnBits)) + val req = Decoupled(new PTWReq) val resp = Valid(new PTWResp).flip - val status = new Status().asInput + val status = new MStatus().asInput val invalidate = Bool(INPUT) } class DatapathPTWIO extends CoreBundle { val ptbr = UInt(INPUT, paddrBits) val invalidate = Bool(INPUT) - val status = new Status().asInput + val status = new MStatus().asInput } class PTW(n: Int) extends CoreModule @@ -37,48 +43,54 @@ class PTW(n: Int) extends CoreModule val bitsPerLevel = vpnBits/levels require(vpnBits == levels * bitsPerLevel) - val s_ready :: s_req :: s_wait :: s_done :: s_error :: Nil = Enum(UInt(), 5) + val s_ready :: s_req :: s_wait :: s_set_dirty :: s_wait_dirty :: s_done :: s_error :: Nil = Enum(UInt(), 7) val state = Reg(init=s_ready) val count = Reg(UInt(width = log2Up(levels))) - val r_req_vpn = Reg(Bits()) + val r_req = Reg(new PTWReq) val r_req_dest = Reg(Bits()) val r_pte = Reg(Bits()) - val vpn_idx = Vec((0 until levels).map(i => (r_req_vpn >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) + val vpn_idx = Vec((0 until levels).map(i => (r_req.addr >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) - val arb = Module(new RRArbiter(UInt(width = vpnBits), n)) + val arb = Module(new RRArbiter(new PTWReq, n)) arb.io.in <> io.requestor.map(_.req) arb.io.out.ready := state === s_ready + val pte = io.mem.resp.bits.data when (arb.io.out.fire()) { - r_req_vpn := arb.io.out.bits + r_req := arb.io.out.bits r_req_dest := arb.io.chosen - r_pte := Cat(io.dpath.ptbr(paddrBits-1,pgIdxBits), io.mem.resp.bits.data(pgIdxBits-1,0)) + r_pte := Cat(io.dpath.ptbr(paddrBits-1,pgIdxBits), pte(pgIdxBits-1,0)) } - when (io.mem.resp.valid) { - r_pte := io.mem.resp.bits.data + val perm_ok = (pte(8,3) & r_req.perm).orR + val is_store = r_req.perm(1) || r_req.perm(4) + val set_dirty_bit = perm_ok && !pte(1) && (!pte(9) || (is_store && !pte(10))) + when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) { + r_pte := pte } - io.mem.req.valid := state === s_req + io.mem.req.valid := state === s_req || state === s_set_dirty io.mem.req.bits.phys := Bool(true) - io.mem.req.bits.cmd := M_XRD + io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD) io.mem.req.bits.typ := MT_D io.mem.req.bits.addr := Cat(r_pte(paddrBits-1,pgIdxBits), vpn_idx).toUInt << log2Up(xLen/8) io.mem.req.bits.kill := Bool(false) + io.mem.req.bits.data := UInt(1<<9) | Mux(is_store, UInt(1<<10), UInt(0)) - val resp_val = state === s_done || state === s_error - val resp_err = state === s_error || state === s_wait + val resp_err = state === s_error + val resp_val = state === s_done || resp_err val r_resp_ppn = io.mem.req.bits.addr >> UInt(pgIdxBits) - val resp_ppn = Vec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req_vpn(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) + val resp_ppn = Vec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req.addr(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { val me = r_req_dest === UInt(i) io.requestor(i).resp.valid := resp_val && me io.requestor(i).resp.bits.error := resp_err io.requestor(i).resp.bits.perm := r_pte(8,3) + io.requestor(i).resp.bits.dirty := r_pte(10) io.requestor(i).resp.bits.ppn := resp_ppn.toUInt io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).status := io.dpath.status @@ -103,8 +115,10 @@ class PTW(n: Int) extends CoreModule } when (io.mem.resp.valid) { state := s_error - when (io.mem.resp.bits.data(0)) { - when (!io.mem.resp.bits.data(1)) { + when (pte(0)) { + when (set_dirty_bit) { + state := s_set_dirty + }.elsewhen (!pte(1)) { state := s_done }.elsewhen (count < levels-1) { state := s_req @@ -113,6 +127,19 @@ class PTW(n: Int) extends CoreModule } } } + is (s_set_dirty) { + when (io.mem.req.ready) { + state := s_wait_dirty + } + } + is (s_wait_dirty) { + when (io.mem.resp.bits.nack) { + state := s_set_dirty + } + when (io.mem.resp.valid) { + state := s_req + } + } is (s_done) { state := s_ready } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 78254acb..f209002b 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ +import Util._ import uncore._ import scala.math._ @@ -19,7 +20,7 @@ abstract class TLBModule extends Module with TLBParameters class CAMIO extends TLBBundle { val clear = Bool(INPUT) - val clear_hit = Bool(INPUT) + val clear_mask = Bits(INPUT, entries) val tag = Bits(INPUT, camTagBits) val hit = Bool(OUTPUT) val hits = UInt(OUTPUT, entries) @@ -40,10 +41,7 @@ class RocketCAM extends TLBModule { cam_tags(io.write_addr) := io.write_tag } when (io.clear) { - vb_array := Bits(0, entries) - } - .elsewhen (io.clear_hit) { - vb_array := vb_array & ~io.hits + vb_array := vb_array & ~io.clear_mask } val hits = (0 until entries).map(i => vb_array(i) && cam_tags(i) === io.tag) @@ -81,6 +79,7 @@ class TLBReq extends CoreBundle { val vpn = UInt(width = vpnBits+1) val passthrough = Bool() val instruction = Bool() + val store = Bool() } class TLBRespNoHitIndex extends CoreBundle { @@ -107,36 +106,38 @@ class TLB extends TLBModule { val state = Reg(init=s_ready) val r_refill_tag = Reg(UInt()) val r_refill_waddr = Reg(UInt()) + val r_req = Reg(new TLBReq) val tag_cam = Module(new RocketCAM) val tag_ram = Mem(io.ptw.resp.bits.ppn.clone, entries) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt - tag_cam.io.clear := io.ptw.invalidate - tag_cam.io.clear_hit := io.req.fire() && Mux(io.req.bits.instruction, io.resp.xcpt_if, io.resp.xcpt_ld && io.resp.xcpt_st) tag_cam.io.tag := lookup_tag tag_cam.io.write := state === s_wait && io.ptw.resp.valid tag_cam.io.write_tag := r_refill_tag tag_cam.io.write_addr := r_refill_waddr - val tag_hit = tag_cam.io.hit val tag_hit_addr = OHToUInt(tag_cam.io.hits) // permission bit arrays + val valid_array = Reg(Bits()) // V bit of PTE (not equivalent to CAM tag valid bit!) val ur_array = Reg(Bits()) // user read permission val uw_array = Reg(Bits()) // user write permission val ux_array = Reg(Bits()) // user execute permission val sr_array = Reg(Bits()) // supervisor read permission val sw_array = Reg(Bits()) // supervisor write permission val sx_array = Reg(Bits()) // supervisor execute permission + val dirty_array = Reg(Bits()) // PTE dirty bit when (io.ptw.resp.valid) { + val perm = io.ptw.resp.bits.perm & ~io.ptw.resp.bits.error.toSInt tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn - val perm = (!io.ptw.resp.bits.error).toSInt & io.ptw.resp.bits.perm - ur_array := ur_array.bitSet(r_refill_waddr, perm(0)) + valid_array := valid_array.bitSet(r_refill_waddr, !io.ptw.resp.bits.error) + ur_array := ur_array.bitSet(r_refill_waddr, perm(0) || perm(2)) uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) ux_array := ux_array.bitSet(r_refill_waddr, perm(2)) - sr_array := sr_array.bitSet(r_refill_waddr, perm(3)) + sr_array := sr_array.bitSet(r_refill_waddr, perm(3) || perm(5)) sw_array := sw_array.bitSet(r_refill_waddr, perm(4)) sx_array := sx_array.bitSet(r_refill_waddr, perm(5)) + dirty_array := dirty_array.bitSet(r_refill_waddr, io.ptw.resp.bits.dirty) } // high if there are any unused (invalid) entries in the TLB @@ -144,30 +145,51 @@ class TLB extends TLBModule { val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - + + val priv = Mux(io.ptw.status.prv === PRV_M && !io.req.bits.instruction, io.ptw.status.mprv, io.ptw.status.prv) + val priv_s = priv === PRV_S + val priv_uses_vm = priv <= PRV_S + val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store)) + val req_perm = Cat(req_xwr & priv_s.toSInt, req_xwr & ~priv_s.toSInt) + + val r_array = Mux(priv_s, sr_array, ur_array) + val w_array = Mux(priv_s, sw_array, uw_array) + val x_array = Mux(priv_s, sx_array, ux_array) + + val vm_enabled = io.ptw.status.vm(2) && priv_uses_vm val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) - val tlb_hit = io.ptw.status.vm && tag_hit - val tlb_miss = io.ptw.status.vm && !tag_hit && !bad_va + // it's only a store hit if the dirty bit is set + val tag_hits = tag_cam.io.hits & (dirty_array | ~(io.req.bits.store.toSInt & w_array)) + val tag_hit = tag_hits.orR + val tlb_hit = vm_enabled && tag_hit + val tlb_miss = vm_enabled && !tag_hit && !bad_va when (io.req.valid && tlb_hit) { plru.access(OHToUInt(tag_cam.io.hits)) } io.req.ready := state === s_ready - io.resp.xcpt_ld := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sr_array & tag_cam.io.hits).orR, (ur_array & tag_cam.io.hits).orR) - io.resp.xcpt_st := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sw_array & tag_cam.io.hits).orR, (uw_array & tag_cam.io.hits).orR) - io.resp.xcpt_if := bad_va || tlb_hit && !Mux(io.ptw.status.s, (sx_array & tag_cam.io.hits).orR, (ux_array & tag_cam.io.hits).orR) + io.resp.xcpt_ld := bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR + io.resp.xcpt_st := bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR + io.resp.xcpt_if := bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR io.resp.miss := tlb_miss - io.resp.ppn := Mux(io.ptw.status.vm && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) + io.resp.ppn := Mux(vm_enabled && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) io.resp.hit_idx := tag_cam.io.hits + + // clear invalid entries on access, or all entries on a TLB flush + tag_cam.io.clear := io.ptw.invalidate || io.req.fire() + tag_cam.io.clear_mask := ~valid_array | (tag_cam.io.hits & ~tag_hits) + when (io.ptw.invalidate) { tag_cam.io.clear_mask := SInt(-1) } io.ptw.req.valid := state === s_request - io.ptw.req.bits := r_refill_tag + io.ptw.req.bits.addr := r_refill_tag + io.ptw.req.bits.perm := req_perm when (io.req.fire() && tlb_miss) { state := s_request r_refill_tag := lookup_tag r_refill_waddr := repl_waddr + r_req := io.req.bits } when (state === s_request) { when (io.ptw.invalidate) { From 2c875555a2df6e60e602d77e951e46b591ed2151 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Mar 2015 00:14:32 -0700 Subject: [PATCH 0812/1087] Separate exception return control from exception control --- rocket/src/main/scala/csr.scala | 4 +++- rocket/src/main/scala/ctrl.scala | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 7dec9a09..a0d2589b 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -82,6 +82,7 @@ class CSRFileIO extends CoreBundle { val csr_replay = Bool(OUTPUT) val csr_xcpt = Bool(OUTPUT) + val eret = Bool(OUTPUT) val status = new MStatus().asOutput val ptbr = UInt(OUTPUT, paddrBits) @@ -206,7 +207,8 @@ class CSRFile extends CoreModule Mux(insn_redirect_trap, reg_stvec, Mux(reg_mstatus.prv(1), reg_mepc, reg_sepc))).toUInt io.ptbr := reg_sptbr - io.csr_xcpt := csr_xcpt || insn_redirect_trap || insn_ret /* sort of a lie */ + io.csr_xcpt := csr_xcpt + io.eret := insn_ret || insn_redirect_trap io.status := reg_mstatus io.status.fs := reg_mstatus.fs.orR.toSInt // either off or dirty (no clean/initial support yet) io.status.xs := reg_mstatus.xs.orR.toSInt // either off or dirty (no clean/initial support yet) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 450a8817..14cff632 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -45,6 +45,7 @@ class CtrlDpathIO extends CoreBundle // inputs from csr file val csr_replay = Bool(INPUT) val csr_xcpt = Bool(INPUT) + val eret = Bool(INPUT) val interrupt = Bool(INPUT) val interrupt_cause = UInt(INPUT, xLen) } @@ -541,10 +542,10 @@ class Control extends CoreModule val wb_xcpt = wb_reg_xcpt || io.dpath.csr_xcpt // control transfer from ex/wb - take_pc_wb := replay_wb || wb_xcpt + take_pc_wb := replay_wb || wb_xcpt || io.dpath.eret io.dpath.sel_pc := - Mux(wb_xcpt, PC_PCR, // exception or [m|s]ret + Mux(wb_xcpt || io.dpath.eret, PC_PCR, // exception or [m|s]ret Mux(replay_wb, PC_WB, // replay PC_MEM)) From 66388be1ce681830e818cddbf3c8ce4e5191326b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 17 Mar 2015 02:24:41 -0700 Subject: [PATCH 0813/1087] Merge [shm]call into ecall, [shm]ret into eret --- rocket/src/main/scala/csr.scala | 16 ++++++------- rocket/src/main/scala/ctrl.scala | 2 -- rocket/src/main/scala/instructions.scala | 29 +++++++++--------------- 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index a0d2589b..76a2a736 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -187,14 +187,12 @@ class CSRFile extends CoreModule Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, host_pcr_bits.data))) - val opcode = io.rw.addr(3,0) - // The following comparison is meant to be opcode === SFENCE_VM(23,20). But - // FOR SOME FUCKING REASON, extracting SFENCE_VM(23,20) gives 3, not 4. - val insn_sfence_vm = opcode === 4 && system_insn && priv_sufficient - val insn_redirect_trap = opcode === MRTS(23,20) && system_insn && priv_sufficient - val insn_ret = opcode === SRET(23,20) /* or H/MRET */ && io.rw.addr(1) && system_insn && priv_sufficient - val insn_break = opcode === SBREAK(23,20) && io.rw.addr(0) && system_insn && priv_sufficient - val insn_call = opcode === SCALL(23,20) /* or H/MCALL */ && system_insn && priv_sufficient + val opcode = io.rw.addr + val insn_call = !opcode(8) && !opcode(0) && system_insn && priv_sufficient + val insn_break = !opcode(8) && opcode(0) && system_insn && priv_sufficient + val insn_ret = opcode(8) && !opcode(0) && system_insn && priv_sufficient + val insn_sfence_vm = opcode(8) && opcode(0) && system_insn && priv_sufficient + val insn_redirect_trap = opcode(2) && system_insn && priv_sufficient val csr_xcpt = (cpu_wen && read_only) || (cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) || @@ -228,7 +226,7 @@ class CSRFile extends CoreModule when (csr_xcpt) { reg_mcause := Causes.illegal_instruction when (insn_break) { reg_mcause := Causes.breakpoint } - when (insn_call) { reg_mcause := Causes.scall + csr_addr_priv } + when (insn_call) { reg_mcause := Causes.ecall } } reg_mbadaddr := io.pc diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 14cff632..e118b6cc 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -216,8 +216,6 @@ object XDecode extends DecodeConstants SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SBREAK-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - HCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - MRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), MRTS-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index c3c015c7..d9b433c8 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -95,12 +95,9 @@ object Instructions { def SC_D = Bits("b00011????????????011?????0101111") def SCALL = Bits("b00000000000000000000000001110011") def SBREAK = Bits("b00000000000100000000000001110011") - def SRET = Bits("b00010000001000000000000001110011") - def SFENCE_VM = Bits("b000100000100?????000000001110011") - def HCALL = Bits("b00010000000000000000000001110011") - def MCALL = Bits("b00100000000000000000000001110011") - def MRET = Bits("b00110000001000000000000001110011") - def MRTS = Bits("b00110000100100000000000001110011") + def SRET = Bits("b00010000000000000000000001110011") + def SFENCE_VM = Bits("b000100000001?????000000001110011") + def MRTS = Bits("b00110000010100000000000001110011") def CSRRW = Bits("b?????????????????001?????1110011") def CSRRS = Bits("b?????????????????010?????1110011") def CSRRC = Bits("b?????????????????011?????1110011") @@ -198,27 +195,23 @@ object Causes { val misaligned_fetch = 0x0 val fault_fetch = 0x1 val illegal_instruction = 0x2 - val scall = 0x4 - val hcall = 0x5 - val mcall = 0x6 - val breakpoint = 0x7 - val misaligned_load = 0x8 - val fault_load = 0x9 - val misaligned_store = 0xa - val fault_store = 0xb + val misaligned_load = 0x4 + val fault_load = 0x5 + val misaligned_store = 0x6 + val fault_store = 0x7 + val ecall = 0x8 + val breakpoint = 0x9 val all = { val res = collection.mutable.ArrayBuffer[Int]() res += misaligned_fetch res += fault_fetch res += illegal_instruction - res += scall - res += hcall - res += mcall - res += breakpoint res += misaligned_load res += fault_load res += misaligned_store res += fault_store + res += ecall + res += breakpoint res.toArray } } From 5b4653b621b6cdc14d2ecfcec6d5df45729ce4f2 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 17 Mar 2015 05:08:19 -0700 Subject: [PATCH 0814/1087] fix rocc exception/s bit --- rocket/src/main/scala/ctrl.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index e118b6cc..16ce5956 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -667,6 +667,6 @@ class Control extends CoreModule io.dmem.sret := wb_xcpt // obviously not an sret, but sufficient io.rocc.cmd.valid := wb_rocc_val - io.rocc.exception := wb_xcpt && io.dpath.status.xs != 0 - io.rocc.s := io.dpath.status.prv != 0 // should we just pass all of mstatus? + io.rocc.exception := wb_xcpt && io.dpath.status.xs.orR + io.rocc.s := io.dpath.status.prv.orR // should we just pass all of mstatus? } From 53617d6df58402f32221f4345acafc1a7fd571f3 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Tue, 17 Mar 2015 21:45:17 -0700 Subject: [PATCH 0815/1087] fix long-standing dcache bug have to initialize register, if it is used the same cycle it is begin written --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7d124d92..3f24ba4c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -628,7 +628,7 @@ class HellaCache extends L1HellaCacheModule { val s3_req = Reg(io.cpu.req.bits.clone) val s3_way = Reg(Bits()) - val s1_recycled = RegEnable(s2_recycle, s1_clk_en) + val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) val s1_sc = s1_req.cmd === M_XSC From 31d17cbf8623528917f02ed16fe68f3aa3c51e7a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 Mar 2015 00:16:08 -0700 Subject: [PATCH 0816/1087] Hard-wire LSB of JALR to 0, as sent to BTB --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 57021306..bc25a1bd 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -229,7 +229,7 @@ class Datapath extends CoreModule val mem_br_target = mem_reg_pc + Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target).toUInt + val mem_npc = (Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target) & SInt(-2)).toUInt io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid io.ctrl.mem_npc_misaligned := mem_npc(1) io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 From 0332c1e7fe4c5dc1c967a6ec0b303619fc0a83a0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 Mar 2015 20:12:25 -0700 Subject: [PATCH 0817/1087] Reduce latency of page table walks A small cache in the PTW caches non-leaf PTEs, reducing latency and D$ misses. --- rocket/src/main/scala/ptw.scala | 66 +++++++++++++++++++++++++-------- rocket/src/main/scala/tlb.scala | 8 ++-- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 5ef014c7..a156b1a9 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -13,9 +13,7 @@ class PTWReq extends CoreBundle { class PTWResp extends CoreBundle { val error = Bool() - val ppn = UInt(width = ppnBits) - val perm = Bits(width = permBits) - val dirty = Bool() + val pte = new PTE } class TLBPTWIO extends CoreBundle { @@ -31,6 +29,17 @@ class DatapathPTWIO extends CoreBundle { val status = new MStatus().asInput } +class PTE extends CoreBundle { + val ppn = Bits(width = ppnBits) + val reserved = Bits(width = 2) + val d = Bool() + val r = Bool() + val perm = Bits(width = 6) + val g = Bool() + val t = Bool() + val v = Bool() +} + class PTW(n: Int) extends CoreModule { val io = new Bundle { @@ -49,7 +58,7 @@ class PTW(n: Int) extends CoreModule val r_req = Reg(new PTWReq) val r_req_dest = Reg(Bits()) - val r_pte = Reg(Bits()) + val r_pte = Reg(new PTE) val vpn_idx = Vec((0 until levels).map(i => (r_req.addr >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) @@ -57,16 +66,39 @@ class PTW(n: Int) extends CoreModule arb.io.in <> io.requestor.map(_.req) arb.io.out.ready := state === s_ready - val pte = io.mem.resp.bits.data + val pte = new PTE().fromBits(io.mem.resp.bits.data) + val pte_addr = Cat(r_pte.ppn, vpn_idx).toUInt << log2Up(xLen/8) + when (arb.io.out.fire()) { r_req := arb.io.out.bits r_req_dest := arb.io.chosen - r_pte := Cat(io.dpath.ptbr(paddrBits-1,pgIdxBits), pte(pgIdxBits-1,0)) + r_pte.ppn := io.dpath.ptbr(paddrBits-1,pgIdxBits) } - val perm_ok = (pte(8,3) & r_req.perm).orR + val (pte_cache_hit, pte_cache_data) = { + val size = log2Up(levels * 2) + val plru = new PseudoLRU(size) + val valid = Reg(init = Bits(0, size)) + val tags = Mem(UInt(width = paddrBits), size) + val data = Mem(UInt(width = paddrBits - pgIdxBits), size) + + val hits = Vec(tags.map(_ === pte_addr)).toBits & valid + val hit = hits.orR + when (io.mem.resp.valid && io.mem.resp.bits.data(1,0).andR && !hit) { + val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid)) + valid(r) := true + tags(r) := pte_addr + data(r) := io.mem.resp.bits.data(paddrBits-1,pgIdxBits) + } + when (hit && state === s_req) { plru.access(OHToUInt(hits)) } + when (io.dpath.invalidate) { valid := 0 } + + (hit, Mux1H(hits, data)) + } + + val perm_ok = (pte.perm & r_req.perm).orR val is_store = r_req.perm(1) || r_req.perm(4) - val set_dirty_bit = perm_ok && !pte(1) && (!pte(9) || (is_store && !pte(10))) + val set_dirty_bit = perm_ok && !pte.t && (!pte.r || (is_store && !pte.d)) when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) { r_pte := pte } @@ -75,7 +107,7 @@ class PTW(n: Int) extends CoreModule io.mem.req.bits.phys := Bool(true) io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD) io.mem.req.bits.typ := MT_D - io.mem.req.bits.addr := Cat(r_pte(paddrBits-1,pgIdxBits), vpn_idx).toUInt << log2Up(xLen/8) + io.mem.req.bits.addr := pte_addr io.mem.req.bits.kill := Bool(false) io.mem.req.bits.data := UInt(1<<9) | Mux(is_store, UInt(1<<10), UInt(0)) @@ -89,9 +121,8 @@ class PTW(n: Int) extends CoreModule val me = r_req_dest === UInt(i) io.requestor(i).resp.valid := resp_val && me io.requestor(i).resp.bits.error := resp_err - io.requestor(i).resp.bits.perm := r_pte(8,3) - io.requestor(i).resp.bits.dirty := r_pte(10) - io.requestor(i).resp.bits.ppn := resp_ppn.toUInt + io.requestor(i).resp.bits.pte := r_pte + io.requestor(i).resp.bits.pte.ppn := resp_ppn io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).status := io.dpath.status } @@ -105,7 +136,12 @@ class PTW(n: Int) extends CoreModule count := UInt(0) } is (s_req) { - when (io.mem.req.ready) { + when (pte_cache_hit && count < levels-1) { + io.mem.req.valid := false + state := s_req + count := count + 1 + r_pte.ppn := pte_cache_data + }.elsewhen (io.mem.req.ready) { state := s_wait } } @@ -115,10 +151,10 @@ class PTW(n: Int) extends CoreModule } when (io.mem.resp.valid) { state := s_error - when (pte(0)) { + when (pte.v) { when (set_dirty_bit) { state := s_set_dirty - }.elsewhen (!pte(1)) { + }.elsewhen (!pte.t) { state := s_done }.elsewhen (count < levels-1) { state := s_req diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index f209002b..4f276f56 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -109,7 +109,7 @@ class TLB extends TLBModule { val r_req = Reg(new TLBReq) val tag_cam = Module(new RocketCAM) - val tag_ram = Mem(io.ptw.resp.bits.ppn.clone, entries) + val tag_ram = Mem(io.ptw.resp.bits.pte.ppn.clone, entries) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt tag_cam.io.tag := lookup_tag @@ -128,8 +128,8 @@ class TLB extends TLBModule { val sx_array = Reg(Bits()) // supervisor execute permission val dirty_array = Reg(Bits()) // PTE dirty bit when (io.ptw.resp.valid) { - val perm = io.ptw.resp.bits.perm & ~io.ptw.resp.bits.error.toSInt - tag_ram(r_refill_waddr) := io.ptw.resp.bits.ppn + val perm = io.ptw.resp.bits.pte.perm & ~io.ptw.resp.bits.error.toSInt + tag_ram(r_refill_waddr) := io.ptw.resp.bits.pte.ppn valid_array := valid_array.bitSet(r_refill_waddr, !io.ptw.resp.bits.error) ur_array := ur_array.bitSet(r_refill_waddr, perm(0) || perm(2)) uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) @@ -137,7 +137,7 @@ class TLB extends TLBModule { sr_array := sr_array.bitSet(r_refill_waddr, perm(3) || perm(5)) sw_array := sw_array.bitSet(r_refill_waddr, perm(4)) sx_array := sx_array.bitSet(r_refill_waddr, perm(5)) - dirty_array := dirty_array.bitSet(r_refill_waddr, io.ptw.resp.bits.dirty) + dirty_array := dirty_array.bitSet(r_refill_waddr, io.ptw.resp.bits.pte.d) } // high if there are any unused (invalid) entries in the TLB From 822698b567fb0d04b0d21db742f6d64fb32d09cb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Mar 2015 19:32:45 -0700 Subject: [PATCH 0818/1087] support disabling supervisor mode (via UseVM parameter) --- rocket/src/main/scala/csr.scala | 149 +++++++++++++++++--------------- 1 file changed, 80 insertions(+), 69 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 76a2a736..805e2512 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -170,10 +170,56 @@ class CSRFile extends CoreModule io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy + val read_mstatus = io.status.toBits + val read_sstatus = new SStatus + read_sstatus := new SStatus().fromBits(read_mstatus) // sstatus mostly overlaps mstatus + read_sstatus.zero0 := 0 + read_sstatus.zero1 := 0 + read_sstatus.zero2 := 0 + read_sstatus.zero3 := 0 + read_sstatus.zero4 := 0 + read_sstatus.zero5 := 0 + read_sstatus.ua := io.status.ua + read_sstatus.tip := r_irq_timer + + val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( + CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), + CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), + CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), + CSRs.cycle -> reg_time, + CSRs.scycle -> reg_time, + CSRs.time -> reg_time, + CSRs.stime -> reg_time, + CSRs.instret -> reg_instret, + CSRs.sinstret -> reg_instret, + CSRs.mstatus -> read_mstatus, + CSRs.mscratch -> reg_mscratch, + CSRs.mepc -> reg_mepc, + CSRs.mbadaddr -> reg_mbadaddr, + CSRs.mcause -> reg_mcause, + CSRs.stimecmp -> reg_stimecmp, + CSRs.stvec -> reg_stvec, + CSRs.hartid -> io.host.id, + CSRs.send_ipi -> io.host.id, /* don't care */ + CSRs.stats -> reg_stats, + CSRs.tohost -> reg_tohost, + CSRs.fromhost -> reg_fromhost) + + if (params(UseVM)) { + read_mapping += CSRs.sstatus -> read_sstatus.toBits + read_mapping += CSRs.sscratch -> reg_sscratch + read_mapping += CSRs.scause -> reg_scause + read_mapping += CSRs.sbadaddr -> reg_sbadaddr + read_mapping += CSRs.sptbr -> reg_sptbr + read_mapping += CSRs.sasid -> UInt(0) + read_mapping += CSRs.sepc -> reg_sepc + } + + for (i <- 0 until reg_uarch_counters.size) + read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) + val addr = Mux(cpu_ren, io.rw.addr, host_pcr_bits.addr) - val decoded_addr = Map(( - for ((v, i) <- CSRs.all.zipWithIndex) - yield v -> (addr === CSRs.all(i))):_*) + val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } val addr_valid = decoded_addr.values.reduce(_||_) val fp_csr = decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr) @@ -268,50 +314,6 @@ class CSRFile extends CoreModule when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } - val read_mstatus = io.status.toBits - val read_sstatus = new SStatus - read_sstatus := new SStatus().fromBits(read_mstatus) // sstatus mostly overlaps mstatus - read_sstatus.zero0 := 0 - read_sstatus.zero1 := 0 - read_sstatus.zero2 := 0 - read_sstatus.zero3 := 0 - read_sstatus.zero4 := 0 - read_sstatus.zero5 := 0 - read_sstatus.ua := io.status.ua - read_sstatus.tip := r_irq_timer - - val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( - CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), - CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), - CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), - CSRs.cycle -> reg_time, - CSRs.time -> reg_time, - CSRs.scycle -> reg_time, - CSRs.stime -> reg_time, - CSRs.instret -> reg_instret, - CSRs.sinstret -> reg_instret, - CSRs.mstatus -> read_mstatus, - CSRs.mscratch -> reg_mscratch, - CSRs.mepc -> reg_mepc, - CSRs.mbadaddr -> reg_mbadaddr, - CSRs.mcause -> reg_mcause, - CSRs.sstatus -> read_sstatus.toBits, - CSRs.sscratch -> reg_sscratch, - CSRs.sepc -> reg_sepc, - CSRs.scause -> reg_scause, - CSRs.sbadaddr -> reg_sbadaddr, - CSRs.sptbr -> reg_sptbr, - CSRs.sasid -> UInt(0), - CSRs.stimecmp -> reg_stimecmp, - CSRs.stvec -> reg_stvec, - CSRs.hartid -> io.host.id, - CSRs.stats -> reg_stats, - CSRs.tohost -> reg_tohost, - CSRs.fromhost -> reg_fromhost) - - for (i <- 0 until reg_uarch_counters.size) - read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) - io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) io.fcsr_rm := reg_frm @@ -326,26 +328,23 @@ class CSRFile extends CoreModule reg_mstatus.msip := new_mstatus.msip reg_mstatus.stie := new_mstatus.stie reg_mstatus.ie := new_mstatus.ie - reg_mstatus.ie1 := new_mstatus.ie1 - reg_mstatus.ie2 := new_mstatus.ie2 - when (new_mstatus.mprv != PRV_H) { reg_mstatus.mprv := new_mstatus.mprv } - when (new_mstatus.prv != PRV_H) { reg_mstatus.prv := new_mstatus.prv } - when (new_mstatus.prv1 != PRV_H) { reg_mstatus.prv1 := new_mstatus.prv1 } - when (new_mstatus.prv2 != PRV_H) { reg_mstatus.prv2 := new_mstatus.prv2 } + + val supportedModes = Vec((PRV_M :: PRV_U :: (if (params(UseVM)) List(PRV_S) else Nil)).map(UInt(_))) + if (supportedModes.size > 1) { + when (supportedModes contains new_mstatus.mprv) { reg_mstatus.mprv := new_mstatus.mprv } + when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv } + when (supportedModes contains new_mstatus.prv1) { reg_mstatus.prv1 := new_mstatus.prv1 } + reg_mstatus.ie1 := new_mstatus.ie1 + if (supportedModes.size > 2) { + when (supportedModes contains new_mstatus.prv2) { reg_mstatus.prv2 := new_mstatus.prv2 } + reg_mstatus.ie2 := new_mstatus.ie2 + } + } + if (params(UseVM)) when (new_mstatus.vm === 0 || new_mstatus.vm === 5) { reg_mstatus.vm := new_mstatus.vm } - if (!params(BuildFPU).isEmpty) reg_mstatus.fs := new_mstatus.fs + if (params(UseVM) || !params(BuildFPU).isEmpty) reg_mstatus.fs := new_mstatus.fs if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_mstatus.xs } - when (decoded_addr(CSRs.sstatus)) { - val new_sstatus = new SStatus().fromBits(wdata) - reg_mstatus.ssip := new_sstatus.sip - reg_mstatus.stie := new_sstatus.tie - reg_mstatus.ie := new_sstatus.ie - reg_mstatus.ie1 := new_sstatus.pie - reg_mstatus.prv1 := Mux(new_sstatus.ps, PRV_S, PRV_U) - if (!params(BuildFPU).isEmpty) reg_mstatus.fs := new_sstatus.fs - if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs - } when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } @@ -353,7 +352,6 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata } - when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBits,0).toSInt } when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt } when (decoded_addr(CSRs.scycle)) { reg_time := wdata.toUInt } when (decoded_addr(CSRs.stime)) { reg_time := wdata.toUInt } @@ -361,9 +359,22 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.stimecmp)) { reg_stimecmp := wdata(31,0).toUInt; r_irq_timer := Bool(false) } when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } - when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } - when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } + if (params(UseVM)) { + when (decoded_addr(CSRs.sstatus)) { + val new_sstatus = new SStatus().fromBits(wdata) + reg_mstatus.ssip := new_sstatus.sip + reg_mstatus.stie := new_sstatus.tie + reg_mstatus.ie := new_sstatus.ie + reg_mstatus.ie1 := new_sstatus.pie + reg_mstatus.prv1 := Mux(new_sstatus.ps, PRV_S, PRV_U) + if (!params(BuildFPU).isEmpty) reg_mstatus.fs := new_sstatus.fs + if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs + } + when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } + when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } + when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBits,0).toSInt } + } } io.host.ipi_rep.ready := true @@ -377,9 +388,9 @@ class CSRFile extends CoreModule reg_mstatus.ie := false reg_mstatus.prv := PRV_M reg_mstatus.ie1 := false - reg_mstatus.prv1 := PRV_S + reg_mstatus.prv1 := PRV_U /* hard-wired to 0 when missing user mode */ reg_mstatus.ie2 := false - reg_mstatus.prv2 := PRV_S + reg_mstatus.prv2 := PRV_U /* hard-wired to 0 when missing supervisor mode */ reg_mstatus.mprv := PRV_M reg_mstatus.zero2 := 0 reg_mstatus.vm := 0 From 90b31586ffcf3b67f34f174e49ed2dfcc2ad3542 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Mar 2015 23:48:47 -0700 Subject: [PATCH 0819/1087] Misc. CSR fixes/improvements - Support RV32 mstatus register - Don't ignore mstatus.stie bit - Support custom M-mode R/W CSRs for Raven chip --- rocket/src/main/scala/core.scala | 1 + rocket/src/main/scala/csr.scala | 34 +++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 51f7007b..6b64ac18 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -18,6 +18,7 @@ case object FastMulDiv extends Field[Boolean] case object CoreInstBits extends Field[Int] case object CoreDataBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int] +case object NCustomMRWCSRs extends Field[Int] abstract trait CoreParameters extends UsesParameters { val xLen = params(XLen) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 805e2512..3e131281 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -11,21 +11,22 @@ import scala.math._ class MStatus extends Bundle { val sd = Bool() - val zero6 = UInt(width = 19) + val zero4 = UInt(width = 19) val ha = UInt(width = 4) val sa = UInt(width = 4) val ua = UInt(width = 4) - val zero5 = UInt(width = 1) + val sd_rv32 = UInt(width = 1) val xs = UInt(width = 2) val fs = UInt(width = 2) val mtie = Bool() val htie = Bool() val stie = Bool() - val zero4 = UInt(width = 1) - val vm = UInt(width = 4) val zero3 = UInt(width = 1) + val vm = UInt(width = 4) + val zero2 = UInt(width = 1) val mprv = UInt(width = 2) - val zero2 = UInt(width = 3) + val prv3 = UInt(width = 2) + val ie3 = Bool() val prv2 = UInt(width = 2) val ie2 = Bool() val prv1 = UInt(width = 2) @@ -90,10 +91,10 @@ class CSRFileIO extends CoreBundle { val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) + val custom_mrw_csrs = Vec.fill(params(NCustomMRWCSRs))(UInt(INPUT, xLen)) val cause = UInt(INPUT, xLen) val mbadaddr_wen = Bool(INPUT) val pc = SInt(INPUT, vaddrBits+1) - val sret = Bool(INPUT) val fatc = Bool(OUTPUT) val time = UInt(OUTPUT, xLen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) @@ -141,7 +142,7 @@ class CSRFile extends CoreModule } } - checkInterrupt(PRV_S, r_irq_timer, 0) + checkInterrupt(PRV_S, reg_mstatus.stie && r_irq_timer, 0) checkInterrupt(PRV_S, reg_mstatus.ssip, 1) checkInterrupt(PRV_M, reg_mstatus.msip, 1) checkInterrupt(PRV_M, reg_fromhost != 0, 2) @@ -198,7 +199,6 @@ class CSRFile extends CoreModule CSRs.mbadaddr -> reg_mbadaddr, CSRs.mcause -> reg_mcause, CSRs.stimecmp -> reg_stimecmp, - CSRs.stvec -> reg_stvec, CSRs.hartid -> io.host.id, CSRs.send_ipi -> io.host.id, /* don't care */ CSRs.stats -> reg_stats, @@ -213,11 +213,19 @@ class CSRFile extends CoreModule read_mapping += CSRs.sptbr -> reg_sptbr read_mapping += CSRs.sasid -> UInt(0) read_mapping += CSRs.sepc -> reg_sepc + read_mapping += CSRs.stvec -> reg_stvec } for (i <- 0 until reg_uarch_counters.size) read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) + for (i <- 0 until params(NCustomMRWCSRs)) { + val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? + require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") + require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") + read_mapping += addr -> io.custom_mrw_csrs(i) + } + val addr = Mux(cpu_ren, io.rw.addr, host_pcr_bits.addr) val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } @@ -257,6 +265,8 @@ class CSRFile extends CoreModule io.status.fs := reg_mstatus.fs.orR.toSInt // either off or dirty (no clean/initial support yet) io.status.xs := reg_mstatus.xs.orR.toSInt // either off or dirty (no clean/initial support yet) io.status.sd := reg_mstatus.xs.orR || reg_mstatus.fs.orR + if (xLen == 32) + io.status.sd_rv32 := io.status.sd when (io.exception || csr_xcpt) { reg_mstatus.ie := false @@ -352,7 +362,6 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata } - when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt } when (decoded_addr(CSRs.scycle)) { reg_time := wdata.toUInt } when (decoded_addr(CSRs.stime)) { reg_time := wdata.toUInt } when (decoded_addr(CSRs.sinstret)) { reg_instret := wdata.toUInt } @@ -374,6 +383,7 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBits,0).toSInt } + when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt } } } @@ -391,6 +401,8 @@ class CSRFile extends CoreModule reg_mstatus.prv1 := PRV_U /* hard-wired to 0 when missing user mode */ reg_mstatus.ie2 := false reg_mstatus.prv2 := PRV_U /* hard-wired to 0 when missing supervisor mode */ + reg_mstatus.ie3 := false + reg_mstatus.prv3 := PRV_U /* hard-wired to 0 when missing hypervisor mode */ reg_mstatus.mprv := PRV_M reg_mstatus.zero2 := 0 reg_mstatus.vm := 0 @@ -400,11 +412,11 @@ class CSRFile extends CoreModule reg_mstatus.mtie := false reg_mstatus.fs := 0 reg_mstatus.xs := 0 - reg_mstatus.zero4 := 0 + reg_mstatus.sd_rv32 := false reg_mstatus.ua := 4 reg_mstatus.sa := 4 reg_mstatus.ha := 0 - reg_mstatus.zero5 := 0 + reg_mstatus.zero4 := 0 reg_mstatus.sd := false } } From 543ac91cf278a61481786a03d36ce1c27a1c3ae7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Mar 2015 23:55:43 -0700 Subject: [PATCH 0820/1087] Misaligned fetches can't happen at the I$ anymore They are caught before the I$ ever sees them, so leverage that fact. --- rocket/src/main/scala/icache.scala | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 39c3c9fd..d7a820db 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -26,7 +26,6 @@ class FrontendResp extends CoreBundle { val pc = UInt(width = vaddrBits+1) // ID stage PC val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) val mask = Bits(width = coreFetchWidth) - val xcpt_ma = Bool() val xcpt_if = Bool() } @@ -53,7 +52,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val tlb = Module(new TLB) val s1_pc_ = Reg(UInt()) - val s1_pc = s1_pc_ & SInt(-2) // discard LSB of PC (throughout the pipeline) + val s1_pc = s1_pc_ & SInt(-coreInstBytes) // discard PC LSBS (this propagates down the pipeline) val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) val s2_pc = Reg(init=UInt(START_ADDR)) @@ -90,7 +89,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule } btb.io.req.valid := !stall && !icmiss - btb.io.req.bits.addr := s1_pc & SInt(-coreInstBytes) + btb.io.req.bits.addr := s1_pc btb.io.btb_update := io.cpu.btb_update btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update @@ -113,7 +112,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) - io.cpu.resp.bits.pc := s2_pc & SInt(-coreInstBytes) // discard PC LSBs + io.cpu.resp.bits.pc := s2_pc val fetch_data = icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) @@ -124,8 +123,6 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val all_ones = UInt((1 << (coreFetchWidth+1))-1) val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) - - io.cpu.resp.bits.xcpt_ma := s2_pc(log2Up(coreInstBytes)-1,0) != UInt(0) io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.btb_resp.valid := s2_btb_resp_valid From faada5f11004056c67fc7f989efdcee4b48d79e7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 Mar 2015 00:20:58 -0700 Subject: [PATCH 0821/1087] Mask off LSBs of sepc/mepc/stvec Therefore, they cannot generate misaligned instruction exceptions. When a misaligned instruction exception does occur, mbadaddr retains the misaligned PC bits, so no information is actually lost. --- rocket/src/main/scala/csr.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 3e131281..76f54775 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -277,7 +277,7 @@ class CSRFile extends CoreModule reg_mstatus.prv2 := reg_mstatus.prv1 reg_mstatus.ie2 := reg_mstatus.ie1 - reg_mepc := io.pc + reg_mepc := io.pc & SInt(-coreInstBytes) reg_mcause := io.cause when (csr_xcpt) { reg_mcause := Causes.illegal_instruction @@ -358,7 +358,7 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.mepc)) { reg_mepc := wdata(vaddrBits,0).toSInt } + when (decoded_addr(CSRs.mepc)) { reg_mepc := wdata(vaddrBits,0).toSInt & SInt(-coreInstBytes) } when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata } @@ -382,8 +382,8 @@ class CSRFile extends CoreModule } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } - when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBits,0).toSInt } - when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt } + when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBits,0).toSInt & SInt(-coreInstBytes) } + when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt & SInt(-coreInstBytes) } } } From d912ea265ede6d1c77b5defc50faa19cab3e07b1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 27 Mar 2015 16:20:59 -0700 Subject: [PATCH 0822/1087] New virtual memory implementation (Sv39) --- rocket/src/main/scala/core.scala | 3 +- rocket/src/main/scala/ptw.scala | 67 ++++++++++++++++++-------------- rocket/src/main/scala/tlb.scala | 25 ++++++------ 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 6b64ac18..6c1b7373 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -27,7 +27,8 @@ abstract trait CoreParameters extends UsesParameters { val pgIdxBits = params(PgIdxBits) val ppnBits = params(PPNBits) val vpnBits = params(VPNBits) - val permBits = params(PermBits) + val pgLevels = params(PgLevels) + val pgLevelBits = params(PgLevelBits) val asIdBits = params(ASIdBits) val retireWidth = params(RetireWidth) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index a156b1a9..4ee45779 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -8,7 +8,9 @@ import Util._ class PTWReq extends CoreBundle { val addr = UInt(width = vpnBits) - val perm = Bits(width = permBits) + val prv = Bits(width = 2) + val store = Bool() + val fetch = Bool() } class PTWResp extends CoreBundle { @@ -34,10 +36,20 @@ class PTE extends CoreBundle { val reserved = Bits(width = 2) val d = Bool() val r = Bool() - val perm = Bits(width = 6) val g = Bool() - val t = Bool() - val v = Bool() + val perm = Bits(width = 2) + val typ = Bits(width = 3) + + def table(dummy: Int = 0) = typ === 1 + def leaf(dummy: Int = 0) = typ >= 2 + def ur(dummy: Int = 0) = typ === 2 || typ >= 4 + def uw(dummy: Int = 0) = ur() && perm(0) + def ux(dummy: Int = 0) = ur() && perm(1) + def sr(dummy: Int = 0) = typ >= 3 + def sw(dummy: Int = 0) = Mux(typ >= 4, typ(0), typ === 3 && perm(0)) + def sx(dummy: Int = 0) = Mux(typ >= 4, typ(1), typ === 3 && perm(1)) + def access_ok(prv: Bits, store: Bool, fetch: Bool) = + Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur()))) } class PTW(n: Int) extends CoreModule @@ -48,19 +60,15 @@ class PTW(n: Int) extends CoreModule val dpath = new DatapathPTWIO } - val levels = 3 - val bitsPerLevel = vpnBits/levels - require(vpnBits == levels * bitsPerLevel) - val s_ready :: s_req :: s_wait :: s_set_dirty :: s_wait_dirty :: s_done :: s_error :: Nil = Enum(UInt(), 7) val state = Reg(init=s_ready) - val count = Reg(UInt(width = log2Up(levels))) + val count = Reg(UInt(width = log2Up(pgLevels))) val r_req = Reg(new PTWReq) val r_req_dest = Reg(Bits()) val r_pte = Reg(new PTE) - val vpn_idx = Vec((0 until levels).map(i => (r_req.addr >> (levels-i-1)*bitsPerLevel)(bitsPerLevel-1,0)))(count) + val vpn_idx = Vec((0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0)))(count) val arb = Module(new RRArbiter(new PTWReq, n)) arb.io.in <> io.requestor.map(_.req) @@ -76,19 +84,19 @@ class PTW(n: Int) extends CoreModule } val (pte_cache_hit, pte_cache_data) = { - val size = log2Up(levels * 2) + val size = log2Up(pgLevels * 2) val plru = new PseudoLRU(size) val valid = Reg(init = Bits(0, size)) val tags = Mem(UInt(width = paddrBits), size) - val data = Mem(UInt(width = paddrBits - pgIdxBits), size) + val data = Mem(UInt(width = ppnBits), size) val hits = Vec(tags.map(_ === pte_addr)).toBits & valid val hit = hits.orR - when (io.mem.resp.valid && io.mem.resp.bits.data(1,0).andR && !hit) { + when (io.mem.resp.valid && pte.table() && !hit) { val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid)) valid(r) := true tags(r) := pte_addr - data(r) := io.mem.resp.bits.data(paddrBits-1,pgIdxBits) + data(r) := pte.ppn } when (hit && state === s_req) { plru.access(OHToUInt(hits)) } when (io.dpath.invalidate) { valid := 0 } @@ -96,12 +104,16 @@ class PTW(n: Int) extends CoreModule (hit, Mux1H(hits, data)) } - val perm_ok = (pte.perm & r_req.perm).orR - val is_store = r_req.perm(1) || r_req.perm(4) - val set_dirty_bit = perm_ok && !pte.t && (!pte.r || (is_store && !pte.d)) + val perm_ok = pte.access_ok(r_req.prv, r_req.store, r_req.fetch) + val set_dirty_bit = perm_ok && (!pte.r || (r_req.store && !pte.d)) when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) { r_pte := pte } + + val pte_wdata = new PTE + pte_wdata := new PTE().fromBits(0) + pte_wdata.r := true + pte_wdata.d := r_req.store io.mem.req.valid := state === s_req || state === s_set_dirty io.mem.req.bits.phys := Bool(true) @@ -109,13 +121,13 @@ class PTW(n: Int) extends CoreModule io.mem.req.bits.typ := MT_D io.mem.req.bits.addr := pte_addr io.mem.req.bits.kill := Bool(false) - io.mem.req.bits.data := UInt(1<<9) | Mux(is_store, UInt(1<<10), UInt(0)) + io.mem.req.bits.data := pte_wdata.toBits val resp_err = state === s_error val resp_val = state === s_done || resp_err val r_resp_ppn = io.mem.req.bits.addr >> UInt(pgIdxBits) - val resp_ppn = Vec((0 until levels-1).map(i => Cat(r_resp_ppn >> bitsPerLevel*(levels-i-1), r_req.addr(bitsPerLevel*(levels-i-1)-1,0))) :+ r_resp_ppn)(count) + val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { val me = r_req_dest === UInt(i) @@ -136,7 +148,7 @@ class PTW(n: Int) extends CoreModule count := UInt(0) } is (s_req) { - when (pte_cache_hit && count < levels-1) { + when (pte_cache_hit && count < pgLevels-1) { io.mem.req.valid := false state := s_req count := count + 1 @@ -151,15 +163,12 @@ class PTW(n: Int) extends CoreModule } when (io.mem.resp.valid) { state := s_error - when (pte.v) { - when (set_dirty_bit) { - state := s_set_dirty - }.elsewhen (!pte.t) { - state := s_done - }.elsewhen (count < levels-1) { - state := s_req - count := count + 1 - } + when (pte.table() && count < pgLevels-1) { + state := s_req + count := count + 1 + } + when (pte.leaf()) { + state := Mux(set_dirty_bit, s_set_dirty, s_done) } } } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 4f276f56..71bf0cbd 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -119,7 +119,7 @@ class TLB extends TLBModule { val tag_hit_addr = OHToUInt(tag_cam.io.hits) // permission bit arrays - val valid_array = Reg(Bits()) // V bit of PTE (not equivalent to CAM tag valid bit!) + val valid_array = Reg(Bits()) // PTE is valid (not equivalent to CAM tag valid bit!) val ur_array = Reg(Bits()) // user read permission val uw_array = Reg(Bits()) // user write permission val ux_array = Reg(Bits()) // user execute permission @@ -128,16 +128,16 @@ class TLB extends TLBModule { val sx_array = Reg(Bits()) // supervisor execute permission val dirty_array = Reg(Bits()) // PTE dirty bit when (io.ptw.resp.valid) { - val perm = io.ptw.resp.bits.pte.perm & ~io.ptw.resp.bits.error.toSInt - tag_ram(r_refill_waddr) := io.ptw.resp.bits.pte.ppn + val pte = io.ptw.resp.bits.pte + tag_ram(r_refill_waddr) := pte.ppn valid_array := valid_array.bitSet(r_refill_waddr, !io.ptw.resp.bits.error) - ur_array := ur_array.bitSet(r_refill_waddr, perm(0) || perm(2)) - uw_array := uw_array.bitSet(r_refill_waddr, perm(1)) - ux_array := ux_array.bitSet(r_refill_waddr, perm(2)) - sr_array := sr_array.bitSet(r_refill_waddr, perm(3) || perm(5)) - sw_array := sw_array.bitSet(r_refill_waddr, perm(4)) - sx_array := sx_array.bitSet(r_refill_waddr, perm(5)) - dirty_array := dirty_array.bitSet(r_refill_waddr, io.ptw.resp.bits.pte.d) + ur_array(r_refill_waddr) := pte.ur() && !io.ptw.resp.bits.error + uw_array(r_refill_waddr) := pte.uw() && !io.ptw.resp.bits.error + ux_array(r_refill_waddr) := pte.ux() && !io.ptw.resp.bits.error + sr_array(r_refill_waddr) := pte.sr() && !io.ptw.resp.bits.error + sw_array(r_refill_waddr) := pte.sw() && !io.ptw.resp.bits.error + sx_array(r_refill_waddr) := pte.sx() && !io.ptw.resp.bits.error + dirty_array(r_refill_waddr) := pte.d } // high if there are any unused (invalid) entries in the TLB @@ -150,7 +150,6 @@ class TLB extends TLBModule { val priv_s = priv === PRV_S val priv_uses_vm = priv <= PRV_S val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store)) - val req_perm = Cat(req_xwr & priv_s.toSInt, req_xwr & ~priv_s.toSInt) val r_array = Mux(priv_s, sr_array, ur_array) val w_array = Mux(priv_s, sw_array, uw_array) @@ -183,7 +182,9 @@ class TLB extends TLBModule { io.ptw.req.valid := state === s_request io.ptw.req.bits.addr := r_refill_tag - io.ptw.req.bits.perm := req_perm + io.ptw.req.bits.prv := io.ptw.status.prv + io.ptw.req.bits.store := r_req.store + io.ptw.req.bits.fetch := r_req.instruction when (io.req.fire() && tlb_miss) { state := s_request From a369d8f17fc95ed8076730f52449927cef1d453c Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Thu, 2 Apr 2015 01:30:11 -0700 Subject: [PATCH 0823/1087] Add fpu port to the rocc interface --- rocket/src/main/scala/core.scala | 6 +++ rocket/src/main/scala/fpu.scala | 67 ++++++++++++++++++++++---------- rocket/src/main/scala/rocc.scala | 2 + 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 6c1b7373..0586048c 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -74,6 +74,12 @@ class Core extends Module with CoreParameters .foreach { fpu => dpath.io.fpu <> fpu.io.dpath ctrl.io.fpu <> fpu.io.ctrl + if(!params(BuildRoCC).isEmpty) { + io.rocc.fpu_req <> fpu.io.cp_req + io.rocc.fpu_resp <> fpu.io.cp_resp + } else { + fpu.io.cp_req.valid := Bool(false) + } } ctrl.io.dpath <> dpath.io.ctrl diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 9f069b45..36216edb 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -348,21 +348,32 @@ class FPU extends Module val io = new Bundle { val ctrl = (new CtrlFPUIO).flip val dpath = (new DpathFPUIO).flip + val cp_req = Decoupled(new FPInput()).flip //cp doesn't pay attn to kill sigs + val cp_resp = Decoupled(new FPResult()) } val ex_reg_valid = Reg(next=io.ctrl.valid, init=Bool(false)) + val req_valid = ex_reg_valid || io.cp_req.valid val ex_reg_inst = RegEnable(io.dpath.inst, io.ctrl.valid) - val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx, init=Bool(false)) + val ex_cp_valid = io.cp_req.valid && !ex_reg_valid + val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx || ex_cp_valid, init=Bool(false)) val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) - val killm = io.ctrl.killm || io.ctrl.nack_mem - val wb_reg_valid = Reg(next=mem_reg_valid && !killm, init=Bool(false)) + val mem_cp_valid = Reg(next=ex_cp_valid, init=Bool(false)) + val killm = (io.ctrl.killm || io.ctrl.nack_mem) && !mem_cp_valid + val wb_reg_valid = Reg(next=mem_reg_valid && (!killm || mem_cp_valid), init=Bool(false)) + val wb_cp_valid = Reg(next=mem_cp_valid, init=Bool(false)) val fp_decoder = Module(new FPUDecoder) fp_decoder.io.inst := io.dpath.inst + val cp_ctrl = new FPUCtrlSigs + cp_ctrl <> io.cp_req.bits + io.cp_resp.valid := Bool(false) + io.cp_resp.bits.data := UInt(0) + val id_ctrl = fp_decoder.io.sigs - val ex_ctrl = RegEnable(id_ctrl, io.ctrl.valid) - val mem_ctrl = RegEnable(ex_ctrl, ex_reg_valid) + val ex_ctrl = Mux(ex_reg_valid, RegEnable(id_ctrl, io.ctrl.valid), cp_ctrl) + val mem_ctrl = RegEnable(ex_ctrl, req_valid) val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) // load response @@ -391,35 +402,43 @@ class FPU extends Module val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_)) val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12)) + val cp_rs1 = io.cp_req.bits.in1 + val cp_rs2 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in3, io.cp_req.bits.in2) + val cp_rs3 = Mux(io.cp_req.bits.swap23, io.cp_req.bits.in2, io.cp_req.bits.in3) + val req = new FPInput req := ex_ctrl - req.rm := ex_rm - req.in1 := ex_rs1 - req.in2 := ex_rs2 - req.in3 := ex_rs3 - req.typ := ex_reg_inst(21,20) + req.rm := Mux(ex_reg_valid, ex_rm, io.cp_req.bits.rm) + req.in1 := Mux(ex_reg_valid, ex_rs1, cp_rs1) + req.in2 := Mux(ex_reg_valid, ex_rs2, cp_rs2) + req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3) + req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ) val sfma = Module(new FPUFMAPipe(params(SFMALatency), 23, 9)) - sfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && ex_ctrl.single + sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single sfma.io.in.bits := req val dfma = Module(new FPUFMAPipe(params(DFMALatency), 52, 12)) - dfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && !ex_ctrl.single + dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single dfma.io.in.bits := req val fpiu = Module(new FPToInt) - fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.cmd === FCMD_MINMAX) + fpiu.io.in.valid := req_valid && (ex_ctrl.toint || ex_ctrl.cmd === FCMD_MINMAX) fpiu.io.in.bits := req io.dpath.store_data := fpiu.io.out.bits.store io.dpath.toint_data := fpiu.io.out.bits.toint + when(fpiu.io.out.valid){//COLIN FIXME: are there conflicts since we now share a port? + io.cp_resp.bits.data := fpiu.io.out.bits.toint + io.cp_resp.valid := Bool(true) + } val ifpu = Module(new IntToFP(3)) - ifpu.io.in.valid := ex_reg_valid && ex_ctrl.fromint + ifpu.io.in.valid := req_valid && ex_ctrl.fromint ifpu.io.in.bits := req - ifpu.io.in.bits.in1 := io.dpath.fromint_data + ifpu.io.in.bits.in1 := Mux(ex_reg_valid, io.dpath.fromint_data, cp_rs1) val fpmu = Module(new FPToFP(2)) - fpmu.io.in.valid := ex_reg_valid && ex_ctrl.fastpipe + fpmu.io.in.valid := req_valid && ex_ctrl.fastpipe fpmu.io.in.bits := req fpmu.io.lt := fpiu.io.out.bits.lt @@ -441,8 +460,8 @@ class FPU extends Module val wen = Reg(init=Bits(0, maxLatency-1)) val winfo = Vec.fill(maxLatency-1){Reg(Bits())} val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) - val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, ex_reg_valid) - val mem_winfo = Cat(pipeid(mem_ctrl), mem_reg_inst(11,7)) + val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid) + val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_reg_inst(11,7)) for (i <- 0 until maxLatency-2) { when (wen(i+1)) { winfo(i) := winfo(i+1) } @@ -461,9 +480,15 @@ class FPU extends Module val waddr = winfo(0)(4,0).toUInt val wsrc = winfo(0) >> waddr.getWidth + val wcp = winfo(0)(waddr.getWidth+log2Up(pipes.size)) val wdata = Vec(pipes.map(_.wdata))(wsrc) val wexc = Vec(pipes.map(_.wexc))(wsrc) - when (wen(0)) { regfile(waddr(4,0)) := wdata } + when (wen(0) && !wcp) { regfile(waddr(4,0)) := wdata } + when (wen(0) && wcp) { + io.cp_resp.bits.data := wdata + io.cp_resp.valid := Bool(true) + } + io.cp_req.ready := !ex_reg_valid val wb_toint_valid = wb_reg_valid && wb_ctrl.toint val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) @@ -478,8 +503,8 @@ class FPU extends Module io.ctrl.nack_mem := units_busy || write_port_busy io.ctrl.dec <> fp_decoder.io.sigs def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) - io.ctrl.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl))) - io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UInt(x._2)) + io.ctrl.sboard_set := wb_reg_valid && !wb_cp_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl))) + io.ctrl.sboard_clr := wen(0) && !wb_cp_valid && useScoreboard(x => wsrc === UInt(x._2)) io.ctrl.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) io.ctrl.illegal_rm := ex_rm(2) && ex_ctrl.round diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 0f044dae..bf49aca5 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -49,6 +49,8 @@ class RoCCInterface extends Bundle val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO + val fpu_req = Decoupled(new FPInput) + val fpu_resp = Decoupled(new FPResult).flip val exception = Bool(INPUT) } From bce62d5774c98c0c4d03bd87b57c3525b11ff6d6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 4 Apr 2015 15:19:15 -0700 Subject: [PATCH 0824/1087] Update PTE format to reflect reserved bits --- rocket/src/main/scala/ptw.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 4ee45779..4632cca2 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -33,7 +33,8 @@ class DatapathPTWIO extends CoreBundle { class PTE extends CoreBundle { val ppn = Bits(width = ppnBits) - val reserved = Bits(width = 2) + val reserved = Bits(width = 16) + val reserved_for_software = Bits(width = 2) val d = Bool() val r = Bool() val g = Bool() From fe27b9b1b25fbcaf23ab5dc80e827b712dc9f7b3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 4 Apr 2015 15:20:18 -0700 Subject: [PATCH 0825/1087] Support writing sstatus.fs even without an FPU --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 76f54775..8e0c875a 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -377,7 +377,7 @@ class CSRFile extends CoreModule reg_mstatus.ie := new_sstatus.ie reg_mstatus.ie1 := new_sstatus.pie reg_mstatus.prv1 := Mux(new_sstatus.ps, PRV_S, PRV_U) - if (!params(BuildFPU).isEmpty) reg_mstatus.fs := new_sstatus.fs + reg_mstatus.fs := new_sstatus.fs // even without an FPU if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } From 9ade0e41cc343e996a1161b67007eaa16f549d9f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 4 Apr 2015 16:39:17 -0700 Subject: [PATCH 0826/1087] Integrate divide/sqrt unit --- rocket/src/main/scala/core.scala | 1 + rocket/src/main/scala/ctrl.scala | 10 ++ rocket/src/main/scala/fpu.scala | 219 +++++++++++++++++++------------ 3 files changed, 149 insertions(+), 81 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 6c1b7373..33bb617b 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -7,6 +7,7 @@ import Util._ import uncore._ case object BuildFPU extends Field[Option[() => FPU]] +case object FDivSqrt extends Field[Boolean] case object XLen extends Field[Int] case object NMultXpr extends Field[Int] case object FetchWidth extends Field[Int] diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 16ce5956..b083371c 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -296,6 +296,15 @@ object FDecode extends DecodeConstants FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N)) } +object FDivSqrtDecode extends DecodeConstants +{ + val table = Array( + FDIV_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FDIV_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSQRT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSQRT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N)) +} + object RoCCDecode extends DecodeConstants { val table = Array( @@ -345,6 +354,7 @@ class Control extends CoreModule var decode_table = XDecode.table if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table + if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table val id_ctrl = new IntCtrlSigs().decode(io.dpath.inst, decode_table) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 9f069b45..91a488cb 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -20,8 +20,8 @@ object FPConstants val FCMD_MSUB = Bits("b1??01") val FCMD_NMSUB = Bits("b1??10") val FCMD_NMADD = Bits("b1??11") - val FCMD_DIV = Bits("b?0?11") - val FCMD_SQRT = Bits("b?1?11") + val FCMD_DIV = Bits("b?0011") + val FCMD_SQRT = Bits("b?1011") val FCMD_SGNJ = Bits("b??1?0") val FCMD_MINMAX = Bits("b?01?1") val FCMD_CVT_FF = Bits("b??0??") @@ -45,13 +45,17 @@ class FPUCtrlSigs extends Bundle val ren1 = Bool() val ren2 = Bool() val ren3 = Bool() + val swap12 = Bool() val swap23 = Bool() val single = Bool() val fromint = Bool() val toint = Bool() val fastpipe = Bool() val fma = Bool() + val div = Bool() + val sqrt = Bool() val round = Bool() + val wflags = Bool() } class FPUDecoder extends Module @@ -65,69 +69,73 @@ class FPUDecoder extends Module val Y = Bool(true) val X = Bool(false) val decoder = DecodeLogic(io.inst, - List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X), - Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,Y,N,N,N,N,N), - FLD -> List(FCMD_X, Y,Y,N,N,N,X,N,N,N,N,N,N), - FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,X,Y,N,Y,N,N,N), - FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,X,N,N,Y,N,N,N), - FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,Y,Y,N,N,N,Y), - FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,N,Y,N,N,N,Y), - FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), - FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), - FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), - FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,Y,Y,N,N,N,Y), - FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), - FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), - FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), - FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,N,Y,N,N,N,Y), - FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), - FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,X,N,N,Y,N,N,Y), - FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), - FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,X,N,N,Y,N,N,Y), - FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), - FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), - FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), - FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,X,Y,N,Y,N,N,Y), - FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), - FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), - FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), - FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,X,N,N,Y,N,N,Y), - FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,X,Y,N,N,Y,N,Y), - FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,X,N,N,N,Y,N,Y), - FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,Y,N,Y,N,N,N), - FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,Y,N,Y,N,N,N), - FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,Y,N,Y,N,N,N), - FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,Y,N,N,N), - FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,Y,N,N,N), - FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,Y,N,N,N), - FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), - FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), - FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), - FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), - FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), - FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,Y,N,N), - FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), - FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,Y,N,N,Y,N,N), - FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,Y,N,N), - FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,Y,N,N), - FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,Y,Y,N,N,N,Y,Y), - FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,Y,Y,N,N,N,Y,Y), - FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,Y,N,N,N,Y,Y), - FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,Y,N,N,N,N,Y,Y), - FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,Y,N,N,N,N,Y,Y), - FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,Y,Y), - FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), - FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), - FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), - FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,Y,N,N,N,Y,Y), - FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y), - FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y), - FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y), - FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,Y,Y) + List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X), + Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N), + FLD -> List(FCMD_X, Y,Y,N,N,N,X,X,N,N,N,N,N,N,N,N,N), + FSW -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,Y,N,Y,N,N,N,N,N,N), + FSD -> List(FCMD_MV_XF, Y,N,N,Y,N,Y,X,N,N,Y,N,N,N,N,N,N), + FMV_S_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,N), + FMV_D_X -> List(FCMD_MV_FX, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,N), + FCVT_S_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), + FCVT_S_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), + FCVT_S_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), + FCVT_S_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,Y,Y,N,N,N,N,N,Y,Y), + FCVT_D_W -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), + FCVT_D_WU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), + FCVT_D_L -> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), + FCVT_D_LU-> List(FCMD_CVT_FI, N,Y,N,N,N,X,X,N,Y,N,N,N,N,N,Y,Y), + FMV_X_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N), + FMV_X_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N), + FCLASS_S -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,N), + FCLASS_D -> List(FCMD_MV_XF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,N), + FCVT_W_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), + FCVT_WU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), + FCVT_L_S -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), + FCVT_LU_S-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,Y,N,Y,N,N,N,N,Y,Y), + FCVT_W_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), + FCVT_WU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), + FCVT_L_D -> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), + FCVT_LU_D-> List(FCMD_CVT_IF, N,N,Y,N,N,N,X,N,N,Y,N,N,N,N,Y,Y), + FCVT_S_D -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,Y,N,N,Y,N,N,N,Y,Y), + FCVT_D_S -> List(FCMD_CVT_FF, N,Y,Y,N,N,N,X,N,N,N,Y,N,N,N,Y,Y), + FEQ_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y), + FLT_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y), + FLE_S -> List(FCMD_CMP, N,N,Y,Y,N,N,N,Y,N,Y,N,N,N,N,N,Y), + FEQ_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y), + FLT_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y), + FLE_D -> List(FCMD_CMP, N,N,Y,Y,N,N,N,N,N,Y,N,N,N,N,N,Y), + FSGNJ_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), + FSGNJN_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), + FSGNJX_S -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,N), + FSGNJ_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N), + FSGNJN_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N), + FSGNJX_D -> List(FCMD_SGNJ, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,N), + FMIN_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y), + FMAX_S -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,Y,N,N,Y,N,N,N,N,Y), + FMIN_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y), + FMAX_D -> List(FCMD_MINMAX, N,Y,Y,Y,N,N,N,N,N,N,Y,N,N,N,N,Y), + FADD_S -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y), + FSUB_S -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,Y,N,N,N,Y,N,N,Y,Y), + FMUL_S -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,Y,N,N,N,Y,N,N,Y,Y), + FADD_D -> List(FCMD_ADD, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y), + FSUB_D -> List(FCMD_SUB, N,Y,Y,Y,N,N,Y,N,N,N,N,Y,N,N,Y,Y), + FMUL_D -> List(FCMD_MUL, N,Y,Y,Y,N,N,N,N,N,N,N,Y,N,N,Y,Y), + FMADD_S -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), + FMSUB_S -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), + FNMADD_S -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), + FNMSUB_S -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,Y,N,N,N,Y,N,N,Y,Y), + FMADD_D -> List(FCMD_MADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), + FMSUB_D -> List(FCMD_MSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), + FNMADD_D -> List(FCMD_NMADD, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), + FNMSUB_D -> List(FCMD_NMSUB, N,Y,Y,Y,Y,N,N,N,N,N,N,Y,N,N,Y,Y), + FDIV_S -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,Y,N,N,N,N,Y,N,Y,Y), + FSQRT_S -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,Y,N,N,N,N,N,Y,Y,Y), + FDIV_D -> List(FCMD_DIV, N,Y,Y,Y,N,N,N,N,N,N,N,N,Y,N,Y,Y), + FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y) )) val s = io.sigs - Vec(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap23, s.single, s.fromint, - s.toint, s.fastpipe, s.fma, s.round) := decoder + Vec(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, s.swap23, s.single, s.fromint, + s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder } class DpathFPUIO extends Bundle { @@ -177,6 +185,7 @@ class FPToInt extends Module { val io = new Bundle { val in = Valid(new FPInput).flip + val as_double = new FPInput().asOutput val out = Valid(new Bundle { val lt = Bool() val store = Bits(width = 64) @@ -227,6 +236,7 @@ class FPToInt extends Module io.out.valid := valid io.out.bits.lt := dcmp.io.a_lt_b + io.as_double := in } class IntToFP(val latency: Int) extends Module @@ -380,13 +390,16 @@ class FPU extends Module val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) when (io.ctrl.valid) { - when (id_ctrl.ren1) { ex_ra1 := io.dpath.inst(19,15) } - when (id_ctrl.ren3) { ex_ra3 := io.dpath.inst(31,27) } - when (id_ctrl.ren2) { - when ( id_ctrl.ldst) { ex_ra1 := io.dpath.inst(24,20) } - when (!id_ctrl.ldst && !id_ctrl.swap23) { ex_ra2 := io.dpath.inst(24,20) } - when (!id_ctrl.ldst && id_ctrl.swap23) { ex_ra3 := io.dpath.inst(24,20) } + when (id_ctrl.ren1) { + when (!id_ctrl.swap12) { ex_ra1 := io.dpath.inst(19,15) } + when (id_ctrl.swap12) { ex_ra2 := io.dpath.inst(19,15) } } + when (id_ctrl.ren2) { + when (id_ctrl.swap12) { ex_ra1 := io.dpath.inst(24,20) } + when (id_ctrl.swap23) { ex_ra3 := io.dpath.inst(24,20) } + when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.dpath.inst(24,20) } + } + when (id_ctrl.ren3) { ex_ra3 := io.dpath.inst(31,27) } } val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_)) val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12)) @@ -408,7 +421,7 @@ class FPU extends Module dfma.io.in.bits := req val fpiu = Module(new FPToInt) - fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.cmd === FCMD_MINMAX) + fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX) fpiu.io.in.bits := req io.dpath.store_data := fpiu.io.out.bits.store io.dpath.toint_data := fpiu.io.out.bits.toint @@ -423,6 +436,15 @@ class FPU extends Module fpmu.io.in.bits := req fpmu.io.lt := fpiu.io.out.bits.lt + val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64) + val divSqrt_inReady = Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div) + val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt + val divSqrt_wen = Reg(next=Bool(false)) + val divSqrt_waddr = Reg(Bits()) + val divSqrt_wdata = Bits() + val divSqrt_flags = Bits() + val divSqrt_in_flight = Reg(init=Bool(false)) + // writeback arbitration case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits) val pipes = List( @@ -459,28 +481,63 @@ class FPU extends Module } } - val waddr = winfo(0)(4,0).toUInt - val wsrc = winfo(0) >> waddr.getWidth - val wdata = Vec(pipes.map(_.wdata))(wsrc) + val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) + val wsrc = winfo(0) >> 5 + val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.wdata))(wsrc)) val wexc = Vec(pipes.map(_.wexc))(wsrc) - when (wen(0)) { regfile(waddr(4,0)) := wdata } + when (wen(0) || divSqrt_wen) { regfile(waddr) := wdata } val wb_toint_valid = wb_reg_valid && wb_ctrl.toint val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) - io.dpath.fcsr_flags.valid := wb_toint_valid || wen(0) + io.dpath.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) io.dpath.fcsr_flags.bits := Mux(wb_toint_valid, wb_toint_exc, UInt(0)) | + Mux(divSqrt_wen, divSqrt_flags, UInt(0)) | Mux(wen(0), wexc, UInt(0)) - val fp_inflight = wb_reg_valid && wb_ctrl.toint || wen.orR - val units_busy = Bool(false) //mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid)) - io.ctrl.fcsr_rdy := !fp_inflight - io.ctrl.nack_mem := units_busy || write_port_busy + val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid)) + io.ctrl.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight) + io.ctrl.nack_mem := units_busy || write_port_busy || divSqrt_in_flight io.ctrl.dec <> fp_decoder.io.sigs def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) - io.ctrl.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl))) - io.ctrl.sboard_clr := wen(0) && useScoreboard(x => wsrc === UInt(x._2)) + io.ctrl.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt) + io.ctrl.sboard_clr := divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))) io.ctrl.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) io.ctrl.illegal_rm := ex_rm(2) && ex_ctrl.round + + divSqrt_wdata := 0 + divSqrt_flags := 0 + if (params(FDivSqrt)) { + val divSqrt_single = Reg(Bool()) + val divSqrt_rm = Reg(Bits()) + val divSqrt_flags_double = Reg(Bits()) + val divSqrt_wdata_double = Reg(Bits()) + + def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 + val divSqrt_wb_hazard = wen.orR + divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.ctrl.killm && (mem_ctrl.div || mem_ctrl.sqrt) + divSqrt.io.sqrtOp := mem_ctrl.sqrt + divSqrt.io.a := fpiu.io.as_double.in1 + divSqrt.io.b := fpiu.io.as_double.in2 + divSqrt.io.roundingMode := fpiu.io.as_double.rm + + when (divSqrt.io.inValid && divSqrt_inReady) { + divSqrt_in_flight := true + divSqrt_single := mem_ctrl.single + divSqrt_waddr := mem_reg_inst(11,7) + divSqrt_rm := divSqrt.io.roundingMode + } + + when (divSqrt_outValid) { + divSqrt_wen := true + divSqrt_wdata_double := divSqrt.io.out + divSqrt_in_flight := false + divSqrt_flags_double := divSqrt.io.exceptionFlags + } + + val divSqrt_toSingle = hardfloat.recodedFloatNToRecodedFloatM(divSqrt_wdata_double, ex_rm, 52, 12, 23, 9) + divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle._1, divSqrt_wdata_double) + divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle._2, Bits(0)) + } } From bd72db92c13db7512ef29b5118d42ef02bf395e5 Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Tue, 7 Apr 2015 15:02:02 -0700 Subject: [PATCH 0827/1087] update rocc port to use fdiv/sqrt --- rocket/src/main/scala/fpu.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 29802b83..3b5a6c1f 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -440,7 +440,7 @@ class FPU extends Module fpiu.io.in.bits := req io.dpath.store_data := fpiu.io.out.bits.store io.dpath.toint_data := fpiu.io.out.bits.toint - when(fpiu.io.out.valid){//COLIN FIXME: are there conflicts since we now share a port? + when(fpiu.io.out.valid && mem_cp_valid && !(mem_ctrl.div || mem_ctrl.sqrt)){ io.cp_resp.bits.data := fpiu.io.out.bits.toint io.cp_resp.valid := Bool(true) } @@ -463,6 +463,7 @@ class FPU extends Module val divSqrt_wdata = Bits() val divSqrt_flags = Bits() val divSqrt_in_flight = Reg(init=Bool(false)) + val divSqrt_cp = Reg(init=Bool(false)) // writeback arbitration case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits) @@ -505,8 +506,8 @@ class FPU extends Module val wcp = winfo(0)(5+log2Up(pipes.size)) val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.wdata))(wsrc)) val wexc = Vec(pipes.map(_.wexc))(wsrc) - when (!wcp && (wen(0) || divSqrt_wen)) { regfile(waddr) := wdata } - when (wcp && (wen(0) || divSqrt_wen)) { + when ((!wcp && wen(0)) || (!divSqrt_cp && divSqrt_wen)) { regfile(waddr) := wdata } + when ((wcp && wen(0)) || (divSqrt_cp && divSqrt_wen)) { io.cp_resp.bits.data := wdata io.cp_resp.valid := Bool(true) } @@ -541,7 +542,7 @@ class FPU extends Module def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 val divSqrt_wb_hazard = wen.orR - divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.ctrl.killm && (mem_ctrl.div || mem_ctrl.sqrt) + divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && (!io.ctrl.killm || mem_cp_valid) && (mem_ctrl.div || mem_ctrl.sqrt) divSqrt.io.sqrtOp := mem_ctrl.sqrt divSqrt.io.a := fpiu.io.as_double.in1 divSqrt.io.b := fpiu.io.as_double.in2 @@ -552,6 +553,7 @@ class FPU extends Module divSqrt_single := mem_ctrl.single divSqrt_waddr := mem_reg_inst(11,7) divSqrt_rm := divSqrt.io.roundingMode + divSqrt_cp := mem_cp_valid } when (divSqrt_outValid) { From 11dbd4221a0dd2af5bbf401069ef7254f773455c Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 10 Apr 2015 17:53:47 -0700 Subject: [PATCH 0828/1087] Fixed front-end to support four-wide fetch. --- rocket/src/main/scala/icache.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index d7a820db..b992dd5f 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -114,8 +114,13 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc - - val fetch_data = icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) + var fetch_data:Bits = null + require (coreFetchWidth <= 4) + if (coreFetchWidth == 4) { + fetch_data = icache.io.resp.bits.datablock + } else { + fetch_data = icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) + } for (i <- 0 until coreFetchWidth) { io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) } From 2f88c5ca9dcde6dd1dc9647c8dde22c2d70891da Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sat, 11 Apr 2015 02:16:44 -0700 Subject: [PATCH 0829/1087] Renamed PCR to CSR --- rocket/src/main/scala/consts.scala | 2 +- rocket/src/main/scala/ctrl.scala | 6 +++--- rocket/src/main/scala/dpath.scala | 32 +++++++++++++++--------------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 030753d0..4d7cb68d 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -21,7 +21,7 @@ trait ScalarOpConstants { val PC_EX = UInt(0, 2) val PC_MEM = UInt(1, 2) val PC_WB = UInt(2, 2) - val PC_PCR = UInt(3, 2) + val PC_CSR = UInt(3, 2) val A1_X = Bits("b??", 2) val A1_ZERO = UInt(0, 2) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index b083371c..182d326c 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -553,7 +553,7 @@ class Control extends CoreModule take_pc_wb := replay_wb || wb_xcpt || io.dpath.eret io.dpath.sel_pc := - Mux(wb_xcpt || io.dpath.eret, PC_PCR, // exception or [m|s]ret + Mux(wb_xcpt || io.dpath.eret, PC_CSR, // exception or [m|s]ret Mux(replay_wb, PC_WB, // replay PC_MEM)) @@ -589,7 +589,7 @@ class Control extends CoreModule io.dpath.bypass_src(i) := PriorityEncoder(doBypass(i)) } - // stall for RAW/WAW hazards on PCRs, loads, AMOs, and mul/div in execute stage. + // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0) val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0) val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0) @@ -605,7 +605,7 @@ class Control extends CoreModule io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex) - // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage. + // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass else Bool(true) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index bc25a1bd..a60ff72f 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -166,17 +166,17 @@ class Datapath extends CoreModule require(params(CoreDCacheReqTagBits) >= 6) // processor control regfile read - val pcr = Module(new CSRFile) - pcr.io.host <> io.host - pcr.io <> io.ctrl - pcr.io <> io.fpu - pcr.io.rocc <> io.rocc - pcr.io.pc := wb_reg_pc - pcr.io.uarch_counters.foreach(_ := Bool(false)) + val csr = Module(new CSRFile) + csr.io.host <> io.host + csr.io <> io.ctrl + csr.io <> io.fpu + csr.io.rocc <> io.rocc + csr.io.pc := wb_reg_pc + csr.io.uarch_counters.foreach(_ := Bool(false)) - io.ptw.ptbr := pcr.io.ptbr - io.ptw.invalidate := pcr.io.fatc - io.ptw.status := pcr.io.status + io.ptw.ptbr := csr.io.ptbr + io.ptw.invalidate := csr.io.fatc + io.ptw.status := csr.io.status // memory stage mem_reg_kill := ex_reg_kill @@ -246,7 +246,7 @@ class Datapath extends CoreModule } wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, Mux(io.ctrl.ll_wen, ll_wdata, - Mux(io.ctrl.csr_cmd != CSR.N, pcr.io.rw.rdata, + Mux(io.ctrl.csr_cmd != CSR.N, csr.io.rw.rdata, wb_reg_wdata))) val wb_wen = io.ctrl.ll_wen || io.ctrl.wb_wen @@ -258,9 +258,9 @@ class Datapath extends CoreModule io.ctrl.fp_sboard_clra := dmem_resp_waddr // processor control regfile write - pcr.io.rw.addr := wb_reg_inst(31,20) - pcr.io.rw.cmd := io.ctrl.csr_cmd - pcr.io.rw.wdata := wb_reg_wdata + csr.io.rw.addr := wb_reg_inst(31,20) + csr.io.rw.cmd := io.ctrl.csr_cmd + csr.io.rw.wdata := wb_reg_wdata io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) io.rocc.cmd.bits.rs1 := wb_reg_wdata @@ -269,7 +269,7 @@ class Datapath extends CoreModule // hook up I$ io.imem.req.bits.pc := Mux(io.ctrl.sel_pc === PC_MEM, mem_npc, - Mux(io.ctrl.sel_pc === PC_PCR, pcr.io.evec, + Mux(io.ctrl.sel_pc === PC_CSR, csr.io.evec, wb_reg_pc)).toUInt // PC_WB io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc @@ -283,7 +283,7 @@ class Datapath extends CoreModule io.ctrl.wb_waddr := wb_reg_inst(11,7) printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", - io.host.id, pcr.io.time(32,0), io.ctrl.retire, wb_reg_pc, + io.host.id, csr.io.time(32,0), io.ctrl.retire, wb_reg_pc, Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, wb_wen, wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), From 8fc2d38ca90093f11192ba8bd54f093b97ac242a Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sat, 11 Apr 2015 02:20:34 -0700 Subject: [PATCH 0830/1087] Removed unnecessary signal in CSRIO --- rocket/src/main/scala/csr.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 8e0c875a..fa263d19 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -93,7 +93,6 @@ class CSRFileIO extends CoreBundle { val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) val custom_mrw_csrs = Vec.fill(params(NCustomMRWCSRs))(UInt(INPUT, xLen)) val cause = UInt(INPUT, xLen) - val mbadaddr_wen = Bool(INPUT) val pc = SInt(INPUT, vaddrBits+1) val fatc = Bool(OUTPUT) val time = UInt(OUTPUT, xLen) From a564f08702593b59b17f3cac5636e9924ed7dcc4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sat, 11 Apr 2015 02:26:33 -0700 Subject: [PATCH 0831/1087] Rename dmem.sret signal to more accurate invalidate_lr --- rocket/src/main/scala/ctrl.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 4 ++-- rocket/src/main/scala/rocc.scala | 2 +- rocket/src/main/scala/tile.scala | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 182d326c..3e0e3a8c 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -674,7 +674,7 @@ class Control extends CoreModule io.dmem.req.bits.cmd := ex_ctrl.mem_cmd io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.phys := Bool(false) - io.dmem.sret := wb_xcpt // obviously not an sret, but sufficient + io.dmem.invalidate_lr := wb_xcpt io.rocc.cmd.valid := wb_rocc_val io.rocc.exception := wb_xcpt && io.dpath.status.xs.orR diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3f24ba4c..fa82b485 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -83,7 +83,7 @@ class HellaCacheIO extends CoreBundle { val resp = Valid(new HellaCacheResp).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val xcpt = (new HellaCacheExceptions).asInput - val sret = Bool(OUTPUT) + val invalidate_lr = Bool(OUTPUT) val ordered = Bool(INPUT) } @@ -752,7 +752,7 @@ class HellaCache extends L1HellaCacheModule { lrsc_count := 0 } } - when (io.cpu.sret) { lrsc_count := 0 } + when (io.cpu.invalidate_lr) { lrsc_count := 0 } val s2_data = Vec.fill(nWays){Bits(width = encRowBits)} for (w <- 0 until nWays) { diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 0f044dae..79332c32 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -123,7 +123,7 @@ class AccumulatorExample extends RoCC io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores) io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1 io.mem.req.bits.data := Bits(0) // we're not performing any stores... - io.mem.sret := false + io.mem.invalidate_lr := false io.imem.acquire.valid := false io.imem.grant.ready := false diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index def17452..edb5566c 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -25,7 +25,7 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { val ptw = Module(new PTW(params(NPTWPorts))) val core = Module(new Core, { case CoreName => "Rocket" }) - dcache.io.cpu.sret := core.io.dmem.sret // Bypass sret to dcache + dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem From 4d6ebded02f182e31912f7b696a3cdece1e198a4 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sat, 11 Apr 2015 02:58:34 -0700 Subject: [PATCH 0832/1087] Added assert to nbdcache --- rocket/src/main/scala/nbdcache.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index fa82b485..3b55ec8c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -686,6 +686,10 @@ class HellaCache extends L1HellaCacheModule { io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st + assert (!(Reg(next= + (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && + io.cpu.resp.valid), "DCache exception occurred - cache response not killed.") + // tags def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) val meta = Module(new MetadataArray(onReset _)) From 517d0d4b897c7d5da8dbdc6ef14435696dc5a8f9 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sun, 12 Apr 2015 18:44:03 -0700 Subject: [PATCH 0833/1087] feedback on PR --- rocket/src/main/scala/icache.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index b992dd5f..1103f22d 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -114,13 +114,11 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc - var fetch_data:Bits = null - require (coreFetchWidth <= 4) - if (coreFetchWidth == 4) { - fetch_data = icache.io.resp.bits.datablock - } else { - fetch_data = icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) - } + require(coreFetchWidth * coreInstBytes <= rowBytes) + val fetch_data = + if (coreFetchWidth * coreInstBytes == rowBytes) icache.io.resp.bits.datablock + else icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) + for (i <- 0 until coreFetchWidth) { io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) } From 91e882e3f867b46c04584e0409e5f69bc7e28b6b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 26 Mar 2015 23:26:03 -0700 Subject: [PATCH 0834/1087] Use HeaderlessTileLinkIO --- rocket/src/main/scala/arbiter.scala | 24 -------- rocket/src/main/scala/icache.scala | 27 +++------ rocket/src/main/scala/nbdcache.scala | 90 +++++++++++----------------- rocket/src/main/scala/rocc.scala | 4 +- rocket/src/main/scala/tile.scala | 2 +- 5 files changed, 45 insertions(+), 102 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 5aa931f4..b2a8482f 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -52,27 +52,3 @@ class HellaCacheArbiter(n: Int) extends Module io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> UInt(log2Up(n)) } } - -class RocketUncachedTileLinkIOArbiter(n: Int) extends TileLinkArbiterLike(n) - with AppendsArbiterId { - val io = new Bundle { - val in = Vec.fill(n){new HeaderlessUncachedTileLinkIO}.flip - val out = new HeaderlessUncachedTileLinkIO - } - hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) - hookupFinish(io.in.map(_.finish), io.out.finish) - hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) -} - -class RocketTileLinkIOArbiter(n: Int) extends TileLinkArbiterLike(n) - with AppendsArbiterId { - val io = new Bundle { - val in = Vec.fill(n){new HeaderlessTileLinkIO}.flip - val out = new HeaderlessTileLinkIO - } - hookupClientSourceHeaderless(io.in.map(_.acquire), io.out.acquire) - hookupClientSourceHeaderless(io.in.map(_.release), io.out.release) - hookupFinish(io.in.map(_.finish), io.out.finish) - hookupManagerSourceBroadcast(io.in.map(_.probe), io.out.probe) - hookupManagerSourceWithId(io.in.map(_.grant), io.out.grant) -} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1103f22d..6c95008a 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -190,15 +190,10 @@ class ICache extends FrontendModule val s2_miss = s2_valid && !s2_any_tag_hit rdy := state === s_ready && !s2_miss - val ser = Module(new FlowThroughSerializer( - io.mem.grant.bits, - refillCyclesPerBeat)) - ser.io.in <> io.mem.grant - val (refill_cnt, refill_wrap) = Counter(ser.io.out.fire(), refillCycles) //TODO Zero width wire + val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat) + val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles) //TODO Zero width wire val refill_done = state === s_refill && refill_wrap - val refill_valid = ser.io.out.valid - val refill_bits = ser.io.out.bits - ser.io.out.ready := Bool(true) + narrow_grant.ready := Bool(true) val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) val entagbits = code.width(tagBits) @@ -250,9 +245,9 @@ class ICache extends FrontendModule for (i <- 0 until nWays) { val data_array = Mem(Bits(width = code.width(rowBits)), nSets*refillCycles, seqRead = true) val s1_raddr = Reg(UInt()) - when (refill_valid && repl_way === UInt(i)) { - val e_d = code.encode(refill_bits.payload.data) - if(refillCycles > 1) data_array(Cat(s2_idx, refill_bits.payload.addr_beat)) := e_d + when (narrow_grant.valid && repl_way === UInt(i)) { + val e_d = code.encode(narrow_grant.bits.data) + if(refillCycles > 1) data_array(Cat(s2_idx, refill_cnt)) := e_d else data_array(s2_idx) := e_d } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM @@ -266,16 +261,10 @@ class ICache extends FrontendModule io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) - val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ack_q.io.enq.valid := refill_done && refill_bits.payload.requiresAck() - ack_q.io.enq.bits.payload := refill_bits.payload.makeFinish() - ack_q.io.enq.bits.header.dst := refill_bits.header.src - // output signals io.resp.valid := s2_hit - io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready + io.mem.acquire.valid := (state === s_request) io.mem.acquire.bits := GetBlock(addr_block = s2_addr >> UInt(blockOffBits)) - io.mem.finish <> ack_q.io.deq // control state machine switch (state) { @@ -284,7 +273,7 @@ class ICache extends FrontendModule invalidated := Bool(false) } is (s_request) { - when (io.mem.acquire.ready && ack_q.io.enq.ready) { state := s_refill_wait } + when (io.mem.acquire.ready) { state := s_refill_wait } } is (s_refill_wait) { when (io.mem.grant.valid) { state := s_refill } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3b55ec8c..02fd750c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -97,6 +97,8 @@ class L1DataWriteReq extends L1DataReadReq { val data = Bits(width = encRowBits) } +class L1RefillReq extends L1DataReadReq + class L1MetaReadReq extends MetaReadReq { val tag = Bits(width = tagBits) } @@ -140,12 +142,11 @@ class MSHR(id: Int) extends L1HellaCacheModule { val tag = Bits(OUTPUT, tagBits) val mem_req = Decoupled(new Acquire) - val mem_resp = new L1DataWriteReq().asOutput + val refill = new L1RefillReq().asOutput // Data is bypassed val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new ReplayInternal) - val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip - val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) + val mem_grant = Valid(new Grant).flip val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) } @@ -168,11 +169,9 @@ class MSHR(id: Int) extends L1HellaCacheModule { (states_before_refill.contains(state) || (Vec(s_refill_req, s_refill_resp).contains(state) && !cmd_requires_second_acquire)) - val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id) - val gnt_multi_data = io.mem_grant.bits.payload.hasMultibeatData() - val (refill_cnt, refill_count_done) = Counter(reply && gnt_multi_data, refillCycles) // TODO: Zero width? - val refill_done = reply && (!gnt_multi_data || refill_count_done) - val wb_done = reply && (state === s_wb_resp) + val gnt_multi_data = io.mem_grant.bits.hasMultibeatData() + val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) // TODO: Zero width? + val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth))) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) @@ -180,7 +179,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid val coh_on_grant = req.old_meta.coh.onGrant( - incoming = io.mem_grant.bits.payload, + incoming = io.mem_grant.bits, pending = req.cmd) val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd) @@ -195,7 +194,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { state := s_meta_write_resp } when (state === s_refill_resp) { - when (reply) { new_coh_state := coh_on_grant } + when (io.mem_grant.valid) { new_coh_state := coh_on_grant } when (refill_done) { state := s_meta_write_req } } when (io.mem_req.fire()) { // s_refill_req @@ -204,7 +203,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { when (state === s_meta_clear && io.meta_write.ready) { state := s_refill_req } - when (state === s_wb_resp && reply) { + when (state === s_wb_resp && io.mem_grant.valid) { state := s_meta_clear } when (io.wb_req.fire()) { // s_wb_req @@ -233,18 +232,9 @@ class MSHR(id: Int) extends L1HellaCacheModule { } } - val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) - ackq.io.enq.valid := (wb_done || refill_done) && io.mem_grant.bits.payload.requiresAck() - ackq.io.enq.bits.payload := io.mem_grant.bits.payload.makeFinish() - ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src - val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp - io.mem_finish.valid := ackq.io.deq.valid && can_finish - ackq.io.deq.ready := io.mem_finish.ready && can_finish - io.mem_finish.bits := ackq.io.deq.bits - io.idx_match := (state != s_invalid) && idx_match - io.mem_resp := req - io.mem_resp.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits + io.refill.way_en := req.way_en + io.refill.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits io.tag := req.addr >> untagBits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -262,18 +252,17 @@ class MSHR(id: Int) extends L1HellaCacheModule { io.meta_write.bits.data.tag := io.tag io.meta_write.bits.way_en := req.way_en - io.wb_req.valid := state === s_wb_req && ackq.io.enq.ready + io.wb_req.valid := state === s_wb_req io.wb_req.bits := req.old_meta.coh.makeVoluntaryWriteback( client_xact_id = UInt(id), addr_block = Cat(req.old_meta.tag, req_idx)) io.wb_req.bits.way_en := req.way_en - io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready + io.mem_req.valid := state === s_refill_req io.mem_req.bits := req.old_meta.coh.makeAcquire( addr_block = Cat(io.tag, req_idx).toUInt, client_xact_id = Bits(id), op_code = req.cmd) - io.mem_finish <> ackq.io.deq io.meta_read.valid := state === s_drain_rpq io.meta_read.bits.idx := req_idx @@ -295,13 +284,12 @@ class MSHRFile extends L1HellaCacheModule { val req = Decoupled(new MSHRReq).flip val secondary_miss = Bool(OUTPUT) - val mem_req = Decoupled(new Acquire) //TODO make sure TLParameters are correct ????? - val mem_resp = new L1DataWriteReq().asOutput + val mem_req = Decoupled(new Acquire) + val refill = new L1RefillReq().asOutput val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new Replay) - val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip - val mem_finish = Decoupled(new LogicalNetworkIO(new Finish)) + val mem_grant = Valid(new Grant).flip val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) @@ -320,7 +308,7 @@ class MSHRFile extends L1HellaCacheModule { val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits val wbTagList = Vec.fill(nMSHRs){Bits()} - val memRespMux = Vec.fill(nMSHRs){new L1DataWriteReq} + val refillMux = Vec.fill(nMSHRs){new L1RefillReq} val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) val mem_req_arb = Module(new LockingArbiter( @@ -328,7 +316,6 @@ class MSHRFile extends L1HellaCacheModule { nMSHRs, outerDataBeats, (a: Acquire) => a.hasMultibeatData())) - val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), nMSHRs)) val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs)) val alloc_arb = Module(new Arbiter(Bool(), nMSHRs)) @@ -357,12 +344,13 @@ class MSHRFile extends L1HellaCacheModule { mshr.io.meta_read <> meta_read_arb.io.in(i) mshr.io.meta_write <> meta_write_arb.io.in(i) mshr.io.mem_req <> mem_req_arb.io.in(i) - mshr.io.mem_finish <> mem_finish_arb.io.in(i) mshr.io.wb_req <> wb_req_arb.io.in(i) mshr.io.replay <> replay_arb.io.in(i) - mshr.io.mem_grant <> io.mem_grant - memRespMux(i) := mshr.io.mem_resp + mshr.io.mem_grant.valid := io.mem_grant.valid && + io.mem_grant.bits.client_xact_id === UInt(i) + mshr.io.mem_grant.bits := io.mem_grant.bits + refillMux(i) := mshr.io.refill pri_rdy = pri_rdy || mshr.io.req_pri_rdy sec_rdy = sec_rdy || mshr.io.req_sec_rdy @@ -377,12 +365,11 @@ class MSHRFile extends L1HellaCacheModule { meta_read_arb.io.out <> io.meta_read meta_write_arb.io.out <> io.meta_write mem_req_arb.io.out <> io.mem_req - mem_finish_arb.io.out <> io.mem_finish wb_req_arb.io.out <> io.wb_req io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match - io.mem_resp := memRespMux(io.mem_grant.bits.payload.client_xact_id) + io.refill := refillMux(io.mem_grant.bits.client_xact_id) val free_sdq = io.replay.fire() && isWrite(io.replay.bits.cmd) io.replay.bits.data := sdq(RegEnable(replay_arb.io.out.bits.sdq_id, free_sdq)) @@ -806,7 +793,6 @@ class HellaCache extends L1HellaCacheModule { mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) mshrs.io.req.bits.data := s2_req.data when (mshrs.io.req.fire()) { replacer.miss } - io.mem.acquire <> mshrs.io.mem_req // replays @@ -822,10 +808,9 @@ class HellaCache extends L1HellaCacheModule { val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) releaseArb.io.out <> io.mem.release - val probe = DecoupledLogicalNetworkIOUnwrapper(io.mem.probe) - prober.io.req.valid := probe.valid && !lrsc_valid - probe.ready := prober.io.req.ready && !lrsc_valid - prober.io.req.bits := probe.bits + prober.io.req.valid := io.mem.probe.valid && !lrsc_valid + io.mem.probe.ready := prober.io.req.ready && !lrsc_valid + prober.io.req.bits := io.mem.probe.bits prober.io.rep <> releaseArb.io.in(1) prober.io.way_en := s2_tag_match_way prober.io.block_state := s2_hit_state @@ -834,19 +819,16 @@ class HellaCache extends L1HellaCacheModule { prober.io.mshr_rdy := mshrs.io.probe_rdy // refills - val ser = Module(new FlowThroughSerializer( - io.mem.grant.bits, - refillCyclesPerBeat)) - ser.io.in <> io.mem.grant - val refill = ser.io.out - mshrs.io.mem_grant.valid := refill.fire() - mshrs.io.mem_grant.bits := refill.bits - refill.ready := writeArb.io.in(1).ready || !refill.bits.payload.hasData() - writeArb.io.in(1).valid := refill.valid && refill.bits.payload.hasData() - writeArb.io.in(1).bits := mshrs.io.mem_resp + val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat) + mshrs.io.mem_grant.valid := narrow_grant.fire() + mshrs.io.mem_grant.bits := narrow_grant.bits + narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData() + writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() + writeArb.io.in(1).bits.addr := mshrs.io.refill.addr + writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en writeArb.io.in(1).bits.wmask := SInt(-1) - writeArb.io.in(1).bits.data := refill.bits.payload.data(encRowBits-1,0) - readArb.io.out.ready := !refill.valid || refill.ready // insert bubble if refill gets blocked + writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) + readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked readArb.io.out <> data.io.read // writebacks @@ -920,8 +902,6 @@ class HellaCache extends L1HellaCacheModule { io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) io.cpu.replay_next.bits := s1_req.tag - - io.mem.finish <> mshrs.io.mem_finish } // exposes a sane decoupled request interface diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 79332c32..caee76e5 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -7,7 +7,7 @@ import Node._ import uncore._ import Util._ -case object RoCCMemTagBits extends Field[Int] +case object RoCCMaxTaggedMemXacts extends Field[Int] class RoCCInstruction extends Bundle { @@ -127,10 +127,8 @@ class AccumulatorExample extends RoCC io.imem.acquire.valid := false io.imem.grant.ready := false - io.imem.finish.valid := false io.dmem.acquire.valid := false io.dmem.grant.ready := false - io.dmem.finish.valid := false io.iptw.req.valid := false io.dptw.req.valid := false io.pptw.req.valid := false diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index edb5566c..8d53836c 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -44,7 +44,7 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { // otherwise, just hookup the icache io.uncached <> params(BuildRoCC).map { buildItHere => val rocc = buildItHere() - val memArb = Module(new RocketUncachedTileLinkIOArbiter(3)) + val memArb = Module(new HeaderlessTileLinkIOArbiter(3)) val dcIF = Module(new SimpleHellaCacheIF) core.io.rocc <> rocc.io dcIF.io.requestor <> rocc.io.mem From 49f1c0aa7b7b63fefb650f07433ef30f4571b832 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 26 Mar 2015 23:26:03 -0700 Subject: [PATCH 0835/1087] moved ecc lib to uncore --- rocket/src/main/scala/ecc.scala | 148 ----------------------------- rocket/src/main/scala/icache.scala | 3 - rocket/src/main/scala/util.scala | 8 -- 3 files changed, 159 deletions(-) delete mode 100644 rocket/src/main/scala/ecc.scala diff --git a/rocket/src/main/scala/ecc.scala b/rocket/src/main/scala/ecc.scala deleted file mode 100644 index a0612a1d..00000000 --- a/rocket/src/main/scala/ecc.scala +++ /dev/null @@ -1,148 +0,0 @@ -// See LICENSE for license details. - -package rocket - -import Chisel._ -import uncore._ -import Util._ - -abstract class Decoding -{ - def uncorrected: Bits - def corrected: Bits - def correctable: Bool - def uncorrectable: Bool - def error = correctable || uncorrectable -} - -abstract class Code -{ - def width(w0: Int): Int - def encode(x: Bits): Bits - def decode(x: Bits): Decoding -} - -class IdentityCode extends Code -{ - def width(w0: Int) = w0 - def encode(x: Bits) = x - def decode(y: Bits) = new Decoding { - def uncorrected = y - def corrected = y - def correctable = Bool(false) - def uncorrectable = Bool(false) - } -} - -class ParityCode extends Code -{ - def width(w0: Int) = w0+1 - def encode(x: Bits) = Cat(x.xorR, x) - def decode(y: Bits) = new Decoding { - def uncorrected = y(y.getWidth-2,0) - def corrected = uncorrected - def correctable = Bool(false) - def uncorrectable = y.xorR - } -} - -class SECCode extends Code -{ - def width(k: Int) = { - val m = k.log2 + 1 - k + m + ((1 << m) < m+k+1).toInt - } - def encode(x: Bits) = { - val k = x.getWidth - require(k > 0) - val n = width(k) - - val y = for (i <- 1 to n) yield { - if (isPow2(i)) { - val r = for (j <- 1 to n; if j != i && (j & i) != 0) - yield x(mapping(j)) - r reduce (_^_) - } else - x(mapping(i)) - } - Vec(y).toBits - } - def decode(y: Bits) = new Decoding { - val n = y.getWidth - require(n > 0 && !isPow2(n)) - - val p2 = for (i <- 0 until log2Up(n)) yield 1 << i - val syndrome = p2 map { i => - val r = for (j <- 1 to n; if (j & i) != 0) - yield y(j-1) - r reduce (_^_) - } - val s = Vec(syndrome).toBits - - private def swizzle(z: Bits) = Vec((1 to n).filter(i => !isPow2(i)).map(i => z(i-1))).toBits - def uncorrected = swizzle(y) - def corrected = swizzle(((y.toUInt << 1) ^ UIntToOH(s)) >> 1) - def correctable = s.orR - def uncorrectable = Bool(false) - } - private def mapping(i: Int) = i-1-log2Up(i) -} - -class SECDEDCode extends Code -{ - private val sec = new SECCode - private val par = new ParityCode - - def width(k: Int) = sec.width(k)+1 - def encode(x: Bits) = par.encode(sec.encode(x)) - def decode(x: Bits) = new Decoding { - val secdec = sec.decode(x(x.getWidth-2,0)) - val pardec = par.decode(x) - - def uncorrected = secdec.uncorrected - def corrected = secdec.corrected - def correctable = pardec.uncorrectable - def uncorrectable = !pardec.uncorrectable && secdec.correctable - } -} - -object ErrGen -{ - // generate a 1-bit error with approximate probability 2^-f - def apply(width: Int, f: Int): Bits = { - require(width > 0 && f >= 0 && log2Up(width) + f <= 16) - UIntToOH(LFSR16()(log2Up(width)+f-1,0))(width-1,0) - } - def apply(x: Bits, f: Int): Bits = x ^ apply(x.getWidth, f) -} - -class SECDEDTest extends Module -{ - val code = new SECDEDCode - val k = 4 - val n = code.width(k) - - val io = new Bundle { - val original = Bits(OUTPUT, k) - val encoded = Bits(OUTPUT, n) - val injected = Bits(OUTPUT, n) - val uncorrected = Bits(OUTPUT, k) - val corrected = Bits(OUTPUT, k) - val correctable = Bool(OUTPUT) - val uncorrectable = Bool(OUTPUT) - } - - val c = Counter(Bool(true), 1 << k) - val numErrors = Counter(c._2, 3)._1 - val e = code.encode(c._1) - val i = e ^ Mux(numErrors < 1, 0, ErrGen(n, 1)) ^ Mux(numErrors < 2, 0, ErrGen(n, 1)) - val d = code.decode(i) - - io.original := c._1 - io.encoded := e - io.injected := i - io.uncorrected := d.uncorrected - io.corrected := d.corrected - io.correctable := d.correctable - io.uncorrectable := d.uncorrectable -} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 6c95008a..4b5b9107 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -4,12 +4,9 @@ import Chisel._ import uncore._ import Util._ -case object ECCCode extends Field[Option[Code]] - abstract trait L1CacheParameters extends CacheParameters with CoreParameters { val outerDataBeats = params(TLDataBeats) val outerDataBits = params(TLDataBits) - val code = params(ECCCode).getOrElse(new IdentityCode) val refillCyclesPerBeat = outerDataBits/rowBits val refillCycles = refillCyclesPerBeat*outerDataBeats } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 464fdbb2..80543180 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -6,14 +6,6 @@ import Chisel._ import uncore._ import scala.math._ -class Unsigned(x: Int) { - require(x >= 0) - def clog2: Int = { require(x > 0); ceil(log(x)/log(2)).toInt } - def log2: Int = { require(x > 0); floor(log(x)/log(2)).toInt } - def isPow2: Boolean = x > 0 && (x & (x-1)) == 0 - def nextPow2: Int = if (x == 0) 1 else 1 << clog2 -} - object Util { implicit def intToUInt(x: Int): UInt = UInt(x) implicit def booleanToBool(x: Boolean): Bits = Bool(x) From 3048f4785aaad90766b11a34553c25e8cd6aaae5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 17 Apr 2015 16:56:53 -0700 Subject: [PATCH 0836/1087] HeaderlessTileLinkIO -> ClientTileLinkIO --- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/rocc.scala | 4 ++-- rocket/src/main/scala/tile.scala | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 4b5b9107..c07bd889 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -41,7 +41,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val io = new Bundle { val cpu = new CPUFrontendIO().flip val ptw = new TLBPTWIO() - val mem = new HeaderlessUncachedTileLinkIO + val mem = new ClientUncachedTileLinkIO } val btb = Module(new BTB(btb_updates_out_of_order)) @@ -146,7 +146,7 @@ class ICache extends FrontendModule val req = Valid(new ICacheReq).flip val resp = Decoupled(new ICacheResp) val invalidate = Bool(INPUT) - val mem = new HeaderlessUncachedTileLinkIO + val mem = new ClientUncachedTileLinkIO } require(isPow2(nSets) && isPow2(nWays)) require(isPow2(coreInstBytes)) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 02fd750c..dc61ca04 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -584,7 +584,7 @@ class HellaCache extends L1HellaCacheModule { val io = new Bundle { val cpu = (new HellaCacheIO).flip val ptw = new TLBPTWIO() - val mem = new HeaderlessTileLinkIO + val mem = new ClientTileLinkIO } require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index caee76e5..725a500b 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -44,8 +44,8 @@ class RoCCInterface extends Bundle val interrupt = Bool(OUTPUT) // These should be handled differently, eventually - val imem = new HeaderlessUncachedTileLinkIO - val dmem = new HeaderlessUncachedTileLinkIO + val imem = new ClientUncachedTileLinkIO + val dmem = new ClientUncachedTileLinkIO val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 8d53836c..ca6a86fb 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,8 +13,8 @@ case object BuildRoCC extends Field[Option[() => RoCC]] abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { val io = new Bundle { - val cached = new HeaderlessTileLinkIO - val uncached = new HeaderlessUncachedTileLinkIO + val cached = new ClientTileLinkIO + val uncached = new ClientUncachedTileLinkIO val host = new HTIFIO } } @@ -44,7 +44,7 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { // otherwise, just hookup the icache io.uncached <> params(BuildRoCC).map { buildItHere => val rocc = buildItHere() - val memArb = Module(new HeaderlessTileLinkIOArbiter(3)) + val memArb = Module(new ClientTileLinkIOArbiter(3)) val dcIF = Module(new SimpleHellaCacheIF) core.io.rocc <> rocc.io dcIF.io.requestor <> rocc.io.mem From a315fe93c1e6e5343c4a4ed81e30e42fc80d26a8 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 20 Apr 2015 10:46:24 -0700 Subject: [PATCH 0837/1087] simplify ClientMetadata.makeRelease --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index dc61ca04..e3f7d650 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -511,7 +511,7 @@ class ProbeUnit extends L1HellaCacheModule { req := io.req.bits } - val reply = old_coh.makeRelease(req, req.client_xact_id) + val reply = old_coh.makeRelease(req) io.req.ready := state === s_invalid io.rep.valid := state === s_release && !(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release From 1f410ac42cde16c30ec76ec130c5b2ab7fb0006d Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Wed, 22 Apr 2015 11:26:03 -0700 Subject: [PATCH 0838/1087] move fetch buffer into frontend to allow retiming --- rocket/src/main/scala/icache.scala | 85 ++++++++++++++---------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1103f22d..69f5d877 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -59,13 +59,14 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_bits = Reg(btb.io.resp.bits.clone) val s2_xcpt_if = Reg(init=Bool(false)) + val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) val msb = vaddrBits-1 val lsb = log2Up(coreFetchWidth*coreInstBytes) val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) val ntpc_0 = s1_pc + UInt(coreInstBytes*coreFetchWidth) val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure - val icmiss = s2_valid && !icache.io.resp.valid + val icmiss = s2_valid && !icbuf.io.deq.valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) @@ -109,15 +110,17 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.ptw.invalidate - icache.io.resp.ready := !stall && !s1_same_block - io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) + io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid) io.cpu.resp.bits.pc := s2_pc + icbuf.io.enq <> icache.io.resp + icbuf.io.deq.ready := !stall && !s1_same_block + require(coreFetchWidth * coreInstBytes <= rowBytes) val fetch_data = - if (coreFetchWidth * coreInstBytes == rowBytes) icache.io.resp.bits.datablock - else icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) + if (coreFetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock + else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) for (i <- 0 until coreFetchWidth) { io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) @@ -139,7 +142,6 @@ class ICacheReq extends FrontendBundle { } class ICacheResp extends FrontendBundle { - val data = Bits(width = coreInstBits) val datablock = Bits(width = rowBits) } @@ -161,9 +163,8 @@ class ICache extends FrontendModule val stall = !io.resp.ready val rdy = Bool() - val s2_valid = Reg(init=Bool(false)) - val s2_addr = Reg(UInt(width = paddrBits)) - val s2_any_tag_hit = Bool() + val refill_addr = Reg(UInt(width = paddrBits)) + val s1_any_tag_hit = Bool() val s1_valid = Reg(init=Bool(false)) val s1_pgoff = Reg(UInt(width = pgIdxBits)) @@ -178,17 +179,17 @@ class ICache extends FrontendModule s1_pgoff := io.req.bits.idx } - s2_valid := s1_valid && rdy && !io.req.bits.kill || io.resp.valid && stall - when (s1_valid && rdy && !stall) { - s2_addr := s1_addr - } + val out_valid = s1_valid && !io.req.bits.kill && state === s_ready + val s1_idx = s1_addr(untagBits-1,blockOffBits) + val s1_offset = s1_addr(blockOffBits-1,0) + val s1_hit = out_valid && s1_any_tag_hit + val s1_miss = out_valid && !s1_any_tag_hit + rdy := state === s_ready && !s1_miss - val s2_tag = s2_addr(tagBits+untagBits-1,untagBits) - val s2_idx = s2_addr(untagBits-1,blockOffBits) - val s2_offset = s2_addr(blockOffBits-1,0) - val s2_hit = s2_valid && s2_any_tag_hit - val s2_miss = s2_valid && !s2_any_tag_hit - rdy := state === s_ready && !s2_miss + when (s1_valid && state === s_ready && s1_miss) { + refill_addr := s1_addr + } + val refill_tag = refill_addr(tagBits+untagBits-1,untagBits) val ser = Module(new FlowThroughSerializer( io.mem.grant.bits, @@ -200,14 +201,14 @@ class ICache extends FrontendModule val refill_bits = ser.io.out.bits ser.io.out.ready := Bool(true) - val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) + val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0) val entagbits = code.width(tagBits) val tag_array = Mem(Bits(width = entagbits*nWays), nSets, seqRead = true) val tag_raddr = Reg(UInt()) when (refill_done) { val wmask = FillInterleaved(entagbits, if (isDM) Bits(1) else UIntToOH(repl_way)) - val tag = code.encode(s2_tag).toUInt - tag_array.write(s2_idx, Fill(nWays, tag), wmask) + val tag = code.encode(refill_tag).toUInt + tag_array.write(s1_idx, Fill(nWays, tag), wmask) } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { @@ -216,55 +217,49 @@ class ICache extends FrontendModule val vb_array = Reg(init=Bits(0, nSets*nWays)) when (refill_done && !invalidated) { - vb_array := vb_array.bitSet(Cat(repl_way, s2_idx), Bool(true)) + vb_array := vb_array.bitSet(Cat(repl_way, s1_idx), Bool(true)) } when (io.invalidate) { vb_array := Bits(0) invalidated := Bool(true) } - val s2_disparity = Vec.fill(nWays){Bool()} + val s1_disparity = Vec.fill(nWays){Bool()} for (i <- 0 until nWays) - when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s2_idx), Bool(false)) } + when (s1_valid && s1_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s1_idx), Bool(false)) } val s1_tag_match = Vec.fill(nWays){Bool()} - val s2_tag_hit = Vec.fill(nWays){Bool()} - val s2_dout = Vec.fill(nWays){Reg(Bits())} + val s1_tag_hit = Vec.fill(nWays){Bool()} + val s1_dout = Vec.fill(nWays){(Bits())} for (i <- 0 until nWays) { val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool - val s2_vb = Reg(Bool()) - val s2_tag_disparity = Reg(Bool()) - val s2_tag_match = Reg(Bool()) val tag_out = tag_array(tag_raddr)(entagbits*(i+1)-1, entagbits*i) + val s1_tag_disparity = code.decode(tag_out).error when (s1_valid && rdy && !stall) { - s2_vb := s1_vb - s2_tag_disparity := code.decode(tag_out).error - s2_tag_match := s1_tag_match(i) } s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag - s2_tag_hit(i) := s2_vb && s2_tag_match - s2_disparity(i) := s2_vb && (s2_tag_disparity || code.decode(s2_dout(i)).error) + s1_tag_hit(i) := s1_vb && s1_tag_match(i) + s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error) } - s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) + s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_) for (i <- 0 until nWays) { val data_array = Mem(Bits(width = code.width(rowBits)), nSets*refillCycles, seqRead = true) val s1_raddr = Reg(UInt()) when (refill_valid && repl_way === UInt(i)) { val e_d = code.encode(refill_bits.payload.data) - if(refillCycles > 1) data_array(Cat(s2_idx, refill_bits.payload.addr_beat)) := e_d - else data_array(s2_idx) := e_d + if(refillCycles > 1) data_array(Cat(s1_idx, refill_bits.payload.addr_beat)) := e_d + else data_array(s1_idx) := e_d } // /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM .elsewhen (s0_valid) { s1_raddr := s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) } // if s1_tag_match is critical, replace with partial tag check - when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } + s1_dout(i) := 0 + when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := data_array(s1_raddr) } } - val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)))(coreInstBits-1,0)) - io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) - io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) + io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1)) ack_q.io.enq.valid := refill_done && refill_bits.payload.requiresAck() @@ -272,15 +267,15 @@ class ICache extends FrontendModule ack_q.io.enq.bits.header.dst := refill_bits.header.src // output signals - io.resp.valid := s2_hit + io.resp.valid := s1_hit io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready - io.mem.acquire.bits := GetBlock(addr_block = s2_addr >> UInt(blockOffBits)) + io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> UInt(blockOffBits)) io.mem.finish <> ack_q.io.deq // control state machine switch (state) { is (s_ready) { - when (s2_miss) { state := s_request } + when (s1_miss) { state := s_request } invalidated := Bool(false) } is (s_request) { From c746ef8702fb424fe90b2df1e7d1740603cb09f6 Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Mon, 4 May 2015 11:20:55 -0700 Subject: [PATCH 0839/1087] fix bug in rocc port resp for FPtoInt instructions --- rocket/src/main/scala/fpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 3b5a6c1f..d26c9eda 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -440,7 +440,7 @@ class FPU extends Module fpiu.io.in.bits := req io.dpath.store_data := fpiu.io.out.bits.store io.dpath.toint_data := fpiu.io.out.bits.toint - when(fpiu.io.out.valid && mem_cp_valid && !(mem_ctrl.div || mem_ctrl.sqrt)){ + when(fpiu.io.out.valid && mem_cp_valid && mem_ctrl.toint){ io.cp_resp.bits.data := fpiu.io.out.bits.toint io.cp_resp.valid := Bool(true) } From b09832f1b591da5a137dd6c8173f1abba860e34d Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Thu, 7 May 2015 04:53:05 -0700 Subject: [PATCH 0840/1087] ICache now returns the "next PC" signal. useful for other modules that need access to the fetch PC on the cycle it is sent to the SRAM. --- rocket/src/main/scala/icache.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index c07bd889..8686d310 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -26,7 +26,7 @@ class FrontendResp extends CoreBundle { val xcpt_if = Bool() } -class CPUFrontendIO extends Bundle { +class CPUFrontendIO extends CoreBundle { val req = Valid(new FrontendReq) val resp = Decoupled(new FrontendResp).flip val btb_resp = Valid(new BTBResp).flip @@ -34,6 +34,7 @@ class CPUFrontendIO extends Bundle { val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) val invalidate = Bool(OUTPUT) + val npc = UInt(INPUT, width = vaddrBits+1) } class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule @@ -102,6 +103,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule icache.io.mem <> io.mem icache.io.req.valid := !stall && !s0_same_block + io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn From d31b26c342e7ef2f10fbe64597c1330d3c353269 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 18 May 2015 18:22:48 -0700 Subject: [PATCH 0841/1087] Clean up handling of icache's io.cpu.npc signal --- rocket/src/main/scala/core.scala | 1 + rocket/src/main/scala/icache.scala | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala index 33bb617b..d73e494d 100644 --- a/rocket/src/main/scala/core.scala +++ b/rocket/src/main/scala/core.scala @@ -40,6 +40,7 @@ abstract trait CoreParameters extends UsesParameters { val coreDataBytes = coreDataBits/8 val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits + val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt if(params(FastLoadByte)) require(params(FastLoadWord)) } diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 8686d310..79500260 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -16,11 +16,11 @@ abstract class FrontendBundle extends Bundle with FrontendParameters abstract class FrontendModule extends Module with FrontendParameters class FrontendReq extends CoreBundle { - val pc = UInt(width = vaddrBits+1) + val pc = UInt(width = vaddrBitsExtended) } class FrontendResp extends CoreBundle { - val pc = UInt(width = vaddrBits+1) // ID stage PC + val pc = UInt(width = vaddrBitsExtended) // ID stage PC val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) val mask = Bits(width = coreFetchWidth) val xcpt_if = Bool() @@ -34,7 +34,7 @@ class CPUFrontendIO extends CoreBundle { val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) val invalidate = Bool(OUTPUT) - val npc = UInt(INPUT, width = vaddrBits+1) + val npc = UInt(INPUT, width = vaddrBitsExtended) } class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule @@ -103,8 +103,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule icache.io.mem <> io.mem icache.io.req.valid := !stall && !s0_same_block - io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) - icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) + icache.io.req.bits.idx := io.cpu.npc icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.ptw.invalidate @@ -112,6 +111,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) io.cpu.resp.bits.pc := s2_pc + io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) require(coreFetchWidth * coreInstBytes <= rowBytes) val fetch_data = From 254498042af8ba871b971055bc39b1e8e6ea71db Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 18 May 2015 18:23:17 -0700 Subject: [PATCH 0842/1087] Fix Split for 0-width wires --- rocket/src/main/scala/util.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 80543180..89bfddf5 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -97,7 +97,7 @@ object Split val w = x.getWidth def decreasing(x: Seq[Int]): Boolean = if (x.tail.isEmpty) true - else x.head > x.tail.head && decreasing(x.tail) + else x.head >= x.tail.head && decreasing(x.tail) require(decreasing(w :: n.toList)) w } From f460cb6c544f4e6ea5f6b41178e896ffe62a8e0f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 18 May 2015 18:23:58 -0700 Subject: [PATCH 0843/1087] Update to privileged architecture 1.7 --- rocket/src/main/scala/csr.scala | 270 +++++++++++++---------- rocket/src/main/scala/ctrl.scala | 15 +- rocket/src/main/scala/instructions.scala | 91 +++++--- rocket/src/main/scala/package.scala | 3 +- rocket/src/main/scala/ptw.scala | 24 +- rocket/src/main/scala/tlb.scala | 4 +- rocket/src/main/scala/util.scala | 3 + 7 files changed, 249 insertions(+), 161 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index fa263d19..ac63573c 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -11,20 +11,13 @@ import scala.math._ class MStatus extends Bundle { val sd = Bool() - val zero4 = UInt(width = 19) - val ha = UInt(width = 4) - val sa = UInt(width = 4) - val ua = UInt(width = 4) + val zero2 = UInt(width = 31) val sd_rv32 = UInt(width = 1) + val zero1 = UInt(width = 9) + val vm = UInt(width = 5) + val mprv = Bool() val xs = UInt(width = 2) val fs = UInt(width = 2) - val mtie = Bool() - val htie = Bool() - val stie = Bool() - val zero3 = UInt(width = 1) - val vm = UInt(width = 4) - val zero2 = UInt(width = 1) - val mprv = UInt(width = 2) val prv3 = UInt(width = 2) val ie3 = Bool() val prv2 = UInt(width = 2) @@ -33,30 +26,32 @@ class MStatus extends Bundle { val ie1 = Bool() val prv = UInt(width = 2) val ie = Bool() - val msip = Bool() - val hsip = Bool() - val ssip = Bool() - val zero1 = UInt(width = 1) } class SStatus extends Bundle { val sd = Bool() - val zero6 = UInt(width = 32) + val zero4 = UInt(width = 31) + val sd_rv32 = UInt(width = 1) + val zero3 = UInt(width = 14) + val mprv = Bool() val xs = UInt(width = 2) val fs = UInt(width = 2) - val tip = Bool() - val zero5 = UInt(width = 1) - val tie = Bool() - val zero4 = UInt(width = 4) - val ua = UInt(width = 4) - val zero3 = UInt(width = 7) + val zero2 = UInt(width = 7) val ps = Bool() - val pie = UInt(width = 1) - val zero2 = UInt(width = 2) - val ie = Bool() + val pie = Bool() val zero1 = UInt(width = 2) - val sip = Bool() - val zero0 = UInt(width = 1) + val ie = Bool() +} + +class MIP extends Bundle { + val mtip = Bool() + val htip = Bool() + val stip = Bool() + val utip = Bool() + val msip = Bool() + val hsip = Bool() + val ssip = Bool() + val usip = Bool() } object CSR @@ -82,18 +77,19 @@ class CSRFileIO extends CoreBundle { } val csr_replay = Bool(OUTPUT) + val csr_stall = Bool(OUTPUT) val csr_xcpt = Bool(OUTPUT) val eret = Bool(OUTPUT) val status = new MStatus().asOutput val ptbr = UInt(OUTPUT, paddrBits) - val evec = UInt(OUTPUT, vaddrBits+1) + val evec = UInt(OUTPUT, vaddrBitsExtended) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) val custom_mrw_csrs = Vec.fill(params(NCustomMRWCSRs))(UInt(INPUT, xLen)) val cause = UInt(INPUT, xLen) - val pc = SInt(INPUT, vaddrBits+1) + val pc = UInt(INPUT, vaddrBitsExtended) val fatc = Bool(OUTPUT) val time = UInt(OUTPUT, xLen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) @@ -106,20 +102,23 @@ class CSRFileIO extends CoreBundle { class CSRFile extends CoreModule { val io = new CSRFileIO - + val reg_mstatus = Reg(new MStatus) - val reg_mepc = Reg(SInt(width = vaddrBits+1)) + val reg_mie = Reg(init=new MIP().fromBits(0)) + val reg_mip = Reg(init=new MIP().fromBits(0)) + val reg_mepc = Reg(UInt(width = vaddrBitsExtended)) val reg_mcause = Reg(Bits(width = xLen)) - val reg_mbadaddr = Reg(SInt(width = vaddrBits+1)) + val reg_mbadaddr = Reg(UInt(width = vaddrBitsExtended)) val reg_mscratch = Reg(Bits(width = xLen)) - val reg_sepc = Reg(SInt(width = vaddrBits+1)) + val reg_sepc = Reg(UInt(width = vaddrBitsExtended)) val reg_scause = Reg(Bits(width = xLen)) - val reg_sbadaddr = Reg(SInt(width = vaddrBits+1)) + val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended)) val reg_sscratch = Reg(Bits(width = xLen)) - val reg_stvec = Reg(SInt(width = vaddrBits)) + val reg_stvec = Reg(UInt(width = vaddrBits)) val reg_stimecmp = Reg(Bits(width = 32)) val reg_sptbr = Reg(UInt(width = paddrBits)) + val reg_wfi = Reg(init=Bool(false)) val reg_tohost = Reg(init=Bits(0, xLen)) val reg_fromhost = Reg(init=Bits(0, xLen)) @@ -130,20 +129,23 @@ class CSRFile extends CoreModule val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) - val r_irq_timer = Reg(init=Bool(false)) val irq_rocc = Bool(!params(BuildRoCC).isEmpty) && io.rocc.interrupt io.interrupt_cause := 0 io.interrupt := io.interrupt_cause(xLen-1) + val some_interrupt_pending = Bool(); some_interrupt_pending := false def checkInterrupt(max_priv: UInt, cond: Bool, num: Int) = { when (cond && (reg_mstatus.prv < max_priv || reg_mstatus.prv === max_priv && reg_mstatus.ie)) { io.interrupt_cause := UInt((BigInt(1) << (xLen-1)) + num) } + when (cond && reg_mstatus.prv <= max_priv) { + some_interrupt_pending := true + } } - checkInterrupt(PRV_S, reg_mstatus.stie && r_irq_timer, 0) - checkInterrupt(PRV_S, reg_mstatus.ssip, 1) - checkInterrupt(PRV_M, reg_mstatus.msip, 1) + checkInterrupt(PRV_S, reg_mie.ssip && reg_mip.ssip, 0) + checkInterrupt(PRV_M, reg_mie.msip && reg_mip.msip, 0) + checkInterrupt(PRV_S, reg_mie.stip && reg_mip.stip, 1) checkInterrupt(PRV_M, reg_fromhost != 0, 2) checkInterrupt(PRV_M, irq_rocc, 3) @@ -171,48 +173,72 @@ class CSRFile extends CoreModule io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy val read_mstatus = io.status.toBits - val read_sstatus = new SStatus - read_sstatus := new SStatus().fromBits(read_mstatus) // sstatus mostly overlaps mstatus - read_sstatus.zero0 := 0 - read_sstatus.zero1 := 0 - read_sstatus.zero2 := 0 - read_sstatus.zero3 := 0 - read_sstatus.zero4 := 0 - read_sstatus.zero5 := 0 - read_sstatus.ua := io.status.ua - read_sstatus.tip := r_irq_timer + val isa_string = "IMA" + + (if (params(UseVM)) "S" else "") + + (if (!params(BuildFPU).isEmpty) "FD" else "") + + (if (!params(BuildRoCC).isEmpty) "X" else "") + val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | + isa_string.map(x => 1 << (x - 'A')).reduce(_|_) + val impid = 1 val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.cycle -> reg_time, - CSRs.scycle -> reg_time, - CSRs.time -> reg_time, - CSRs.stime -> reg_time, + CSRs.cyclew -> reg_time, CSRs.instret -> reg_instret, - CSRs.sinstret -> reg_instret, + CSRs.instretw -> reg_instret, + CSRs.time -> reg_time, + CSRs.timew -> reg_time, + CSRs.stime -> reg_time, + CSRs.stimew -> reg_time, + CSRs.mcpuid -> UInt(cpuid), + CSRs.mimpid -> UInt(impid), CSRs.mstatus -> read_mstatus, + CSRs.mtdeleg -> UInt(0), + CSRs.mtvec -> UInt(MTVEC), + CSRs.mip -> reg_mip.toBits, + CSRs.mie -> reg_mie.toBits, CSRs.mscratch -> reg_mscratch, - CSRs.mepc -> reg_mepc, - CSRs.mbadaddr -> reg_mbadaddr, + CSRs.mepc -> reg_mepc.sextTo(xLen), + CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen), CSRs.mcause -> reg_mcause, CSRs.stimecmp -> reg_stimecmp, - CSRs.hartid -> io.host.id, + CSRs.mhartid -> io.host.id, CSRs.send_ipi -> io.host.id, /* don't care */ CSRs.stats -> reg_stats, - CSRs.tohost -> reg_tohost, - CSRs.fromhost -> reg_fromhost) + CSRs.mtohost -> reg_tohost, + CSRs.mfromhost -> reg_fromhost) if (params(UseVM)) { + val read_sstatus = new SStatus + read_sstatus := new SStatus().fromBits(read_mstatus) // sstatus mostly overlaps mstatus + read_sstatus.zero1 := 0 + read_sstatus.zero2 := 0 + read_sstatus.zero3 := 0 + read_sstatus.zero4 := 0 + + val read_sip = new MIP + read_sip := new MIP().fromBits(0) + read_sip.ssip := reg_mip.ssip + read_sip.stip := reg_mip.stip + + val read_sie = new MIP + read_sie := new MIP().fromBits(0) + read_sie.ssip := reg_mie.ssip + read_sie.stip := reg_mie.stip + read_mapping += CSRs.sstatus -> read_sstatus.toBits + read_mapping += CSRs.sip -> read_sip.toBits + read_mapping += CSRs.sie -> read_sie.toBits read_mapping += CSRs.sscratch -> reg_sscratch read_mapping += CSRs.scause -> reg_scause - read_mapping += CSRs.sbadaddr -> reg_sbadaddr + read_mapping += CSRs.sbadaddr -> reg_sbadaddr.sextTo(xLen) read_mapping += CSRs.sptbr -> reg_sptbr read_mapping += CSRs.sasid -> UInt(0) - read_mapping += CSRs.sepc -> reg_sepc - read_mapping += CSRs.stvec -> reg_stvec + read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen) + read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) } for (i <- 0 until reg_uarch_counters.size) @@ -241,22 +267,26 @@ class CSRFile extends CoreModule host_pcr_bits.data))) val opcode = io.rw.addr - val insn_call = !opcode(8) && !opcode(0) && system_insn && priv_sufficient - val insn_break = !opcode(8) && opcode(0) && system_insn && priv_sufficient - val insn_ret = opcode(8) && !opcode(0) && system_insn && priv_sufficient - val insn_sfence_vm = opcode(8) && opcode(0) && system_insn && priv_sufficient - val insn_redirect_trap = opcode(2) && system_insn && priv_sufficient + val insn_call = !opcode(8) && !opcode(0) && system_insn + val insn_break = !opcode(8) && opcode(0) && system_insn + val insn_ret = opcode(8) && !opcode(1) && !opcode(0) && system_insn && priv_sufficient + val insn_sfence_vm = opcode(8) && !opcode(1) && opcode(0) && system_insn && priv_sufficient + val maybe_insn_redirect_trap = opcode(2) && system_insn + val insn_redirect_trap = maybe_insn_redirect_trap && priv_sufficient + val insn_wfi = opcode(8) && opcode(1) && !opcode(0) && system_insn && priv_sufficient val csr_xcpt = (cpu_wen && read_only) || (cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) || (system_insn && !priv_sufficient) || insn_call || insn_break - val mtvec = reg_mstatus.prv << 6 + when (insn_wfi) { reg_wfi := true } + when (some_interrupt_pending) { reg_wfi := false } + io.fatc := insn_sfence_vm - io.evec := Mux(io.exception || csr_xcpt, mtvec.zext, - Mux(insn_redirect_trap, reg_stvec, - Mux(reg_mstatus.prv(1), reg_mepc, reg_sepc))).toUInt + io.evec := Mux(io.exception || csr_xcpt, (reg_mstatus.prv << 6) + MTVEC, + Mux(maybe_insn_redirect_trap, reg_stvec.sextTo(vaddrBitsExtended), + Mux(reg_mstatus.prv(1), reg_mepc, reg_sepc))) io.ptbr := reg_sptbr io.csr_xcpt := csr_xcpt io.eret := insn_ret || insn_redirect_trap @@ -270,7 +300,7 @@ class CSRFile extends CoreModule when (io.exception || csr_xcpt) { reg_mstatus.ie := false reg_mstatus.prv := PRV_M - reg_mstatus.mprv := PRV_M + reg_mstatus.mprv := false reg_mstatus.prv1 := reg_mstatus.prv reg_mstatus.ie1 := reg_mstatus.ie reg_mstatus.prv2 := reg_mstatus.prv1 @@ -281,16 +311,15 @@ class CSRFile extends CoreModule when (csr_xcpt) { reg_mcause := Causes.illegal_instruction when (insn_break) { reg_mcause := Causes.breakpoint } - when (insn_call) { reg_mcause := Causes.ecall } + when (insn_call) { reg_mcause := reg_mstatus.prv + Causes.user_ecall } } reg_mbadaddr := io.pc when (io.cause === Causes.fault_load || io.cause === Causes.misaligned_load || - io.cause === Causes.fault_store || io.cause === Causes.misaligned_store) { - val wdata = io.rw.wdata - val (upper, lower) = Split(wdata, vaddrBits) + io.cause === Causes.fault_store || io.cause === Causes.misaligned_store) { + val (upper, lower) = Split(io.rw.wdata, vaddrBits) val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) - reg_mbadaddr := Cat(sign, lower).toSInt + reg_mbadaddr := Cat(sign, lower) } } @@ -313,15 +342,16 @@ class CSRFile extends CoreModule assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") when (reg_time(reg_stimecmp.getWidth-1,0) === reg_stimecmp) { - r_irq_timer := true + reg_mip.stip := true } io.time := reg_time io.host.ipi_req.valid := cpu_wen && decoded_addr(CSRs.send_ipi) io.host.ipi_req.bits := io.rw.wdata io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready + io.csr_stall := reg_wfi - when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.tohost)) { reg_tohost := UInt(0) } + when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) @@ -333,89 +363,103 @@ class CSRFile extends CoreModule when (wen) { when (decoded_addr(CSRs.mstatus)) { val new_mstatus = new MStatus().fromBits(wdata) - reg_mstatus.ssip := new_mstatus.ssip - reg_mstatus.msip := new_mstatus.msip - reg_mstatus.stie := new_mstatus.stie reg_mstatus.ie := new_mstatus.ie + reg_mstatus.ie1 := new_mstatus.ie1 val supportedModes = Vec((PRV_M :: PRV_U :: (if (params(UseVM)) List(PRV_S) else Nil)).map(UInt(_))) if (supportedModes.size > 1) { - when (supportedModes contains new_mstatus.mprv) { reg_mstatus.mprv := new_mstatus.mprv } + reg_mstatus.mprv := new_mstatus.mprv when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv } when (supportedModes contains new_mstatus.prv1) { reg_mstatus.prv1 := new_mstatus.prv1 } - reg_mstatus.ie1 := new_mstatus.ie1 if (supportedModes.size > 2) { when (supportedModes contains new_mstatus.prv2) { reg_mstatus.prv2 := new_mstatus.prv2 } reg_mstatus.ie2 := new_mstatus.ie2 } } - if (params(UseVM)) when (new_mstatus.vm === 0 || new_mstatus.vm === 5) { reg_mstatus.vm := new_mstatus.vm } + if (params(UseVM)) { + val vm_on = if (xLen == 32) 8 else 9 + when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } + when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } + } if (params(UseVM) || !params(BuildFPU).isEmpty) reg_mstatus.fs := new_mstatus.fs if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_mstatus.xs } + when (decoded_addr(CSRs.mip)) { + val new_mip = new MIP().fromBits(wdata) + if (params(UseVM)) + reg_mip.ssip := new_mip.ssip + reg_mip.msip := new_mip.msip + } + when (decoded_addr(CSRs.mie)) { + val new_mie = new MIP().fromBits(wdata) + if (params(UseVM)) { + reg_mie.ssip := new_mie.ssip + reg_mie.stip := new_mie.stip + } + reg_mie.msip := new_mie.msip + reg_mie.mtip := new_mie.mtip + } when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.mepc)) { reg_mepc := wdata(vaddrBits,0).toSInt & SInt(-coreInstBytes) } + when (decoded_addr(CSRs.mepc)) { reg_mepc := wdata(vaddrBitsExtended-1,0).toSInt & SInt(-coreInstBytes) } when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } - when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata } - when (decoded_addr(CSRs.scycle)) { reg_time := wdata.toUInt } - when (decoded_addr(CSRs.stime)) { reg_time := wdata.toUInt } - when (decoded_addr(CSRs.sinstret)) { reg_instret := wdata.toUInt } - when (decoded_addr(CSRs.stimecmp)) { reg_stimecmp := wdata(31,0).toUInt; r_irq_timer := Bool(false) } - when (decoded_addr(CSRs.fromhost)) { when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } - when (decoded_addr(CSRs.tohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } + when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } + when (decoded_addr(CSRs.cyclew)) { reg_time := wdata } + when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } + when (decoded_addr(CSRs.timew)) { reg_time := wdata } + when (decoded_addr(CSRs.stimew)) { reg_time := wdata } + when (decoded_addr(CSRs.stimecmp)) { reg_stimecmp := wdata(31,0); reg_mip.stip := false } + when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } + when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } if (params(UseVM)) { when (decoded_addr(CSRs.sstatus)) { val new_sstatus = new SStatus().fromBits(wdata) - reg_mstatus.ssip := new_sstatus.sip - reg_mstatus.stie := new_sstatus.tie reg_mstatus.ie := new_sstatus.ie reg_mstatus.ie1 := new_sstatus.pie reg_mstatus.prv1 := Mux(new_sstatus.ps, PRV_S, PRV_U) + reg_mstatus.mprv := new_sstatus.mprv reg_mstatus.fs := new_sstatus.fs // even without an FPU if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs } + when (decoded_addr(CSRs.sip)) { + val new_sip = new MIP().fromBits(wdata) + reg_mip.ssip := new_sip.ssip + } + when (decoded_addr(CSRs.sie)) { + val new_sie = new MIP().fromBits(wdata) + reg_mie.ssip := new_sie.ssip + reg_mie.stip := new_sie.stip + } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } - when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)).toUInt } - when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBits,0).toSInt & SInt(-coreInstBytes) } + when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)) } + when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBitsExtended-1,0).toSInt & SInt(-coreInstBytes) } when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt & SInt(-coreInstBytes) } } } io.host.ipi_rep.ready := true - when (io.host.ipi_rep.valid) { reg_mstatus.msip := true } + when (io.host.ipi_rep.valid) { reg_mip.msip := true } when(this.reset) { reg_mstatus.zero1 := 0 - reg_mstatus.ssip := false - reg_mstatus.hsip := false - reg_mstatus.msip := false + reg_mstatus.zero2 := 0 reg_mstatus.ie := false reg_mstatus.prv := PRV_M reg_mstatus.ie1 := false - reg_mstatus.prv1 := PRV_U /* hard-wired to 0 when missing user mode */ - reg_mstatus.ie2 := false + reg_mstatus.prv1 := PRV_M /* hard-wired to M when missing user mode */ + reg_mstatus.ie2 := false /* hard-wired to 0 when missing supervisor mode */ reg_mstatus.prv2 := PRV_U /* hard-wired to 0 when missing supervisor mode */ - reg_mstatus.ie3 := false + reg_mstatus.ie3 := false /* hard-wired to 0 when missing hypervisor mode */ reg_mstatus.prv3 := PRV_U /* hard-wired to 0 when missing hypervisor mode */ - reg_mstatus.mprv := PRV_M - reg_mstatus.zero2 := 0 + reg_mstatus.mprv := false reg_mstatus.vm := 0 - reg_mstatus.zero3 := 0 - reg_mstatus.stie := false - reg_mstatus.htie := false - reg_mstatus.mtie := false reg_mstatus.fs := 0 reg_mstatus.xs := 0 reg_mstatus.sd_rv32 := false - reg_mstatus.ua := 4 - reg_mstatus.sa := 4 - reg_mstatus.ha := 0 - reg_mstatus.zero4 := 0 reg_mstatus.sd := false } } diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 3e0e3a8c..d50c1103 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -44,6 +44,7 @@ class CtrlDpathIO extends CoreBundle val fp_sboard_clra = UInt(INPUT, 5) // inputs from csr file val csr_replay = Bool(INPUT) + val csr_stall = Bool(INPUT) val csr_xcpt = Bool(INPUT) val eret = Bool(INPUT) val interrupt = Bool(INPUT) @@ -212,11 +213,12 @@ object XDecode extends DecodeConstants FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N), FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N), - SFENCE_VM-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SCALL-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SBREAK-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SRET-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - MRTS-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SFENCE_VM-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SCALL-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SBREAK-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SRET-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + MRTS-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + WFI-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N), @@ -646,7 +648,8 @@ class Control extends CoreModule id_ctrl.fp && id_stall_fpu || id_ctrl.mem && !io.dmem.req.ready || Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || - id_do_fence + id_do_fence || + io.dpath.csr_stall val ctrl_draind = io.dpath.interrupt ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index d9b433c8..215ddaba 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -97,7 +97,10 @@ object Instructions { def SBREAK = Bits("b00000000000100000000000001110011") def SRET = Bits("b00010000000000000000000001110011") def SFENCE_VM = Bits("b000100000001?????000000001110011") + def WFI = Bits("b00010000001000000000000001110011") + def MRTH = Bits("b00110000011000000000000001110011") def MRTS = Bits("b00110000010100000000000001110011") + def HRTS = Bits("b00100000010100000000000001110011") def CSRRW = Bits("b?????????????????001?????1110011") def CSRRS = Bits("b?????????????????010?????1110011") def CSRRC = Bits("b?????????????????011?????1110011") @@ -195,23 +198,29 @@ object Causes { val misaligned_fetch = 0x0 val fault_fetch = 0x1 val illegal_instruction = 0x2 + val breakpoint = 0x3 val misaligned_load = 0x4 val fault_load = 0x5 val misaligned_store = 0x6 val fault_store = 0x7 - val ecall = 0x8 - val breakpoint = 0x9 + val user_ecall = 0x8 + val supervisor_ecall = 0x9 + val hypervisor_ecall = 0xa + val machine_ecall = 0xb val all = { val res = collection.mutable.ArrayBuffer[Int]() res += misaligned_fetch res += fault_fetch res += illegal_instruction + res += breakpoint res += misaligned_load res += fault_load res += misaligned_store res += fault_store - res += ecall - res += breakpoint + res += user_ecall + res += supervisor_ecall + res += hypervisor_ecall + res += machine_ecall res.toArray } } @@ -241,32 +250,47 @@ object CSRs { val uarch15 = 0xccf val sstatus = 0x100 val stvec = 0x101 + val sie = 0x104 val stimecmp = 0x121 val sscratch = 0x140 val sepc = 0x141 - val sptbr = 0x188 - val sasid = 0x189 - val scycle = 0x900 - val stime = 0x901 - val sinstret = 0x902 - val scause = 0xd40 - val sbadaddr = 0xd41 + val sip = 0x144 + val sptbr = 0x180 + val sasid = 0x181 + val cyclew = 0x900 + val timew = 0x901 + val instretw = 0x902 + val stime = 0xd01 + val scause = 0xd42 + val sbadaddr = 0xd43 + val stimew = 0xa01 val mstatus = 0x300 + val mtvec = 0x301 + val mtdeleg = 0x302 + val mie = 0x304 + val mtimecmp = 0x321 val mscratch = 0x340 val mepc = 0x341 val mcause = 0x342 val mbadaddr = 0x343 - val reset = 0x780 - val tohost = 0x781 - val fromhost = 0x782 + val mip = 0x344 + val mtime = 0x701 + val mcpuid = 0xf00 + val mimpid = 0xf01 + val mhartid = 0xf10 + val mtohost = 0x780 + val mfromhost = 0x781 + val mreset = 0x782 val send_ipi = 0x783 - val hartid = 0xfc0 val cycleh = 0xc80 val timeh = 0xc81 val instreth = 0xc82 - val scycleh = 0x980 - val stimeh = 0x981 - val sinstreth = 0x982 + val cyclehw = 0x980 + val timehw = 0x981 + val instrethw = 0x982 + val stimeh = 0xd81 + val stimehw = 0xa81 + val mtimeh = 0x741 val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags @@ -294,26 +318,38 @@ object CSRs { res += uarch15 res += sstatus res += stvec + res += sie res += stimecmp res += sscratch res += sepc + res += sip res += sptbr res += sasid - res += scycle + res += cyclew + res += timew + res += instretw res += stime - res += sinstret res += scause res += sbadaddr + res += stimew res += mstatus + res += mtvec + res += mtdeleg + res += mie + res += mtimecmp res += mscratch res += mepc res += mcause res += mbadaddr - res += reset - res += tohost - res += fromhost + res += mip + res += mtime + res += mcpuid + res += mimpid + res += mhartid + res += mtohost + res += mfromhost + res += mreset res += send_ipi - res += hartid res.toArray } val all32 = { @@ -321,9 +357,12 @@ object CSRs { res += cycleh res += timeh res += instreth - res += scycleh + res += cyclehw + res += timehw + res += instrethw res += stimeh - res += sinstreth + res += stimehw + res += mtimeh res.toArray } } diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index 75c065be..e0b879ac 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -3,5 +3,6 @@ package object rocket extends rocket.constants.ScalarOpConstants { - val START_ADDR = 0x100 + val MTVEC = 0x100 + val START_ADDR = MTVEC + 0x100 } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 4632cca2..56cb484c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -33,22 +33,20 @@ class DatapathPTWIO extends CoreBundle { class PTE extends CoreBundle { val ppn = Bits(width = ppnBits) - val reserved = Bits(width = 16) - val reserved_for_software = Bits(width = 2) + val reserved_for_software = Bits(width = 3) val d = Bool() val r = Bool() - val g = Bool() - val perm = Bits(width = 2) - val typ = Bits(width = 3) + val typ = Bits(width = 4) + val v = Bool() - def table(dummy: Int = 0) = typ === 1 - def leaf(dummy: Int = 0) = typ >= 2 - def ur(dummy: Int = 0) = typ === 2 || typ >= 4 - def uw(dummy: Int = 0) = ur() && perm(0) - def ux(dummy: Int = 0) = ur() && perm(1) - def sr(dummy: Int = 0) = typ >= 3 - def sw(dummy: Int = 0) = Mux(typ >= 4, typ(0), typ === 3 && perm(0)) - def sx(dummy: Int = 0) = Mux(typ >= 4, typ(1), typ === 3 && perm(1)) + def table(dummy: Int = 0) = v && typ < 2 + def leaf(dummy: Int = 0) = v && typ >= 2 + def ur(dummy: Int = 0) = leaf() && typ < 8 + def uw(dummy: Int = 0) = ur() && typ(0) + def ux(dummy: Int = 0) = ur() && typ(1) + def sr(dummy: Int = 0) = leaf() + def sw(dummy: Int = 0) = leaf() && typ(0) + def sx(dummy: Int = 0) = v && typ >= 4 && typ(1) def access_ok(prv: Bits, store: Bool, fetch: Bool) = Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur()))) } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 71bf0cbd..adb64960 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -146,7 +146,7 @@ class TLB extends TLBModule { val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val priv = Mux(io.ptw.status.prv === PRV_M && !io.req.bits.instruction, io.ptw.status.mprv, io.ptw.status.prv) + val priv = Mux(io.ptw.status.mprv && !io.req.bits.instruction, io.ptw.status.prv1, io.ptw.status.prv) val priv_s = priv === PRV_S val priv_uses_vm = priv <= PRV_S val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store)) @@ -155,7 +155,7 @@ class TLB extends TLBModule { val w_array = Mux(priv_s, sw_array, uw_array) val x_array = Mux(priv_s, sx_array, ux_array) - val vm_enabled = io.ptw.status.vm(2) && priv_uses_vm + val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) // it's only a store hit if the dirty bit is set val tag_hits = tag_cam.io.hits & (dirty_array | ~(io.req.bits.store.toSInt & w_array)) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 89bfddf5..c71e44f9 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -12,6 +12,9 @@ object Util { implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_)) implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x) implicit def wcToUInt(c: WideCounter): UInt = c.value + implicit def sextToConv(x: UInt) = new AnyRef { + def sextTo(n: Int): UInt = Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x) + } implicit def intToUnsigned(x: Int): Unsigned = new Unsigned(x) implicit def booleanToIntConv(x: Boolean) = new AnyRef { From 6a9390c50e11fbc9d1f39d5f2ed7eff3aaa6353c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 19 May 2015 02:56:20 -0700 Subject: [PATCH 0844/1087] Avoid spurious D$ assertion failures For the Rocket pipeline, this fix is needless and the problem is that the assertion is too conservative, but I solved it this way to avoid problems for other plausible use cases where physical and virtual accesses are intermixed. --- rocket/src/main/scala/nbdcache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e3f7d650..95cde564 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -670,8 +670,8 @@ class HellaCache extends L1HellaCacheModule { io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned - io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld - io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st + io.cpu.xcpt.pf.ld := !s1_req.phys && s1_read && dtlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := !s1_req.phys && s1_write && dtlb.io.resp.xcpt_st assert (!(Reg(next= (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && From b6e68773fd1ad828043945471feda64424380032 Mon Sep 17 00:00:00 2001 From: Wei Song Date: Sat, 30 May 2015 16:25:27 +0100 Subject: [PATCH 0845/1087] nbdcache, writeback unit: when release is not ready and data is not ready for a beat too, no need to re-read data array. --- rocket/src/main/scala/nbdcache.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 95cde564..29af64c0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -409,11 +409,13 @@ class WritebackUnit extends L1HellaCacheModule { } when (r2_data_req_fired) { io.release.valid := beat_done - when(!io.release.ready) { - r1_data_req_fired := false - r2_data_req_fired := false - data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) - } .elsewhen(beat_done) { if(refillCyclesPerBeat > 1) buf_v := 0 } + when(beat_done) { + when(!io.release.ready) { + r1_data_req_fired := false + r2_data_req_fired := false + data_req_cnt := data_req_cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1) + } .otherwise { if(refillCyclesPerBeat > 1) buf_v := 0 } + } when(!r1_data_req_fired) { // We're done if this is the final data request and the Release can be sent active := data_req_cnt < UInt(refillCycles) || !io.release.ready From 4db60d9e9d28be5d9116272b64a5d2e32a6106cc Mon Sep 17 00:00:00 2001 From: Wei Song Date: Tue, 2 Jun 2015 22:06:12 +0100 Subject: [PATCH 0846/1087] code clean in dcache, no need to check the condition twice. --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 29af64c0..806a0074 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -882,7 +882,7 @@ class HellaCache extends L1HellaCacheModule { val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable val s2_recycle_next = Reg(init=Bool(false)) - when (s1_valid || s1_replay) { s2_recycle_next := (s1_valid || s1_replay) && s2_recycle_ecc } + when (s1_valid || s1_replay) { s2_recycle_next := s2_recycle_ecc } s2_recycle := s2_recycle_ecc || s2_recycle_next // after a nack, block until nack condition resolves to save energy From 5e009ecc758f5d17a46fd80f16352265bc0e5d6a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 11 Jun 2015 16:08:18 -0700 Subject: [PATCH 0847/1087] Fix an apparently benign PC sign-extension bug --- rocket/src/main/scala/dpath.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index a60ff72f..b03d0722 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -226,7 +226,7 @@ class Datapath extends CoreModule io.fpu.dmem_resp_tag := dmem_resp_waddr io.ctrl.mem_br_taken := mem_reg_wdata(0) - val mem_br_target = mem_reg_pc + + val mem_br_target = mem_reg_pc.toSInt + Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) val mem_npc = (Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target) & SInt(-2)).toUInt From 5362e2bbbdc3c07ad1b0d028e2e935b3c20dd2c3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 5 Jul 2015 16:38:49 -0700 Subject: [PATCH 0848/1087] New machine-mode timer facility --- rocket/src/main/scala/csr.scala | 30 ++++++++++++++---------- rocket/src/main/scala/instructions.scala | 4 ++-- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index ac63573c..3483453c 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -116,14 +116,15 @@ class CSRFile extends CoreModule val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended)) val reg_sscratch = Reg(Bits(width = xLen)) val reg_stvec = Reg(UInt(width = vaddrBits)) - val reg_stimecmp = Reg(Bits(width = 32)) + val reg_mtimecmp = Reg(Bits(width = xLen)) val reg_sptbr = Reg(UInt(width = paddrBits)) val reg_wfi = Reg(init=Bool(false)) val reg_tohost = Reg(init=Bits(0, xLen)) val reg_fromhost = Reg(init=Bits(0, xLen)) val reg_stats = Reg(init=Bool(false)) - val reg_time = WideCounter(xLen) + val reg_time = Reg(UInt(width = xLen)) + val reg_cycle = WideCounter(xLen) val reg_instret = WideCounter(xLen, io.retire) val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) @@ -146,6 +147,7 @@ class CSRFile extends CoreModule checkInterrupt(PRV_S, reg_mie.ssip && reg_mip.ssip, 0) checkInterrupt(PRV_M, reg_mie.msip && reg_mip.msip, 0) checkInterrupt(PRV_S, reg_mie.stip && reg_mip.stip, 1) + checkInterrupt(PRV_M, reg_mie.mtip && reg_mip.mtip, 1) checkInterrupt(PRV_M, reg_fromhost != 0, 2) checkInterrupt(PRV_M, irq_rocc, 3) @@ -185,18 +187,20 @@ class CSRFile extends CoreModule CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), - CSRs.cycle -> reg_time, - CSRs.cyclew -> reg_time, + CSRs.cycle -> reg_cycle, + CSRs.cyclew -> reg_cycle, CSRs.instret -> reg_instret, CSRs.instretw -> reg_instret, CSRs.time -> reg_time, CSRs.timew -> reg_time, CSRs.stime -> reg_time, CSRs.stimew -> reg_time, + CSRs.mtime -> reg_time, CSRs.mcpuid -> UInt(cpuid), CSRs.mimpid -> UInt(impid), CSRs.mstatus -> read_mstatus, CSRs.mtdeleg -> UInt(0), + CSRs.mreset -> UInt(0), CSRs.mtvec -> UInt(MTVEC), CSRs.mip -> reg_mip.toBits, CSRs.mie -> reg_mie.toBits, @@ -204,7 +208,7 @@ class CSRFile extends CoreModule CSRs.mepc -> reg_mepc.sextTo(xLen), CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen), CSRs.mcause -> reg_mcause, - CSRs.stimecmp -> reg_stimecmp, + CSRs.mtimecmp -> reg_mtimecmp, CSRs.mhartid -> io.host.id, CSRs.send_ipi -> io.host.id, /* don't care */ CSRs.stats -> reg_stats, @@ -341,11 +345,11 @@ class CSRFile extends CoreModule assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") - when (reg_time(reg_stimecmp.getWidth-1,0) === reg_stimecmp) { - reg_mip.stip := true + when (reg_time >= reg_mtimecmp) { + reg_mip.mtip := true } - io.time := reg_time + io.time := reg_cycle io.host.ipi_req.valid := cpu_wen && decoded_addr(CSRs.send_ipi) io.host.ipi_req.bits := io.rw.wdata io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready @@ -387,8 +391,10 @@ class CSRFile extends CoreModule } when (decoded_addr(CSRs.mip)) { val new_mip = new MIP().fromBits(wdata) - if (params(UseVM)) + if (params(UseVM)) { reg_mip.ssip := new_mip.ssip + reg_mip.stip := new_mip.stip + } reg_mip.msip := new_mip.msip } when (decoded_addr(CSRs.mie)) { @@ -407,11 +413,9 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } - when (decoded_addr(CSRs.cyclew)) { reg_time := wdata } when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } - when (decoded_addr(CSRs.timew)) { reg_time := wdata } - when (decoded_addr(CSRs.stimew)) { reg_time := wdata } - when (decoded_addr(CSRs.stimecmp)) { reg_stimecmp := wdata(31,0); reg_mip.stip := false } + when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } + when (decoded_addr(CSRs.mreset) /* XXX used by HTIF to write mtime */) { reg_time := wdata } when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 215ddaba..9b94792e 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -251,7 +251,6 @@ object CSRs { val sstatus = 0x100 val stvec = 0x101 val sie = 0x104 - val stimecmp = 0x121 val sscratch = 0x140 val sepc = 0x141 val sip = 0x144 @@ -290,6 +289,7 @@ object CSRs { val instrethw = 0x982 val stimeh = 0xd81 val stimehw = 0xa81 + val mtimecmph = 0x361 val mtimeh = 0x741 val all = { val res = collection.mutable.ArrayBuffer[Int]() @@ -319,7 +319,6 @@ object CSRs { res += sstatus res += stvec res += sie - res += stimecmp res += sscratch res += sepc res += sip @@ -362,6 +361,7 @@ object CSRs { res += instrethw res += stimeh res += stimehw + res += mtimecmph res += mtimeh res.toArray } From 323386739080a3b1d2f8357822fa765fbef2fa76 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 11 Jul 2015 13:32:45 -0700 Subject: [PATCH 0849/1087] Use Chisel3 SeqMem construct --- rocket/src/main/scala/icache.scala | 30 +++++++++++----------------- rocket/src/main/scala/nbdcache.scala | 8 ++++---- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 79500260..6da75d36 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -196,17 +196,13 @@ class ICache extends FrontendModule val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) val entagbits = code.width(tagBits) - val tag_array = Mem(Bits(width = entagbits*nWays), nSets, seqRead = true) - val tag_raddr = Reg(UInt()) + val tag_array = SeqMem(Bits(width = entagbits*nWays), nSets) + val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) when (refill_done) { val wmask = FillInterleaved(entagbits, if (isDM) Bits(1) else UIntToOH(repl_way)) val tag = code.encode(s2_tag).toUInt tag_array.write(s2_idx, Fill(nWays, tag), wmask) } -// /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM - .elsewhen (s0_valid) { - tag_raddr := s0_pgoff(untagBits-1,blockOffBits) - } val vb_array = Reg(init=Bits(0, nSets*nWays)) when (refill_done && !invalidated) { @@ -229,7 +225,7 @@ class ICache extends FrontendModule val s2_vb = Reg(Bool()) val s2_tag_disparity = Reg(Bool()) val s2_tag_match = Reg(Bool()) - val tag_out = tag_array(tag_raddr)(entagbits*(i+1)-1, entagbits*i) + val tag_out = tag_rdata(entagbits*(i+1)-1, entagbits*i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb s2_tag_disparity := code.decode(tag_out).error @@ -242,19 +238,17 @@ class ICache extends FrontendModule s2_any_tag_hit := s2_tag_hit.reduceLeft(_||_) && !s2_disparity.reduceLeft(_||_) for (i <- 0 until nWays) { - val data_array = Mem(Bits(width = code.width(rowBits)), nSets*refillCycles, seqRead = true) - val s1_raddr = Reg(UInt()) - when (narrow_grant.valid && repl_way === UInt(i)) { - val e_d = code.encode(narrow_grant.bits.data) - if(refillCycles > 1) data_array(Cat(s2_idx, refill_cnt)) := e_d - else data_array(s2_idx) := e_d - } -// /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM - .elsewhen (s0_valid) { - s1_raddr := s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) + val data_array = SeqMem(Bits(width = code.width(rowBits)), nSets*refillCycles) + val wen = narrow_grant.valid && repl_way === UInt(i) + when (wen) { + val e_d = code.encode(narrow_grant.bits.data).toUInt + if(refillCycles > 1) data_array.write(Cat(s2_idx, refill_cnt), e_d) + else data_array.write(s2_idx, e_d) } + val s0_raddr = s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) + val s1_rdata = data_array.read(s0_raddr, !wen && s0_valid) // if s1_tag_match is critical, replace with partial tag check - when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s2_dout(i) := data_array(s1_raddr) } + when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s2_dout(i) := s1_rdata } } val s2_dout_word = s2_dout.map(x => (x >> (s2_offset(log2Up(rowBytes)-1,log2Up(coreInstBytes)) << log2Up(coreInstBits)))(coreInstBits-1,0)) io.resp.bits.data := Mux1H(s2_tag_hit, s2_dout_word) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 806a0074..4a24c6e5 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -551,13 +551,13 @@ class DataArray extends L1HellaCacheModule { val resp = Vec.fill(rowWords){Bits(width = encRowBits)} val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true) + val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { val data = Fill(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) val mask = FillInterleaved(encDataBits, wway_en) array.write(waddr, data, mask) } - resp(p) := array(RegEnable(raddr, rway_en.orR && io.read.valid)) + resp(p) := array.read(raddr, rway_en.orR && io.read.valid) } for (dw <- 0 until rowWords) { val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw))) @@ -570,11 +570,11 @@ class DataArray extends L1HellaCacheModule { } else { val wmask = FillInterleaved(encDataBits, io.write.bits.wmask) for (w <- 0 until nWays) { - val array = Mem(Bits(width=encRowBits), nSets*refillCycles, seqRead = true) + val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) when (io.write.bits.way_en(w) && io.write.valid) { array.write(waddr, io.write.bits.data, wmask) } - io.resp(w) := array(RegEnable(raddr, io.read.bits.way_en(w) && io.read.valid)) + io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid) } } From a78e28523c4f59a5b4ec27a4123cc9ea244bc39a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 11 Jul 2015 14:06:08 -0700 Subject: [PATCH 0850/1087] Chisel3: Don't mix Mux types --- rocket/src/main/scala/dpath.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index b03d0722..53e23a60 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -229,11 +229,11 @@ class Datapath extends CoreModule val mem_br_target = mem_reg_pc.toSInt + Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = (Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)), mem_br_target) & SInt(-2)).toUInt + val mem_npc = (Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid io.ctrl.mem_npc_misaligned := mem_npc(1) io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 - val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata).toUInt + val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt // writeback stage when (!mem_reg_kill) { From be2ff6dec7a9515f6afce7fad4b9bb563d2c8f2c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Jul 2015 12:33:46 -0700 Subject: [PATCH 0851/1087] Vec(Reg) -> Reg(Vec) --- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/dpath.scala | 6 +++--- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/rocc.scala | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 86f5934b..878b7e09 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -36,7 +36,7 @@ class RAS(nras: Int) { private val count = Reg(init=UInt(0,log2Up(nras+1))) private val pos = Reg(init=UInt(0,log2Up(nras))) - private val stack = Vec.fill(nras){Reg(UInt())} + private val stack = Reg(Vec.fill(nras){UInt()}) } class BHTResp extends Bundle with BTBParameters { diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 53e23a60..0161da34 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -23,9 +23,9 @@ class Datapath extends CoreModule val ex_reg_pc = Reg(UInt()) val ex_reg_inst = Reg(Bits()) val ex_reg_kill = Reg(Bool()) - val ex_reg_rs_bypass = Vec.fill(2)(Reg(Bool())) - val ex_reg_rs_lsb = Vec.fill(2)(Reg(Bits())) - val ex_reg_rs_msb = Vec.fill(2)(Reg(Bits())) + val ex_reg_rs_bypass = Reg(Vec.fill(2)(Bool())) + val ex_reg_rs_lsb = Reg(Vec.fill(2)(Bits())) + val ex_reg_rs_msb = Reg(Vec.fill(2)(Bits())) // memory definitions val mem_reg_pc = Reg(UInt()) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 91a488cb..0194ba4c 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -461,7 +461,7 @@ class FPU extends Module val memLatencyMask = latencyMask(mem_ctrl, 2) val wen = Reg(init=Bits(0, maxLatency-1)) - val winfo = Vec.fill(maxLatency-1){Reg(Bits())} + val winfo = Reg(Vec.fill(maxLatency-1){Bits()}) val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, ex_reg_valid) val mem_winfo = Cat(pipeid(mem_ctrl), mem_reg_inst(11,7)) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 6da75d36..43a30ef5 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -218,7 +218,7 @@ class ICache extends FrontendModule val s1_tag_match = Vec.fill(nWays){Bool()} val s2_tag_hit = Vec.fill(nWays){Bool()} - val s2_dout = Vec.fill(nWays){Reg(Bits())} + val s2_dout = Reg(Vec.fill(nWays){Bits()}) for (i <- 0 until nWays) { val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4a24c6e5..0ad6f301 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -749,7 +749,7 @@ class HellaCache extends L1HellaCacheModule { val s2_data = Vec.fill(nWays){Bits(width = encRowBits)} for (w <- 0 until nWays) { - val regs = Vec.fill(rowWords){Reg(Bits(width = encDataBits))} + val regs = Reg(Vec.fill(rowWords){Bits(width = encDataBits)}) val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 725a500b..c8838c69 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -62,7 +62,7 @@ class AccumulatorExample extends RoCC { val n = 4 val regfile = Mem(UInt(width = xLen), n) - val busy = Vec.fill(n){Reg(init=Bool(false))} + val busy = Reg(init=Vec.fill(n){Bool(false)}) val cmd = Queue(io.cmd) val funct = cmd.bits.inst.funct From 5b7f3c3006e0349a39005c9181f703fcb27204a3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Jul 2015 17:30:50 -0700 Subject: [PATCH 0852/1087] Don't use clone --- rocket/src/main/scala/ctrl.scala | 4 ++-- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 6 +++--- rocket/src/main/scala/rocc.scala | 2 +- rocket/src/main/scala/tlb.scala | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index d50c1103..991ed4b5 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -367,7 +367,7 @@ class Control extends CoreModule val ex_reg_xcpt_interrupt = Reg(Bool()) val ex_reg_valid = Reg(Bool()) val ex_reg_btb_hit = Reg(Bool()) - val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) + val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits) val ex_reg_xcpt = Reg(Bool()) val ex_reg_flush_pipe = Reg(Bool()) val ex_reg_load_use = Reg(Bool()) @@ -376,7 +376,7 @@ class Control extends CoreModule val mem_reg_xcpt_interrupt = Reg(Bool()) val mem_reg_valid = Reg(Bool()) val mem_reg_btb_hit = Reg(Bool()) - val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits.clone) + val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits) val mem_reg_xcpt = Reg(Bool()) val mem_reg_replay = Reg(Bool()) val mem_reg_flush_pipe = Reg(Bool()) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 43a30ef5..59fc041c 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -55,7 +55,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val s2_valid = Reg(init=Bool(true)) val s2_pc = Reg(init=UInt(START_ADDR)) val s2_btb_resp_valid = Reg(init=Bool(false)) - val s2_btb_resp_bits = Reg(btb.io.resp.bits.clone) + val s2_btb_resp_bits = Reg(btb.io.resp.bits) val s2_xcpt_if = Reg(init=Bool(false)) val msb = vaddrBits-1 diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0ad6f301..9c822d50 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -602,19 +602,19 @@ class HellaCache extends L1HellaCacheModule { io.cpu.req.ready := Bool(true) val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) - val s1_req = Reg(io.cpu.req.bits.clone) + val s1_req = Reg(io.cpu.req.bits) val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill val s1_replay = Reg(init=Bool(false)) val s1_clk_en = Reg(Bool()) val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) - val s2_req = Reg(io.cpu.req.bits.clone) + val s2_req = Reg(io.cpu.req.bits) val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd != M_NOP val s2_recycle = Bool() val s2_valid_masked = Bool() val s3_valid = Reg(init=Bool(false)) - val s3_req = Reg(io.cpu.req.bits.clone) + val s3_req = Reg(io.cpu.req.bits) val s3_way = Reg(Bits()) val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index c8838c69..61a4b000 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -62,7 +62,7 @@ class AccumulatorExample extends RoCC { val n = 4 val regfile = Mem(UInt(width = xLen), n) - val busy = Reg(init=Vec.fill(n){Bool(false)}) + val busy = Reg(init=Vec(Bool(false), n)) val cmd = Queue(io.cmd) val funct = cmd.bits.inst.funct diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index adb64960..a5d87943 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -109,7 +109,7 @@ class TLB extends TLBModule { val r_req = Reg(new TLBReq) val tag_cam = Module(new RocketCAM) - val tag_ram = Mem(io.ptw.resp.bits.pte.ppn.clone, entries) + val tag_ram = Mem(io.ptw.resp.bits.pte.ppn, entries) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt tag_cam.io.tag := lookup_tag From ac6e73e317d8472cb8fb6c4efc68efffb0021d44 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Jul 2015 20:24:18 -0700 Subject: [PATCH 0853/1087] Add Wire() wrap --- rocket/src/main/scala/btb.scala | 4 ++-- rocket/src/main/scala/csr.scala | 11 ++++------- rocket/src/main/scala/ctrl.scala | 12 ++++++------ rocket/src/main/scala/dpath.scala | 8 ++++---- rocket/src/main/scala/fpu.scala | 12 ++++++------ rocket/src/main/scala/icache.scala | 12 ++++++------ rocket/src/main/scala/nbdcache.scala | 18 +++++++++--------- rocket/src/main/scala/ptw.scala | 3 +-- 8 files changed, 38 insertions(+), 42 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 878b7e09..cb372746 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -56,7 +56,7 @@ class BHTResp extends Bundle with BTBParameters { class BHT(nbht: Int) { val nbhtbits = log2Up(nbht) def get(addr: UInt, update: Bool): BHTResp = { - val res = new BHTResp + val res = Wire(new BHTResp) val index = addr(nbhtbits+1,2) ^ history res.value := table(index) res.history := history @@ -178,7 +178,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = !useUpdatePageHit - val idxPageRepl = UInt() + val idxPageRepl = Wire(UInt()) val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 3483453c..ab064dad 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -134,7 +134,7 @@ class CSRFile extends CoreModule io.interrupt_cause := 0 io.interrupt := io.interrupt_cause(xLen-1) - val some_interrupt_pending = Bool(); some_interrupt_pending := false + val some_interrupt_pending = Wire(init=Bool(false)) def checkInterrupt(max_priv: UInt, cond: Bool, num: Int) = { when (cond && (reg_mstatus.prv < max_priv || reg_mstatus.prv === max_priv && reg_mstatus.ie)) { io.interrupt_cause := UInt((BigInt(1) << (xLen-1)) + num) @@ -216,20 +216,17 @@ class CSRFile extends CoreModule CSRs.mfromhost -> reg_fromhost) if (params(UseVM)) { - val read_sstatus = new SStatus - read_sstatus := new SStatus().fromBits(read_mstatus) // sstatus mostly overlaps mstatus + val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus)) read_sstatus.zero1 := 0 read_sstatus.zero2 := 0 read_sstatus.zero3 := 0 read_sstatus.zero4 := 0 - val read_sip = new MIP - read_sip := new MIP().fromBits(0) + val read_sip = Wire(init=new MIP().fromBits(0)) read_sip.ssip := reg_mip.ssip read_sip.stip := reg_mip.stip - val read_sie = new MIP - read_sie := new MIP().fromBits(0) + val read_sie = Wire(init=new MIP().fromBits(0)) read_sie.ssip := reg_mie.ssip read_sie.stip := reg_mie.stip diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/ctrl.scala index 991ed4b5..df580ca1 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/ctrl.scala @@ -359,7 +359,7 @@ class Control extends CoreModule if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table - val id_ctrl = new IntCtrlSigs().decode(io.dpath.inst, decode_table) + val id_ctrl = Wire(new IntCtrlSigs()).decode(io.dpath.inst, decode_table) val ex_ctrl = Reg(new IntCtrlSigs) val mem_ctrl = Reg(new IntCtrlSigs) val wb_ctrl = Reg(new IntCtrlSigs) @@ -389,21 +389,21 @@ class Control extends CoreModule val wb_reg_cause = Reg(UInt()) val wb_reg_rocc_pending = Reg(init=Bool(false)) - val take_pc_wb = Bool() + val take_pc_wb = Wire(Bool()) val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) val take_pc_mem = want_take_pc_mem && !io.dpath.mem_npc_misaligned val take_pc_mem_wb = take_pc_wb || take_pc_mem val take_pc = take_pc_mem_wb - val ctrl_killd = Bool() - val ctrl_killx = Bool() - val ctrl_killm = Bool() + val ctrl_killd = Wire(Bool()) + val ctrl_killx = Wire(Bool()) + val ctrl_killm = Wire(Bool()) val id_raddr3 = io.dpath.inst(31,27) val id_raddr2 = io.dpath.inst(24,20) val id_raddr1 = io.dpath.inst(19,15) val id_waddr = io.dpath.inst(11,7) - val id_load_use = Bool() + val id_load_use = Wire(Bool()) val id_reg_fence = Reg(init=Bool(false)) val id_csr_en = id_ctrl.csr != CSR.N diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala index 0161da34..c7caa4f7 100644 --- a/rocket/src/main/scala/dpath.scala +++ b/rocket/src/main/scala/dpath.scala @@ -38,7 +38,7 @@ class Datapath extends CoreModule val wb_reg_pc = Reg(UInt()) val wb_reg_inst = Reg(Bits()) val wb_reg_wdata = Reg(Bits()) - val wb_wdata = Bits() + val wb_wdata = Wire(Bits()) val wb_reg_rs2 = Reg(Bits()) // instruction decode stage @@ -51,7 +51,7 @@ class Datapath extends CoreModule private var canRead = true def read(addr: UInt) = { require(canRead) - reads += addr -> UInt() + reads += addr -> Wire(UInt()) reads.last._2 := rf(~addr) reads.last._2 } @@ -109,7 +109,7 @@ class Datapath extends CoreModule } } - val bypass = Vec.fill(NBYP)(Bits()) + val bypass = Wire(Vec(Bits(), NBYP)) bypass(BYP_0) := Bits(0) bypass(BYP_EX) := mem_reg_wdata bypass(BYP_MEM) := wb_reg_wdata @@ -198,7 +198,7 @@ class Datapath extends CoreModule val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data - val ll_wdata = Bits() + val ll_wdata = Wire(Bits()) div.io.resp.ready := io.ctrl.ll_ready ll_wdata := div.io.resp.bits.data io.ctrl.ll_waddr := div.io.resp.bits.tag diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 0194ba4c..9a444fb9 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -248,7 +248,7 @@ class IntToFP(val latency: Int) extends Module val in = Pipe(io.in) - val mux = new FPResult + val mux = Wire(new FPResult) mux.exc := Bits(0) mux.data := hardfloat.floatNToRecodedFloatN(in.bits.in1, 52, 12) when (in.bits.single) { @@ -299,7 +299,7 @@ class FPToFP(val latency: Int) extends Module val isMax = in.bits.rm(0) val isLHS = isnan2 || isMax != io.lt && !isnan1 - val mux = new FPResult + val mux = Wire(new FPResult) mux.exc := minmax_exc mux.data := in.bits.in2 @@ -347,7 +347,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module fma.io.b := in.in2 fma.io.c := in.in3 - val res = new FPResult + val res = Wire(new FPResult) res.data := fma.io.out res.exc := fma.io.exceptionFlags io.out := Pipe(valid, res, latency-1) @@ -404,7 +404,7 @@ class FPU extends Module val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_)) val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12)) - val req = new FPInput + val req = Wire(new FPInput) req := ex_ctrl req.rm := ex_rm req.in1 := ex_rs1 @@ -441,8 +441,8 @@ class FPU extends Module val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt val divSqrt_wen = Reg(next=Bool(false)) val divSqrt_waddr = Reg(Bits()) - val divSqrt_wdata = Bits() - val divSqrt_flags = Bits() + val divSqrt_wdata = Wire(Bits()) + val divSqrt_flags = Wire(Bits()) val divSqrt_in_flight = Reg(init=Bool(false)) // writeback arbitration diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 59fc041c..4a4436a6 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -158,11 +158,11 @@ class ICache extends FrontendModule val state = Reg(init=s_ready) val invalidated = Reg(Bool()) val stall = !io.resp.ready - val rdy = Bool() + val rdy = Wire(Bool()) val s2_valid = Reg(init=Bool(false)) val s2_addr = Reg(UInt(width = paddrBits)) - val s2_any_tag_hit = Bool() + val s2_any_tag_hit = Wire(Bool()) val s1_valid = Reg(init=Bool(false)) val s1_pgoff = Reg(UInt(width = pgIdxBits)) @@ -212,13 +212,13 @@ class ICache extends FrontendModule vb_array := Bits(0) invalidated := Bool(true) } - val s2_disparity = Vec.fill(nWays){Bool()} + val s2_disparity = Wire(Vec(Bool(), nWays)) for (i <- 0 until nWays) when (s2_valid && s2_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s2_idx), Bool(false)) } - val s1_tag_match = Vec.fill(nWays){Bool()} - val s2_tag_hit = Vec.fill(nWays){Bool()} - val s2_dout = Reg(Vec.fill(nWays){Bits()}) + val s1_tag_match = Wire(Vec(Bool(), nWays)) + val s2_tag_hit = Wire(Vec(Bool(), nWays)) + val s2_dout = Reg(Vec(Bits(), nWays)) for (i <- 0 until nWays) { val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9c822d50..4b191db6 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -108,7 +108,7 @@ class L1MetaWriteReq extends object L1Metadata { def apply(tag: Bits, coh: ClientMetadata) = { - val meta = new L1Metadata + val meta = Wire(new L1Metadata) meta.tag := tag meta.coh := coh meta @@ -303,12 +303,12 @@ class MSHRFile extends L1HellaCacheModule { val sdq = Mem(io.req.bits.data, sdqDepth) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val idxMatch = Vec.fill(nMSHRs){Bool()} - val tagList = Vec.fill(nMSHRs){Bits()} + val idxMatch = Wire(Vec(Bool(), nMSHRs)) + val tagList = Wire(Vec(Bits(), nMSHRs)) val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits - val wbTagList = Vec.fill(nMSHRs){Bits()} - val refillMux = Vec.fill(nMSHRs){new L1RefillReq} + val wbTagList = Wire(Vec(Bits(), nMSHRs)) + val refillMux = Wire(Vec(new L1RefillReq, nMSHRs)) val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) val mem_req_arb = Module(new LockingArbiter( @@ -548,7 +548,7 @@ class DataArray extends L1HellaCacheModule { for (w <- 0 until nWays by rowWords) { val wway_en = io.write.bits.way_en(w+rowWords-1,w) val rway_en = io.read.bits.way_en(w+rowWords-1,w) - val resp = Vec.fill(rowWords){Bits(width = encRowBits)} + val resp = Wire(Vec(Bits(width = encRowBits), rowWords)) val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) @@ -610,8 +610,8 @@ class HellaCache extends L1HellaCacheModule { val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_req = Reg(io.cpu.req.bits) val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd != M_NOP - val s2_recycle = Bool() - val s2_valid_masked = Bool() + val s2_recycle = Wire(Bool()) + val s2_valid_masked = Wire(Bool()) val s3_valid = Reg(init=Bool(false)) val s3_req = Reg(io.cpu.req.bits) @@ -747,7 +747,7 @@ class HellaCache extends L1HellaCacheModule { } when (io.cpu.invalidate_lr) { lrsc_count := 0 } - val s2_data = Vec.fill(nWays){Bits(width = encRowBits)} + val s2_data = Wire(Vec(Bits(width=encRowBits), nWays)) for (w <- 0 until nWays) { val regs = Reg(Vec.fill(rowWords){Bits(width = encDataBits)}) val en1 = s1_clk_en && s1_tag_eq_way(w) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 56cb484c..69fbf57e 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -109,8 +109,7 @@ class PTW(n: Int) extends CoreModule r_pte := pte } - val pte_wdata = new PTE - pte_wdata := new PTE().fromBits(0) + val pte_wdata = Wire(init=new PTE().fromBits(0)) pte_wdata.r := true pte_wdata.d := r_req.store From cc447c8110e1fb8674fda9aec510aed3fe88b3a0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 21 Jul 2015 17:10:56 -0700 Subject: [PATCH 0854/1087] Refactor pipeline RTL (merge ctrl + dpath into rocket) --- rocket/src/main/scala/consts.scala | 12 - rocket/src/main/scala/core.scala | 94 --- rocket/src/main/scala/dpath.scala | 291 --------- rocket/src/main/scala/fpu.scala | 110 ++-- .../main/scala/{ctrl.scala => idecode.scala} | 386 +---------- rocket/src/main/scala/rocket.scala | 599 ++++++++++++++++++ rocket/src/main/scala/tile.scala | 9 +- 7 files changed, 662 insertions(+), 839 deletions(-) delete mode 100644 rocket/src/main/scala/core.scala delete mode 100644 rocket/src/main/scala/dpath.scala rename rocket/src/main/scala/{ctrl.scala => idecode.scala} (62%) create mode 100644 rocket/src/main/scala/rocket.scala diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 4d7cb68d..1fd8bc30 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -18,11 +18,6 @@ trait ScalarOpConstants { val BR_LTU = Bits(6, 3) val BR_GEU = Bits(7, 3) - val PC_EX = UInt(0, 2) - val PC_MEM = UInt(1, 2) - val PC_WB = UInt(2, 2) - val PC_CSR = UInt(3, 2) - val A1_X = Bits("b??", 2) val A1_ZERO = UInt(0, 2) val A1_RS1 = UInt(1, 2) @@ -46,13 +41,6 @@ trait ScalarOpConstants { val N = Bool(false) val Y = Bool(true) - val NBYP = 4 - val SZ_BYP = log2Up(NBYP) - val BYP_0 = 0 - val BYP_EX = 1 - val BYP_MEM = 2 - val BYP_DC = 3 - val SZ_DW = 1 val DW_X = X val DW_32 = N diff --git a/rocket/src/main/scala/core.scala b/rocket/src/main/scala/core.scala deleted file mode 100644 index d73e494d..00000000 --- a/rocket/src/main/scala/core.scala +++ /dev/null @@ -1,94 +0,0 @@ -// See LICENSE for license details. - -package rocket - -import Chisel._ -import Util._ -import uncore._ - -case object BuildFPU extends Field[Option[() => FPU]] -case object FDivSqrt extends Field[Boolean] -case object XLen extends Field[Int] -case object NMultXpr extends Field[Int] -case object FetchWidth extends Field[Int] -case object RetireWidth extends Field[Int] -case object UseVM extends Field[Boolean] -case object FastLoadWord extends Field[Boolean] -case object FastLoadByte extends Field[Boolean] -case object FastMulDiv extends Field[Boolean] -case object CoreInstBits extends Field[Int] -case object CoreDataBits extends Field[Int] -case object CoreDCacheReqTagBits extends Field[Int] -case object NCustomMRWCSRs extends Field[Int] - -abstract trait CoreParameters extends UsesParameters { - val xLen = params(XLen) - val paddrBits = params(PAddrBits) - val vaddrBits = params(VAddrBits) - val pgIdxBits = params(PgIdxBits) - val ppnBits = params(PPNBits) - val vpnBits = params(VPNBits) - val pgLevels = params(PgLevels) - val pgLevelBits = params(PgLevelBits) - val asIdBits = params(ASIdBits) - - val retireWidth = params(RetireWidth) - val coreFetchWidth = params(FetchWidth) - val coreInstBits = params(CoreInstBits) - val coreInstBytes = coreInstBits/8 - val coreDataBits = xLen - val coreDataBytes = coreDataBits/8 - val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) - val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits - val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt - - if(params(FastLoadByte)) require(params(FastLoadWord)) -} - -abstract trait RocketCoreParameters extends CoreParameters -{ - require(params(FetchWidth) == 1) // for now... - require(params(RetireWidth) == 1) // for now... -} - -abstract class CoreBundle extends Bundle with CoreParameters -abstract class CoreModule extends Module with CoreParameters - -class RocketIO extends Bundle -{ - val host = new HTIFIO - val imem = new CPUFrontendIO - val dmem = new HellaCacheIO - val ptw = new DatapathPTWIO().flip - val rocc = new RoCCInterface().flip -} - -class Core extends Module with CoreParameters -{ - val io = new RocketIO - - val ctrl = Module(new Control) - val dpath = Module(new Datapath) - - //If so specified, build an FPU module and wire it in - params(BuildFPU) - .map { bf => bf() } - .foreach { fpu => - dpath.io.fpu <> fpu.io.dpath - ctrl.io.fpu <> fpu.io.ctrl - } - - ctrl.io.dpath <> dpath.io.ctrl - dpath.io.host <> io.host - - ctrl.io.imem <> io.imem - dpath.io.imem <> io.imem - - ctrl.io.dmem <> io.dmem - dpath.io.dmem <> io.dmem - - dpath.io.ptw <> io.ptw - - ctrl.io.rocc <> io.rocc - dpath.io.rocc <> io.rocc -} diff --git a/rocket/src/main/scala/dpath.scala b/rocket/src/main/scala/dpath.scala deleted file mode 100644 index c7caa4f7..00000000 --- a/rocket/src/main/scala/dpath.scala +++ /dev/null @@ -1,291 +0,0 @@ -// See LICENSE for license details. - -package rocket - -import Chisel._ -import Instructions._ -import Util._ -import uncore._ - -class Datapath extends CoreModule -{ - val io = new Bundle { - val host = new HTIFIO - val ctrl = new CtrlDpathIO().flip - val dmem = new HellaCacheIO - val ptw = new DatapathPTWIO().flip - val imem = new CPUFrontendIO - val fpu = new DpathFPUIO - val rocc = new RoCCInterface().flip - } - - // execute definitions - val ex_reg_pc = Reg(UInt()) - val ex_reg_inst = Reg(Bits()) - val ex_reg_kill = Reg(Bool()) - val ex_reg_rs_bypass = Reg(Vec.fill(2)(Bool())) - val ex_reg_rs_lsb = Reg(Vec.fill(2)(Bits())) - val ex_reg_rs_msb = Reg(Vec.fill(2)(Bits())) - - // memory definitions - val mem_reg_pc = Reg(UInt()) - val mem_reg_inst = Reg(Bits()) - val mem_reg_wdata = Reg(Bits()) - val mem_reg_kill = Reg(Bool()) - val mem_reg_rs2 = Reg(Bits()) - - // writeback definitions - val wb_reg_pc = Reg(UInt()) - val wb_reg_inst = Reg(Bits()) - val wb_reg_wdata = Reg(Bits()) - val wb_wdata = Wire(Bits()) - val wb_reg_rs2 = Reg(Bits()) - - // instruction decode stage - val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) - val id_pc = io.imem.resp.bits.pc - - class RegFile { - private val rf = Mem(UInt(width = 64), 31) - private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() - private var canRead = true - def read(addr: UInt) = { - require(canRead) - reads += addr -> Wire(UInt()) - reads.last._2 := rf(~addr) - reads.last._2 - } - def write(addr: UInt, data: UInt) = { - canRead = false - when (addr != UInt(0)) { - rf(~addr) := data - for ((raddr, rdata) <- reads) - when (addr === raddr) { rdata := data } - } - } - } - val rf = new RegFile - - // RF read ports + bypass from WB stage - val id_raddr = Vec(id_inst(19,15), id_inst(24,20)) - val id_rs = id_raddr.map(rf.read _) - - // immediate generation - def imm(sel: Bits, inst: Bits) = { - val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) - val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) - val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) - val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), - Mux(sel === IMM_UJ, inst(20).toSInt, - Mux(sel === IMM_SB, inst(7).toSInt, sign))) - val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25)) - val b4_1 = Mux(sel === IMM_U, Bits(0), - Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), - Mux(sel === IMM_Z, inst(19,16), inst(24,21)))) - val b0 = Mux(sel === IMM_S, inst(7), - Mux(sel === IMM_I, inst(20), - Mux(sel === IMM_Z, inst(15), Bits(0)))) - - Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt - } - - io.ctrl.inst := id_inst - io.fpu.inst := id_inst - - // execute stage - ex_reg_kill := io.ctrl.killd - when (!io.ctrl.killd) { - ex_reg_pc := id_pc - ex_reg_inst := id_inst - ex_reg_rs_bypass := io.ctrl.bypass - for (i <- 0 until id_rs.size) { - when (io.ctrl.ren(i)) { - ex_reg_rs_lsb(i) := id_rs(i)(SZ_BYP-1,0) - when (!io.ctrl.bypass(i)) { - ex_reg_rs_msb(i) := id_rs(i) >> SZ_BYP - } - } - when (io.ctrl.bypass(i)) { ex_reg_rs_lsb(i) := io.ctrl.bypass_src(i) } - } - } - - val bypass = Wire(Vec(Bits(), NBYP)) - bypass(BYP_0) := Bits(0) - bypass(BYP_EX) := mem_reg_wdata - bypass(BYP_MEM) := wb_reg_wdata - bypass(BYP_DC) := (if(params(FastLoadByte)) io.dmem.resp.bits.data_subword - else if(params(FastLoadWord)) io.dmem.resp.bits.data - else wb_reg_wdata) - - val ex_rs = for (i <- 0 until id_rs.size) - yield Mux(ex_reg_rs_bypass(i), bypass(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) - val ex_imm = imm(io.ctrl.ex_ctrl.sel_imm, ex_reg_inst) - val ex_op1 = MuxLookup(io.ctrl.ex_ctrl.sel_alu1, SInt(0), Seq( - A1_RS1 -> ex_rs(0).toSInt, - A1_PC -> ex_reg_pc.toSInt)) - val ex_op2 = MuxLookup(io.ctrl.ex_ctrl.sel_alu2, SInt(0), Seq( - A2_RS2 -> ex_rs(1).toSInt, - A2_IMM -> ex_imm, - A2_FOUR -> SInt(4))) - - val alu = Module(new ALU) - alu.io.dw := io.ctrl.ex_ctrl.alu_dw - alu.io.fn := io.ctrl.ex_ctrl.alu_fn - alu.io.in2 := ex_op2.toUInt - alu.io.in1 := ex_op1 - - // multiplier and divider - val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, - earlyOut = params(FastMulDiv))) - div.io.req.valid := io.ctrl.ex_valid && io.ctrl.ex_ctrl.div - div.io.req.bits.dw := io.ctrl.ex_ctrl.alu_dw - div.io.req.bits.fn := io.ctrl.ex_ctrl.alu_fn - div.io.req.bits.in1 := ex_rs(0) - div.io.req.bits.in2 := ex_rs(1) - div.io.req.bits.tag := io.ctrl.ex_waddr - div.io.kill := io.ctrl.killm && Reg(next = div.io.req.fire()) - io.ctrl.div_mul_rdy := div.io.req.ready - - io.fpu.fromint_data := ex_rs(0) - - def vaSign(a0: UInt, ea: Bits) = { - // efficient means to compress 64-bit VA into vaddrBits+1 bits - // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) - val a = a0 >> vaddrBits-1 - val e = ea(vaddrBits,vaddrBits-1) - Mux(a === UInt(0) || a === UInt(1), e != UInt(0), - Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), - e(0))) - } - - // D$ request interface (registered inside D$ module) - // other signals (req_val, req_rdy) connect to control module - io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt - io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_ctrl.fp) - require(io.dmem.req.bits.tag.getWidth >= 6) - require(params(CoreDCacheReqTagBits) >= 6) - - // processor control regfile read - val csr = Module(new CSRFile) - csr.io.host <> io.host - csr.io <> io.ctrl - csr.io <> io.fpu - csr.io.rocc <> io.rocc - csr.io.pc := wb_reg_pc - csr.io.uarch_counters.foreach(_ := Bool(false)) - - io.ptw.ptbr := csr.io.ptbr - io.ptw.invalidate := csr.io.fatc - io.ptw.status := csr.io.status - - // memory stage - mem_reg_kill := ex_reg_kill - when (!ex_reg_kill) { - mem_reg_pc := ex_reg_pc - mem_reg_inst := ex_reg_inst - mem_reg_wdata := alu.io.out - when (io.ctrl.ex_ctrl.rxs2 && (io.ctrl.ex_ctrl.mem || io.ctrl.ex_ctrl.rocc)) { - mem_reg_rs2 := ex_rs(1) - } - } - - io.dmem.req.bits.data := Mux(io.ctrl.mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) - - // writeback arbitration - val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool - val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool - val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) - val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data - val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data - - val ll_wdata = Wire(Bits()) - div.io.resp.ready := io.ctrl.ll_ready - ll_wdata := div.io.resp.bits.data - io.ctrl.ll_waddr := div.io.resp.bits.tag - io.ctrl.ll_wen := div.io.resp.fire() - if (!params(BuildRoCC).isEmpty) { - io.rocc.resp.ready := io.ctrl.ll_ready - when (io.rocc.resp.fire()) { - div.io.resp.ready := Bool(false) - ll_wdata := io.rocc.resp.bits.data - io.ctrl.ll_waddr := io.rocc.resp.bits.rd - io.ctrl.ll_wen := Bool(true) - } - } - when (dmem_resp_replay && dmem_resp_xpu) { - div.io.resp.ready := Bool(false) - if (!params(BuildRoCC).isEmpty) - io.rocc.resp.ready := Bool(false) - io.ctrl.ll_waddr := dmem_resp_waddr - io.ctrl.ll_wen := Bool(true) - } - - io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu - io.fpu.dmem_resp_data := io.dmem.resp.bits.data - io.fpu.dmem_resp_type := io.dmem.resp.bits.typ - io.fpu.dmem_resp_tag := dmem_resp_waddr - - io.ctrl.mem_br_taken := mem_reg_wdata(0) - val mem_br_target = mem_reg_pc.toSInt + - Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst), - Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) - val mem_npc = (Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt - io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid - io.ctrl.mem_npc_misaligned := mem_npc(1) - io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1 - val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt - - // writeback stage - when (!mem_reg_kill) { - wb_reg_pc := mem_reg_pc - wb_reg_inst := mem_reg_inst - wb_reg_wdata := Mux(io.ctrl.mem_ctrl.fp && io.ctrl.mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) - when (io.ctrl.mem_ctrl.rocc) { - wb_reg_rs2 := mem_reg_rs2 - } - } - wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, - Mux(io.ctrl.ll_wen, ll_wdata, - Mux(io.ctrl.csr_cmd != CSR.N, csr.io.rw.rdata, - wb_reg_wdata))) - - val wb_wen = io.ctrl.ll_wen || io.ctrl.wb_wen - val wb_waddr = Mux(io.ctrl.ll_wen, io.ctrl.ll_waddr, io.ctrl.wb_waddr) - when (wb_wen) { rf.write(wb_waddr, wb_wdata) } - - // scoreboard clear (for div/mul and D$ load miss writebacks) - io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu - io.ctrl.fp_sboard_clra := dmem_resp_waddr - - // processor control regfile write - csr.io.rw.addr := wb_reg_inst(31,20) - csr.io.rw.cmd := io.ctrl.csr_cmd - csr.io.rw.wdata := wb_reg_wdata - - io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) - io.rocc.cmd.bits.rs1 := wb_reg_wdata - io.rocc.cmd.bits.rs2 := wb_reg_rs2 - - // hook up I$ - io.imem.req.bits.pc := - Mux(io.ctrl.sel_pc === PC_MEM, mem_npc, - Mux(io.ctrl.sel_pc === PC_CSR, csr.io.evec, - wb_reg_pc)).toUInt // PC_WB - io.imem.btb_update.bits.pc := mem_reg_pc - io.imem.btb_update.bits.target := io.imem.req.bits.pc - io.imem.btb_update.bits.br_pc := mem_reg_pc - io.imem.bht_update.bits.pc := mem_reg_pc - io.imem.ras_update.bits.returnAddr := mem_int_wdata - - // for hazard/bypass opportunity detection - io.ctrl.ex_waddr := ex_reg_inst(11,7) - io.ctrl.mem_waddr := mem_reg_inst(11,7) - io.ctrl.wb_waddr := wb_reg_inst(11,7) - - printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", - io.host.id, csr.io.time(32,0), io.ctrl.retire, wb_reg_pc, - Mux(wb_wen, wb_waddr, UInt(0)), wb_wdata, wb_wen, - wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), - wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), - wb_reg_inst, wb_reg_inst) -} diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 9a444fb9..2c3a8590 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -138,33 +138,34 @@ class FPUDecoder extends Module s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder } -class DpathFPUIO extends Bundle { - val inst = Bits(OUTPUT, 32) - val fromint_data = Bits(OUTPUT, 64) +class FPUIO extends Bundle { + val inst = Bits(INPUT, 32) + val fromint_data = Bits(INPUT, 64) - val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) - val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)).flip + val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ) + val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)) - val store_data = Bits(INPUT, 64) - val toint_data = Bits(INPUT, 64) + val store_data = Bits(OUTPUT, 64) + val toint_data = Bits(OUTPUT, 64) - val dmem_resp_val = Bool(OUTPUT) - val dmem_resp_type = Bits(OUTPUT, 3) - val dmem_resp_tag = UInt(OUTPUT, 5) - val dmem_resp_data = Bits(OUTPUT, 64) + val dmem_resp_val = Bool(INPUT) + val dmem_resp_type = Bits(INPUT, 3) + val dmem_resp_tag = UInt(INPUT, 5) + val dmem_resp_data = Bits(INPUT, 64) + + val valid = Bool(INPUT) + val fcsr_rdy = Bool(OUTPUT) + val nack_mem = Bool(OUTPUT) + val illegal_rm = Bool(OUTPUT) + val killx = Bool(INPUT) + val killm = Bool(INPUT) + val dec = new FPUCtrlSigs().asOutput + val sboard_set = Bool(OUTPUT) + val sboard_clr = Bool(OUTPUT) + val sboard_clra = UInt(OUTPUT, 5) } class CtrlFPUIO extends Bundle { - val valid = Bool(OUTPUT) - val fcsr_rdy = Bool(INPUT) - val nack_mem = Bool(INPUT) - val illegal_rm = Bool(INPUT) - val killx = Bool(OUTPUT) - val killm = Bool(OUTPUT) - val dec = new FPUCtrlSigs().asInput - val sboard_set = Bool(INPUT) - val sboard_clr = Bool(INPUT) - val sboard_clra = UInt(INPUT, 5) } class FPResult extends Bundle @@ -355,31 +356,28 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module class FPU extends Module { - val io = new Bundle { - val ctrl = (new CtrlFPUIO).flip - val dpath = (new DpathFPUIO).flip - } + val io = new FPUIO - val ex_reg_valid = Reg(next=io.ctrl.valid, init=Bool(false)) - val ex_reg_inst = RegEnable(io.dpath.inst, io.ctrl.valid) - val mem_reg_valid = Reg(next=ex_reg_valid && !io.ctrl.killx, init=Bool(false)) + val ex_reg_valid = Reg(next=io.valid, init=Bool(false)) + val ex_reg_inst = RegEnable(io.inst, io.valid) + val mem_reg_valid = Reg(next=ex_reg_valid && !io.killx, init=Bool(false)) val mem_reg_inst = RegEnable(ex_reg_inst, ex_reg_valid) - val killm = io.ctrl.killm || io.ctrl.nack_mem + val killm = io.killm || io.nack_mem val wb_reg_valid = Reg(next=mem_reg_valid && !killm, init=Bool(false)) val fp_decoder = Module(new FPUDecoder) - fp_decoder.io.inst := io.dpath.inst + fp_decoder.io.inst := io.inst val id_ctrl = fp_decoder.io.sigs - val ex_ctrl = RegEnable(id_ctrl, io.ctrl.valid) + val ex_ctrl = RegEnable(id_ctrl, io.valid) val mem_ctrl = RegEnable(ex_ctrl, ex_reg_valid) val wb_ctrl = RegEnable(mem_ctrl, mem_reg_valid) // load response - val load_wb = Reg(next=io.dpath.dmem_resp_val) - val load_wb_single = RegEnable(io.dpath.dmem_resp_type === MT_W || io.dpath.dmem_resp_type === MT_WU, io.dpath.dmem_resp_val) - val load_wb_data = RegEnable(io.dpath.dmem_resp_data, io.dpath.dmem_resp_val) - val load_wb_tag = RegEnable(io.dpath.dmem_resp_tag, io.dpath.dmem_resp_val) + val load_wb = Reg(next=io.dmem_resp_val) + val load_wb_single = RegEnable(io.dmem_resp_type === MT_W || io.dmem_resp_type === MT_WU, io.dmem_resp_val) + val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val) + val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val) val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) @@ -389,20 +387,20 @@ class FPU extends Module when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) - when (io.ctrl.valid) { + when (io.valid) { when (id_ctrl.ren1) { - when (!id_ctrl.swap12) { ex_ra1 := io.dpath.inst(19,15) } - when (id_ctrl.swap12) { ex_ra2 := io.dpath.inst(19,15) } + when (!id_ctrl.swap12) { ex_ra1 := io.inst(19,15) } + when (id_ctrl.swap12) { ex_ra2 := io.inst(19,15) } } when (id_ctrl.ren2) { - when (id_ctrl.swap12) { ex_ra1 := io.dpath.inst(24,20) } - when (id_ctrl.swap23) { ex_ra3 := io.dpath.inst(24,20) } - when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.dpath.inst(24,20) } + when (id_ctrl.swap12) { ex_ra1 := io.inst(24,20) } + when (id_ctrl.swap23) { ex_ra3 := io.inst(24,20) } + when (!id_ctrl.swap12 && !id_ctrl.swap23) { ex_ra2 := io.inst(24,20) } } - when (id_ctrl.ren3) { ex_ra3 := io.dpath.inst(31,27) } + when (id_ctrl.ren3) { ex_ra3 := io.inst(31,27) } } val ex_rs1::ex_rs2::ex_rs3::Nil = Seq(ex_ra1, ex_ra2, ex_ra3).map(regfile(_)) - val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.dpath.fcsr_rm, ex_reg_inst(14,12)) + val ex_rm = Mux(ex_reg_inst(14,12) === Bits(7), io.fcsr_rm, ex_reg_inst(14,12)) val req = Wire(new FPInput) req := ex_ctrl @@ -423,13 +421,13 @@ class FPU extends Module val fpiu = Module(new FPToInt) fpiu.io.in.valid := ex_reg_valid && (ex_ctrl.toint || ex_ctrl.div || ex_ctrl.sqrt || ex_ctrl.cmd === FCMD_MINMAX) fpiu.io.in.bits := req - io.dpath.store_data := fpiu.io.out.bits.store - io.dpath.toint_data := fpiu.io.out.bits.toint + io.store_data := fpiu.io.out.bits.store + io.toint_data := fpiu.io.out.bits.toint val ifpu = Module(new IntToFP(3)) ifpu.io.in.valid := ex_reg_valid && ex_ctrl.fromint ifpu.io.in.bits := req - ifpu.io.in.bits.in1 := io.dpath.fromint_data + ifpu.io.in.bits.in1 := io.fromint_data val fpmu = Module(new FPToFP(2)) fpmu.io.in.valid := ex_reg_valid && ex_ctrl.fastpipe @@ -489,22 +487,22 @@ class FPU extends Module val wb_toint_valid = wb_reg_valid && wb_ctrl.toint val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) - io.dpath.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) - io.dpath.fcsr_flags.bits := + io.fcsr_flags.valid := wb_toint_valid || divSqrt_wen || wen(0) + io.fcsr_flags.bits := Mux(wb_toint_valid, wb_toint_exc, UInt(0)) | Mux(divSqrt_wen, divSqrt_flags, UInt(0)) | Mux(wen(0), wexc, UInt(0)) val units_busy = mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && (!divSqrt_inReady || wen.orR) // || mem_reg_valid && mem_ctrl.fma && Reg(next=Mux(ex_ctrl.single, io.sfma.valid, io.dfma.valid)) - io.ctrl.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight) - io.ctrl.nack_mem := units_busy || write_port_busy || divSqrt_in_flight - io.ctrl.dec <> fp_decoder.io.sigs + io.fcsr_rdy := !(ex_reg_valid && ex_ctrl.wflags || mem_reg_valid && mem_ctrl.wflags || wb_reg_valid && wb_ctrl.toint || wen.orR || divSqrt_in_flight) + io.nack_mem := units_busy || write_port_busy || divSqrt_in_flight + io.dec <> fp_decoder.io.sigs def useScoreboard(f: ((Pipe, Int)) => Bool) = pipes.zipWithIndex.filter(_._1.lat > 3).map(x => f(x)).fold(Bool(false))(_||_) - io.ctrl.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt) - io.ctrl.sboard_clr := divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))) - io.ctrl.sboard_clra := waddr + io.sboard_set := wb_reg_valid && Reg(next=useScoreboard(_._1.cond(mem_ctrl)) || mem_ctrl.div || mem_ctrl.sqrt) + io.sboard_clr := divSqrt_wen || (wen(0) && useScoreboard(x => wsrc === UInt(x._2))) + io.sboard_clra := waddr // we don't currently support round-max-magnitude (rm=4) - io.ctrl.illegal_rm := ex_rm(2) && ex_ctrl.round + io.illegal_rm := ex_rm(2) && ex_ctrl.round divSqrt_wdata := 0 divSqrt_flags := 0 @@ -516,7 +514,7 @@ class FPU extends Module def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 val divSqrt_wb_hazard = wen.orR - divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.ctrl.killm && (mem_ctrl.div || mem_ctrl.sqrt) + divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.killm && (mem_ctrl.div || mem_ctrl.sqrt) divSqrt.io.sqrtOp := mem_ctrl.sqrt divSqrt.io.a := fpiu.io.as_double.in1 divSqrt.io.b := fpiu.io.as_double.in2 diff --git a/rocket/src/main/scala/ctrl.scala b/rocket/src/main/scala/idecode.scala similarity index 62% rename from rocket/src/main/scala/ctrl.scala rename to rocket/src/main/scala/idecode.scala index df580ca1..18a24616 100644 --- a/rocket/src/main/scala/ctrl.scala +++ b/rocket/src/main/scala/idecode.scala @@ -1,4 +1,4 @@ -// See LICENSE for license details. +// See LICENSE for license details package rocket @@ -6,50 +6,6 @@ import Chisel._ import Instructions._ import uncore.constants.MemoryOpConstants._ import ALU._ -import Util._ - -class CtrlDpathIO extends CoreBundle -{ - // outputs to datapath - val sel_pc = UInt(OUTPUT, 3) - val killd = Bool(OUTPUT) - val killm = Bool(OUTPUT) - val ren = Vec.fill(2)(Bool(OUTPUT)) - val ex_ctrl = new IntCtrlSigs().asOutput - val mem_ctrl = new IntCtrlSigs().asOutput - val csr_cmd = UInt(OUTPUT, CSR.SZ) - val ex_valid = Bool(OUTPUT) - val wb_wen = Bool(OUTPUT) - val bypass = Vec.fill(2)(Bool(OUTPUT)) - val bypass_src = Vec.fill(2)(Bits(OUTPUT, SZ_BYP)) - val ll_ready = Bool(OUTPUT) - // exception handling - val retire = Bool(OUTPUT) - val exception = Bool(OUTPUT) - val cause = UInt(OUTPUT, xLen) - // inputs from datapath - val inst = Bits(INPUT, 32) - val mem_br_taken = Bool(INPUT) - val mem_misprediction = Bool(INPUT) - val mem_npc_misaligned = Bool(INPUT) - val div_mul_rdy = Bool(INPUT) - val ll_wen = Bool(INPUT) - val ll_waddr = UInt(INPUT, 5) - val ex_waddr = UInt(INPUT, 5) - val mem_rs1_ra = Bool(INPUT) - val mem_waddr = UInt(INPUT, 5) - val wb_waddr = UInt(INPUT, 5) - val status = new MStatus().asInput - val fp_sboard_clr = Bool(INPUT) - val fp_sboard_clra = UInt(INPUT, 5) - // inputs from csr file - val csr_replay = Bool(INPUT) - val csr_stall = Bool(INPUT) - val csr_xcpt = Bool(INPUT) - val eret = Bool(INPUT) - val interrupt = Bool(INPUT) - val interrupt_cause = UInt(INPUT, xLen) -} abstract trait DecodeConstants { @@ -343,343 +299,3 @@ object RoCCDecode extends DecodeConstants CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) } - -class Control extends CoreModule -{ - val io = new Bundle { - val dpath = new CtrlDpathIO - val imem = new CPUFrontendIO - val dmem = new HellaCacheIO - val fpu = new CtrlFPUIO - val rocc = new RoCCInterface().flip - } - - var decode_table = XDecode.table - if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table - if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table - if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table - - val id_ctrl = Wire(new IntCtrlSigs()).decode(io.dpath.inst, decode_table) - val ex_ctrl = Reg(new IntCtrlSigs) - val mem_ctrl = Reg(new IntCtrlSigs) - val wb_ctrl = Reg(new IntCtrlSigs) - - val ex_reg_xcpt_interrupt = Reg(Bool()) - val ex_reg_valid = Reg(Bool()) - val ex_reg_btb_hit = Reg(Bool()) - val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits) - val ex_reg_xcpt = Reg(Bool()) - val ex_reg_flush_pipe = Reg(Bool()) - val ex_reg_load_use = Reg(Bool()) - val ex_reg_cause = Reg(UInt()) - - val mem_reg_xcpt_interrupt = Reg(Bool()) - val mem_reg_valid = Reg(Bool()) - val mem_reg_btb_hit = Reg(Bool()) - val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits) - val mem_reg_xcpt = Reg(Bool()) - val mem_reg_replay = Reg(Bool()) - val mem_reg_flush_pipe = Reg(Bool()) - val mem_reg_cause = Reg(UInt()) - val mem_reg_slow_bypass = Reg(Bool()) - - val wb_reg_valid = Reg(Bool()) - val wb_reg_xcpt = Reg(Bool()) - val wb_reg_replay = Reg(Bool()) - val wb_reg_cause = Reg(UInt()) - val wb_reg_rocc_pending = Reg(init=Bool(false)) - - val take_pc_wb = Wire(Bool()) - val mem_misprediction = io.dpath.mem_misprediction && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) - val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) - val take_pc_mem = want_take_pc_mem && !io.dpath.mem_npc_misaligned - val take_pc_mem_wb = take_pc_wb || take_pc_mem - val take_pc = take_pc_mem_wb - val ctrl_killd = Wire(Bool()) - val ctrl_killx = Wire(Bool()) - val ctrl_killm = Wire(Bool()) - - val id_raddr3 = io.dpath.inst(31,27) - val id_raddr2 = io.dpath.inst(24,20) - val id_raddr1 = io.dpath.inst(19,15) - val id_waddr = io.dpath.inst(11,7) - val id_load_use = Wire(Bool()) - val id_reg_fence = Reg(init=Bool(false)) - - val id_csr_en = id_ctrl.csr != CSR.N - val id_system_insn = id_ctrl.csr === CSR.I - val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0) - val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) - val id_csr_addr = io.dpath.inst(31,20) - // this is overly conservative - val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil - val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) - val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs)) - - val id_illegal_insn = !id_ctrl.legal || - id_ctrl.fp && !io.dpath.status.fs.orR || - id_ctrl.rocc && !io.dpath.status.xs.orR - // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) - val id_amo_aq = io.dpath.inst(26) - val id_amo_rl = io.dpath.inst(25) - val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl - val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid - val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && - (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc || - mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) - id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy - val id_do_fence = id_rocc_busy && id_ctrl.fence || - id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en) - - def checkExceptions(x: Seq[(Bool, UInt)]) = - (x.map(_._1).reduce(_||_), PriorityMux(x)) - - val (id_xcpt, id_cause) = checkExceptions(List( - (io.dpath.interrupt, io.dpath.interrupt_cause), - (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), - (id_illegal_insn, UInt(Causes.illegal_instruction)))) - - ex_reg_valid := !ctrl_killd - ex_reg_xcpt := !ctrl_killd && id_xcpt - ex_reg_xcpt_interrupt := io.dpath.interrupt && !take_pc && io.imem.resp.valid - when (id_xcpt) { ex_reg_cause := id_cause } - - when (!ctrl_killd) { - ex_ctrl := id_ctrl - ex_ctrl.csr := id_csr - ex_reg_btb_hit := io.imem.btb_resp.valid - when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } - ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush - ex_reg_load_use := id_load_use - ex_reg_xcpt := id_xcpt - } - - // replay inst in ex stage - val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid - val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready || - ex_ctrl.div && !io.dpath.div_mul_rdy - val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use - val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use) - ctrl_killx := take_pc_mem_wb || replay_ex || !ex_reg_valid - // detect 2-cycle load-use delay for LB/LH/SC - val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type) - - val (ex_xcpt, ex_cause) = checkExceptions(List( - (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), - (ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) - - mem_reg_valid := !ctrl_killx - mem_reg_replay := !take_pc_mem_wb && replay_ex - mem_reg_xcpt := !ctrl_killx && ex_xcpt - mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt - when (ex_xcpt) { mem_reg_cause := ex_cause } - - when (!ctrl_killx) { - mem_ctrl := ex_ctrl - mem_reg_btb_hit := ex_reg_btb_hit - when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } - mem_reg_flush_pipe := ex_reg_flush_pipe - mem_reg_slow_bypass := ex_slow_bypass - mem_reg_xcpt := ex_xcpt - } - - val (mem_xcpt, mem_cause) = checkExceptions(List( - (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), - (want_take_pc_mem && io.dpath.mem_npc_misaligned, UInt(Causes.misaligned_fetch)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) - - val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port - val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem - val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem - val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid - ctrl_killm := killm_common || mem_xcpt || fpu_kill_mem - - wb_reg_valid := !ctrl_killm - when (!ctrl_killm) { wb_ctrl := mem_ctrl } - wb_reg_replay := replay_mem && !take_pc_wb - wb_reg_xcpt := mem_xcpt && !take_pc_wb - when (mem_xcpt) { wb_reg_cause := mem_cause } - - val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc - val replay_wb_common = - io.dmem.resp.bits.nack || wb_reg_replay || io.dpath.csr_replay - val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common - val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready - - when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready } - when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) } - - class Scoreboard(n: Int) - { - def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) - def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) - def read(addr: UInt): Bool = r(addr) - def readBypassed(addr: UInt): Bool = _next(addr) - - private val r = Reg(init=Bits(0, n)) - private var _next = r - private var ens = Bool(false) - private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) - private def update(en: Bool, update: UInt) = { - _next = update - ens = ens || en - when (ens) { r := _next } - } - } - - val sboard = new Scoreboard(32) - sboard.clear(io.dpath.ll_wen, io.dpath.ll_waddr) - - val id_stall_fpu = if (!params(BuildFPU).isEmpty) { - val fp_sboard = new Scoreboard(32) - fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && io.dpath.retire, io.dpath.wb_waddr) - fp_sboard.clear(io.dpath.fp_sboard_clr, io.dpath.fp_sboard_clra) - fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) - - id_csr_en && !io.fpu.fcsr_rdy || - io.fpu.dec.ren1 && fp_sboard.read(id_raddr1) || - io.fpu.dec.ren2 && fp_sboard.read(id_raddr2) || - io.fpu.dec.ren3 && fp_sboard.read(id_raddr3) || - io.fpu.dec.wen && fp_sboard.read(id_waddr) - } else Bool(false) - - // write CAUSE CSR on an exception - io.dpath.exception := wb_reg_xcpt - io.dpath.cause := wb_reg_cause - val wb_xcpt = wb_reg_xcpt || io.dpath.csr_xcpt - - // control transfer from ex/wb - take_pc_wb := replay_wb || wb_xcpt || io.dpath.eret - - io.dpath.sel_pc := - Mux(wb_xcpt || io.dpath.eret, PC_CSR, // exception or [m|s]ret - Mux(replay_wb, PC_WB, // replay - PC_MEM)) - - io.imem.btb_update.valid := mem_reg_valid && !io.dpath.mem_npc_misaligned && io.dpath.mem_misprediction && ((mem_ctrl.branch && io.dpath.mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb - io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit - io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr - io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra - - io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb - io.imem.bht_update.bits.taken := io.dpath.mem_br_taken - io.imem.bht_update.bits.mispredict := io.dpath.mem_misprediction - io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit - io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp - - io.imem.ras_update.valid := io.imem.btb_update.bits.isJump && !io.dpath.mem_npc_misaligned && !take_pc_wb - io.imem.ras_update.bits.isCall := mem_ctrl.wxd && io.dpath.mem_waddr(0) - io.imem.ras_update.bits.isReturn := mem_ctrl.jalr && io.dpath.mem_rs1_ra - io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit - io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp - - io.imem.req.valid := take_pc - - val bypassDst = Array(id_raddr1, id_raddr2) - val bypassSrc = Array.fill(NBYP)((Bool(true), UInt(0))) - bypassSrc(BYP_EX) = (ex_reg_valid && ex_ctrl.wxd, io.dpath.ex_waddr) - bypassSrc(BYP_MEM) = (mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, io.dpath.mem_waddr) - bypassSrc(BYP_DC) = (mem_reg_valid && mem_ctrl.wxd, io.dpath.mem_waddr) - - val doBypass = bypassDst.map(d => bypassSrc.map(s => s._1 && s._2 === d)) - for (i <- 0 until io.dpath.bypass.size) { - io.dpath.bypass(i) := doBypass(i).reduce(_||_) - io.dpath.bypass_src(i) := PriorityEncoder(doBypass(i)) - } - - // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. - val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0) - val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0) - val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0) - val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc - val data_hazard_ex = ex_ctrl.wxd && - (id_renx1_not0 && id_raddr1 === io.dpath.ex_waddr || - id_renx2_not0 && id_raddr2 === io.dpath.ex_waddr || - id_wen_not0 && id_waddr === io.dpath.ex_waddr) - val fp_data_hazard_ex = ex_ctrl.wfd && - (io.fpu.dec.ren1 && id_raddr1 === io.dpath.ex_waddr || - io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr || - io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr || - io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr) - val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex) - - // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. - val mem_mem_cmd_bh = - if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass - else Bool(true) - val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc - val data_hazard_mem = mem_ctrl.wxd && - (id_renx1_not0 && id_raddr1 === io.dpath.mem_waddr || - id_renx2_not0 && id_raddr2 === io.dpath.mem_waddr || - id_wen_not0 && id_waddr === io.dpath.mem_waddr) - val fp_data_hazard_mem = mem_ctrl.wfd && - (io.fpu.dec.ren1 && id_raddr1 === io.dpath.mem_waddr || - io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr || - io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr || - io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr) - val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem) - id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem - - // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. - val data_hazard_wb = wb_ctrl.wxd && - (id_renx1_not0 && id_raddr1 === io.dpath.wb_waddr || - id_renx2_not0 && id_raddr2 === io.dpath.wb_waddr || - id_wen_not0 && id_waddr === io.dpath.wb_waddr) - val fp_data_hazard_wb = wb_ctrl.wfd && - (io.fpu.dec.ren1 && id_raddr1 === io.dpath.wb_waddr || - io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr || - io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr || - io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr) - val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) - - val id_sboard_hazard = - (id_renx1_not0 && sboard.readBypassed(id_raddr1) || - id_renx2_not0 && sboard.readBypassed(id_raddr2) || - id_wen_not0 && sboard.readBypassed(id_waddr)) - - sboard.set(wb_set_sboard && io.dpath.wb_wen, io.dpath.wb_waddr) - - val ctrl_stalld = - id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || - id_ctrl.fp && id_stall_fpu || - id_ctrl.mem && !io.dmem.req.ready || - Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || - id_do_fence || - io.dpath.csr_stall - val ctrl_draind = io.dpath.interrupt - ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || ctrl_draind - - io.dpath.killd := take_pc || ctrl_stalld && !ctrl_draind - io.imem.resp.ready := !ctrl_stalld || ctrl_draind - io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i - - io.dpath.ren(1) := id_ctrl.rxs2 - io.dpath.ren(0) := id_ctrl.rxs1 - io.dpath.ex_ctrl := ex_ctrl - io.dpath.mem_ctrl := mem_ctrl - io.dpath.ex_valid := ex_reg_valid - io.dpath.ll_ready := !(wb_reg_valid && wb_ctrl.wxd) - io.dpath.retire := wb_reg_valid && !replay_wb && !io.dpath.csr_xcpt - io.dpath.wb_wen := io.dpath.retire && wb_ctrl.wxd - io.dpath.csr_cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) - io.dpath.killm := killm_common - - io.fpu.valid := !ctrl_killd && id_ctrl.fp - io.fpu.killx := ctrl_killx - io.fpu.killm := killm_common - - io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem - io.dmem.req.bits.kill := killm_common || mem_xcpt - io.dmem.req.bits.cmd := ex_ctrl.mem_cmd - io.dmem.req.bits.typ := ex_ctrl.mem_type - io.dmem.req.bits.phys := Bool(false) - io.dmem.invalidate_lr := wb_xcpt - - io.rocc.cmd.valid := wb_rocc_val - io.rocc.exception := wb_xcpt && io.dpath.status.xs.orR - io.rocc.s := io.dpath.status.prv.orR // should we just pass all of mstatus? -} diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala new file mode 100644 index 00000000..482c7dd9 --- /dev/null +++ b/rocket/src/main/scala/rocket.scala @@ -0,0 +1,599 @@ +// See LICENSE for license details. + +package rocket + +import Chisel._ +import uncore._ +import Util._ + +case object BuildFPU extends Field[Option[() => FPU]] +case object FDivSqrt extends Field[Boolean] +case object XLen extends Field[Int] +case object NMultXpr extends Field[Int] +case object FetchWidth extends Field[Int] +case object RetireWidth extends Field[Int] +case object UseVM extends Field[Boolean] +case object FastLoadWord extends Field[Boolean] +case object FastLoadByte extends Field[Boolean] +case object FastMulDiv extends Field[Boolean] +case object CoreInstBits extends Field[Int] +case object CoreDataBits extends Field[Int] +case object CoreDCacheReqTagBits extends Field[Int] +case object NCustomMRWCSRs extends Field[Int] + +abstract trait CoreParameters extends UsesParameters { + val xLen = params(XLen) + val paddrBits = params(PAddrBits) + val vaddrBits = params(VAddrBits) + val pgIdxBits = params(PgIdxBits) + val ppnBits = params(PPNBits) + val vpnBits = params(VPNBits) + val pgLevels = params(PgLevels) + val pgLevelBits = params(PgLevelBits) + val asIdBits = params(ASIdBits) + + val retireWidth = params(RetireWidth) + val coreFetchWidth = params(FetchWidth) + val coreInstBits = params(CoreInstBits) + val coreInstBytes = coreInstBits/8 + val coreDataBits = xLen + val coreDataBytes = coreDataBits/8 + val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) + val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits + val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt + + if(params(FastLoadByte)) require(params(FastLoadWord)) +} + +abstract trait RocketCoreParameters extends CoreParameters +{ + require(params(FetchWidth) == 1) // for now... + require(params(RetireWidth) == 1) // for now... +} + +abstract class CoreBundle extends Bundle with CoreParameters +abstract class CoreModule extends Module with CoreParameters + +class Rocket extends CoreModule +{ + val io = new Bundle { + val host = new HTIFIO + val imem = new CPUFrontendIO + val dmem = new HellaCacheIO + val ptw = new DatapathPTWIO().flip + val fpu = new FPUIO().flip + val rocc = new RoCCInterface().flip + } + + var decode_table = XDecode.table + if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table + if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table + if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table + + val ex_ctrl = Reg(new IntCtrlSigs) + val mem_ctrl = Reg(new IntCtrlSigs) + val wb_ctrl = Reg(new IntCtrlSigs) + + val ex_reg_xcpt_interrupt = Reg(Bool()) + val ex_reg_valid = Reg(Bool()) + val ex_reg_btb_hit = Reg(Bool()) + val ex_reg_btb_resp = Reg(io.imem.btb_resp.bits) + val ex_reg_xcpt = Reg(Bool()) + val ex_reg_flush_pipe = Reg(Bool()) + val ex_reg_load_use = Reg(Bool()) + val ex_reg_cause = Reg(UInt()) + val ex_reg_pc = Reg(UInt()) + val ex_reg_inst = Reg(Bits()) + + val mem_reg_xcpt_interrupt = Reg(Bool()) + val mem_reg_valid = Reg(Bool()) + val mem_reg_btb_hit = Reg(Bool()) + val mem_reg_btb_resp = Reg(io.imem.btb_resp.bits) + val mem_reg_xcpt = Reg(Bool()) + val mem_reg_replay = Reg(Bool()) + val mem_reg_flush_pipe = Reg(Bool()) + val mem_reg_cause = Reg(UInt()) + val mem_reg_slow_bypass = Reg(Bool()) + val mem_reg_pc = Reg(UInt()) + val mem_reg_inst = Reg(Bits()) + val mem_reg_wdata = Reg(Bits()) + val mem_reg_rs2 = Reg(Bits()) + val take_pc_mem = Wire(Bool()) + + val wb_reg_valid = Reg(Bool()) + val wb_reg_xcpt = Reg(Bool()) + val wb_reg_replay = Reg(Bool()) + val wb_reg_cause = Reg(UInt()) + val wb_reg_rocc_pending = Reg(init=Bool(false)) + val wb_reg_pc = Reg(UInt()) + val wb_reg_inst = Reg(Bits()) + val wb_reg_wdata = Reg(Bits()) + val wb_reg_rs2 = Reg(Bits()) + val take_pc_wb = Wire(Bool()) + + val take_pc_mem_wb = take_pc_wb || take_pc_mem + val take_pc = take_pc_mem_wb + + // decode stage + val id_pc = io.imem.resp.bits.pc + val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) + val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table) + val id_raddr3 = id_inst(31,27) + val id_raddr2 = id_inst(24,20) + val id_raddr1 = id_inst(19,15) + val id_waddr = id_inst(11,7) + val id_load_use = Wire(Bool()) + val id_reg_fence = Reg(init=Bool(false)) + val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2) + val id_raddr = IndexedSeq(id_raddr1, id_raddr2) + val rf = new RegFile + val id_rs = id_raddr.map(rf.read _) + val ctrl_killd = Wire(Bool()) + + val csr = Module(new CSRFile) + val id_csr_en = id_ctrl.csr != CSR.N + val id_system_insn = id_ctrl.csr === CSR.I + val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0) + val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) + val id_csr_addr = id_inst(31,20) + // this is overly conservative + val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil + val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) + val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs)) + + val id_illegal_insn = !id_ctrl.legal || + id_ctrl.fp && !csr.io.status.fs.orR || + id_ctrl.rocc && !csr.io.status.xs.orR + // stall decode for fences (now, for AMO.aq; later, for AMO.rl and FENCE) + val id_amo_aq = id_inst(26) + val id_amo_rl = id_inst(25) + val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl + val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid + val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && + (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc || + mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) + id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy + val id_do_fence = id_rocc_busy && id_ctrl.fence || + id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en) + + val (id_xcpt, id_cause) = checkExceptions(List( + (csr.io.interrupt, csr.io.interrupt_cause), + (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), + (id_illegal_insn, UInt(Causes.illegal_instruction)))) + + val dcache_bypass_data = + if(params(FastLoadByte)) io.dmem.resp.bits.data_subword + else if(params(FastLoadWord)) io.dmem.resp.bits.data + else wb_reg_wdata + + // detect bypass opportunities + val ex_waddr = ex_reg_inst(11,7) + val mem_waddr = mem_reg_inst(11,7) + val wb_waddr = wb_reg_inst(11,7) + val bypass_sources = IndexedSeq( + (Bool(true), UInt(0), UInt(0)), // treat reading x0 as a bypass + (ex_reg_valid && ex_ctrl.wxd, ex_waddr, mem_reg_wdata), + (mem_reg_valid && mem_ctrl.wxd && !mem_ctrl.mem, mem_waddr, wb_reg_wdata), + (mem_reg_valid && mem_ctrl.wxd, mem_waddr, dcache_bypass_data)) + val id_bypass_src = id_raddr.map(raddr => bypass_sources.map(s => s._1 && s._2 === raddr)) + + // execute stage + val bypass_mux = Vec(bypass_sources.map(_._3)) + val ex_reg_rs_bypass = Reg(Vec.fill(id_raddr.size)(Bool())) + val ex_reg_rs_lsb = Reg(Vec.fill(id_raddr.size)(Bits())) + val ex_reg_rs_msb = Reg(Vec.fill(id_raddr.size)(Bits())) + val ex_rs = for (i <- 0 until id_raddr.size) + yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) + val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst) + val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq( + A1_RS1 -> ex_rs(0).toSInt, + A1_PC -> ex_reg_pc.toSInt)) + val ex_op2 = MuxLookup(ex_ctrl.sel_alu2, SInt(0), Seq( + A2_RS2 -> ex_rs(1).toSInt, + A2_IMM -> ex_imm, + A2_FOUR -> SInt(4))) + + val alu = Module(new ALU) + alu.io.dw := ex_ctrl.alu_dw + alu.io.fn := ex_ctrl.alu_fn + alu.io.in2 := ex_op2.toUInt + alu.io.in1 := ex_op1 + + // multiplier and divider + val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, + earlyOut = params(FastMulDiv))) + div.io.req.valid := ex_reg_valid && ex_ctrl.div + div.io.req.bits.dw := ex_ctrl.alu_dw + div.io.req.bits.fn := ex_ctrl.alu_fn + div.io.req.bits.in1 := ex_rs(0) + div.io.req.bits.in2 := ex_rs(1) + div.io.req.bits.tag := ex_waddr + + ex_reg_valid := !ctrl_killd + ex_reg_xcpt := !ctrl_killd && id_xcpt + ex_reg_xcpt_interrupt := csr.io.interrupt && !take_pc && io.imem.resp.valid + when (id_xcpt) { ex_reg_cause := id_cause } + + when (!ctrl_killd) { + ex_ctrl := id_ctrl + ex_ctrl.csr := id_csr + ex_reg_btb_hit := io.imem.btb_resp.valid + when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } + ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush + ex_reg_load_use := id_load_use + + for (i <- 0 until id_raddr.size) { + val do_bypass = id_bypass_src(i).reduce(_||_) + val bypass_src = PriorityEncoder(id_bypass_src(i)) + ex_reg_rs_bypass(i) := do_bypass + ex_reg_rs_lsb(i) := bypass_src + when (id_ren(i) && !do_bypass) { + ex_reg_rs_lsb(i) := id_rs(i)(bypass_src.getWidth-1,0) + ex_reg_rs_msb(i) := id_rs(i) >> bypass_src.getWidth + } + } + } + when (!ctrl_killd || csr.io.interrupt) { + ex_reg_inst := id_inst + ex_reg_pc := id_pc + } + + // replay inst in ex stage? + val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid + val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready || + ex_ctrl.div && !div.io.req.ready + val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use + val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use) + val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid + // detect 2-cycle load-use delay for LB/LH/SC + val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type) + + val (ex_xcpt, ex_cause) = checkExceptions(List( + (ex_reg_xcpt_interrupt || ex_reg_xcpt, ex_reg_cause), + (ex_ctrl.fp && io.fpu.illegal_rm, UInt(Causes.illegal_instruction)))) + + // memory stage + val mem_br_taken = mem_reg_wdata(0) + val mem_br_target = mem_reg_pc.toSInt + + Mux(mem_ctrl.branch && mem_br_taken, imm(IMM_SB, mem_reg_inst), + Mux(mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) + val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt + val mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt + val mem_wrong_npc = mem_npc != ex_reg_pc || !ex_reg_valid + val mem_npc_misaligned = mem_npc(1) + val mem_misprediction = mem_wrong_npc && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) + val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) + take_pc_mem := want_take_pc_mem && !mem_npc_misaligned + + mem_reg_valid := !ctrl_killx + mem_reg_replay := !take_pc_mem_wb && replay_ex + mem_reg_xcpt := !ctrl_killx && ex_xcpt + mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt + when (ex_xcpt) { mem_reg_cause := ex_cause } + + when (ex_reg_valid || ex_reg_xcpt_interrupt) { + mem_ctrl := ex_ctrl + mem_reg_btb_hit := ex_reg_btb_hit + when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } + mem_reg_flush_pipe := ex_reg_flush_pipe + mem_reg_slow_bypass := ex_slow_bypass + + mem_reg_inst := ex_reg_inst + mem_reg_pc := ex_reg_pc + mem_reg_wdata := alu.io.out + when (ex_ctrl.rxs2 && (ex_ctrl.mem || ex_ctrl.rocc)) { + mem_reg_rs2 := ex_rs(1) + } + } + + val (mem_xcpt, mem_cause) = checkExceptions(List( + (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), + (want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), + (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) + + val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port + val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem + val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem + val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid + div.io.kill := killm_common && Reg(next = div.io.req.fire()) + val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem + + wb_reg_valid := !ctrl_killm + wb_reg_replay := replay_mem && !take_pc_wb + wb_reg_xcpt := mem_xcpt && !take_pc_wb + when (mem_xcpt) { wb_reg_cause := mem_cause } + when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) { + wb_ctrl := mem_ctrl + wb_reg_wdata := Mux(mem_ctrl.fp && mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata) + when (mem_ctrl.rocc) { + wb_reg_rs2 := mem_reg_rs2 + } + wb_reg_inst := mem_reg_inst + wb_reg_pc := mem_reg_pc + } + + val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc + val replay_wb_common = + io.dmem.resp.bits.nack || wb_reg_replay || csr.io.csr_replay + val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common + val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready + + when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready } + when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) } + + // writeback arbitration + val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool + val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool + val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) + val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data + val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data + + div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) + val ll_wdata = Wire(init = div.io.resp.bits.data) + val ll_waddr = Wire(init = div.io.resp.bits.tag) + val ll_wen = Wire(init = div.io.resp.fire()) + if (!params(BuildRoCC).isEmpty) { + io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) + when (io.rocc.resp.fire()) { + div.io.resp.ready := Bool(false) + ll_wdata := io.rocc.resp.bits.data + ll_waddr := io.rocc.resp.bits.rd + ll_wen := Bool(true) + } + } + when (dmem_resp_replay && dmem_resp_xpu) { + div.io.resp.ready := Bool(false) + if (!params(BuildRoCC).isEmpty) + io.rocc.resp.ready := Bool(false) + ll_waddr := dmem_resp_waddr + ll_wen := Bool(true) + } + + val wb_valid = wb_reg_valid && !replay_wb && !csr.io.csr_xcpt + val wb_wen = wb_valid && wb_ctrl.wxd + val rf_wen = wb_wen || ll_wen + val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) + val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, + Mux(ll_wen, ll_wdata, + Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata, + wb_reg_wdata))) + when (rf_wen) { rf.write(rf_waddr, rf_wdata) } + + // hook up control/status regfile + csr.io.exception := wb_reg_xcpt + csr.io.cause := wb_reg_cause + csr.io.retire := wb_valid + csr.io.host <> io.host + io.fpu.fcsr_rm := csr.io.fcsr_rm + csr.io.fcsr_flags := io.fpu.fcsr_flags + csr.io.rocc <> io.rocc + csr.io.pc := wb_reg_pc + csr.io.uarch_counters.foreach(_ := Bool(false)) + io.ptw.ptbr := csr.io.ptbr + io.ptw.invalidate := csr.io.fatc + io.ptw.status := csr.io.status + csr.io.rw.addr := wb_reg_inst(31,20) + csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) + csr.io.rw.wdata := wb_reg_wdata + + val sboard = new Scoreboard(32) + sboard.clear(ll_wen, ll_waddr) + + // control transfer from ex/wb + val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt + take_pc_wb := replay_wb || wb_xcpt || csr.io.eret + + io.imem.req.bits.pc := + Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret + Mux(replay_wb, wb_reg_pc, // replay + mem_npc)).toUInt // mispredicted branch + + io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb + io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp + io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr + io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === Bits("b00??1") + + io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb + io.imem.bht_update.bits.taken := mem_br_taken + io.imem.bht_update.bits.mispredict := mem_wrong_npc + io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp + + io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb + io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0) + io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn + io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp + + io.imem.req.valid := take_pc + io.imem.btb_update.bits.pc := mem_reg_pc + io.imem.btb_update.bits.target := io.imem.req.bits.pc + io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.bht_update.bits.pc := mem_reg_pc + io.imem.ras_update.bits.returnAddr := mem_int_wdata + + // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. + val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0) + val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0) + val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0) + val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc + val data_hazard_ex = ex_ctrl.wxd && + (id_renx1_not0 && id_raddr1 === ex_waddr || + id_renx2_not0 && id_raddr2 === ex_waddr || + id_wen_not0 && id_waddr === ex_waddr) + val fp_data_hazard_ex = ex_ctrl.wfd && + (io.fpu.dec.ren1 && id_raddr1 === ex_waddr || + io.fpu.dec.ren2 && id_raddr2 === ex_waddr || + io.fpu.dec.ren3 && id_raddr3 === ex_waddr || + io.fpu.dec.wen && id_waddr === ex_waddr) + val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex) + + // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. + val mem_mem_cmd_bh = + if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass + else Bool(true) + val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc + val data_hazard_mem = mem_ctrl.wxd && + (id_renx1_not0 && id_raddr1 === mem_waddr || + id_renx2_not0 && id_raddr2 === mem_waddr || + id_wen_not0 && id_waddr === mem_waddr) + val fp_data_hazard_mem = mem_ctrl.wfd && + (io.fpu.dec.ren1 && id_raddr1 === mem_waddr || + io.fpu.dec.ren2 && id_raddr2 === mem_waddr || + io.fpu.dec.ren3 && id_raddr3 === mem_waddr || + io.fpu.dec.wen && id_waddr === mem_waddr) + val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem) + id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem + + // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. + val data_hazard_wb = wb_ctrl.wxd && + (id_renx1_not0 && id_raddr1 === wb_waddr || + id_renx2_not0 && id_raddr2 === wb_waddr || + id_wen_not0 && id_waddr === wb_waddr) + val fp_data_hazard_wb = wb_ctrl.wfd && + (io.fpu.dec.ren1 && id_raddr1 === wb_waddr || + io.fpu.dec.ren2 && id_raddr2 === wb_waddr || + io.fpu.dec.ren3 && id_raddr3 === wb_waddr || + io.fpu.dec.wen && id_waddr === wb_waddr) + val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) + + val id_sboard_hazard = + (id_renx1_not0 && sboard.readBypassed(id_raddr1) || + id_renx2_not0 && sboard.readBypassed(id_raddr2) || + id_wen_not0 && sboard.readBypassed(id_waddr)) + + sboard.set(wb_set_sboard && wb_wen, wb_waddr) + + val id_stall_fpu = if (!params(BuildFPU).isEmpty) { + val fp_sboard = new Scoreboard(32) + fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr) + fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr) + fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) + + id_csr_en && !io.fpu.fcsr_rdy || + io.fpu.dec.ren1 && fp_sboard.read(id_raddr1) || + io.fpu.dec.ren2 && fp_sboard.read(id_raddr2) || + io.fpu.dec.ren3 && fp_sboard.read(id_raddr3) || + io.fpu.dec.wen && fp_sboard.read(id_waddr) + } else Bool(false) + + val ctrl_stalld = + id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || + id_ctrl.fp && id_stall_fpu || + id_ctrl.mem && !io.dmem.req.ready || + Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || + id_do_fence || + csr.io.csr_stall + ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt + + io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt + io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i + + io.fpu.valid := !ctrl_killd && id_ctrl.fp + io.fpu.killx := ctrl_killx + io.fpu.killm := killm_common + io.fpu.inst := id_inst + io.fpu.fromint_data := ex_rs(0) + io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu + io.fpu.dmem_resp_data := io.dmem.resp.bits.data + io.fpu.dmem_resp_type := io.dmem.resp.bits.typ + io.fpu.dmem_resp_tag := dmem_resp_waddr + + io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem + io.dmem.req.bits.kill := killm_common || mem_xcpt + io.dmem.req.bits.cmd := ex_ctrl.mem_cmd + io.dmem.req.bits.typ := ex_ctrl.mem_type + io.dmem.req.bits.phys := Bool(false) + io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt + io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) + io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) + require(params(CoreDCacheReqTagBits) >= 6) + io.dmem.invalidate_lr := wb_xcpt + + io.rocc.cmd.valid := wb_rocc_val + io.rocc.exception := wb_xcpt && csr.io.status.xs.orR + io.rocc.s := csr.io.status.prv.orR // should we just pass all of mstatus? + io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) + io.rocc.cmd.bits.rs1 := wb_reg_wdata + io.rocc.cmd.bits.rs2 := wb_reg_rs2 + + printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", + io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc, + Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen, + wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), + wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), + wb_reg_inst, wb_reg_inst) + + def checkExceptions(x: Seq[(Bool, UInt)]) = + (x.map(_._1).reduce(_||_), PriorityMux(x)) + + def imm(sel: Bits, inst: Bits) = { + val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) + val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) + val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) + val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), + Mux(sel === IMM_UJ, inst(20).toSInt, + Mux(sel === IMM_SB, inst(7).toSInt, sign))) + val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25)) + val b4_1 = Mux(sel === IMM_U, Bits(0), + Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), + Mux(sel === IMM_Z, inst(19,16), inst(24,21)))) + val b0 = Mux(sel === IMM_S, inst(7), + Mux(sel === IMM_I, inst(20), + Mux(sel === IMM_Z, inst(15), Bits(0)))) + + Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt + } + + def vaSign(a0: UInt, ea: Bits) = { + // efficient means to compress 64-bit VA into vaddrBits+1 bits + // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) + val a = a0 >> vaddrBits-1 + val e = ea(vaddrBits,vaddrBits-1) + Mux(a === UInt(0) || a === UInt(1), e != UInt(0), + Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), + e(0))) + } + + class RegFile { + private val rf = Mem(UInt(width = 64), 31) + private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() + private var canRead = true + def read(addr: UInt) = { + require(canRead) + reads += addr -> Wire(UInt()) + reads.last._2 := rf(~addr) + reads.last._2 + } + def write(addr: UInt, data: UInt) = { + canRead = false + when (addr != UInt(0)) { + rf(~addr) := data + for ((raddr, rdata) <- reads) + when (addr === raddr) { rdata := data } + } + } + } + + class Scoreboard(n: Int) + { + def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) + def clear(en: Bool, addr: UInt): Unit = update(en, _next & ~mask(en, addr)) + def read(addr: UInt): Bool = r(addr) + def readBypassed(addr: UInt): Bool = _next(addr) + + private val r = Reg(init=Bits(0, n)) + private var _next = r + private var ens = Bool(false) + private def mask(en: Bool, addr: UInt) = Mux(en, UInt(1) << addr, UInt(0)) + private def update(en: Bool, update: UInt) = { + _next = update + ens = ens || en + when (ens) { r := _next } + } + } +} diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index ca6a86fb..b4c19276 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -23,7 +23,7 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) val dcache = Module(new HellaCache, { case CacheName => "L1D" }) val ptw = Module(new PTW(params(NPTWPorts))) - val core = Module(new Core, { case CoreName => "Rocket" }) + val core = Module(new Rocket, { case CoreName => "Rocket" }) dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) @@ -38,6 +38,13 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { core.io.imem <> icache.io.cpu core.io.ptw <> ptw.io.dpath + //If so specified, build an FPU module and wire it in + params(BuildFPU) + .map { bf => bf() } + .foreach { fpu => + fpu.io <> core.io.fpu + } + // Connect the caches and ROCC to the outer memory system io.cached <> dcache.io.mem // If so specified, build an RoCC module and wire it in From bd785e7d1996e4ac5e2d7839bf41e5bb3ad45aa4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Jul 2015 15:46:20 -0700 Subject: [PATCH 0855/1087] Factor out common hazard detection code --- rocket/src/main/scala/rocket.scala | 53 ++++++++++-------------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 482c7dd9..013fc2a8 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -417,19 +417,18 @@ class Rocket extends CoreModule io.imem.ras_update.bits.returnAddr := mem_int_wdata // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. - val id_renx1_not0 = id_ctrl.rxs1 && id_raddr1 != UInt(0) - val id_renx2_not0 = id_ctrl.rxs2 && id_raddr2 != UInt(0) - val id_wen_not0 = id_ctrl.wxd && id_waddr != UInt(0) + val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 != UInt(0), id_raddr1), + (id_ctrl.rxs2 && id_raddr2 != UInt(0), id_raddr2), + (id_ctrl.wxd && id_waddr != UInt(0), id_waddr)) + val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1), + (io.fpu.dec.ren2, id_raddr2), + (io.fpu.dec.ren3, id_raddr3), + (io.fpu.dec.wen, id_waddr)) + + val id_sboard_hazard = checkHazards(hazard_targets, sboard.readBypassed _) val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc - val data_hazard_ex = ex_ctrl.wxd && - (id_renx1_not0 && id_raddr1 === ex_waddr || - id_renx2_not0 && id_raddr2 === ex_waddr || - id_wen_not0 && id_waddr === ex_waddr) - val fp_data_hazard_ex = ex_ctrl.wfd && - (io.fpu.dec.ren1 && id_raddr1 === ex_waddr || - io.fpu.dec.ren2 && id_raddr2 === ex_waddr || - io.fpu.dec.ren3 && id_raddr3 === ex_waddr || - io.fpu.dec.wen && id_waddr === ex_waddr) + val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr) + val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr) val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex) // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. @@ -437,35 +436,16 @@ class Rocket extends CoreModule if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass else Bool(true) val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc - val data_hazard_mem = mem_ctrl.wxd && - (id_renx1_not0 && id_raddr1 === mem_waddr || - id_renx2_not0 && id_raddr2 === mem_waddr || - id_wen_not0 && id_waddr === mem_waddr) - val fp_data_hazard_mem = mem_ctrl.wfd && - (io.fpu.dec.ren1 && id_raddr1 === mem_waddr || - io.fpu.dec.ren2 && id_raddr2 === mem_waddr || - io.fpu.dec.ren3 && id_raddr3 === mem_waddr || - io.fpu.dec.wen && id_waddr === mem_waddr) + val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr) + val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr) val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem) id_load_use := mem_reg_valid && data_hazard_mem && mem_ctrl.mem // stall for RAW/WAW hazards on load/AMO misses and mul/div in writeback. - val data_hazard_wb = wb_ctrl.wxd && - (id_renx1_not0 && id_raddr1 === wb_waddr || - id_renx2_not0 && id_raddr2 === wb_waddr || - id_wen_not0 && id_waddr === wb_waddr) - val fp_data_hazard_wb = wb_ctrl.wfd && - (io.fpu.dec.ren1 && id_raddr1 === wb_waddr || - io.fpu.dec.ren2 && id_raddr2 === wb_waddr || - io.fpu.dec.ren3 && id_raddr3 === wb_waddr || - io.fpu.dec.wen && id_waddr === wb_waddr) + val data_hazard_wb = wb_ctrl.wxd && checkHazards(hazard_targets, _ === wb_waddr) + val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr) val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) - val id_sboard_hazard = - (id_renx1_not0 && sboard.readBypassed(id_raddr1) || - id_renx2_not0 && sboard.readBypassed(id_raddr2) || - id_wen_not0 && sboard.readBypassed(id_waddr)) - sboard.set(wb_set_sboard && wb_wen, wb_waddr) val id_stall_fpu = if (!params(BuildFPU).isEmpty) { @@ -531,6 +511,9 @@ class Rocket extends CoreModule def checkExceptions(x: Seq[(Bool, UInt)]) = (x.map(_._1).reduce(_||_), PriorityMux(x)) + def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = + targets.map(h => h._1 && cond(h._2)).reduce(_||_) + def imm(sel: Bits, inst: Bits) = { val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) From b4e4ceed3dfaa413cbe97d7d4e593b3646205d6d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Jul 2015 15:52:13 -0700 Subject: [PATCH 0856/1087] Factor out some more hazard detection code --- rocket/src/main/scala/rocket.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 013fc2a8..3d9cf54b 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -454,11 +454,7 @@ class Rocket extends CoreModule fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr) fp_sboard.clear(io.fpu.sboard_clr, io.fpu.sboard_clra) - id_csr_en && !io.fpu.fcsr_rdy || - io.fpu.dec.ren1 && fp_sboard.read(id_raddr1) || - io.fpu.dec.ren2 && fp_sboard.read(id_raddr2) || - io.fpu.dec.ren3 && fp_sboard.read(id_raddr3) || - io.fpu.dec.wen && fp_sboard.read(id_waddr) + id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _) } else Bool(false) val ctrl_stalld = From e9433ee01eb2704bcff3d3607096d3d3b6b18050 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Jul 2015 17:32:44 -0700 Subject: [PATCH 0857/1087] Minor cleanup --- rocket/src/main/scala/rocket.scala | 76 ++++++++++++++---------------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 3d9cf54b..7fb5410d 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -301,6 +301,7 @@ class Rocket extends CoreModule div.io.kill := killm_common && Reg(next = div.io.req.fire()) val ctrl_killm = killm_common || mem_xcpt || fpu_kill_mem + // writeback stage wb_reg_valid := !ctrl_killm wb_reg_replay := replay_mem && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb @@ -320,6 +321,8 @@ class Rocket extends CoreModule io.dmem.resp.bits.nack || wb_reg_replay || csr.io.csr_replay val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready + val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt + take_pc_wb := replay_wb || wb_xcpt || csr.io.eret when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready } when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) } @@ -379,44 +382,6 @@ class Rocket extends CoreModule csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) csr.io.rw.wdata := wb_reg_wdata - val sboard = new Scoreboard(32) - sboard.clear(ll_wen, ll_waddr) - - // control transfer from ex/wb - val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt - take_pc_wb := replay_wb || wb_xcpt || csr.io.eret - - io.imem.req.bits.pc := - Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret - Mux(replay_wb, wb_reg_pc, // replay - mem_npc)).toUInt // mispredicted branch - - io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb - io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit - io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp - io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr - io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === Bits("b00??1") - - io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb - io.imem.bht_update.bits.taken := mem_br_taken - io.imem.bht_update.bits.mispredict := mem_wrong_npc - io.imem.bht_update.bits.prediction.valid := mem_reg_btb_hit - io.imem.bht_update.bits.prediction.bits := mem_reg_btb_resp - - io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb - io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0) - io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn - io.imem.ras_update.bits.prediction.valid := mem_reg_btb_hit - io.imem.ras_update.bits.prediction.bits := mem_reg_btb_resp - - io.imem.req.valid := take_pc - io.imem.btb_update.bits.pc := mem_reg_pc - io.imem.btb_update.bits.target := io.imem.req.bits.pc - io.imem.btb_update.bits.br_pc := mem_reg_pc - io.imem.bht_update.bits.pc := mem_reg_pc - io.imem.ras_update.bits.returnAddr := mem_int_wdata - - // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 != UInt(0), id_raddr1), (id_ctrl.rxs2 && id_raddr2 != UInt(0), id_raddr2), (id_ctrl.wxd && id_waddr != UInt(0), id_waddr)) @@ -425,7 +390,12 @@ class Rocket extends CoreModule (io.fpu.dec.ren3, id_raddr3), (io.fpu.dec.wen, id_waddr)) + val sboard = new Scoreboard(32) + sboard.clear(ll_wen, ll_waddr) val id_sboard_hazard = checkHazards(hazard_targets, sboard.readBypassed _) + sboard.set(wb_set_sboard && wb_wen, wb_waddr) + + // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr) val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr) @@ -446,8 +416,6 @@ class Rocket extends CoreModule val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr) val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) - sboard.set(wb_set_sboard && wb_wen, wb_waddr) - val id_stall_fpu = if (!params(BuildFPU).isEmpty) { val fp_sboard = new Scoreboard(32) fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr) @@ -466,8 +434,34 @@ class Rocket extends CoreModule csr.io.csr_stall ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt - io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt + io.imem.req.valid := take_pc + io.imem.req.bits.pc := + Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret + Mux(replay_wb, wb_reg_pc, // replay + mem_npc)).toUInt // mispredicted branch io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i + io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt + + io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb + io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr + io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === Bits("b00??1") + io.imem.btb_update.bits.pc := mem_reg_pc + io.imem.btb_update.bits.target := io.imem.req.bits.pc + io.imem.btb_update.bits.br_pc := mem_reg_pc + io.imem.btb_update.bits.prediction.valid := mem_reg_btb_hit + io.imem.btb_update.bits.prediction.bits := mem_reg_btb_resp + + io.imem.bht_update.valid := mem_reg_valid && mem_ctrl.branch && !take_pc_wb + io.imem.bht_update.bits.pc := mem_reg_pc + io.imem.bht_update.bits.taken := mem_br_taken + io.imem.bht_update.bits.mispredict := mem_wrong_npc + io.imem.bht_update.bits.prediction := io.imem.btb_update.bits.prediction + + io.imem.ras_update.valid := mem_reg_valid && io.imem.btb_update.bits.isJump && !mem_npc_misaligned && !take_pc_wb + io.imem.ras_update.bits.returnAddr := mem_int_wdata + io.imem.ras_update.bits.isCall := mem_ctrl.wxd && mem_waddr(0) + io.imem.ras_update.bits.isReturn := io.imem.btb_update.bits.isReturn + io.imem.ras_update.bits.prediction := io.imem.btb_update.bits.prediction io.fpu.valid := !ctrl_killd && id_ctrl.fp io.fpu.killx := ctrl_killx From ae73e3a997c3dedc44a2f720cb794777004d8903 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Jul 2015 22:17:26 -0700 Subject: [PATCH 0858/1087] Only instantiate div/sqrt unit if requested --- rocket/src/main/scala/fpu.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 2c3a8590..a6b32b49 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -434,10 +434,8 @@ class FPU extends Module fpmu.io.in.bits := req fpmu.io.lt := fpiu.io.out.bits.lt - val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64) - val divSqrt_inReady = Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div) - val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt val divSqrt_wen = Reg(next=Bool(false)) + val divSqrt_inReady = Wire(init=Bool(false)) val divSqrt_waddr = Reg(Bits()) val divSqrt_wdata = Wire(Bits()) val divSqrt_flags = Wire(Bits()) @@ -512,7 +510,9 @@ class FPU extends Module val divSqrt_flags_double = Reg(Bits()) val divSqrt_wdata_double = Reg(Bits()) - def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 + val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64) + divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div) + val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt val divSqrt_wb_hazard = wen.orR divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.killm && (mem_ctrl.div || mem_ctrl.sqrt) divSqrt.io.sqrtOp := mem_ctrl.sqrt From f2dcc40e67f45c2fe5d0e40a8faafca08c3d6f81 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 27 Jul 2015 12:42:20 -0700 Subject: [PATCH 0859/1087] Chisel3 compatibility changes --- rocket/src/main/scala/btb.scala | 1 - rocket/src/main/scala/csr.scala | 3 +-- rocket/src/main/scala/decode.scala | 1 - rocket/src/main/scala/dpath_alu.scala | 1 - rocket/src/main/scala/instructions.scala | 1 - rocket/src/main/scala/rocc.scala | 1 - 6 files changed, 1 insertion(+), 7 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index cb372746..06a6c53c 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -4,7 +4,6 @@ package rocket import Chisel._ import Util._ -import Node._ import uncore._ case object NBTBEntries extends Field[Int] diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index ab064dad..a81ee0e8 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -5,7 +5,6 @@ package rocket import Chisel._ import Util._ import Instructions._ -import Node._ import uncore._ import scala.math._ @@ -421,7 +420,7 @@ class CSRFile extends CoreModule val new_sstatus = new SStatus().fromBits(wdata) reg_mstatus.ie := new_sstatus.ie reg_mstatus.ie1 := new_sstatus.pie - reg_mstatus.prv1 := Mux(new_sstatus.ps, PRV_S, PRV_U) + reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U) reg_mstatus.mprv := new_sstatus.mprv reg_mstatus.fs := new_sstatus.fs // even without an FPU if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 716b6bdf..421ef66d 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -3,7 +3,6 @@ package rocket import Chisel._ -import Node._ object DecodeLogic { diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index a6b258f7..16de87eb 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -3,7 +3,6 @@ package rocket import Chisel._ -import Node._ import Instructions._ object ALU diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 9b94792e..170e7416 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -3,7 +3,6 @@ package rocket import Chisel._ -import Node._ /* Automatically generated by parse-opcodes */ object Instructions { diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 61a4b000..ed962e26 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -3,7 +3,6 @@ package rocket import Chisel._ -import Node._ import uncore._ import Util._ From 049fc8dc24b947e68072f38e78ac036a1a8648f0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 28 Jul 2015 02:48:49 -0700 Subject: [PATCH 0860/1087] Chisel3 compatibility: use BitPat for don't-cares This one's hella ugly, but for the time being, idgaf. --- rocket/src/main/scala/consts.scala | 32 +- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/decode.scala | 26 +- rocket/src/main/scala/dpath_alu.scala | 30 +- rocket/src/main/scala/fpu.scala | 39 ++- rocket/src/main/scala/idecode.scala | 14 +- rocket/src/main/scala/instructions.scala | 372 +++++++++++------------ rocket/src/main/scala/multiplier.scala | 2 +- rocket/src/main/scala/rocket.scala | 2 +- 9 files changed, 253 insertions(+), 266 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index 1fd8bc30..fda38fc2 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -8,22 +8,22 @@ import scala.math._ trait ScalarOpConstants { val SZ_BR = 3 - val BR_X = Bits("b???", 3) - val BR_EQ = Bits(0, 3) - val BR_NE = Bits(1, 3) - val BR_J = Bits(2, 3) - val BR_N = Bits(3, 3) - val BR_LT = Bits(4, 3) - val BR_GE = Bits(5, 3) - val BR_LTU = Bits(6, 3) - val BR_GEU = Bits(7, 3) + val BR_X = BitPat("b???") + val BR_EQ = UInt(0, 3) + val BR_NE = UInt(1, 3) + val BR_J = UInt(2, 3) + val BR_N = UInt(3, 3) + val BR_LT = UInt(4, 3) + val BR_GE = UInt(5, 3) + val BR_LTU = UInt(6, 3) + val BR_GEU = UInt(7, 3) - val A1_X = Bits("b??", 2) + val A1_X = BitPat("b??") val A1_ZERO = UInt(0, 2) val A1_RS1 = UInt(1, 2) val A1_PC = UInt(2, 2) - val IMM_X = Bits("b???", 3) + val IMM_X = BitPat("b???") val IMM_S = UInt(0, 3) val IMM_SB = UInt(1, 3) val IMM_U = UInt(2, 3) @@ -31,15 +31,15 @@ trait ScalarOpConstants { val IMM_I = UInt(4, 3) val IMM_Z = UInt(5, 3) - val A2_X = Bits("b??", 2) + val A2_X = BitPat("b??") val A2_ZERO = UInt(0, 2) val A2_FOUR = UInt(1, 2) val A2_RS2 = UInt(2, 2) val A2_IMM = UInt(3, 2) - val X = Bool.DC - val N = Bool(false) - val Y = Bool(true) + val X = BitPat("b?") + val N = BitPat("b0") + val Y = BitPat("b1") val SZ_DW = 1 val DW_X = X @@ -52,6 +52,4 @@ trait ScalarOpConstants { val PRV_S = 1 val PRV_H = 2 val PRV_M = 3 - - val RA = UInt(1, 5) } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index a81ee0e8..8fb09605 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -57,7 +57,7 @@ object CSR { // commands val SZ = 3 - val X = UInt.DC(SZ) + val X = BitPat.DC(SZ) val N = UInt(0,SZ) val W = UInt(1,SZ) val S = UInt(2,SZ) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index 421ef66d..db65f8ff 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -6,21 +6,14 @@ import Chisel._ object DecodeLogic { - def term(b: Bits) = { - val lit = b.litOf - if (lit.isZ) { - var (bits, mask, swidth) = Literal.parseLit(lit.toString) - new Term(BigInt(bits, 2), BigInt(2).pow(lit.getWidth)-(BigInt(mask, 2)+1)) - } else { - new Term(lit.value) - } - } + def term(lit: BitPat) = + new Term(lit.value, BigInt(2).pow(lit.getWidth)-(lit.mask+1)) def logic(addr: UInt, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bool], terms: Seq[Term]) = { terms.map { t => cache.getOrElseUpdate(t, (if (t.mask == 0) addr else addr & Bits(BigInt(2).pow(addrWidth)-(t.mask+1), addrWidth)) === Bits(t.value, addrWidth)) }.foldLeft(Bool(false))(_||_) } - def apply[T <: Bits](addr: UInt, default: T, mapping: Iterable[(UInt, T)]): T = { + def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = { val cache = caches.getOrElseUpdate(addr, collection.mutable.Map[Term,Bool]()) val dterm = term(default) val (keys, values) = mapping.unzip @@ -32,7 +25,7 @@ object DecodeLogic for (u <- t.tail) assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap") - val result = (0 until default.litOf.getWidth.max(values.map(_.litOf.getWidth).max)).map({ case (i: Int) => + (0 until default.getWidth.max(values.map(_.getWidth).max)).map({ case (i: Int) => val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1) val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1) val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1) @@ -46,20 +39,19 @@ object DecodeLogic if (defbit == 0) bit else ~bit } }).reverse.reduceRight(Cat(_,_)) - default.fromBits(result) } - def apply[T <: Bits](addr: UInt, default: Iterable[T], mappingIn: Iterable[(UInt, Iterable[T])]): Iterable[T] = { - val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(UInt, T)]()) + def apply(addr: UInt, default: Iterable[BitPat], mappingIn: Iterable[(BitPat, Iterable[BitPat])]): Iterable[UInt] = { + val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(BitPat, BitPat)]()) for ((key, values) <- mappingIn) for ((value, i) <- values zipWithIndex) mapping(i) += key -> value for ((thisDefault, thisMapping) <- default zip mapping) yield apply(addr, thisDefault, thisMapping) } + def apply(addr: UInt, default: Iterable[BitPat], mappingIn: List[(UInt, Iterable[BitPat])]): Iterable[UInt] = + apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Iterable[BitPat])]]) def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = - apply(addr, Bool.DC, trues.map(_ -> Bool(true)) ++ falses.map(_ -> Bool(false))) - def apply(addr: UInt, tru: UInt, fals: UInt): Bool = - apply(addr, Seq(tru), Seq(fals)) + apply(addr, BitPat.DC(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).toBool private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]() } diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 16de87eb..354934ca 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -8,21 +8,21 @@ import Instructions._ object ALU { val SZ_ALU_FN = 4 - val FN_X = Bits("b????") - val FN_ADD = Bits(0) - val FN_SL = Bits(1) - val FN_XOR = Bits(4) - val FN_OR = Bits(6) - val FN_AND = Bits(7) - val FN_SR = Bits(5) - val FN_SEQ = Bits(8) - val FN_SNE = Bits(9) - val FN_SUB = Bits(10) - val FN_SRA = Bits(11) - val FN_SLT = Bits(12) - val FN_SGE = Bits(13) - val FN_SLTU = Bits(14) - val FN_SGEU = Bits(15) + val FN_X = BitPat("b????") + val FN_ADD = UInt(0) + val FN_SL = UInt(1) + val FN_XOR = UInt(4) + val FN_OR = UInt(6) + val FN_AND = UInt(7) + val FN_SR = UInt(5) + val FN_SEQ = UInt(8) + val FN_SNE = UInt(9) + val FN_SUB = UInt(10) + val FN_SRA = UInt(11) + val FN_SLT = UInt(12) + val FN_SGE = UInt(13) + val FN_SLTU = UInt(14) + val FN_SGEU = UInt(15) val FN_DIV = FN_XOR val FN_DIVU = FN_SR diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index a6b32b49..c1c9e1e5 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -13,24 +13,24 @@ case object DFMALatency object FPConstants { - val FCMD_ADD = Bits("b0??00") - val FCMD_SUB = Bits("b0??01") - val FCMD_MUL = Bits("b0??10") - val FCMD_MADD = Bits("b1??00") - val FCMD_MSUB = Bits("b1??01") - val FCMD_NMSUB = Bits("b1??10") - val FCMD_NMADD = Bits("b1??11") - val FCMD_DIV = Bits("b?0011") - val FCMD_SQRT = Bits("b?1011") - val FCMD_SGNJ = Bits("b??1?0") - val FCMD_MINMAX = Bits("b?01?1") - val FCMD_CVT_FF = Bits("b??0??") - val FCMD_CVT_IF = Bits("b?10??") - val FCMD_CMP = Bits("b?01??") - val FCMD_MV_XF = Bits("b?11??") - val FCMD_CVT_FI = Bits("b??0??") - val FCMD_MV_FX = Bits("b??1??") - val FCMD_X = Bits("b?????") + val FCMD_ADD = BitPat("b0??00") + val FCMD_SUB = BitPat("b0??01") + val FCMD_MUL = BitPat("b0??10") + val FCMD_MADD = BitPat("b1??00") + val FCMD_MSUB = BitPat("b1??01") + val FCMD_NMSUB = BitPat("b1??10") + val FCMD_NMADD = BitPat("b1??11") + val FCMD_DIV = BitPat("b?0011") + val FCMD_SQRT = BitPat("b?1011") + val FCMD_SGNJ = BitPat("b??1?0") + val FCMD_MINMAX = BitPat("b?01?1") + val FCMD_CVT_FF = BitPat("b??0??") + val FCMD_CVT_IF = BitPat("b?10??") + val FCMD_CMP = BitPat("b?01??") + val FCMD_MV_XF = BitPat("b?11??") + val FCMD_CVT_FI = BitPat("b??0??") + val FCMD_MV_FX = BitPat("b??1??") + val FCMD_X = BitPat("b?????") val FCMD_WIDTH = 5 val RM_SZ = 3 @@ -65,9 +65,6 @@ class FPUDecoder extends Module val sigs = new FPUCtrlSigs().asOutput } - val N = Bool(false) - val Y = Bool(true) - val X = Bool(false) val decoder = DecodeLogic(io.inst, List (FCMD_X, X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X), Array(FLW -> List(FCMD_X, Y,Y,N,N,N,X,X,Y,N,N,N,N,N,N,N,N), diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 18a24616..97058bd4 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -11,7 +11,7 @@ abstract trait DecodeConstants { val xpr64 = Y - val decode_default = + val decode_default: List[BitPat] = // jal renf1 fence.i // | jalr | renf2 | // fp_val| | renx2 | | renf3 | @@ -22,7 +22,7 @@ abstract trait DecodeConstants // | | | | | | | | | | | | | | | | | | | | | | | | | | List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) - val table: Array[(UInt, List[UInt])] + val table: Array[(BitPat, List[BitPat])] } class IntCtrlSigs extends Bundle { @@ -53,7 +53,7 @@ class IntCtrlSigs extends Bundle { val fence = Bool() val amo = Bool() - def decode(inst: UInt, table: Iterable[(UInt, List[UInt])]) = { + def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = { val decoder = DecodeLogic(inst, XDecode.decode_default, table) Vec(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, @@ -64,7 +64,7 @@ class IntCtrlSigs extends Bundle { object XDecode extends DecodeConstants { - val table = Array( + val table: Array[(BitPat, List[BitPat])] = Array( // jal renf1 fence.i // | jalr | renf2 | // fp_val| | renx2 | | renf3 | @@ -185,7 +185,7 @@ object XDecode extends DecodeConstants object FDecode extends DecodeConstants { - val table = Array( + val table: Array[(BitPat, List[BitPat])] = Array( // jal renf1 fence.i // | jalr | renf2 | // fp_val| | renx2 | | renf3 | @@ -256,7 +256,7 @@ object FDecode extends DecodeConstants object FDivSqrtDecode extends DecodeConstants { - val table = Array( + val table: Array[(BitPat, List[BitPat])] = Array( FDIV_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), FDIV_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), FSQRT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), @@ -265,7 +265,7 @@ object FDivSqrtDecode extends DecodeConstants object RoCCDecode extends DecodeConstants { - val table = Array( + val table: Array[(BitPat, List[BitPat])] = Array( // jal renf1 fence.i // | jalr | renf2 | // fp_val| | renx2 | | renf3 | diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 170e7416..f007ebc4 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -6,192 +6,192 @@ import Chisel._ /* Automatically generated by parse-opcodes */ object Instructions { - def BEQ = Bits("b?????????????????000?????1100011") - def BNE = Bits("b?????????????????001?????1100011") - def BLT = Bits("b?????????????????100?????1100011") - def BGE = Bits("b?????????????????101?????1100011") - def BLTU = Bits("b?????????????????110?????1100011") - def BGEU = Bits("b?????????????????111?????1100011") - def JALR = Bits("b?????????????????000?????1100111") - def JAL = Bits("b?????????????????????????1101111") - def LUI = Bits("b?????????????????????????0110111") - def AUIPC = Bits("b?????????????????????????0010111") - def ADDI = Bits("b?????????????????000?????0010011") - def SLLI = Bits("b000000???????????001?????0010011") - def SLTI = Bits("b?????????????????010?????0010011") - def SLTIU = Bits("b?????????????????011?????0010011") - def XORI = Bits("b?????????????????100?????0010011") - def SRLI = Bits("b000000???????????101?????0010011") - def SRAI = Bits("b010000???????????101?????0010011") - def ORI = Bits("b?????????????????110?????0010011") - def ANDI = Bits("b?????????????????111?????0010011") - def ADD = Bits("b0000000??????????000?????0110011") - def SUB = Bits("b0100000??????????000?????0110011") - def SLL = Bits("b0000000??????????001?????0110011") - def SLT = Bits("b0000000??????????010?????0110011") - def SLTU = Bits("b0000000??????????011?????0110011") - def XOR = Bits("b0000000??????????100?????0110011") - def SRL = Bits("b0000000??????????101?????0110011") - def SRA = Bits("b0100000??????????101?????0110011") - def OR = Bits("b0000000??????????110?????0110011") - def AND = Bits("b0000000??????????111?????0110011") - def ADDIW = Bits("b?????????????????000?????0011011") - def SLLIW = Bits("b0000000??????????001?????0011011") - def SRLIW = Bits("b0000000??????????101?????0011011") - def SRAIW = Bits("b0100000??????????101?????0011011") - def ADDW = Bits("b0000000??????????000?????0111011") - def SUBW = Bits("b0100000??????????000?????0111011") - def SLLW = Bits("b0000000??????????001?????0111011") - def SRLW = Bits("b0000000??????????101?????0111011") - def SRAW = Bits("b0100000??????????101?????0111011") - def LB = Bits("b?????????????????000?????0000011") - def LH = Bits("b?????????????????001?????0000011") - def LW = Bits("b?????????????????010?????0000011") - def LD = Bits("b?????????????????011?????0000011") - def LBU = Bits("b?????????????????100?????0000011") - def LHU = Bits("b?????????????????101?????0000011") - def LWU = Bits("b?????????????????110?????0000011") - def SB = Bits("b?????????????????000?????0100011") - def SH = Bits("b?????????????????001?????0100011") - def SW = Bits("b?????????????????010?????0100011") - def SD = Bits("b?????????????????011?????0100011") - def FENCE = Bits("b?????????????????000?????0001111") - def FENCE_I = Bits("b?????????????????001?????0001111") - def MUL = Bits("b0000001??????????000?????0110011") - def MULH = Bits("b0000001??????????001?????0110011") - def MULHSU = Bits("b0000001??????????010?????0110011") - def MULHU = Bits("b0000001??????????011?????0110011") - def DIV = Bits("b0000001??????????100?????0110011") - def DIVU = Bits("b0000001??????????101?????0110011") - def REM = Bits("b0000001??????????110?????0110011") - def REMU = Bits("b0000001??????????111?????0110011") - def MULW = Bits("b0000001??????????000?????0111011") - def DIVW = Bits("b0000001??????????100?????0111011") - def DIVUW = Bits("b0000001??????????101?????0111011") - def REMW = Bits("b0000001??????????110?????0111011") - def REMUW = Bits("b0000001??????????111?????0111011") - def AMOADD_W = Bits("b00000????????????010?????0101111") - def AMOXOR_W = Bits("b00100????????????010?????0101111") - def AMOOR_W = Bits("b01000????????????010?????0101111") - def AMOAND_W = Bits("b01100????????????010?????0101111") - def AMOMIN_W = Bits("b10000????????????010?????0101111") - def AMOMAX_W = Bits("b10100????????????010?????0101111") - def AMOMINU_W = Bits("b11000????????????010?????0101111") - def AMOMAXU_W = Bits("b11100????????????010?????0101111") - def AMOSWAP_W = Bits("b00001????????????010?????0101111") - def LR_W = Bits("b00010??00000?????010?????0101111") - def SC_W = Bits("b00011????????????010?????0101111") - def AMOADD_D = Bits("b00000????????????011?????0101111") - def AMOXOR_D = Bits("b00100????????????011?????0101111") - def AMOOR_D = Bits("b01000????????????011?????0101111") - def AMOAND_D = Bits("b01100????????????011?????0101111") - def AMOMIN_D = Bits("b10000????????????011?????0101111") - def AMOMAX_D = Bits("b10100????????????011?????0101111") - def AMOMINU_D = Bits("b11000????????????011?????0101111") - def AMOMAXU_D = Bits("b11100????????????011?????0101111") - def AMOSWAP_D = Bits("b00001????????????011?????0101111") - def LR_D = Bits("b00010??00000?????011?????0101111") - def SC_D = Bits("b00011????????????011?????0101111") - def SCALL = Bits("b00000000000000000000000001110011") - def SBREAK = Bits("b00000000000100000000000001110011") - def SRET = Bits("b00010000000000000000000001110011") - def SFENCE_VM = Bits("b000100000001?????000000001110011") - def WFI = Bits("b00010000001000000000000001110011") - def MRTH = Bits("b00110000011000000000000001110011") - def MRTS = Bits("b00110000010100000000000001110011") - def HRTS = Bits("b00100000010100000000000001110011") - def CSRRW = Bits("b?????????????????001?????1110011") - def CSRRS = Bits("b?????????????????010?????1110011") - def CSRRC = Bits("b?????????????????011?????1110011") - def CSRRWI = Bits("b?????????????????101?????1110011") - def CSRRSI = Bits("b?????????????????110?????1110011") - def CSRRCI = Bits("b?????????????????111?????1110011") - def FADD_S = Bits("b0000000??????????????????1010011") - def FSUB_S = Bits("b0000100??????????????????1010011") - def FMUL_S = Bits("b0001000??????????????????1010011") - def FDIV_S = Bits("b0001100??????????????????1010011") - def FSGNJ_S = Bits("b0010000??????????000?????1010011") - def FSGNJN_S = Bits("b0010000??????????001?????1010011") - def FSGNJX_S = Bits("b0010000??????????010?????1010011") - def FMIN_S = Bits("b0010100??????????000?????1010011") - def FMAX_S = Bits("b0010100??????????001?????1010011") - def FSQRT_S = Bits("b010110000000?????????????1010011") - def FADD_D = Bits("b0000001??????????????????1010011") - def FSUB_D = Bits("b0000101??????????????????1010011") - def FMUL_D = Bits("b0001001??????????????????1010011") - def FDIV_D = Bits("b0001101??????????????????1010011") - def FSGNJ_D = Bits("b0010001??????????000?????1010011") - def FSGNJN_D = Bits("b0010001??????????001?????1010011") - def FSGNJX_D = Bits("b0010001??????????010?????1010011") - def FMIN_D = Bits("b0010101??????????000?????1010011") - def FMAX_D = Bits("b0010101??????????001?????1010011") - def FCVT_S_D = Bits("b010000000001?????????????1010011") - def FCVT_D_S = Bits("b010000100000?????????????1010011") - def FSQRT_D = Bits("b010110100000?????????????1010011") - def FLE_S = Bits("b1010000??????????000?????1010011") - def FLT_S = Bits("b1010000??????????001?????1010011") - def FEQ_S = Bits("b1010000??????????010?????1010011") - def FLE_D = Bits("b1010001??????????000?????1010011") - def FLT_D = Bits("b1010001??????????001?????1010011") - def FEQ_D = Bits("b1010001??????????010?????1010011") - def FCVT_W_S = Bits("b110000000000?????????????1010011") - def FCVT_WU_S = Bits("b110000000001?????????????1010011") - def FCVT_L_S = Bits("b110000000010?????????????1010011") - def FCVT_LU_S = Bits("b110000000011?????????????1010011") - def FMV_X_S = Bits("b111000000000?????000?????1010011") - def FCLASS_S = Bits("b111000000000?????001?????1010011") - def FCVT_W_D = Bits("b110000100000?????????????1010011") - def FCVT_WU_D = Bits("b110000100001?????????????1010011") - def FCVT_L_D = Bits("b110000100010?????????????1010011") - def FCVT_LU_D = Bits("b110000100011?????????????1010011") - def FMV_X_D = Bits("b111000100000?????000?????1010011") - def FCLASS_D = Bits("b111000100000?????001?????1010011") - def FCVT_S_W = Bits("b110100000000?????????????1010011") - def FCVT_S_WU = Bits("b110100000001?????????????1010011") - def FCVT_S_L = Bits("b110100000010?????????????1010011") - def FCVT_S_LU = Bits("b110100000011?????????????1010011") - def FMV_S_X = Bits("b111100000000?????000?????1010011") - def FCVT_D_W = Bits("b110100100000?????????????1010011") - def FCVT_D_WU = Bits("b110100100001?????????????1010011") - def FCVT_D_L = Bits("b110100100010?????????????1010011") - def FCVT_D_LU = Bits("b110100100011?????????????1010011") - def FMV_D_X = Bits("b111100100000?????000?????1010011") - def FLW = Bits("b?????????????????010?????0000111") - def FLD = Bits("b?????????????????011?????0000111") - def FSW = Bits("b?????????????????010?????0100111") - def FSD = Bits("b?????????????????011?????0100111") - def FMADD_S = Bits("b?????00??????????????????1000011") - def FMSUB_S = Bits("b?????00??????????????????1000111") - def FNMSUB_S = Bits("b?????00??????????????????1001011") - def FNMADD_S = Bits("b?????00??????????????????1001111") - def FMADD_D = Bits("b?????01??????????????????1000011") - def FMSUB_D = Bits("b?????01??????????????????1000111") - def FNMSUB_D = Bits("b?????01??????????????????1001011") - def FNMADD_D = Bits("b?????01??????????????????1001111") - def CUSTOM0 = Bits("b?????????????????000?????0001011") - def CUSTOM0_RS1 = Bits("b?????????????????010?????0001011") - def CUSTOM0_RS1_RS2 = Bits("b?????????????????011?????0001011") - def CUSTOM0_RD = Bits("b?????????????????100?????0001011") - def CUSTOM0_RD_RS1 = Bits("b?????????????????110?????0001011") - def CUSTOM0_RD_RS1_RS2 = Bits("b?????????????????111?????0001011") - def CUSTOM1 = Bits("b?????????????????000?????0101011") - def CUSTOM1_RS1 = Bits("b?????????????????010?????0101011") - def CUSTOM1_RS1_RS2 = Bits("b?????????????????011?????0101011") - def CUSTOM1_RD = Bits("b?????????????????100?????0101011") - def CUSTOM1_RD_RS1 = Bits("b?????????????????110?????0101011") - def CUSTOM1_RD_RS1_RS2 = Bits("b?????????????????111?????0101011") - def CUSTOM2 = Bits("b?????????????????000?????1011011") - def CUSTOM2_RS1 = Bits("b?????????????????010?????1011011") - def CUSTOM2_RS1_RS2 = Bits("b?????????????????011?????1011011") - def CUSTOM2_RD = Bits("b?????????????????100?????1011011") - def CUSTOM2_RD_RS1 = Bits("b?????????????????110?????1011011") - def CUSTOM2_RD_RS1_RS2 = Bits("b?????????????????111?????1011011") - def CUSTOM3 = Bits("b?????????????????000?????1111011") - def CUSTOM3_RS1 = Bits("b?????????????????010?????1111011") - def CUSTOM3_RS1_RS2 = Bits("b?????????????????011?????1111011") - def CUSTOM3_RD = Bits("b?????????????????100?????1111011") - def CUSTOM3_RD_RS1 = Bits("b?????????????????110?????1111011") - def CUSTOM3_RD_RS1_RS2 = Bits("b?????????????????111?????1111011") + def BEQ = BitPat("b?????????????????000?????1100011") + def BNE = BitPat("b?????????????????001?????1100011") + def BLT = BitPat("b?????????????????100?????1100011") + def BGE = BitPat("b?????????????????101?????1100011") + def BLTU = BitPat("b?????????????????110?????1100011") + def BGEU = BitPat("b?????????????????111?????1100011") + def JALR = BitPat("b?????????????????000?????1100111") + def JAL = BitPat("b?????????????????????????1101111") + def LUI = BitPat("b?????????????????????????0110111") + def AUIPC = BitPat("b?????????????????????????0010111") + def ADDI = BitPat("b?????????????????000?????0010011") + def SLLI = BitPat("b000000???????????001?????0010011") + def SLTI = BitPat("b?????????????????010?????0010011") + def SLTIU = BitPat("b?????????????????011?????0010011") + def XORI = BitPat("b?????????????????100?????0010011") + def SRLI = BitPat("b000000???????????101?????0010011") + def SRAI = BitPat("b010000???????????101?????0010011") + def ORI = BitPat("b?????????????????110?????0010011") + def ANDI = BitPat("b?????????????????111?????0010011") + def ADD = BitPat("b0000000??????????000?????0110011") + def SUB = BitPat("b0100000??????????000?????0110011") + def SLL = BitPat("b0000000??????????001?????0110011") + def SLT = BitPat("b0000000??????????010?????0110011") + def SLTU = BitPat("b0000000??????????011?????0110011") + def XOR = BitPat("b0000000??????????100?????0110011") + def SRL = BitPat("b0000000??????????101?????0110011") + def SRA = BitPat("b0100000??????????101?????0110011") + def OR = BitPat("b0000000??????????110?????0110011") + def AND = BitPat("b0000000??????????111?????0110011") + def ADDIW = BitPat("b?????????????????000?????0011011") + def SLLIW = BitPat("b0000000??????????001?????0011011") + def SRLIW = BitPat("b0000000??????????101?????0011011") + def SRAIW = BitPat("b0100000??????????101?????0011011") + def ADDW = BitPat("b0000000??????????000?????0111011") + def SUBW = BitPat("b0100000??????????000?????0111011") + def SLLW = BitPat("b0000000??????????001?????0111011") + def SRLW = BitPat("b0000000??????????101?????0111011") + def SRAW = BitPat("b0100000??????????101?????0111011") + def LB = BitPat("b?????????????????000?????0000011") + def LH = BitPat("b?????????????????001?????0000011") + def LW = BitPat("b?????????????????010?????0000011") + def LD = BitPat("b?????????????????011?????0000011") + def LBU = BitPat("b?????????????????100?????0000011") + def LHU = BitPat("b?????????????????101?????0000011") + def LWU = BitPat("b?????????????????110?????0000011") + def SB = BitPat("b?????????????????000?????0100011") + def SH = BitPat("b?????????????????001?????0100011") + def SW = BitPat("b?????????????????010?????0100011") + def SD = BitPat("b?????????????????011?????0100011") + def FENCE = BitPat("b?????????????????000?????0001111") + def FENCE_I = BitPat("b?????????????????001?????0001111") + def MUL = BitPat("b0000001??????????000?????0110011") + def MULH = BitPat("b0000001??????????001?????0110011") + def MULHSU = BitPat("b0000001??????????010?????0110011") + def MULHU = BitPat("b0000001??????????011?????0110011") + def DIV = BitPat("b0000001??????????100?????0110011") + def DIVU = BitPat("b0000001??????????101?????0110011") + def REM = BitPat("b0000001??????????110?????0110011") + def REMU = BitPat("b0000001??????????111?????0110011") + def MULW = BitPat("b0000001??????????000?????0111011") + def DIVW = BitPat("b0000001??????????100?????0111011") + def DIVUW = BitPat("b0000001??????????101?????0111011") + def REMW = BitPat("b0000001??????????110?????0111011") + def REMUW = BitPat("b0000001??????????111?????0111011") + def AMOADD_W = BitPat("b00000????????????010?????0101111") + def AMOXOR_W = BitPat("b00100????????????010?????0101111") + def AMOOR_W = BitPat("b01000????????????010?????0101111") + def AMOAND_W = BitPat("b01100????????????010?????0101111") + def AMOMIN_W = BitPat("b10000????????????010?????0101111") + def AMOMAX_W = BitPat("b10100????????????010?????0101111") + def AMOMINU_W = BitPat("b11000????????????010?????0101111") + def AMOMAXU_W = BitPat("b11100????????????010?????0101111") + def AMOSWAP_W = BitPat("b00001????????????010?????0101111") + def LR_W = BitPat("b00010??00000?????010?????0101111") + def SC_W = BitPat("b00011????????????010?????0101111") + def AMOADD_D = BitPat("b00000????????????011?????0101111") + def AMOXOR_D = BitPat("b00100????????????011?????0101111") + def AMOOR_D = BitPat("b01000????????????011?????0101111") + def AMOAND_D = BitPat("b01100????????????011?????0101111") + def AMOMIN_D = BitPat("b10000????????????011?????0101111") + def AMOMAX_D = BitPat("b10100????????????011?????0101111") + def AMOMINU_D = BitPat("b11000????????????011?????0101111") + def AMOMAXU_D = BitPat("b11100????????????011?????0101111") + def AMOSWAP_D = BitPat("b00001????????????011?????0101111") + def LR_D = BitPat("b00010??00000?????011?????0101111") + def SC_D = BitPat("b00011????????????011?????0101111") + def SCALL = BitPat("b00000000000000000000000001110011") + def SBREAK = BitPat("b00000000000100000000000001110011") + def SRET = BitPat("b00010000000000000000000001110011") + def SFENCE_VM = BitPat("b000100000001?????000000001110011") + def WFI = BitPat("b00010000001000000000000001110011") + def MRTH = BitPat("b00110000011000000000000001110011") + def MRTS = BitPat("b00110000010100000000000001110011") + def HRTS = BitPat("b00100000010100000000000001110011") + def CSRRW = BitPat("b?????????????????001?????1110011") + def CSRRS = BitPat("b?????????????????010?????1110011") + def CSRRC = BitPat("b?????????????????011?????1110011") + def CSRRWI = BitPat("b?????????????????101?????1110011") + def CSRRSI = BitPat("b?????????????????110?????1110011") + def CSRRCI = BitPat("b?????????????????111?????1110011") + def FADD_S = BitPat("b0000000??????????????????1010011") + def FSUB_S = BitPat("b0000100??????????????????1010011") + def FMUL_S = BitPat("b0001000??????????????????1010011") + def FDIV_S = BitPat("b0001100??????????????????1010011") + def FSGNJ_S = BitPat("b0010000??????????000?????1010011") + def FSGNJN_S = BitPat("b0010000??????????001?????1010011") + def FSGNJX_S = BitPat("b0010000??????????010?????1010011") + def FMIN_S = BitPat("b0010100??????????000?????1010011") + def FMAX_S = BitPat("b0010100??????????001?????1010011") + def FSQRT_S = BitPat("b010110000000?????????????1010011") + def FADD_D = BitPat("b0000001??????????????????1010011") + def FSUB_D = BitPat("b0000101??????????????????1010011") + def FMUL_D = BitPat("b0001001??????????????????1010011") + def FDIV_D = BitPat("b0001101??????????????????1010011") + def FSGNJ_D = BitPat("b0010001??????????000?????1010011") + def FSGNJN_D = BitPat("b0010001??????????001?????1010011") + def FSGNJX_D = BitPat("b0010001??????????010?????1010011") + def FMIN_D = BitPat("b0010101??????????000?????1010011") + def FMAX_D = BitPat("b0010101??????????001?????1010011") + def FCVT_S_D = BitPat("b010000000001?????????????1010011") + def FCVT_D_S = BitPat("b010000100000?????????????1010011") + def FSQRT_D = BitPat("b010110100000?????????????1010011") + def FLE_S = BitPat("b1010000??????????000?????1010011") + def FLT_S = BitPat("b1010000??????????001?????1010011") + def FEQ_S = BitPat("b1010000??????????010?????1010011") + def FLE_D = BitPat("b1010001??????????000?????1010011") + def FLT_D = BitPat("b1010001??????????001?????1010011") + def FEQ_D = BitPat("b1010001??????????010?????1010011") + def FCVT_W_S = BitPat("b110000000000?????????????1010011") + def FCVT_WU_S = BitPat("b110000000001?????????????1010011") + def FCVT_L_S = BitPat("b110000000010?????????????1010011") + def FCVT_LU_S = BitPat("b110000000011?????????????1010011") + def FMV_X_S = BitPat("b111000000000?????000?????1010011") + def FCLASS_S = BitPat("b111000000000?????001?????1010011") + def FCVT_W_D = BitPat("b110000100000?????????????1010011") + def FCVT_WU_D = BitPat("b110000100001?????????????1010011") + def FCVT_L_D = BitPat("b110000100010?????????????1010011") + def FCVT_LU_D = BitPat("b110000100011?????????????1010011") + def FMV_X_D = BitPat("b111000100000?????000?????1010011") + def FCLASS_D = BitPat("b111000100000?????001?????1010011") + def FCVT_S_W = BitPat("b110100000000?????????????1010011") + def FCVT_S_WU = BitPat("b110100000001?????????????1010011") + def FCVT_S_L = BitPat("b110100000010?????????????1010011") + def FCVT_S_LU = BitPat("b110100000011?????????????1010011") + def FMV_S_X = BitPat("b111100000000?????000?????1010011") + def FCVT_D_W = BitPat("b110100100000?????????????1010011") + def FCVT_D_WU = BitPat("b110100100001?????????????1010011") + def FCVT_D_L = BitPat("b110100100010?????????????1010011") + def FCVT_D_LU = BitPat("b110100100011?????????????1010011") + def FMV_D_X = BitPat("b111100100000?????000?????1010011") + def FLW = BitPat("b?????????????????010?????0000111") + def FLD = BitPat("b?????????????????011?????0000111") + def FSW = BitPat("b?????????????????010?????0100111") + def FSD = BitPat("b?????????????????011?????0100111") + def FMADD_S = BitPat("b?????00??????????????????1000011") + def FMSUB_S = BitPat("b?????00??????????????????1000111") + def FNMSUB_S = BitPat("b?????00??????????????????1001011") + def FNMADD_S = BitPat("b?????00??????????????????1001111") + def FMADD_D = BitPat("b?????01??????????????????1000011") + def FMSUB_D = BitPat("b?????01??????????????????1000111") + def FNMSUB_D = BitPat("b?????01??????????????????1001011") + def FNMADD_D = BitPat("b?????01??????????????????1001111") + def CUSTOM0 = BitPat("b?????????????????000?????0001011") + def CUSTOM0_RS1 = BitPat("b?????????????????010?????0001011") + def CUSTOM0_RS1_RS2 = BitPat("b?????????????????011?????0001011") + def CUSTOM0_RD = BitPat("b?????????????????100?????0001011") + def CUSTOM0_RD_RS1 = BitPat("b?????????????????110?????0001011") + def CUSTOM0_RD_RS1_RS2 = BitPat("b?????????????????111?????0001011") + def CUSTOM1 = BitPat("b?????????????????000?????0101011") + def CUSTOM1_RS1 = BitPat("b?????????????????010?????0101011") + def CUSTOM1_RS1_RS2 = BitPat("b?????????????????011?????0101011") + def CUSTOM1_RD = BitPat("b?????????????????100?????0101011") + def CUSTOM1_RD_RS1 = BitPat("b?????????????????110?????0101011") + def CUSTOM1_RD_RS1_RS2 = BitPat("b?????????????????111?????0101011") + def CUSTOM2 = BitPat("b?????????????????000?????1011011") + def CUSTOM2_RS1 = BitPat("b?????????????????010?????1011011") + def CUSTOM2_RS1_RS2 = BitPat("b?????????????????011?????1011011") + def CUSTOM2_RD = BitPat("b?????????????????100?????1011011") + def CUSTOM2_RD_RS1 = BitPat("b?????????????????110?????1011011") + def CUSTOM2_RD_RS1_RS2 = BitPat("b?????????????????111?????1011011") + def CUSTOM3 = BitPat("b?????????????????000?????1111011") + def CUSTOM3_RS1 = BitPat("b?????????????????010?????1111011") + def CUSTOM3_RS1_RS2 = BitPat("b?????????????????011?????1111011") + def CUSTOM3_RD = BitPat("b?????????????????100?????1111011") + def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011") + def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011") } object Causes { val misaligned_fetch = 0x0 diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 2b0ca819..b76851f9 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -50,7 +50,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { FN_MUL -> List(Y, N, X, X), FN_MULH -> List(Y, Y, Y, Y), FN_MULHU -> List(Y, Y, N, N), - FN_MULHSU -> List(Y, Y, Y, N))) + FN_MULHSU -> List(Y, Y, Y, N))).map(_ toBool) def sext(x: Bits, signed: Bool) = { val sign = signed && Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 7fb5410d..0f65e0cf 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -139,7 +139,7 @@ class Rocket extends CoreModule // this is overly conservative val safe_csrs = CSRs.sscratch :: CSRs.sepc :: CSRs.mscratch :: CSRs.mepc :: CSRs.mcause :: CSRs.mbadaddr :: Nil val legal_csrs = collection.mutable.LinkedHashSet(CSRs.all:_*) - val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs, legal_csrs -- safe_csrs)) + val id_csr_flush = id_system_insn || (id_csr_en && !id_csr_ren && !DecodeLogic(id_csr_addr, safe_csrs.map(UInt(_)), (legal_csrs -- safe_csrs).toList.map(UInt(_)))) val id_illegal_insn = !id_ctrl.legal || id_ctrl.fp && !csr.io.status.fs.orR || From 431dd2219b41c7bc44c74e6883a1490fc26d839b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 28 Jul 2015 20:13:56 -0700 Subject: [PATCH 0861/1087] Another Bits -> BitPat --- rocket/src/main/scala/rocket.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 0f65e0cf..1b87348b 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -444,7 +444,7 @@ class Rocket extends CoreModule io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr - io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === Bits("b00??1") + io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1") io.imem.btb_update.bits.pc := mem_reg_pc io.imem.btb_update.bits.target := io.imem.req.bits.pc io.imem.btb_update.bits.br_pc := mem_reg_pc From a2fdcdcaef446a145cee01794b0948247f404fd3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 29 Jul 2015 00:24:58 -0700 Subject: [PATCH 0862/1087] Use Seq, not Iterable, when traversal order matters --- rocket/src/main/scala/decode.scala | 6 +++--- rocket/src/main/scala/util.scala | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/decode.scala b/rocket/src/main/scala/decode.scala index db65f8ff..07cdc1d6 100644 --- a/rocket/src/main/scala/decode.scala +++ b/rocket/src/main/scala/decode.scala @@ -40,7 +40,7 @@ object DecodeLogic } }).reverse.reduceRight(Cat(_,_)) } - def apply(addr: UInt, default: Iterable[BitPat], mappingIn: Iterable[(BitPat, Iterable[BitPat])]): Iterable[UInt] = { + def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = { val mapping = collection.mutable.ArrayBuffer.fill(default.size)(collection.mutable.ArrayBuffer[(BitPat, BitPat)]()) for ((key, values) <- mappingIn) for ((value, i) <- values zipWithIndex) @@ -48,8 +48,8 @@ object DecodeLogic for ((thisDefault, thisMapping) <- default zip mapping) yield apply(addr, thisDefault, thisMapping) } - def apply(addr: UInt, default: Iterable[BitPat], mappingIn: List[(UInt, Iterable[BitPat])]): Iterable[UInt] = - apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Iterable[BitPat])]]) + def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] = + apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]]) def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = apply(addr, BitPat.DC(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).toBool private val caches = collection.mutable.Map[UInt,collection.mutable.Map[Term,Bool]]() diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index c71e44f9..2ac3a0b8 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -9,8 +9,8 @@ import scala.math._ object Util { implicit def intToUInt(x: Int): UInt = UInt(x) implicit def booleanToBool(x: Boolean): Bits = Bool(x) - implicit def intSeqToUIntSeq(x: Iterable[Int]): Iterable[UInt] = x.map(UInt(_)) - implicit def seqToVec[T <: Data](x: Iterable[T]): Vec[T] = Vec(x) + implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_)) + implicit def seqToVec[T <: Data](x: Seq[T]): Vec[T] = Vec(x) implicit def wcToUInt(c: WideCounter): UInt = c.value implicit def sextToConv(x: UInt) = new AnyRef { def sextTo(n: Int): UInt = Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x) From c8c312e860b5f100e43bb1b1a91503c759270f92 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 29 Jul 2015 15:03:01 -0700 Subject: [PATCH 0863/1087] minor btb cleanup --- rocket/src/main/scala/btb.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 06a6c53c..33f5b468 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -252,12 +252,13 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete if (nBHT > 0) { val bht = new BHT(nBHT) - val res = bht.get(io.req.bits.addr, io.req.valid && hits.orR && !Mux1H(hits, isJump)) + val isBranch = !Mux1H(hits, isJump) + val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch) val update_btb_hit = io.bht_update.bits.prediction.valid when (io.bht_update.valid && update_btb_hit) { bht.update(io.bht_update.bits.pc, io.bht_update.bits.prediction.bits.bht, io.bht_update.bits.taken, io.bht_update.bits.mispredict) } - when (!res.value(0) && !Mux1H(hits, isJump)) { io.resp.bits.taken := false } + when (!res.value(0) && isBranch) { io.resp.bits.taken := false } io.resp.bits.bht := res } From ce161b83e31bd6e1f1e703e595b796b80168538a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 29 Jul 2015 15:03:13 -0700 Subject: [PATCH 0864/1087] Chisel3 compatibility: avoid subword assignment --- rocket/src/main/scala/btb.scala | 18 ++++++++++-------- rocket/src/main/scala/ptw.scala | 9 +++++---- rocket/src/main/scala/tlb.scala | 28 ++++++++++++++-------------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 33f5b468..3d1108a3 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -133,7 +133,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val invalidate = Bool(INPUT) } - val idxValid = Reg(init=UInt(0, entries)) + val idxValid = Reg(Vec(Bool(), entries)) val idxs = Mem(UInt(width=matchBits), entries) val idxPages = Mem(UInt(width=log2Up(nPages)), entries) val tgts = Mem(UInt(width=matchBits), entries) @@ -143,8 +143,8 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) - val useRAS = Reg(UInt(width = entries)) - val isJump = Reg(UInt(width = entries)) + val useRAS = Reg(Vec(Bool(), entries)) + val isJump = Reg(Vec(Bool(), entries)) val brIdx = Mem(UInt(width=log2Up(params(FetchWidth))), entries) private def page(addr: UInt) = addr >> matchBits @@ -152,11 +152,12 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val p = page(addr) Vec(pages.map(_ === p)).toBits & pageValid } - private def tagMatch(addr: UInt, pgMatch: UInt): UInt = { + private def tagMatch(addr: UInt, pgMatch: UInt): Vec[Bool] = { val idx = addr(matchBits-1,0) val idxMatch = idxs.map(_ === idx).toBits val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits - idxValid & idxMatch & idxPageMatch + Vec(for (i <- 0 until entries) + yield idxValid(i) && idxMatch(i) && idxPageMatch(i)) } val r_btb_update = Pipe(io.btb_update) @@ -198,11 +199,12 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val nextRepl = Counter(!updateHit, entries)._1 val waddr = - if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + if (updates_out_of_order) Mux(updateHits.reduce(_||_), OHToUInt(updateHits), nextRepl) else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon - idxValid := idxValid & ~Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits + for (i <- 0 until idxValid.size) + when ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { idxValid(i) := false } idxValid(waddr) := Bool(true) idxs(waddr) := r_btb_update.bits.pc @@ -237,7 +239,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete pageValid := 0 } - io.resp.valid := hits.orR + io.resp.valid := hits.reduce(_||_) io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 69fbf57e..518656a4 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -85,20 +85,21 @@ class PTW(n: Int) extends CoreModule val (pte_cache_hit, pte_cache_data) = { val size = log2Up(pgLevels * 2) val plru = new PseudoLRU(size) - val valid = Reg(init = Bits(0, size)) + val valid = Reg(init=Vec(Bool(), size)) + val validBits = valid.toBits val tags = Mem(UInt(width = paddrBits), size) val data = Mem(UInt(width = ppnBits), size) - val hits = Vec(tags.map(_ === pte_addr)).toBits & valid + val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits val hit = hits.orR when (io.mem.resp.valid && pte.table() && !hit) { - val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid)) + val r = Mux(validBits.andR, plru.replace, PriorityEncoder(~validBits)) valid(r) := true tags(r) := pte_addr data(r) := pte.ppn } when (hit && state === s_req) { plru.access(OHToUInt(hits)) } - when (io.dpath.invalidate) { valid := 0 } + when (reset || io.dpath.invalidate) { valid.foreach(_ := false) } (hit, Mux1H(hits, data)) } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index a5d87943..cd357fbb 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -119,18 +119,18 @@ class TLB extends TLBModule { val tag_hit_addr = OHToUInt(tag_cam.io.hits) // permission bit arrays - val valid_array = Reg(Bits()) // PTE is valid (not equivalent to CAM tag valid bit!) - val ur_array = Reg(Bits()) // user read permission - val uw_array = Reg(Bits()) // user write permission - val ux_array = Reg(Bits()) // user execute permission - val sr_array = Reg(Bits()) // supervisor read permission - val sw_array = Reg(Bits()) // supervisor write permission - val sx_array = Reg(Bits()) // supervisor execute permission - val dirty_array = Reg(Bits()) // PTE dirty bit + val valid_array = Reg(Vec(Bool(), entries)) // PTE is valid (not equivalent to CAM tag valid bit!) + val ur_array = Reg(Vec(Bool(), entries)) // user read permission + val uw_array = Reg(Vec(Bool(), entries)) // user write permission + val ux_array = Reg(Vec(Bool(), entries)) // user execute permission + val sr_array = Reg(Vec(Bool(), entries)) // supervisor read permission + val sw_array = Reg(Vec(Bool(), entries)) // supervisor write permission + val sx_array = Reg(Vec(Bool(), entries)) // supervisor execute permission + val dirty_array = Reg(Vec(Bool(), entries)) // PTE dirty bit when (io.ptw.resp.valid) { val pte = io.ptw.resp.bits.pte tag_ram(r_refill_waddr) := pte.ppn - valid_array := valid_array.bitSet(r_refill_waddr, !io.ptw.resp.bits.error) + valid_array(r_refill_waddr) := !io.ptw.resp.bits.error ur_array(r_refill_waddr) := pte.ur() && !io.ptw.resp.bits.error uw_array(r_refill_waddr) := pte.uw() && !io.ptw.resp.bits.error ux_array(r_refill_waddr) := pte.ux() && !io.ptw.resp.bits.error @@ -151,14 +151,14 @@ class TLB extends TLBModule { val priv_uses_vm = priv <= PRV_S val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store)) - val r_array = Mux(priv_s, sr_array, ur_array) - val w_array = Mux(priv_s, sw_array, uw_array) - val x_array = Mux(priv_s, sx_array, ux_array) + val r_array = Mux(priv_s, sr_array.toBits, ur_array.toBits) + val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits) + val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) // it's only a store hit if the dirty bit is set - val tag_hits = tag_cam.io.hits & (dirty_array | ~(io.req.bits.store.toSInt & w_array)) + val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~(io.req.bits.store.toSInt & w_array)) val tag_hit = tag_hits.orR val tlb_hit = vm_enabled && tag_hit val tlb_miss = vm_enabled && !tag_hit && !bad_va @@ -177,7 +177,7 @@ class TLB extends TLBModule { // clear invalid entries on access, or all entries on a TLB flush tag_cam.io.clear := io.ptw.invalidate || io.req.fire() - tag_cam.io.clear_mask := ~valid_array | (tag_cam.io.hits & ~tag_hits) + tag_cam.io.clear_mask := ~valid_array.toBits | (tag_cam.io.hits & ~tag_hits) when (io.ptw.invalidate) { tag_cam.io.clear_mask := SInt(-1) } io.ptw.req.valid := state === s_request From 9d67ef4ee225da36843bb4694c36a1f6fa173a03 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 28 Jul 2015 14:35:26 -0700 Subject: [PATCH 0865/1087] simplify .sbt files --- rocket/build.sbt | 3 +++ rocket/chisel-dependent.sbt | 10 ---------- rocket/hardfloat-dependent.sbt | 8 -------- rocket/uncore-dependent.sbt | 8 -------- 4 files changed, 3 insertions(+), 26 deletions(-) delete mode 100644 rocket/chisel-dependent.sbt delete mode 100644 rocket/hardfloat-dependent.sbt delete mode 100644 rocket/uncore-dependent.sbt diff --git a/rocket/build.sbt b/rocket/build.sbt index d9150787..33b49e0a 100644 --- a/rocket/build.sbt +++ b/rocket/build.sbt @@ -5,3 +5,6 @@ version := "1.2" name := "rocket" scalaVersion := "2.10.2" + +libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore").map { + dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten diff --git a/rocket/chisel-dependent.sbt b/rocket/chisel-dependent.sbt deleted file mode 100644 index 1681364e..00000000 --- a/rocket/chisel-dependent.sbt +++ /dev/null @@ -1,10 +0,0 @@ -// Provide a managed dependency on chisel if -DchiselVersion="" is -// supplied on the command line. - -val chiselVersion_r = System.getProperty("chiselVersion", "None") - -// _r a temporary fix until sbt 13.6 https://github.com/sbt/sbt/issues/1465 - -libraryDependencies ++= ( if (chiselVersion_r != "None" ) ( - "edu.berkeley.cs" %% "chisel" % chiselVersion_r -) :: Nil; else Nil) diff --git a/rocket/hardfloat-dependent.sbt b/rocket/hardfloat-dependent.sbt deleted file mode 100644 index e6cc8f7e..00000000 --- a/rocket/hardfloat-dependent.sbt +++ /dev/null @@ -1,8 +0,0 @@ -// Provide a managed dependency on chisel if -DhardfloatVersion="" is -// supplied on the command line. - -val hardfloatVersion = System.getProperty("hardfloatVersion", "None") - -libraryDependencies ++= ( if (hardfloatVersion != "None" ) ( - "edu.berkeley.cs" %% "hardfloat" % hardfloatVersion -) :: Nil; else Nil) diff --git a/rocket/uncore-dependent.sbt b/rocket/uncore-dependent.sbt deleted file mode 100644 index 9526f621..00000000 --- a/rocket/uncore-dependent.sbt +++ /dev/null @@ -1,8 +0,0 @@ -// Provide a managed dependency on chisel if -DuncoreVersion="" is -// supplied on the command line. - -val uncoreVersion = System.getProperty("uncoreVersion", "None") - -libraryDependencies ++= ( if (uncoreVersion != "None" ) ( - "edu.berkeley.cs" %% "uncore" % uncoreVersion -) :: Nil; else Nil) From d2a594fb577b652c64a4d8ed972e2d037cf5d5ab Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 29 Jul 2015 17:22:22 -0700 Subject: [PATCH 0866/1087] new junctions repo has mem size constants --- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/rocket.scala | 1 + rocket/src/main/scala/tlb.scala | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 3d1108a3..b37a5c67 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -3,8 +3,8 @@ package rocket import Chisel._ +import junctions._ import Util._ -import uncore._ case object NBTBEntries extends Field[Int] case object NRAS extends Field[Int] diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 1b87348b..c7747525 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ +import junctions._ import uncore._ import Util._ diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index cd357fbb..faf18bde 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -4,7 +4,7 @@ package rocket import Chisel._ import Util._ -import uncore._ +import junctions._ import scala.math._ case object NTLBEntries extends Field[Int] From db7258f8871537b821c3909dd539f35dbf70e7d4 Mon Sep 17 00:00:00 2001 From: Jim Lawson Date: Thu, 30 Jul 2015 15:11:23 -0700 Subject: [PATCH 0867/1087] Add junctions to the possible managed dependency list. --- rocket/build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/build.sbt b/rocket/build.sbt index 33b49e0a..4a8e9378 100644 --- a/rocket/build.sbt +++ b/rocket/build.sbt @@ -6,5 +6,5 @@ name := "rocket" scalaVersion := "2.10.2" -libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore").map { +libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions").map { dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten From 57930e8a26fca0e12eec350a1f2ce916dee5b93e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 30 Jul 2015 23:52:42 -0700 Subject: [PATCH 0868/1087] Chisel3 compatibility potpourri --- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/tlb.scala | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 8fb09605..234a4a2f 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -306,7 +306,7 @@ class CSRFile extends CoreModule reg_mstatus.prv2 := reg_mstatus.prv1 reg_mstatus.ie2 := reg_mstatus.ie1 - reg_mepc := io.pc & SInt(-coreInstBytes) + reg_mepc := ~(~io.pc | (coreInstBytes-1)) reg_mcause := io.cause when (csr_xcpt) { reg_mcause := Causes.illegal_instruction diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index c1c9e1e5..8b7ae4e4 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -439,7 +439,7 @@ class FPU extends Module val divSqrt_in_flight = Reg(init=Bool(false)) // writeback arbitration - case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: Bits, wexc: Bits) + case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: UInt, wexc: UInt) val pipes = List( Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 4a4436a6..d30d09d5 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -50,7 +50,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule val tlb = Module(new TLB) val s1_pc_ = Reg(UInt()) - val s1_pc = s1_pc_ & SInt(-coreInstBytes) // discard PC LSBS (this propagates down the pipeline) + val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) val s2_pc = Reg(init=UInt(START_ADDR)) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 518656a4..0346f843 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -85,7 +85,7 @@ class PTW(n: Int) extends CoreModule val (pte_cache_hit, pte_cache_data) = { val size = log2Up(pgLevels * 2) val plru = new PseudoLRU(size) - val valid = Reg(init=Vec(Bool(), size)) + val valid = Reg(Vec(Bool(), size)) val validBits = valid.toBits val tags = Mem(UInt(width = paddrBits), size) val data = Mem(UInt(width = ppnBits), size) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index faf18bde..226dcf52 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -158,7 +158,7 @@ class TLB extends TLBModule { val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) // it's only a store hit if the dirty bit is set - val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~(io.req.bits.store.toSInt & w_array)) + val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) val tag_hit = tag_hits.orR val tlb_hit = vm_enabled && tag_hit val tlb_miss = vm_enabled && !tag_hit && !bad_va From 45cf64dbd732a00c3faba90fb2b0a081d1041795 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 31 Jul 2015 04:59:45 -0700 Subject: [PATCH 0869/1087] Use UInt instead of Vec[Bool] --- rocket/src/main/scala/btb.scala | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index b37a5c67..a10c55f5 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -133,7 +133,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val invalidate = Bool(INPUT) } - val idxValid = Reg(Vec(Bool(), entries)) + val idxValid = Reg(init=UInt(0, entries)) val idxs = Mem(UInt(width=matchBits), entries) val idxPages = Mem(UInt(width=log2Up(nPages)), entries) val tgts = Mem(UInt(width=matchBits), entries) @@ -152,12 +152,11 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val p = page(addr) Vec(pages.map(_ === p)).toBits & pageValid } - private def tagMatch(addr: UInt, pgMatch: UInt): Vec[Bool] = { + private def tagMatch(addr: UInt, pgMatch: UInt) = { val idx = addr(matchBits-1,0) val idxMatch = idxs.map(_ === idx).toBits val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits - Vec(for (i <- 0 until entries) - yield idxValid(i) && idxMatch(i) && idxPageMatch(i)) + idxValid & idxMatch & idxPageMatch } val r_btb_update = Pipe(io.btb_update) @@ -199,14 +198,14 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val nextRepl = Counter(!updateHit, entries)._1 val waddr = - if (updates_out_of_order) Mux(updateHits.reduce(_||_), OHToUInt(updateHits), nextRepl) + if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon - for (i <- 0 until idxValid.size) - when ((pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR) { idxValid(i) := false } + val invalidateMask = Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits + val validateMask = UIntToOH(waddr) + idxValid := (idxValid & ~invalidateMask) | validateMask - idxValid(waddr) := Bool(true) idxs(waddr) := r_btb_update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate @@ -239,7 +238,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete pageValid := 0 } - io.resp.valid := hits.reduce(_||_) + io.resp.valid := hits.orR io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) From 6d7cc37e87c2270173e2329967ce3672c7b1ff94 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 31 Jul 2015 14:23:52 -0700 Subject: [PATCH 0870/1087] Specify some uninferrable widths It's really scary that Chisel2 passed this stuff. --- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index a10c55f5..5e0adb68 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -177,7 +177,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = !useUpdatePageHit - val idxPageRepl = Wire(UInt()) + val idxPageRepl = Wire(UInt(width = nPages)) val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index d30d09d5..6b79aec3 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -218,7 +218,7 @@ class ICache extends FrontendModule val s1_tag_match = Wire(Vec(Bool(), nWays)) val s2_tag_hit = Wire(Vec(Bool(), nWays)) - val s2_dout = Reg(Vec(Bits(), nWays)) + val s2_dout = Reg(Vec(Bits(width = code.width(rowBits)), nWays)) for (i <- 0 until nWays) { val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4b191db6..d12ad13b 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -304,7 +304,7 @@ class MSHRFile extends L1HellaCacheModule { when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } val idxMatch = Wire(Vec(Bool(), nMSHRs)) - val tagList = Wire(Vec(Bits(), nMSHRs)) + val tagList = Wire(Vec(Bits(width = tagBits), nMSHRs)) val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits val wbTagList = Wire(Vec(Bits(), nMSHRs)) From 6c0e1e33ab5835849a4685482e44654ced52def3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 31 Jul 2015 15:42:10 -0700 Subject: [PATCH 0871/1087] Purge UInt := SInt assignments --- rocket/src/main/scala/csr.scala | 12 ++++++------ rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/multiplier.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 8 ++++---- rocket/src/main/scala/rocket.scala | 2 +- rocket/src/main/scala/tlb.scala | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 234a4a2f..5a5fc3ca 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -291,9 +291,9 @@ class CSRFile extends CoreModule io.csr_xcpt := csr_xcpt io.eret := insn_ret || insn_redirect_trap io.status := reg_mstatus - io.status.fs := reg_mstatus.fs.orR.toSInt // either off or dirty (no clean/initial support yet) - io.status.xs := reg_mstatus.xs.orR.toSInt // either off or dirty (no clean/initial support yet) - io.status.sd := reg_mstatus.xs.orR || reg_mstatus.fs.orR + io.status.fs := Fill(2, reg_mstatus.fs.orR) // either off or dirty (no clean/initial support yet) + io.status.xs := Fill(2, reg_mstatus.xs.orR) // either off or dirty (no clean/initial support yet) + io.status.sd := io.status.fs.andR || io.status.xs.andR if (xLen == 32) io.status.sd_rv32 := io.status.sd @@ -405,7 +405,7 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } - when (decoded_addr(CSRs.mepc)) { reg_mepc := wdata(vaddrBitsExtended-1,0).toSInt & SInt(-coreInstBytes) } + when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) } when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } @@ -436,8 +436,8 @@ class CSRFile extends CoreModule } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)) } - when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata(vaddrBitsExtended-1,0).toSInt & SInt(-coreInstBytes) } - when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata(vaddrBits-1,0).toSInt & SInt(-coreInstBytes) } + when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) } + when (decoded_addr(CSRs.stvec)) { reg_stvec := ~(~wdata | (coreInstBytes-1)) } } } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 8b7ae4e4..aba5d61b 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -228,7 +228,7 @@ class FPToInt extends Module io.out.bits.exc := dcmp_exc } when (in.cmd === FCMD_CVT_IF) { - io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt) + io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt).toUInt io.out.bits.exc := d2i._2 } diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index b76851f9..6498aa2d 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -95,7 +95,7 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { !isHi && (mplier & ~eOutMask) === UInt(0) val eOutRes = (mulReg >> (mulw - count * mulUnroll)(log2Up(mulw)-1,0)) val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0)) - remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)).toSInt + remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)) count := count + 1 when (eOut || count === mulw/mulUnroll-1) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d12ad13b..e43f1a70 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -705,7 +705,7 @@ class HellaCache extends L1HellaCacheModule { // data read for new requests readArb.io.in(3).valid := io.cpu.req.valid readArb.io.in(3).bits.addr := io.cpu.req.bits.addr - readArb.io.in(3).bits.way_en := SInt(-1) + readArb.io.in(3).bits.way_en := ~UInt(0, nWays) when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } // recycled requests @@ -713,7 +713,7 @@ class HellaCache extends L1HellaCacheModule { metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits readArb.io.in(0).valid := s2_recycle readArb.io.in(0).bits.addr := s2_req.addr - readArb.io.in(0).bits.way_en := SInt(-1) + readArb.io.in(0).bits.way_en := ~UInt(0, nWays) // tag check and way muxing def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f)) @@ -800,7 +800,7 @@ class HellaCache extends L1HellaCacheModule { // replays readArb.io.in(1).valid := mshrs.io.replay.valid readArb.io.in(1).bits := mshrs.io.replay.bits - readArb.io.in(1).bits.way_en := SInt(-1) + readArb.io.in(1).bits.way_en := ~UInt(0, nWays) mshrs.io.replay.ready := readArb.io.in(1).ready s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready metaReadArb.io.in(1) <> mshrs.io.meta_read @@ -828,7 +828,7 @@ class HellaCache extends L1HellaCacheModule { writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() writeArb.io.in(1).bits.addr := mshrs.io.refill.addr writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en - writeArb.io.in(1).bits.wmask := SInt(-1) + writeArb.io.in(1).bits.wmask := ~UInt(0, nWays) writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked readArb.io.out <> data.io.read diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index c7747525..b77dcf43 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -198,7 +198,7 @@ class Rocket extends CoreModule alu.io.dw := ex_ctrl.alu_dw alu.io.fn := ex_ctrl.alu_fn alu.io.in2 := ex_op2.toUInt - alu.io.in1 := ex_op1 + alu.io.in1 := ex_op1.toUInt // multiplier and divider val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 226dcf52..87569e10 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -178,7 +178,7 @@ class TLB extends TLBModule { // clear invalid entries on access, or all entries on a TLB flush tag_cam.io.clear := io.ptw.invalidate || io.req.fire() tag_cam.io.clear_mask := ~valid_array.toBits | (tag_cam.io.hits & ~tag_hits) - when (io.ptw.invalidate) { tag_cam.io.clear_mask := SInt(-1) } + when (io.ptw.invalidate) { tag_cam.io.clear_mask := ~UInt(0, entries) } io.ptw.req.valid := state === s_request io.ptw.req.bits.addr := r_refill_tag From 52fc34a1382a429f6640aff3fa0f6f8a98459393 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 1 Aug 2015 21:11:25 -0700 Subject: [PATCH 0872/1087] Chisel3: bulk connect is not commutative We haven't decided if this is a FIRRTL limitation that we should relax, or a backwards incompatibility we're forced to live with. Should make for lively debate. --- rocket/src/main/scala/icache.scala | 4 +-- rocket/src/main/scala/nbdcache.scala | 52 +++++++++++++++------------- rocket/src/main/scala/rocket.scala | 2 +- rocket/src/main/scala/tile.scala | 8 ++--- 4 files changed, 34 insertions(+), 32 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 6b79aec3..18e55b86 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -93,7 +93,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule btb.io.ras_update := io.cpu.ras_update btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate - tlb.io.ptw <> io.ptw + io.ptw <> tlb.io.ptw tlb.io.req.valid := !stall && !icmiss tlb.io.req.bits.vpn := s1_pc >> UInt(pgIdxBits) tlb.io.req.bits.asid := UInt(0) @@ -101,7 +101,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule tlb.io.req.bits.instruction := Bool(true) tlb.io.req.bits.store := Bool(false) - icache.io.mem <> io.mem + io.mem <> icache.io.mem icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.idx := io.cpu.npc icache.io.invalidate := io.cpu.invalidate diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index e43f1a70..0ff9a50a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -341,11 +341,11 @@ class MSHRFile extends L1HellaCacheModule { mshr.io.req_bits := io.req.bits mshr.io.req_bits.sdq_id := sdq_alloc_id - mshr.io.meta_read <> meta_read_arb.io.in(i) - mshr.io.meta_write <> meta_write_arb.io.in(i) - mshr.io.mem_req <> mem_req_arb.io.in(i) - mshr.io.wb_req <> wb_req_arb.io.in(i) - mshr.io.replay <> replay_arb.io.in(i) + meta_read_arb.io.in(i) <> mshr.io.meta_read + meta_write_arb.io.in(i) <> mshr.io.meta_write + mem_req_arb.io.in(i) <> mshr.io.mem_req + wb_req_arb.io.in(i) <> mshr.io.wb_req + replay_arb.io.in(i) <> mshr.io.replay mshr.io.mem_grant.valid := io.mem_grant.valid && io.mem_grant.bits.client_xact_id === UInt(i) @@ -362,10 +362,10 @@ class MSHRFile extends L1HellaCacheModule { alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match - meta_read_arb.io.out <> io.meta_read - meta_write_arb.io.out <> io.meta_write - mem_req_arb.io.out <> io.mem_req - wb_req_arb.io.out <> io.wb_req + io.meta_read <> meta_read_arb.io.out + io.meta_write <> meta_write_arb.io.out + io.mem_req <> mem_req_arb.io.out + io.wb_req <> wb_req_arb.io.out io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy io.secondary_miss := idx_match @@ -624,7 +624,7 @@ class HellaCache extends L1HellaCacheModule { val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val dtlb = Module(new TLB) - dtlb.io.ptw <> io.ptw + io.ptw <> dtlb.io.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.asid := UInt(0) @@ -684,8 +684,8 @@ class HellaCache extends L1HellaCacheModule { val meta = Module(new MetadataArray(onReset _)) val metaReadArb = Module(new Arbiter(new MetaReadReq, 5)) val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2)) - metaReadArb.io.out <> meta.io.read - metaWriteArb.io.out <> meta.io.write + meta.io.read <> metaReadArb.io.out + meta.io.write <> metaWriteArb.io.out // data val data = Module(new DataArray) @@ -763,7 +763,7 @@ class HellaCache extends L1HellaCacheModule { val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,3) val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) - + // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd) val amoalu = Module(new AMOALU) @@ -808,16 +808,16 @@ class HellaCache extends L1HellaCacheModule { // probes and releases val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) - releaseArb.io.out <> io.mem.release + io.mem.release <> releaseArb.io.out prober.io.req.valid := io.mem.probe.valid && !lrsc_valid io.mem.probe.ready := prober.io.req.ready && !lrsc_valid prober.io.req.bits := io.mem.probe.bits - prober.io.rep <> releaseArb.io.in(1) + releaseArb.io.in(1) <> prober.io.rep prober.io.way_en := s2_tag_match_way prober.io.block_state := s2_hit_state - prober.io.meta_read <> metaReadArb.io.in(2) - prober.io.meta_write <> metaWriteArb.io.in(1) + metaReadArb.io.in(2) <> prober.io.meta_read + metaWriteArb.io.in(1) <> prober.io.meta_write prober.io.mshr_rdy := mshrs.io.probe_rdy // refills @@ -831,15 +831,15 @@ class HellaCache extends L1HellaCacheModule { writeArb.io.in(1).bits.wmask := ~UInt(0, nWays) writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked - readArb.io.out <> data.io.read + data.io.read <> readArb.io.out // writebacks val wbArb = Module(new Arbiter(new WritebackReq, 2)) - prober.io.wb_req <> wbArb.io.in(0) - mshrs.io.wb_req <> wbArb.io.in(1) - wbArb.io.out <> wb.io.req - wb.io.meta_read <> metaReadArb.io.in(3) - wb.io.data_req <> readArb.io.in(2) + wbArb.io.in(0) <> prober.io.wb_req + wbArb.io.in(1) <> mshrs.io.wb_req + wb.io.req <> wbArb.io.out + metaReadArb.io.in(3) <> wb.io.meta_read + readArb.io.in(2) <> wb.io.data_req wb.io.data_resp := s2_data_corrected releaseArb.io.in(0) <> wb.io.release @@ -865,8 +865,10 @@ class HellaCache extends L1HellaCacheModule { val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) - - amoalu.io := s2_req + + amoalu.io.addr := s2_req.addr + amoalu.io.cmd := s2_req.cmd + amoalu.io.typ := s2_req.typ amoalu.io.lhs := s2_data_word amoalu.io.rhs := s2_req.data diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index b77dcf43..c85dd56e 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -370,7 +370,7 @@ class Rocket extends CoreModule csr.io.exception := wb_reg_xcpt csr.io.cause := wb_reg_cause csr.io.retire := wb_valid - csr.io.host <> io.host + io.host <> csr.io.host io.fpu.fcsr_rm := csr.io.fcsr_rm csr.io.fcsr_flags := io.fpu.fcsr_flags csr.io.rocc <> io.rocc diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index b4c19276..bfd8ebd8 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -29,20 +29,20 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem - dcArb.io.mem <> dcache.io.cpu + dcache.io.cpu <> dcArb.io.mem ptw.io.requestor(0) <> icache.io.ptw ptw.io.requestor(1) <> dcache.io.ptw - core.io.host <> io.host - core.io.imem <> icache.io.cpu + io.host <> core.io.host + icache.io.cpu <> core.io.imem core.io.ptw <> ptw.io.dpath //If so specified, build an FPU module and wire it in params(BuildFPU) .map { bf => bf() } .foreach { fpu => - fpu.io <> core.io.fpu + core.io.fpu <> fpu.io } // Connect the caches and ROCC to the outer memory system From ef319edc848debb4174e6861d20cd9f2deaabe71 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 2 Aug 2015 21:03:42 -0700 Subject: [PATCH 0873/1087] Bits -> UInt --- rocket/src/main/scala/rocket.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index c85dd56e..36d140a7 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -505,7 +505,7 @@ class Rocket extends CoreModule def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = targets.map(h => h._1 && cond(h._2)).reduce(_||_) - def imm(sel: Bits, inst: Bits) = { + def imm(sel: UInt, inst: UInt) = { val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) @@ -523,13 +523,13 @@ class Rocket extends CoreModule Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt } - def vaSign(a0: UInt, ea: Bits) = { + def vaSign(a0: UInt, ea: UInt) = { // efficient means to compress 64-bit VA into vaddrBits+1 bits // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) val a = a0 >> vaddrBits-1 val e = ea(vaddrBits,vaddrBits-1) Mux(a === UInt(0) || a === UInt(1), e != UInt(0), - Mux(a === SInt(-1) || a === SInt(-2), e === SInt(-1), + Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e.toSInt === SInt(-1), e(0))) } From c345d72af4bfdf490b1d552ab19ed79b1a351e0c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 3 Aug 2015 18:52:59 -0700 Subject: [PATCH 0874/1087] Chisel3: Flip order of := and <> --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0ff9a50a..ac48b7b3 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -830,8 +830,8 @@ class HellaCache extends L1HellaCacheModule { writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en writeArb.io.in(1).bits.wmask := ~UInt(0, nWays) writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) - readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked data.io.read <> readArb.io.out + readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked // writebacks val wbArb = Module(new Arbiter(new WritebackReq, 2)) From d4c94c6566111b34b632d65eaeaad47fb5f8f3d3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 3 Aug 2015 19:08:00 -0700 Subject: [PATCH 0875/1087] Chisel3 has different Vec semantics Vec(a, b) := c doesn't modify a and b in chisel3. --- rocket/src/main/scala/idecode.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 97058bd4..2464b817 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -55,9 +55,10 @@ class IntCtrlSigs extends Bundle { def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = { val decoder = DecodeLogic(inst, XDecode.decode_default, table) - Vec(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, sel_alu1, - sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, - rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo) := decoder + val sigs = Seq(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, + sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, + rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo) + sigs zip decoder map {case(s,d) => s := d} this } } From fb5524372d08223efcb51e36312bea091578ac90 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 3 Aug 2015 19:51:08 -0700 Subject: [PATCH 0876/1087] bump scala to 2.11.6 --- rocket/build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/build.sbt b/rocket/build.sbt index 4a8e9378..bfc36cc5 100644 --- a/rocket/build.sbt +++ b/rocket/build.sbt @@ -4,7 +4,7 @@ version := "1.2" name := "rocket" -scalaVersion := "2.10.2" +scalaVersion := "2.11.6" libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions").map { dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten From 546205b174bb9e1ccc38eb0f8dc33b769cfc1a0a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 5 Aug 2015 15:28:31 -0700 Subject: [PATCH 0877/1087] Chisel3 compatibility: use >>Int instead of >>UInt --- rocket/src/main/scala/arbiter.scala | 4 ++-- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/icache.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 4 ++-- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/rocket.scala | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index b2a8482f..01d592a5 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -43,12 +43,12 @@ class HellaCacheArbiter(n: Int) extends Module io.requestor(i).xcpt := io.mem.xcpt io.requestor(i).ordered := io.mem.ordered resp.bits := io.mem.resp.bits - resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) + resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit io.requestor(i).replay_next.valid := io.mem.replay_next.valid && io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) - io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> UInt(log2Up(n)) + io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> log2Up(n) } } diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index aba5d61b..3b5810b8 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -215,7 +215,7 @@ class FPToInt extends Module dcmp.io.a := in.in1 dcmp.io.b := in.in2 val dcmp_out = (~in.rm & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR - val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UInt(4) + val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << 4 val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, in.typ ^ 1, 52, 12, 64) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 18e55b86..50294282 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -95,7 +95,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule io.ptw <> tlb.io.ptw tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> UInt(pgIdxBits) + tlb.io.req.bits.vpn := s1_pc >> pgIdxBits tlb.io.req.bits.asid := UInt(0) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) @@ -257,7 +257,7 @@ class ICache extends FrontendModule // output signals io.resp.valid := s2_hit io.mem.acquire.valid := (state === s_request) - io.mem.acquire.bits := GetBlock(addr_block = s2_addr >> UInt(blockOffBits)) + io.mem.acquire.bits := GetBlock(addr_block = s2_addr >> blockOffBits) // control state machine switch (state) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ac48b7b3..f58cc29e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -332,7 +332,7 @@ class MSHRFile extends L1HellaCacheModule { idxMatch(i) := mshr.io.idx_match tagList(i) := mshr.io.tag - wbTagList(i) := mshr.io.wb_req.bits.addr_block >> UInt(idxBits) + wbTagList(i) := mshr.io.wb_req.bits.addr_block >> idxBits alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy mshr.io.req_pri_val := alloc_arb.io.in(i).ready @@ -437,7 +437,7 @@ class WritebackUnit extends L1HellaCacheModule { // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed io.meta_read.valid := fire io.meta_read.bits.idx := req_idx - io.meta_read.bits.tag := req.addr_block >> UInt(idxBits) + io.meta_read.bits.tag := req.addr_block >> idxBits io.data_req.valid := fire io.data_req.bits.way_en := req.way_en diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 0346f843..1b08ce87 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -125,7 +125,7 @@ class PTW(n: Int) extends CoreModule val resp_err = state === s_error val resp_val = state === s_done || resp_err - val r_resp_ppn = io.mem.req.bits.addr >> UInt(pgIdxBits) + val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) for (i <- 0 until io.requestor.size) { diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 36d140a7..25df6265 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -331,7 +331,7 @@ class Rocket extends CoreModule // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool - val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) + val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1) val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data From 1718333f8375e116d733a35fdb37b71039c58dde Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 5 Aug 2015 15:29:33 -0700 Subject: [PATCH 0878/1087] Don't use Vec as lvalue --- rocket/src/main/scala/fpu.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 3b5810b8..e41278b6 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -131,8 +131,10 @@ class FPUDecoder extends Module FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y) )) val s = io.sigs - Vec(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, s.swap23, s.single, s.fromint, - s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder + val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, + s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma, + s.div, s.sqrt, s.round, s.wflags) + sigs zip decoder map {case(s,d) => s := d} } class FPUIO extends Bundle { From 3d6a060dc37ad0059e808278b668e5d0a0761d1d Mon Sep 17 00:00:00 2001 From: Albert Ou Date: Mon, 10 Aug 2015 23:52:58 -0700 Subject: [PATCH 0879/1087] Bump Scala to 2.11.6 This change, originally part of commit b978083, was excluded from the merge at commit 47494ec. --- rocket/build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/build.sbt b/rocket/build.sbt index 4a8e9378..bfc36cc5 100644 --- a/rocket/build.sbt +++ b/rocket/build.sbt @@ -4,7 +4,7 @@ version := "1.2" name := "rocket" -scalaVersion := "2.10.2" +scalaVersion := "2.11.6" libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions").map { dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten From d292b6cb1300fe0c705402fb60de503816f391fb Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Tue, 8 Sep 2015 14:42:34 -0700 Subject: [PATCH 0880/1087] don't connect rocc-fpu-port without rocc accel --- rocket/src/main/scala/rocket.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index cd9b8a26..42daa58e 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -492,7 +492,7 @@ class Rocket extends CoreModule io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 - if (!params(BuildFPU).isEmpty) { + if (!params(BuildFPU).isEmpty && !params(BuildRoCC).isEmpty) { io.fpu.cp_req <> io.rocc.fpu_req io.fpu.cp_resp <> io.rocc.fpu_resp } else { From 78b2e947de12c30f136ac4d0c80d2870ef9a0f4d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 11 Sep 2015 15:43:07 -0700 Subject: [PATCH 0881/1087] Chisel3 compatibility fixes --- rocket/src/main/scala/arbiter.scala | 2 +- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/csr.scala | 4 ++-- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/icache.scala | 9 ++++----- rocket/src/main/scala/nbdcache.scala | 21 ++++++++++----------- rocket/src/main/scala/ptw.scala | 2 +- rocket/src/main/scala/rocket.scala | 6 +++--- 8 files changed, 23 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 01d592a5..d995ff5f 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -8,7 +8,7 @@ import uncore._ class HellaCacheArbiter(n: Int) extends Module { val io = new Bundle { - val requestor = Vec.fill(n){new HellaCacheIO}.flip + val requestor = Vec(new HellaCacheIO, n).flip val mem = new HellaCacheIO } diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 5e0adb68..b0ab7b11 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -35,7 +35,7 @@ class RAS(nras: Int) { private val count = Reg(init=UInt(0,log2Up(nras+1))) private val pos = Reg(init=UInt(0,log2Up(nras))) - private val stack = Reg(Vec.fill(nras){UInt()}) + private val stack = Reg(Vec(UInt(), nras)) } class BHTResp extends Bundle with BTBParameters { diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 5a5fc3ca..d906955a 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -85,8 +85,8 @@ class CSRFileIO extends CoreBundle { val evec = UInt(OUTPUT, vaddrBitsExtended) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) - val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) - val custom_mrw_csrs = Vec.fill(params(NCustomMRWCSRs))(UInt(INPUT, xLen)) + val uarch_counters = Vec(UInt(INPUT, log2Up(1+retireWidth)), 16) + val custom_mrw_csrs = Vec(UInt(INPUT, xLen), params(NCustomMRWCSRs)) val cause = UInt(INPUT, xLen) val pc = UInt(INPUT, vaddrBitsExtended) val fatc = Bool(OUTPUT) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index e41278b6..ba91096f 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -456,7 +456,7 @@ class FPU extends Module val memLatencyMask = latencyMask(mem_ctrl, 2) val wen = Reg(init=Bits(0, maxLatency-1)) - val winfo = Reg(Vec.fill(maxLatency-1){Bits()}) + val winfo = Reg(Vec(Bits(), maxLatency-1)) val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, ex_reg_valid) val mem_winfo = Cat(pipeid(mem_ctrl), mem_reg_inst(11,7)) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 50294282..b6ea99ff 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -21,7 +21,7 @@ class FrontendReq extends CoreBundle { class FrontendResp extends CoreBundle { val pc = UInt(width = vaddrBitsExtended) // ID stage PC - val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits)) + val data = Vec(Bits(width = coreInstBits), coreFetchWidth) val mask = Bits(width = coreFetchWidth) val xcpt_if = Bool() } @@ -196,12 +196,11 @@ class ICache extends FrontendModule val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0) val entagbits = code.width(tagBits) - val tag_array = SeqMem(Bits(width = entagbits*nWays), nSets) + val tag_array = SeqMem(Vec(Bits(width = entagbits), nWays), nSets) val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) when (refill_done) { - val wmask = FillInterleaved(entagbits, if (isDM) Bits(1) else UIntToOH(repl_way)) val tag = code.encode(s2_tag).toUInt - tag_array.write(s2_idx, Fill(nWays, tag), wmask) + tag_array.write(s2_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _)) } val vb_array = Reg(init=Bits(0, nSets*nWays)) @@ -225,7 +224,7 @@ class ICache extends FrontendModule val s2_vb = Reg(Bool()) val s2_tag_disparity = Reg(Bool()) val s2_tag_match = Reg(Bool()) - val tag_out = tag_rdata(entagbits*(i+1)-1, entagbits*i) + val tag_out = tag_rdata(i) when (s1_valid && rdy && !stall) { s2_vb := s1_vb s2_tag_disparity := code.decode(tag_out).error diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f58cc29e..0c3360bb 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -538,7 +538,7 @@ class DataArray extends L1HellaCacheModule { val io = new Bundle { val read = Decoupled(new L1DataReadReq).flip val write = Decoupled(new L1DataWriteReq).flip - val resp = Vec.fill(nWays){Bits(OUTPUT, encRowBits)} + val resp = Vec(Bits(OUTPUT, encRowBits), nWays) } val waddr = io.write.bits.addr >> rowOffBits @@ -551,13 +551,12 @@ class DataArray extends L1HellaCacheModule { val resp = Wire(Vec(Bits(width = encRowBits), rowWords)) val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) + val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { - val data = Fill(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) - val mask = FillInterleaved(encDataBits, wway_en) - array.write(waddr, data, mask) + val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) + array.write(waddr, data, wway_en.toBools) } - resp(p) := array.read(raddr, rway_en.orR && io.read.valid) + resp(p) := array.read(raddr, rway_en.orR && io.read.valid).toBits } for (dw <- 0 until rowWords) { val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw))) @@ -568,13 +567,13 @@ class DataArray extends L1HellaCacheModule { } } } else { - val wmask = FillInterleaved(encDataBits, io.write.bits.wmask) for (w <- 0 until nWays) { - val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) + val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) when (io.write.bits.way_en(w) && io.write.valid) { - array.write(waddr, io.write.bits.data, wmask) + val data = Vec.tabulate(rowWords)(i => io.write.bits.data(encDataBits*(i+1)-1,encDataBits*i)) + array.write(waddr, data, io.write.bits.wmask.toBools) } - io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid) + io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid).toBits } } @@ -749,7 +748,7 @@ class HellaCache extends L1HellaCacheModule { val s2_data = Wire(Vec(Bits(width=encRowBits), nWays)) for (w <- 0 until nWays) { - val regs = Reg(Vec.fill(rowWords){Bits(width = encDataBits)}) + val regs = Reg(Vec(Bits(width = encDataBits), rowWords)) val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 1b08ce87..1a49cee0 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -54,7 +54,7 @@ class PTE extends CoreBundle { class PTW(n: Int) extends CoreModule { val io = new Bundle { - val requestor = Vec.fill(n){new TLBPTWIO}.flip + val requestor = Vec(new TLBPTWIO, n).flip val mem = new HellaCacheIO val dpath = new DatapathPTWIO } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 25df6265..d4b5984e 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -180,9 +180,9 @@ class Rocket extends CoreModule // execute stage val bypass_mux = Vec(bypass_sources.map(_._3)) - val ex_reg_rs_bypass = Reg(Vec.fill(id_raddr.size)(Bool())) - val ex_reg_rs_lsb = Reg(Vec.fill(id_raddr.size)(Bits())) - val ex_reg_rs_msb = Reg(Vec.fill(id_raddr.size)(Bits())) + val ex_reg_rs_bypass = Reg(Vec(Bool(), id_raddr.size)) + val ex_reg_rs_lsb = Reg(Vec(UInt(), id_raddr.size)) + val ex_reg_rs_msb = Reg(Vec(UInt(), id_raddr.size)) val ex_rs = for (i <- 0 until id_raddr.size) yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst) From 91458bef1c54adc0237b70073b81abc8b41b4663 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Thu, 10 Sep 2015 18:12:23 -0700 Subject: [PATCH 0882/1087] [commitlog] Initial commit log for integer working --- rocket/src/main/scala/rocket.scala | 36 +++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index d4b5984e..4c81d8e4 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -492,12 +492,46 @@ class Rocket extends CoreModule io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 - printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", + val COMMITLOG = true + + if (COMMITLOG) { + val pc = Wire(SInt(width=64)) + pc := wb_reg_pc//.toSInt() + val inst = wb_reg_inst + val rd = RegNext(RegNext(RegNext(id_waddr))) + val wfd = wb_ctrl.wfd + val wxd = wb_ctrl.wxd + val has_data = wb_wen && !wb_set_sboard + + when (wb_valid) { + // TODO add privileged level + when (wfd) { + printf ("0x%x (0x%x) f%d\n", pc, inst, rd) + } + .elsewhen (wxd && rd != UInt(0) && has_data) { + printf ("0x%x (0x%x) x%d 0x%x\n", pc, inst, rd, rf_wdata) + } + .elsewhen (wxd && rd != UInt(0) && !has_data) { + printf ("0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", pc, inst, rd, rd) + } + .otherwise { // !wxd || (wxd && rd == 0) + printf ("0x%x (0x%x)\n", pc, inst) + } + } + + // ll write data + when (ll_wen) { + printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata) + } + } + else { + printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc, Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen, wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), wb_reg_inst, wb_reg_inst) + } def checkExceptions(x: Seq[(Bool, UInt)]) = (x.map(_._1).reduce(_||_), PriorityMux(x)) From d630a0385777cad1e87b80957443f5e1bc4ecfaf Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 11 Sep 2015 03:45:31 -0700 Subject: [PATCH 0883/1087] [commitlog] Added FP instructions to the commitlog --- rocket/src/main/scala/fpu.scala | 23 ++++++++++++++++++----- rocket/src/main/scala/rocket.scala | 10 +++++----- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index ba91096f..d435daca 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -353,7 +353,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module io.out := Pipe(valid, res, latency-1) } -class FPU extends Module +class FPU extends CoreModule { val io = new FPUIO @@ -383,7 +383,12 @@ class FPU extends Module // regfile val regfile = Mem(Bits(width = 65), 32) - when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } + when (load_wb) { + regfile(load_wb_tag) := load_wb_data_recoded + if (EnableCommitLog) { + printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), load_wb_data) + } + } val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) when (io.valid) { @@ -459,7 +464,7 @@ class FPU extends Module val winfo = Reg(Vec(Bits(), maxLatency-1)) val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, ex_reg_valid) - val mem_winfo = Cat(pipeid(mem_ctrl), mem_reg_inst(11,7)) + val mem_winfo = Cat(mem_ctrl.single, pipeid(mem_ctrl), mem_reg_inst(11,7)) for (i <- 0 until maxLatency-2) { when (wen(i+1)) { winfo(i) := winfo(i+1) } @@ -477,10 +482,18 @@ class FPU extends Module } val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) - val wsrc = winfo(0) >> 5 + val wsrc = (winfo(0) >> 5)(1,0) // TODO: get rid of magic number on log(num_pipes) val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.wdata))(wsrc)) val wexc = Vec(pipes.map(_.wexc))(wsrc) - when (wen(0) || divSqrt_wen) { regfile(waddr) := wdata } + when (wen(0) || divSqrt_wen) { + regfile(waddr) := wdata + if (EnableCommitLog) { + val wdata_unrec_s = hardfloat.recodedFloatNToFloatN(wdata(64,0), 23, 9) + val wdata_unrec_d = hardfloat.recodedFloatNToFloatN(wdata(64,0), 52, 12) + val wb_single = (winfo(0) >> 5)(2) // TODO: get rid of magic numbers + printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32), Mux(wb_single, wdata_unrec_s, wdata_unrec_d)) + } + } val wb_toint_valid = wb_reg_valid && wb_ctrl.toint val wb_toint_exc = RegEnable(fpiu.io.out.bits.exc, mem_ctrl.toint) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 4c81d8e4..157b55ed 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -43,6 +43,8 @@ abstract trait CoreParameters extends UsesParameters { val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt + val EnableCommitLog = true + if(params(FastLoadByte)) require(params(FastLoadWord)) } @@ -492,11 +494,9 @@ class Rocket extends CoreModule io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 - val COMMITLOG = true - - if (COMMITLOG) { + if (EnableCommitLog) { val pc = Wire(SInt(width=64)) - pc := wb_reg_pc//.toSInt() + pc := wb_reg_pc val inst = wb_reg_inst val rd = RegNext(RegNext(RegNext(id_waddr))) val wfd = wb_ctrl.wfd @@ -506,7 +506,7 @@ class Rocket extends CoreModule when (wb_valid) { // TODO add privileged level when (wfd) { - printf ("0x%x (0x%x) f%d\n", pc, inst, rd) + printf ("0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", pc, inst, rd, rd+UInt(32)) } .elsewhen (wxd && rd != UInt(0) && has_data) { printf ("0x%x (0x%x) x%d 0x%x\n", pc, inst, rd, rf_wdata) From 53a02a62c8a85fc07fb660415776212095337c4b Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 11 Sep 2015 15:48:17 -0700 Subject: [PATCH 0884/1087] [commitlog] Fix sp/dp bug in FPU writeback --- rocket/src/main/scala/fpu.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index d435daca..56755e42 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -386,7 +386,7 @@ class FPU extends CoreModule when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded if (EnableCommitLog) { - printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), load_wb_data) + printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), load_wb_data) // TODO see what happens, either change spike to sext, or us or whatever. } } @@ -464,7 +464,7 @@ class FPU extends CoreModule val winfo = Reg(Vec(Bits(), maxLatency-1)) val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, ex_reg_valid) - val mem_winfo = Cat(mem_ctrl.single, pipeid(mem_ctrl), mem_reg_inst(11,7)) + val mem_winfo = Cat(pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging for (i <- 0 until maxLatency-2) { when (wen(i+1)) { winfo(i) := winfo(i+1) } @@ -482,16 +482,17 @@ class FPU extends CoreModule } val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) - val wsrc = (winfo(0) >> 5)(1,0) // TODO: get rid of magic number on log(num_pipes) + val wsrc = (winfo(0) >> 6) val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.wdata))(wsrc)) val wexc = Vec(pipes.map(_.wexc))(wsrc) - when (wen(0) || divSqrt_wen) { + when (wen(0) || divSqrt_wen) { regfile(waddr) := wdata if (EnableCommitLog) { val wdata_unrec_s = hardfloat.recodedFloatNToFloatN(wdata(64,0), 23, 9) val wdata_unrec_d = hardfloat.recodedFloatNToFloatN(wdata(64,0), 52, 12) - val wb_single = (winfo(0) >> 5)(2) // TODO: get rid of magic numbers - printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32), Mux(wb_single, wdata_unrec_s, wdata_unrec_d)) + val wb_single = (winfo(0) >> 5)(0) + printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32), + Mux(wb_single, Cat(Fill(32, wdata_unrec_s(31)), wdata_unrec_s), wdata_unrec_d)) } } From 7d14abf26215563b32898acb6729b21fab083f42 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 11 Sep 2015 16:08:12 -0700 Subject: [PATCH 0885/1087] [commitlog] Added privilege-level to output --- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/rocket.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 56755e42..ede8679c 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -386,7 +386,7 @@ class FPU extends CoreModule when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded if (EnableCommitLog) { - printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), load_wb_data) // TODO see what happens, either change spike to sext, or us or whatever. + printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), load_wb_data) } } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 157b55ed..a04a64e6 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -502,20 +502,20 @@ class Rocket extends CoreModule val wfd = wb_ctrl.wfd val wxd = wb_ctrl.wxd val has_data = wb_wen && !wb_set_sboard + val priv = csr.io.status.prv when (wb_valid) { - // TODO add privileged level when (wfd) { - printf ("0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", pc, inst, rd, rd+UInt(32)) + printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32)) } .elsewhen (wxd && rd != UInt(0) && has_data) { - printf ("0x%x (0x%x) x%d 0x%x\n", pc, inst, rd, rf_wdata) + printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata) } .elsewhen (wxd && rd != UInt(0) && !has_data) { - printf ("0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", pc, inst, rd, rd) + printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd) } .otherwise { // !wxd || (wxd && rd == 0) - printf ("0x%x (0x%x)\n", pc, inst) + printf ("%d 0x%x (0x%x)\n", priv, pc, inst) } } From e22bf02a80cd091f7b9a4cc4eeedeb1597bfa64e Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Fri, 11 Sep 2015 23:08:23 -0700 Subject: [PATCH 0886/1087] [commitlog] CSR's cycle optionally set to instret - Allows debugging Rocket against Spike by having timer interrupts occur in the same place in the instruction stream for both. --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index d906955a..95f33aaf 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -123,8 +123,8 @@ class CSRFile extends CoreModule val reg_fromhost = Reg(init=Bits(0, xLen)) val reg_stats = Reg(init=Bool(false)) val reg_time = Reg(UInt(width = xLen)) - val reg_cycle = WideCounter(xLen) val reg_instret = WideCounter(xLen, io.retire) + val reg_cycle = if (EnableCommitLog) { reg_instret } else { WideCounter(xLen) } val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) From 3b48d8569cb27e0b35d42a37795a35c1f188f875 Mon Sep 17 00:00:00 2001 From: Scott Beamer Date: Mon, 14 Sep 2015 14:32:24 -0700 Subject: [PATCH 0887/1087] [commitlog] don't print out writebacks to x0 --- rocket/src/main/scala/rocket.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index a04a64e6..8b6e3e8f 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -520,7 +520,7 @@ class Rocket extends CoreModule } // ll write data - when (ll_wen) { + when (ll_wen && rf_waddr != UInt(0)) { printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata) } } From 76bf1da3109fb3dd11e53d3c84a2d1b7636a211e Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Tue, 15 Sep 2015 15:53:36 -0700 Subject: [PATCH 0888/1087] [commitlog] zero-extend SP write-back values --- rocket/src/main/scala/fpu.scala | 5 +++-- rocket/src/main/scala/rocket.scala | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index ede8679c..15ee0920 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -386,7 +386,8 @@ class FPU extends CoreModule when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded if (EnableCommitLog) { - printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), load_wb_data) + printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), + Mux(load_wb_single, load_wb_data(31,0), load_wb_data)) } } @@ -492,7 +493,7 @@ class FPU extends CoreModule val wdata_unrec_d = hardfloat.recodedFloatNToFloatN(wdata(64,0), 52, 12) val wb_single = (winfo(0) >> 5)(0) printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32), - Mux(wb_single, Cat(Fill(32, wdata_unrec_s(31)), wdata_unrec_s), wdata_unrec_d)) + Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d)) } } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 8b6e3e8f..9a336705 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -43,7 +43,9 @@ abstract trait CoreParameters extends UsesParameters { val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt - val EnableCommitLog = true + // Print out log of committed instructions and their writeback values. + // Requires post-processing due to out-of-order writebacks. + val EnableCommitLog = false if(params(FastLoadByte)) require(params(FastLoadWord)) } @@ -514,12 +516,11 @@ class Rocket extends CoreModule .elsewhen (wxd && rd != UInt(0) && !has_data) { printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd) } - .otherwise { // !wxd || (wxd && rd == 0) + .otherwise { printf ("%d 0x%x (0x%x)\n", priv, pc, inst) } } - // ll write data when (ll_wen && rf_waddr != UInt(0)) { printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata) } From e72e5a34b561bc6065698ae9814a99151a673401 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 21 Sep 2015 12:17:46 -0700 Subject: [PATCH 0889/1087] Fix storage of SP values in DP registers The SFMA was zero-extending the SP value to 65 bits, rather than filling the upper 32 bits with 1s. This meant that an FSD + FLD of that register would not restore the value properly. Also, minor code cleanup. --- rocket/src/main/scala/fpu.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 15ee0920..10d657a2 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -348,7 +348,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module fma.io.c := in.in3 val res = Wire(new FPResult) - res.data := fma.io.out + res.data := Cat(SInt(-1, 32), fma.io.out) res.exc := fma.io.exceptionFlags io.out := Pipe(valid, res, latency-1) } @@ -447,12 +447,12 @@ class FPU extends CoreModule val divSqrt_in_flight = Reg(init=Bool(false)) // writeback arbitration - case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: UInt, wexc: UInt) + case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult) val pipes = List( - Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), - Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), - Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, Cat(SInt(-1, 32), sfma.io.out.bits.data), sfma.io.out.bits.exc), - Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits.data, dfma.io.out.bits.exc)) + Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits), + Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits), + Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits), + Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits)) def latencyMask(c: FPUCtrlSigs, offset: Int) = { require(pipes.forall(_.lat >= offset)) pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_) @@ -484,8 +484,8 @@ class FPU extends CoreModule val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) val wsrc = (winfo(0) >> 6) - val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.wdata))(wsrc)) - val wexc = Vec(pipes.map(_.wexc))(wsrc) + val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.res.data))(wsrc)) + val wexc = Vec(pipes.map(_.res.exc))(wsrc) when (wen(0) || divSqrt_wen) { regfile(waddr) := wdata if (EnableCommitLog) { From 382faba4a68a5b008df456026dea6e69bb8c2347 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 5 Aug 2015 11:01:01 -0700 Subject: [PATCH 0890/1087] Implement bypassing L1 data cache for MMIO --- rocket/src/main/scala/nbdcache.scala | 159 ++++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 14 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0c3360bb..fe712f05 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -4,18 +4,23 @@ package rocket import Chisel._ import uncore._ +import junctions.MMIOBase import Util._ case object WordBits extends Field[Int] case object StoreDataQueueDepth extends Field[Int] case object ReplayQueueDepth extends Field[Int] case object NMSHRs extends Field[Int] +case object NIOMSHRs extends Field[Int] case object LRSCCycles extends Field[Int] abstract trait L1HellaCacheParameters extends L1CacheParameters { val wordBits = params(WordBits) val wordBytes = wordBits/8 val wordOffBits = log2Up(wordBytes) + val beatBytes = params(CacheBlockBytes) / params(TLDataBeats) + val beatWords = beatBytes / wordBytes + val beatOffBits = log2Up(beatBytes) val idxMSB = untagBits-1 val idxLSB = blockOffBits val offsetmsb = idxLSB-1 @@ -26,6 +31,8 @@ abstract trait L1HellaCacheParameters extends L1CacheParameters { val encRowBits = encDataBits*rowWords val sdqDepth = params(StoreDataQueueDepth) val nMSHRs = params(NMSHRs) + val nIOMSHRs = params(NIOMSHRs) + val mmioBase = params(MMIOBase) } abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters @@ -130,6 +137,83 @@ class WritebackReq extends Release with CacheParameters { val way_en = Bits(width = nWays) } +class IOMSHR(id: Int) extends L1HellaCacheModule { + val io = new Bundle { + val req = Decoupled(new HellaCacheReq).flip + val acquire = Decoupled(new Acquire) + val grant = Valid(new Grant).flip + val resp = Decoupled(new HellaCacheResp) + } + + def wordFromBeat(addr: UInt, dat: UInt) = { + val offset = addr(beatOffBits - 1, wordOffBits) + val shift = Cat(offset, UInt(0, wordOffBits + 3)) + (dat >> shift)(wordBits - 1, 0) + } + + val req = Reg(new HellaCacheReq) + val grant_word = Reg(UInt(width = wordBits)) + + val storegen = new StoreGen(req.typ, req.addr, req.data) + val loadgen = new LoadGen(req.typ, req.addr, grant_word, Bool(false)) + + val beat_offset = req.addr(beatOffBits - 1, wordOffBits) + val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) + val beat_data = Fill(beatWords, storegen.data) + + val addr_byte = req.addr(beatOffBits - 1, 0) + val a_type = Mux(isRead(req.cmd), Acquire.getType, Acquire.putType) + val union = Mux(isRead(req.cmd), + Cat(addr_byte, req.typ, M_XRD), beat_mask) + + val s_idle :: s_acquire :: s_grant :: s_resp :: Nil = Enum(Bits(), 4) + val state = Reg(init = s_idle) + + io.req.ready := (state === s_idle) + + io.acquire.valid := (state === s_acquire) + io.acquire.bits := Acquire( + is_builtin_type = Bool(true), + a_type = a_type, + client_xact_id = UInt(id), + addr_block = req.addr(paddrBits - 1, blockOffBits), + addr_beat = req.addr(blockOffBits - 1, beatOffBits), + data = beat_data, + // alloc bit should always be false + union = Cat(union, Bool(false))) + + io.resp.valid := (state === s_resp) + io.resp.bits := req + io.resp.bits.has_data := isRead(req.cmd) + io.resp.bits.data := loadgen.word + io.resp.bits.data_subword := loadgen.byte + io.resp.bits.store_data := req.data + io.resp.bits.nack := Bool(false) + io.resp.bits.replay := io.resp.valid + + when (io.req.fire()) { + req := io.req.bits + state := s_acquire + } + + when (io.acquire.fire()) { + state := s_grant + } + + when (state === s_grant && io.grant.valid) { + when (isRead(req.cmd)) { + grant_word := wordFromBeat(req.addr, io.grant.bits.data) + state := s_resp + } .otherwise { + state := s_idle + } + } + + when (io.resp.fire()) { + state := s_idle + } +} + class MSHR(id: Int) extends L1HellaCacheModule { val io = new Bundle { val req_pri_val = Bool(INPUT) @@ -282,6 +366,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { class MSHRFile extends L1HellaCacheModule { val io = new Bundle { val req = Decoupled(new MSHRReq).flip + val resp = Decoupled(new HellaCacheResp) val secondary_miss = Bool(OUTPUT) val mem_req = Decoupled(new Acquire) @@ -296,10 +381,13 @@ class MSHRFile extends L1HellaCacheModule { val fence_rdy = Bool(OUTPUT) } + // determine if the request is in the memory region or mmio region + val cacheable = io.req.bits.addr < UInt(mmioBase) + val sdq_val = Reg(init=Bits(0, sdqDepth)) val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) val sdq_rdy = !sdq_val.andR - val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) + val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd) val sdq = Mem(io.req.bits.data, sdqDepth) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } @@ -313,7 +401,7 @@ class MSHRFile extends L1HellaCacheModule { val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) val mem_req_arb = Module(new LockingArbiter( new Acquire, - nMSHRs, + nMSHRs + nIOMSHRs, outerDataBeats, (a: Acquire) => a.hasMultibeatData())) val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) @@ -360,14 +448,44 @@ class MSHRFile extends L1HellaCacheModule { when (!mshr.io.probe_rdy) { io.probe_rdy := false } } - alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match + alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match io.meta_read <> meta_read_arb.io.out io.meta_write <> meta_write_arb.io.out io.mem_req <> mem_req_arb.io.out io.wb_req <> wb_req_arb.io.out - io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy + val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs)) + val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs)) + + var mmio_rdy = Bool(false) + + for (i <- 0 until nIOMSHRs) { + val id = nMSHRs + i + val mshr = Module(new IOMSHR(id)) + + mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready + mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready + mshr.io.req.bits := io.req.bits + + mmio_rdy = mmio_rdy || mshr.io.req.ready + + mem_req_arb.io.in(id) <> mshr.io.acquire + + mshr.io.grant.bits := io.mem_grant.bits + mshr.io.grant.valid := io.mem_grant.valid && + io.mem_grant.bits.client_xact_id === UInt(id) + + resp_arb.io.in(i) <> mshr.io.resp + + when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) } + } + + mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable + + io.resp <> resp_arb.io.out + io.req.ready := Mux(!cacheable, mmio_rdy, + Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy) io.secondary_miss := idx_match io.refill := refillMux(io.mem_grant.bits.client_xact_id) @@ -824,7 +942,11 @@ class HellaCache extends L1HellaCacheModule { mshrs.io.mem_grant.valid := narrow_grant.fire() mshrs.io.mem_grant.bits := narrow_grant.bits narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData() - writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() + /* The last clause here is necessary in order to prevent the responses for + * the IOMSHRs from being written into the data array. It works because the + * IOMSHR ids start right the ones for the regular MSHRs. */ + writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() && + narrow_grant.bits.client_xact_id < UInt(nMSHRs) writeArb.io.in(1).bits.addr := mshrs.io.refill.addr writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en writeArb.io.in(1).bits.wmask := ~UInt(0, nWays) @@ -893,16 +1015,25 @@ class HellaCache extends L1HellaCacheModule { io.cpu.req.ready := Bool(false) } - io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable - io.cpu.resp.bits.nack := s2_valid && s2_nack - io.cpu.resp.bits := s2_req - io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc - io.cpu.resp.bits.replay := s2_replay - io.cpu.resp.bits.data := loadgen.word - io.cpu.resp.bits.data_subword := loadgen.byte | s2_sc_fail - io.cpu.resp.bits.store_data := s2_req.data - io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid + val cache_resp = Wire(Valid(new HellaCacheResp)) + cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable + cache_resp.bits := s2_req + cache_resp.bits.has_data := isRead(s2_req.cmd) || s2_sc + cache_resp.bits.data := loadgen.word + cache_resp.bits.data_subword := loadgen.byte | s2_sc_fail + cache_resp.bits.store_data := s2_req.data + cache_resp.bits.nack := s2_valid && s2_nack + cache_resp.bits.replay := s2_replay + val uncache_resp = Wire(Valid(new HellaCacheResp)) + uncache_resp.bits := mshrs.io.resp.bits + uncache_resp.valid := mshrs.io.resp.valid + + val cache_pass = s2_valid || s2_replay + mshrs.io.resp.ready := !cache_pass + + io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) + io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) io.cpu.replay_next.bits := s1_req.tag } From d89bcd3922aac84185bac188f6bc02bb0a0ebf54 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 12 Aug 2015 21:22:54 -0700 Subject: [PATCH 0891/1087] modify csr file to bring in line with HTIF changes --- rocket/src/main/scala/csr.scala | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 95f33aaf..fca520e0 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -156,21 +156,21 @@ class CSRFile extends CoreModule val host_pcr_req_valid = Reg(Bool()) // don't reset val host_pcr_req_fire = host_pcr_req_valid && !cpu_ren val host_pcr_rep_valid = Reg(Bool()) // don't reset - val host_pcr_bits = Reg(io.host.pcr_req.bits) - io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid - io.host.pcr_rep.valid := host_pcr_rep_valid - io.host.pcr_rep.bits := host_pcr_bits.data - when (io.host.pcr_req.fire()) { + val host_pcr_bits = Reg(io.host.pcr.req.bits) + io.host.pcr.req.ready := !host_pcr_req_valid && !host_pcr_rep_valid + io.host.pcr.resp.valid := host_pcr_rep_valid + io.host.pcr.resp.bits := host_pcr_bits.data + when (io.host.pcr.req.fire()) { host_pcr_req_valid := true - host_pcr_bits := io.host.pcr_req.bits + host_pcr_bits := io.host.pcr.req.bits } when (host_pcr_req_fire) { host_pcr_req_valid := false host_pcr_rep_valid := true host_pcr_bits.data := io.rw.rdata } - when (io.host.pcr_rep.fire()) { host_pcr_rep_valid := false } - + when (io.host.pcr.resp.fire()) { host_pcr_rep_valid := false } + io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy val read_mstatus = io.status.toBits @@ -411,7 +411,7 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } - when (decoded_addr(CSRs.mreset) /* XXX used by HTIF to write mtime */) { reg_time := wdata } + when (decoded_addr(CSRs.mtime)) { reg_time := wdata } when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } From 16c748576a9c18b2b353c02b80deca2a922eba70 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 10 Sep 2015 17:57:03 -0700 Subject: [PATCH 0892/1087] don't mux data_word_bypass between IOMSHR and cache --- rocket/src/main/scala/nbdcache.scala | 9 ++++----- rocket/src/main/scala/rocket.scala | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index fe712f05..1a401f0f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -70,7 +70,7 @@ class HellaCacheResp extends HasCoreMemOp with HasCoreData { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val has_data = Bool() - val data_subword = Bits(width = coreDataBits) + val data_word_bypass = Bits(width = coreDataBits) val store_data = Bits(width = coreDataBits) } @@ -185,8 +185,7 @@ class IOMSHR(id: Int) extends L1HellaCacheModule { io.resp.valid := (state === s_resp) io.resp.bits := req io.resp.bits.has_data := isRead(req.cmd) - io.resp.bits.data := loadgen.word - io.resp.bits.data_subword := loadgen.byte + io.resp.bits.data := loadgen.byte io.resp.bits.store_data := req.data io.resp.bits.nack := Bool(false) io.resp.bits.replay := io.resp.valid @@ -1019,8 +1018,7 @@ class HellaCache extends L1HellaCacheModule { cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable cache_resp.bits := s2_req cache_resp.bits.has_data := isRead(s2_req.cmd) || s2_sc - cache_resp.bits.data := loadgen.word - cache_resp.bits.data_subword := loadgen.byte | s2_sc_fail + cache_resp.bits.data := loadgen.byte | s2_sc_fail cache_resp.bits.store_data := s2_req.data cache_resp.bits.nack := s2_valid && s2_nack cache_resp.bits.replay := s2_replay @@ -1033,6 +1031,7 @@ class HellaCache extends L1HellaCacheModule { mshrs.io.resp.ready := !cache_pass io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) + io.cpu.resp.bits.data_word_bypass := loadgen.word io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) io.cpu.replay_next.bits := s1_req.tag diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 9a336705..cead7ad2 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -167,8 +167,8 @@ class Rocket extends CoreModule (id_illegal_insn, UInt(Causes.illegal_instruction)))) val dcache_bypass_data = - if(params(FastLoadByte)) io.dmem.resp.bits.data_subword - else if(params(FastLoadWord)) io.dmem.resp.bits.data + if(params(FastLoadByte)) io.dmem.resp.bits.data + else if(params(FastLoadWord)) io.dmem.resp.bits.data_word_bypass else wb_reg_wdata // detect bypass opportunities @@ -364,7 +364,7 @@ class Rocket extends CoreModule val wb_wen = wb_valid && wb_ctrl.wxd val rf_wen = wb_wen || ll_wen val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) - val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, + val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data, Mux(ll_wen, ll_wdata, Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata, wb_reg_wdata))) @@ -474,7 +474,7 @@ class Rocket extends CoreModule io.fpu.inst := id_inst io.fpu.fromint_data := ex_rs(0) io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu - io.fpu.dmem_resp_data := io.dmem.resp.bits.data + io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass io.fpu.dmem_resp_type := io.dmem.resp.bits.typ io.fpu.dmem_resp_tag := dmem_resp_waddr From 9eb988a4c6bad33ede6106cbd76b0e826a13ece1 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 22 Sep 2015 09:42:27 -0700 Subject: [PATCH 0893/1087] make sure access to invalid physical address treated as exception --- rocket/src/main/scala/icache.scala | 4 +++- rocket/src/main/scala/nbdcache.scala | 5 ++--- rocket/src/main/scala/rocket.scala | 1 + rocket/src/main/scala/tlb.scala | 16 +++++++++++----- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index b6ea99ff..f9650b95 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -106,7 +106,9 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule icache.io.req.bits.idx := io.cpu.npc icache.io.invalidate := io.cpu.invalidate icache.io.req.bits.ppn := tlb.io.resp.ppn - icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.ptw.invalidate + icache.io.req.bits.kill := io.cpu.req.valid || + tlb.io.resp.miss || tlb.io.resp.xcpt_if || + icmiss || io.ptw.invalidate icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 1a401f0f..b7aff715 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -32,7 +32,6 @@ abstract trait L1HellaCacheParameters extends L1CacheParameters { val sdqDepth = params(StoreDataQueueDepth) val nMSHRs = params(NMSHRs) val nIOMSHRs = params(NIOMSHRs) - val mmioBase = params(MMIOBase) } abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters @@ -788,8 +787,8 @@ class HellaCache extends L1HellaCacheModule { io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned - io.cpu.xcpt.pf.ld := !s1_req.phys && s1_read && dtlb.io.resp.xcpt_ld - io.cpu.xcpt.pf.st := !s1_req.phys && s1_write && dtlb.io.resp.xcpt_st + io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st assert (!(Reg(next= (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index cead7ad2..f8d9e862 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -42,6 +42,7 @@ abstract trait CoreParameters extends UsesParameters { val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt + val mmioBase = params(MMIOBase) // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 87569e10..16f370f3 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -157,20 +157,26 @@ class TLB extends TLBModule { val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) + val bad_pa = !vm_enabled && io.req.bits.vpn >= UInt(mmioBase >> vpnBits) // it's only a store hit if the dirty bit is set val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) val tag_hit = tag_hits.orR val tlb_hit = vm_enabled && tag_hit val tlb_miss = vm_enabled && !tag_hit && !bad_va - + when (io.req.valid && tlb_hit) { plru.access(OHToUInt(tag_cam.io.hits)) } + val addrMap = params(NASTIAddrHashMap) + val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits)) + val addr_ok = addrMap.isValid(paddr) + val addr_prot = addrMap.getProt(paddr) + io.req.ready := state === s_ready - io.resp.xcpt_ld := bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR - io.resp.xcpt_st := bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR - io.resp.xcpt_if := bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR + io.resp.xcpt_ld := !addr_ok || !addr_prot.r || bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR + io.resp.xcpt_st := !addr_ok || !addr_prot.w || bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR + io.resp.xcpt_if := !addr_ok || !addr_prot.x || bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR io.resp.miss := tlb_miss io.resp.ppn := Mux(vm_enabled && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) io.resp.hit_idx := tag_cam.io.hits @@ -186,7 +192,7 @@ class TLB extends TLBModule { io.ptw.req.bits.store := r_req.store io.ptw.req.bits.fetch := r_req.instruction - when (io.req.fire() && tlb_miss) { + when (io.req.fire() && tlb_miss && addr_ok) { state := s_request r_refill_tag := lookup_tag r_refill_waddr := repl_waddr From a66bdb19566b4c6ab33360c43adff9fe63a079b2 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 24 Sep 2015 17:53:26 -0700 Subject: [PATCH 0894/1087] replace remaining uses of Vec.fill --- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index f9650b95..65305f00 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -202,7 +202,7 @@ class ICache extends FrontendModule val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) when (refill_done) { val tag = code.encode(s2_tag).toUInt - tag_array.write(s2_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _)) + tag_array.write(s2_idx, Vec(nWays, tag), Vec.tabulate(nWays)(repl_way === _)) } val vb_array = Reg(init=Bits(0, nSets*nWays)) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b7aff715..c4b246e9 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -669,7 +669,7 @@ class DataArray extends L1HellaCacheModule { for (p <- 0 until resp.size) { val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { - val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) + val data = Vec(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) array.write(waddr, data, wway_en.toBools) } resp(p) := array.read(raddr, rway_en.orR && io.read.valid).toBits From 0bfb2962a63e8be9b09b0770257b1ce841214550 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 25 Sep 2015 15:26:11 -0700 Subject: [PATCH 0895/1087] Assume coh.isRead returns true for store-conditional This requires an uncore update. --- rocket/src/main/scala/nbdcache.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c4b246e9..9596008d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -151,10 +151,11 @@ class IOMSHR(id: Int) extends L1HellaCacheModule { } val req = Reg(new HellaCacheReq) + val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) val storegen = new StoreGen(req.typ, req.addr, req.data) - val loadgen = new LoadGen(req.typ, req.addr, grant_word, Bool(false)) + val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc) val beat_offset = req.addr(beatOffBits - 1, wordOffBits) val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) @@ -184,7 +185,7 @@ class IOMSHR(id: Int) extends L1HellaCacheModule { io.resp.valid := (state === s_resp) io.resp.bits := req io.resp.bits.has_data := isRead(req.cmd) - io.resp.bits.data := loadgen.byte + io.resp.bits.data := loadgen.byte | req_cmd_sc io.resp.bits.store_data := req.data io.resp.bits.nack := Bool(false) io.resp.bits.replay := io.resp.valid @@ -735,7 +736,6 @@ class HellaCache extends L1HellaCacheModule { val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) - val s1_sc = s1_req.cmd === M_XSC val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val dtlb = Module(new TLB) @@ -1032,7 +1032,7 @@ class HellaCache extends L1HellaCacheModule { io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) io.cpu.resp.bits.data_word_bypass := loadgen.word io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid - io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) + io.cpu.replay_next.valid := s1_replay && s1_read io.cpu.replay_next.bits := s1_req.tag } From c3fff12ff02e5c67e6d1c6b7b8348d8fda46ca51 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 25 Sep 2015 17:02:51 -0700 Subject: [PATCH 0896/1087] Revert "replace remaining uses of Vec.fill" This reverts commit f7a0d125e83f8ca59d9913bb1db79cef5a6d344a. --- rocket/src/main/scala/icache.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 65305f00..f9650b95 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -202,7 +202,7 @@ class ICache extends FrontendModule val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) when (refill_done) { val tag = code.encode(s2_tag).toUInt - tag_array.write(s2_idx, Vec(nWays, tag), Vec.tabulate(nWays)(repl_way === _)) + tag_array.write(s2_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _)) } val vb_array = Reg(init=Bits(0, nSets*nWays)) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9596008d..a0bafb0a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -670,7 +670,7 @@ class DataArray extends L1HellaCacheModule { for (p <- 0 until resp.size) { val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { - val data = Vec(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) + val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) array.write(waddr, data, wway_en.toBools) } resp(p) := array.read(raddr, rway_en.orR && io.read.valid).toBits From 6bf8f41cef5fba4baa9ba13942668ade2d06ca20 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Sat, 26 Sep 2015 20:29:51 -0700 Subject: [PATCH 0897/1087] make sure passthrough requests are treated as vm_enabled = false --- rocket/src/main/scala/tlb.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 16f370f3..1be0f2f0 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -155,9 +155,8 @@ class TLB extends TLBModule { val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits) val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) - val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm + val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) - val bad_pa = !vm_enabled && io.req.bits.vpn >= UInt(mmioBase >> vpnBits) // it's only a store hit if the dirty bit is set val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) val tag_hit = tag_hits.orR @@ -178,7 +177,7 @@ class TLB extends TLBModule { io.resp.xcpt_st := !addr_ok || !addr_prot.w || bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR io.resp.xcpt_if := !addr_ok || !addr_prot.x || bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR io.resp.miss := tlb_miss - io.resp.ppn := Mux(vm_enabled && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) + io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) io.resp.hit_idx := tag_cam.io.hits // clear invalid entries on access, or all entries on a TLB flush From 4bda6b67575e0461721f55b328ae913fb53ee382 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Sat, 26 Sep 2015 21:27:36 -0700 Subject: [PATCH 0898/1087] fix bug in tlb refill --- rocket/src/main/scala/tlb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 1be0f2f0..ce02f8ee 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -191,7 +191,7 @@ class TLB extends TLBModule { io.ptw.req.bits.store := r_req.store io.ptw.req.bits.fetch := r_req.instruction - when (io.req.fire() && tlb_miss && addr_ok) { + when (io.req.fire() && tlb_miss) { state := s_request r_refill_tag := lookup_tag r_refill_waddr := repl_waddr From b93a94597c47350487d4d32a08ea30f6e1f581fd Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 27 Sep 2015 13:31:52 -0700 Subject: [PATCH 0899/1087] Remove needless control logic --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a0bafb0a..d2fd5c1a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1016,7 +1016,7 @@ class HellaCache extends L1HellaCacheModule { val cache_resp = Wire(Valid(new HellaCacheResp)) cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable cache_resp.bits := s2_req - cache_resp.bits.has_data := isRead(s2_req.cmd) || s2_sc + cache_resp.bits.has_data := isRead(s2_req.cmd) cache_resp.bits.data := loadgen.byte | s2_sc_fail cache_resp.bits.store_data := s2_req.data cache_resp.bits.nack := s2_valid && s2_nack From 5e88ead984e463c8b19408b300b7dbaed5ea86ed Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 28 Sep 2015 11:52:27 -0700 Subject: [PATCH 0900/1087] Add pseudo-ops to instructions.scala --- rocket/src/main/scala/instructions.scala | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index f007ebc4..b5ab974f 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -192,6 +192,26 @@ object Instructions { def CUSTOM3_RD = BitPat("b?????????????????100?????1111011") def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011") def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011") + def SLLI_RV32 = BitPat("b0000000??????????001?????0010011") + def SRLI_RV32 = BitPat("b0000000??????????101?????0010011") + def SRAI_RV32 = BitPat("b0100000??????????101?????0010011") + def FRFLAGS = BitPat("b00000000000100000010?????1110011") + def FSFLAGS = BitPat("b000000000001?????001?????1110011") + def FSFLAGSI = BitPat("b000000000001?????101?????1110011") + def FRRM = BitPat("b00000000001000000010?????1110011") + def FSRM = BitPat("b000000000010?????001?????1110011") + def FSRMI = BitPat("b000000000010?????101?????1110011") + def FSCSR = BitPat("b000000000011?????001?????1110011") + def FRCSR = BitPat("b00000000001100000010?????1110011") + def RDCYCLE = BitPat("b11000000000000000010?????1110011") + def RDTIME = BitPat("b11000000000100000010?????1110011") + def RDINSTRET = BitPat("b11000000001000000010?????1110011") + def RDCYCLEH = BitPat("b11001000000000000010?????1110011") + def RDTIMEH = BitPat("b11001000000100000010?????1110011") + def RDINSTRETH = BitPat("b11001000001000000010?????1110011") + def ECALL = BitPat("b00000000000000000000000001110011") + def EBREAK = BitPat("b00000000000100000000000001110011") + def ERET = BitPat("b00010000000000000000000001110011") } object Causes { val misaligned_fetch = 0x0 From f8a7a806445ef1e41d0072c7059aea7e8a13711d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 28 Sep 2015 13:55:23 -0700 Subject: [PATCH 0901/1087] Make perf counters optional --- rocket/src/main/scala/csr.scala | 29 +++++++++++++++++------------ rocket/src/main/scala/rocket.scala | 1 + 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index fca520e0..6b7f6b79 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -173,6 +173,7 @@ class CSRFile extends CoreModule io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy + val read_time = if (params(UsePerfCounters)) reg_time else (reg_cycle: UInt) val read_mstatus = io.status.toBits val isa_string = "IMA" + (if (params(UseVM)) "S" else "") + @@ -188,13 +189,11 @@ class CSRFile extends CoreModule CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.cycle -> reg_cycle, CSRs.cyclew -> reg_cycle, - CSRs.instret -> reg_instret, - CSRs.instretw -> reg_instret, - CSRs.time -> reg_time, - CSRs.timew -> reg_time, - CSRs.stime -> reg_time, - CSRs.stimew -> reg_time, - CSRs.mtime -> reg_time, + CSRs.time -> read_time, + CSRs.timew -> read_time, + CSRs.stime -> read_time, + CSRs.stimew -> read_time, + CSRs.mtime -> read_time, CSRs.mcpuid -> UInt(cpuid), CSRs.mimpid -> UInt(impid), CSRs.mstatus -> read_mstatus, @@ -214,6 +213,14 @@ class CSRFile extends CoreModule CSRs.mtohost -> reg_tohost, CSRs.mfromhost -> reg_fromhost) + if (params(UsePerfCounters)) { + read_mapping += CSRs.instret -> reg_instret + read_mapping += CSRs.instretw -> reg_instret + + for (i <- 0 until reg_uarch_counters.size) + read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) + } + if (params(UseVM)) { val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus)) read_sstatus.zero1 := 0 @@ -241,9 +248,6 @@ class CSRFile extends CoreModule read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) } - for (i <- 0 until reg_uarch_counters.size) - read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) - for (i <- 0 until params(NCustomMRWCSRs)) { val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") @@ -341,7 +345,7 @@ class CSRFile extends CoreModule assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") - when (reg_time >= reg_mtimecmp) { + when (read_time >= reg_mtimecmp) { reg_mip.mtip := true } @@ -409,7 +413,8 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } - when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } + if (params(UsePerfCounters)) + when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } when (decoded_addr(CSRs.mtime)) { reg_time := wdata } when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index f8d9e862..ccc3950e 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -14,6 +14,7 @@ case object NMultXpr extends Field[Int] case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] +case object UsePerfCounters extends Field[Boolean] case object FastLoadWord extends Field[Boolean] case object FastLoadByte extends Field[Boolean] case object FastMulDiv extends Field[Boolean] From 2f3d15675ce7b131fabcfebf6497bdc136a9e7d5 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 28 Sep 2015 16:02:29 -0700 Subject: [PATCH 0902/1087] fix DataArray writemask in L1D --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d2fd5c1a..d1560cde 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -947,7 +947,7 @@ class HellaCache extends L1HellaCacheModule { narrow_grant.bits.client_xact_id < UInt(nMSHRs) writeArb.io.in(1).bits.addr := mshrs.io.refill.addr writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en - writeArb.io.in(1).bits.wmask := ~UInt(0, nWays) + writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords) writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) data.io.read <> readArb.io.out readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked From a7c908cb8303fac9801293e394a804da72ab5145 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 30 Sep 2015 12:43:00 -0700 Subject: [PATCH 0903/1087] Don't declare Reg inside of when We haven't yet decided what the Chisel3 semantics for this will be. --- rocket/src/main/scala/btb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index b0ab7b11..d687c22e 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -174,6 +174,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete } val updateHit = r_btb_update.bits.prediction.valid + val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1 val useUpdatePageHit = updatePageHit.orR val doIdxPageRepl = !useUpdatePageHit @@ -196,7 +197,6 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete when (r_btb_update.valid) { assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") - val nextRepl = Counter(!updateHit, entries)._1 val waddr = if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) From 833909a2b544b4413a6c4bb6121b789cc61ba705 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 30 Sep 2015 14:36:26 -0700 Subject: [PATCH 0904/1087] Chisel3 compatibility fixes --- rocket/src/main/scala/btb.scala | 18 +++++++++--------- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/nbdcache.scala | 2 +- rocket/src/main/scala/ptw.scala | 4 ++-- rocket/src/main/scala/rocket.scala | 2 +- rocket/src/main/scala/tlb.scala | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d687c22e..df6c433f 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -69,7 +69,7 @@ class BHT(nbht: Int) { when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } } - private val table = Mem(UInt(width = 2), nbht) + private val table = Mem(nbht, UInt(width = 2)) val history = Reg(UInt(width = nbhtbits)) } @@ -134,18 +134,18 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete } val idxValid = Reg(init=UInt(0, entries)) - val idxs = Mem(UInt(width=matchBits), entries) - val idxPages = Mem(UInt(width=log2Up(nPages)), entries) - val tgts = Mem(UInt(width=matchBits), entries) - val tgtPages = Mem(UInt(width=log2Up(nPages)), entries) - val pages = Mem(UInt(width=vaddrBits-matchBits), nPages) + val idxs = Mem(entries, UInt(width=matchBits)) + val idxPages = Mem(entries, UInt(width=log2Up(nPages))) + val tgts = Mem(entries, UInt(width=matchBits)) + val tgtPages = Mem(entries, UInt(width=log2Up(nPages))) + val pages = Mem(nPages, UInt(width=vaddrBits-matchBits)) val pageValid = Reg(init=UInt(0, nPages)) val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) - val useRAS = Reg(Vec(Bool(), entries)) - val isJump = Reg(Vec(Bool(), entries)) - val brIdx = Mem(UInt(width=log2Up(params(FetchWidth))), entries) + val useRAS = Reg(Vec(entries, Bool())) + val isJump = Reg(Vec(entries, Bool())) + val brIdx = Mem(entries, UInt(width=log2Up(params(FetchWidth)))) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 10d657a2..daecb199 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -382,7 +382,7 @@ class FPU extends CoreModule val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) // regfile - val regfile = Mem(Bits(width = 65), 32) + val regfile = Mem(32, Bits(width = 65)) when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded if (EnableCommitLog) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d1560cde..bfbcdb11 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -387,7 +387,7 @@ class MSHRFile extends L1HellaCacheModule { val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) val sdq_rdy = !sdq_val.andR val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd) - val sdq = Mem(io.req.bits.data, sdqDepth) + val sdq = Mem(sdqDepth, io.req.bits.data) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } val idxMatch = Wire(Vec(Bool(), nMSHRs)) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 1a49cee0..084eee2f 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -87,8 +87,8 @@ class PTW(n: Int) extends CoreModule val plru = new PseudoLRU(size) val valid = Reg(Vec(Bool(), size)) val validBits = valid.toBits - val tags = Mem(UInt(width = paddrBits), size) - val data = Mem(UInt(width = ppnBits), size) + val tags = Mem(size, UInt(width = paddrBits)) + val data = Mem(size, UInt(width = ppnBits)) val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits val hit = hits.orR diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index ccc3950e..30e33d0e 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -571,7 +571,7 @@ class Rocket extends CoreModule } class RegFile { - private val rf = Mem(UInt(width = 64), 31) + private val rf = Mem(31, UInt(width = 64)) private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() private var canRead = true def read(addr: UInt) = { diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index ce02f8ee..330c5989 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -33,7 +33,7 @@ class CAMIO extends TLBBundle { class RocketCAM extends TLBModule { val io = new CAMIO - val cam_tags = Mem(Bits(width = camTagBits), entries) + val cam_tags = Mem(entries, Bits(width = camTagBits)) val vb_array = Reg(init=Bits(0, entries)) when (io.write) { @@ -109,7 +109,7 @@ class TLB extends TLBModule { val r_req = Reg(new TLBReq) val tag_cam = Module(new RocketCAM) - val tag_ram = Mem(io.ptw.resp.bits.pte.ppn, entries) + val tag_ram = Mem(entries, io.ptw.resp.bits.pte.ppn) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt tag_cam.io.tag := lookup_tag From 19656e4abe3d3b776ae197027729d0f85855a28d Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 30 Sep 2015 16:58:10 -0700 Subject: [PATCH 0905/1087] make sure to generate release from clean coh state on probe miss --- rocket/src/main/scala/nbdcache.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index bfbcdb11..b06a8b89 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -630,7 +630,9 @@ class ProbeUnit extends L1HellaCacheModule { req := io.req.bits } - val reply = old_coh.makeRelease(req) + val miss_coh = ClientMetadata.onReset + val reply_coh = Mux(tag_matches, old_coh, miss_coh) + val reply = reply_coh.makeRelease(req) io.req.ready := state === s_invalid io.rep.valid := state === s_release && !(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release From 69a4dd0a798b0d1086e0a5c91a2f09a582a71538 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 2 Oct 2015 14:20:47 -0700 Subject: [PATCH 0906/1087] refactor NASTI to not use param --- rocket/src/main/scala/tlb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 330c5989..478dd8f2 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -10,6 +10,7 @@ import scala.math._ case object NTLBEntries extends Field[Int] abstract trait TLBParameters extends CoreParameters { + val addrMap = new AddrHashMap(params(NastiAddrMap)) val entries = params(NTLBEntries) val camAddrBits = ceil(log(entries)/log(2)).toInt val camTagBits = asIdBits + vpnBits @@ -167,7 +168,6 @@ class TLB extends TLBModule { plru.access(OHToUInt(tag_cam.io.hits)) } - val addrMap = params(NASTIAddrHashMap) val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits)) val addr_ok = addrMap.isValid(paddr) val addr_prot = addrMap.getProt(paddr) From 84576650b57133b28bd4e2d3a57cb2d512dae5a5 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Mon, 5 Oct 2015 21:48:05 -0700 Subject: [PATCH 0907/1087] Removed all traces of params --- rocket/src/main/scala/arbiter.scala | 2 +- rocket/src/main/scala/btb.scala | 47 +++++---- rocket/src/main/scala/csr.scala | 98 +++++++++--------- rocket/src/main/scala/dpath_alu.scala | 5 +- rocket/src/main/scala/fpu.scala | 13 ++- rocket/src/main/scala/frontend.scala | 122 ++++++++++++++++++++++ rocket/src/main/scala/icache.scala | 135 ++----------------------- rocket/src/main/scala/multiplier.scala | 44 ++++---- rocket/src/main/scala/nbdcache.scala | 92 +++++++++-------- rocket/src/main/scala/ptw.scala | 13 ++- rocket/src/main/scala/rocc.scala | 18 ++-- rocket/src/main/scala/rocket.scala | 102 ++++++++++--------- rocket/src/main/scala/tile.scala | 31 +++--- rocket/src/main/scala/tlb.scala | 26 ++--- 14 files changed, 383 insertions(+), 365 deletions(-) create mode 100644 rocket/src/main/scala/frontend.scala diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index d995ff5f..14f20735 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -5,7 +5,7 @@ package rocket import Chisel._ import uncore._ -class HellaCacheArbiter(n: Int) extends Module +class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { val io = new Bundle { val requestor = Vec(new HellaCacheIO, n).flip diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index df6c433f..d0f51e6d 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -6,18 +6,23 @@ import Chisel._ import junctions._ import Util._ -case object NBTBEntries extends Field[Int] -case object NRAS extends Field[Int] +case object BtbKey extends Field[BtbParameters] +case class BtbParameters(nEntries: Int = 62, nRAS: Int = 2, updatesOutOfOrder: Boolean = false) -abstract trait BTBParameters extends CoreParameters { - val matchBits = params(PgIdxBits) - val entries = params(NBTBEntries) - val nRAS = params(NRAS) +abstract trait HasBtbParameters extends HasCoreParameters { + val matchBits = p(PgIdxBits) + val entries = p(BtbKey).nEntries + val nRAS = p(BtbKey).nRAS + val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages val opaqueBits = log2Up(entries) val nBHT = 1 << log2Up(entries*2) } +abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters +abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasBtbParameters + class RAS(nras: Int) { def push(addr: UInt): Unit = { when (count < nras) { count := count + 1 } @@ -38,7 +43,7 @@ class RAS(nras: Int) { private val stack = Reg(Vec(UInt(), nras)) } -class BHTResp extends Bundle with BTBParameters { +class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { val history = UInt(width = log2Up(nBHT).max(1)) val value = UInt(width = 2) } @@ -52,7 +57,7 @@ class BHTResp extends Bundle with BTBParameters { // - each counter corresponds with the address of the fetch packet ("fetch pc"). // - updated when a branch resolves (and BTB was a hit for that branch). // The updating branch must provide its "fetch pc". -class BHT(nbht: Int) { +class BHT(nbht: Int)(implicit p: Parameters) { val nbhtbits = log2Up(nbht) def get(addr: UInt, update: Bool): BHTResp = { val res = Wire(new BHTResp) @@ -76,7 +81,7 @@ class BHT(nbht: Int) { // BTB update occurs during branch resolution (and only on a mispredict). // - "pc" is what future fetch PCs will tag match against. // - "br_pc" is the PC of the branch instruction. -class BTBUpdate extends Bundle with BTBParameters { +class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) val target = UInt(width = vaddrBits) @@ -88,14 +93,14 @@ class BTBUpdate extends Bundle with BTBParameters { // BHT update occurs during branch resolution on all conditional branches. // - "pc" is what future fetch PCs will tag match against. -class BHTUpdate extends Bundle with BTBParameters { +class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) { val prediction = Valid(new BTBResp) val pc = UInt(width = vaddrBits) val taken = Bool() val mispredict = Bool() } -class RASUpdate extends Bundle with BTBParameters { +class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) { val isCall = Bool() val isReturn = Bool() val returnAddr = UInt(width = vaddrBits) @@ -106,16 +111,16 @@ class RASUpdate extends Bundle with BTBParameters { // shifting off the lowest log(inst_bytes) bits off). // - "resp.mask" provides a mask of valid instructions (instructions are // masked off by the predicted taken branch). -class BTBResp extends Bundle with BTBParameters { +class BTBResp(implicit p: Parameters) extends BtbBundle()(p) { val taken = Bool() - val mask = Bits(width = params(FetchWidth)) - val bridx = Bits(width = log2Up(params(FetchWidth))) + val mask = Bits(width = fetchWidth) + val bridx = Bits(width = log2Up(fetchWidth)) val target = UInt(width = vaddrBits) val entry = UInt(width = opaqueBits) val bht = new BHTResp } -class BTBReq extends Bundle with BTBParameters { +class BTBReq(implicit p: Parameters) extends BtbBundle()(p) { val addr = UInt(width = vaddrBits) } @@ -123,7 +128,7 @@ class BTBReq extends Bundle with BTBParameters { // Higher-performance processors may cause BTB updates to occur out-of-order, // which requires an extra CAM port for updates (to ensure no duplicates get // placed in BTB). -class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParameters { +class BTB(implicit p: Parameters) extends BtbModule { val io = new Bundle { val req = Valid(new BTBReq).flip val resp = Valid(new BTBResp) @@ -145,7 +150,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete val useRAS = Reg(Vec(entries, Bool())) val isJump = Reg(Vec(entries, Bool())) - val brIdx = Mem(entries, UInt(width=log2Up(params(FetchWidth)))) + val brIdx = Mem(entries, UInt(width=log2Up(fetchWidth))) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { @@ -198,7 +203,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") val waddr = - if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + if (updatesOutOfOrder) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) // invalidate entries if we stomp on pages they depend upon @@ -212,10 +217,10 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete tgtPages(waddr) := tgtPageUpdate useRAS(waddr) := r_btb_update.bits.isReturn isJump(waddr) := r_btb_update.bits.isJump - if (params(FetchWidth) == 1) { + if (fetchWidth == 1) { brIdx(waddr) := UInt(0) } else { - brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) + brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes) } require(nPages % 2 == 0) @@ -243,7 +248,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.entry := OHToUInt(hits) io.resp.bits.bridx := brIdx(io.resp.bits.entry) - if (params(FetchWidth) == 1) { + if (fetchWidth == 1) { io.resp.bits.mask := UInt(1) } else { // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 6b7f6b79..cb9bb685 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -64,12 +64,14 @@ object CSR val C = UInt(3,SZ) val I = UInt(4,SZ) val R = UInt(5,SZ) + + val ADDRSZ = 12 } -class CSRFileIO extends CoreBundle { - val host = new HTIFIO +class CSRFileIO(implicit p: Parameters) extends CoreBundle { + val host = new HtifIO val rw = new Bundle { - val addr = UInt(INPUT, 12) + val addr = UInt(INPUT, CSR.ADDRSZ) val cmd = Bits(INPUT, CSR.SZ) val rdata = Bits(OUTPUT, xLen) val wdata = Bits(INPUT, xLen) @@ -86,7 +88,7 @@ class CSRFileIO extends CoreBundle { val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) val uarch_counters = Vec(UInt(INPUT, log2Up(1+retireWidth)), 16) - val custom_mrw_csrs = Vec(UInt(INPUT, xLen), params(NCustomMRWCSRs)) + val custom_mrw_csrs = Vec(UInt(INPUT, xLen), nCustomMrwCsrs) val cause = UInt(INPUT, xLen) val pc = UInt(INPUT, vaddrBitsExtended) val fatc = Bool(OUTPUT) @@ -98,7 +100,7 @@ class CSRFileIO extends CoreBundle { val interrupt_cause = UInt(OUTPUT, xLen) } -class CSRFile extends CoreModule +class CSRFile(implicit p: Parameters) extends CoreModule()(p) { val io = new CSRFileIO @@ -124,12 +126,12 @@ class CSRFile extends CoreModule val reg_stats = Reg(init=Bool(false)) val reg_time = Reg(UInt(width = xLen)) val reg_instret = WideCounter(xLen, io.retire) - val reg_cycle = if (EnableCommitLog) { reg_instret } else { WideCounter(xLen) } + val reg_cycle = if (enableCommitLog) { reg_instret } else { WideCounter(xLen) } val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) - val irq_rocc = Bool(!params(BuildRoCC).isEmpty) && io.rocc.interrupt + val irq_rocc = Bool(!p(BuildRoCC).isEmpty) && io.rocc.interrupt io.interrupt_cause := 0 io.interrupt := io.interrupt_cause(xLen-1) @@ -153,40 +155,40 @@ class CSRFile extends CoreModule val system_insn = io.rw.cmd === CSR.I val cpu_ren = io.rw.cmd != CSR.N && !system_insn - val host_pcr_req_valid = Reg(Bool()) // don't reset - val host_pcr_req_fire = host_pcr_req_valid && !cpu_ren - val host_pcr_rep_valid = Reg(Bool()) // don't reset - val host_pcr_bits = Reg(io.host.pcr.req.bits) - io.host.pcr.req.ready := !host_pcr_req_valid && !host_pcr_rep_valid - io.host.pcr.resp.valid := host_pcr_rep_valid - io.host.pcr.resp.bits := host_pcr_bits.data - when (io.host.pcr.req.fire()) { - host_pcr_req_valid := true - host_pcr_bits := io.host.pcr.req.bits + val host_csr_req_valid = Reg(Bool()) // don't reset + val host_csr_req_fire = host_csr_req_valid && !cpu_ren + val host_csr_rep_valid = Reg(Bool()) // don't reset + val host_csr_bits = Reg(io.host.csr.req.bits) + io.host.csr.req.ready := !host_csr_req_valid && !host_csr_rep_valid + io.host.csr.resp.valid := host_csr_rep_valid + io.host.csr.resp.bits := host_csr_bits.data + when (io.host.csr.req.fire()) { + host_csr_req_valid := true + host_csr_bits := io.host.csr.req.bits } - when (host_pcr_req_fire) { - host_pcr_req_valid := false - host_pcr_rep_valid := true - host_pcr_bits.data := io.rw.rdata + when (host_csr_req_fire) { + host_csr_req_valid := false + host_csr_rep_valid := true + host_csr_bits.data := io.rw.rdata } - when (io.host.pcr.resp.fire()) { host_pcr_rep_valid := false } + when (io.host.csr.resp.fire()) { host_csr_rep_valid := false } - io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy + io.host.debug_stats_csr := reg_stats // direct export up the hierarchy - val read_time = if (params(UsePerfCounters)) reg_time else (reg_cycle: UInt) + val read_time = if (usingPerfCounters) reg_time else (reg_cycle: UInt) val read_mstatus = io.status.toBits val isa_string = "IMA" + - (if (params(UseVM)) "S" else "") + - (if (!params(BuildFPU).isEmpty) "FD" else "") + - (if (!params(BuildRoCC).isEmpty) "X" else "") + (if (usingVM) "S" else "") + + (if (usingFPU) "FD" else "") + + (if (usingRoCC) "X" else "") val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | isa_string.map(x => 1 << (x - 'A')).reduce(_|_) val impid = 1 val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( - CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), - CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), - CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), + CSRs.fflags -> (if (usingFPU) reg_fflags else UInt(0)), + CSRs.frm -> (if (usingFPU) reg_frm else UInt(0)), + CSRs.fcsr -> (if (usingFPU) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.cycle -> reg_cycle, CSRs.cyclew -> reg_cycle, CSRs.time -> read_time, @@ -213,7 +215,7 @@ class CSRFile extends CoreModule CSRs.mtohost -> reg_tohost, CSRs.mfromhost -> reg_fromhost) - if (params(UsePerfCounters)) { + if (usingPerfCounters) { read_mapping += CSRs.instret -> reg_instret read_mapping += CSRs.instretw -> reg_instret @@ -221,7 +223,7 @@ class CSRFile extends CoreModule read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) } - if (params(UseVM)) { + if (usingVM) { val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus)) read_sstatus.zero1 := 0 read_sstatus.zero2 := 0 @@ -248,14 +250,14 @@ class CSRFile extends CoreModule read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) } - for (i <- 0 until params(NCustomMRWCSRs)) { + for (i <- 0 until nCustomMrwCsrs) { val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") read_mapping += addr -> io.custom_mrw_csrs(i) } - val addr = Mux(cpu_ren, io.rw.addr, host_pcr_bits.addr) + val addr = Mux(cpu_ren, io.rw.addr, host_csr_bits.addr) val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } val addr_valid = decoded_addr.values.reduce(_||_) @@ -264,11 +266,11 @@ class CSRFile extends CoreModule val priv_sufficient = reg_mstatus.prv >= csr_addr_priv val read_only = io.rw.addr(11,10).andR val cpu_wen = cpu_ren && io.rw.cmd != CSR.R && priv_sufficient - val wen = cpu_wen && !read_only || host_pcr_req_fire && host_pcr_bits.rw + val wen = cpu_wen && !read_only || host_csr_req_fire && host_csr_bits.rw val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata, Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, - host_pcr_bits.data))) + host_csr_bits.data))) val opcode = io.rw.addr val insn_call = !opcode(8) && !opcode(0) && system_insn @@ -355,7 +357,7 @@ class CSRFile extends CoreModule io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready io.csr_stall := reg_wfi - when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } + when (host_csr_req_fire && !host_csr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) @@ -370,7 +372,7 @@ class CSRFile extends CoreModule reg_mstatus.ie := new_mstatus.ie reg_mstatus.ie1 := new_mstatus.ie1 - val supportedModes = Vec((PRV_M :: PRV_U :: (if (params(UseVM)) List(PRV_S) else Nil)).map(UInt(_))) + val supportedModes = Vec((PRV_M :: PRV_U :: (if (usingVM) List(PRV_S) else Nil)).map(UInt(_))) if (supportedModes.size > 1) { reg_mstatus.mprv := new_mstatus.mprv when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv } @@ -381,17 +383,17 @@ class CSRFile extends CoreModule } } - if (params(UseVM)) { + if (usingVM) { val vm_on = if (xLen == 32) 8 else 9 when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } } - if (params(UseVM) || !params(BuildFPU).isEmpty) reg_mstatus.fs := new_mstatus.fs - if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_mstatus.xs + if (usingVM || usingFPU) reg_mstatus.fs := new_mstatus.fs + if (usingRoCC) reg_mstatus.xs := new_mstatus.xs } when (decoded_addr(CSRs.mip)) { val new_mip = new MIP().fromBits(wdata) - if (params(UseVM)) { + if (usingVM) { reg_mip.ssip := new_mip.ssip reg_mip.stip := new_mip.stip } @@ -399,7 +401,7 @@ class CSRFile extends CoreModule } when (decoded_addr(CSRs.mie)) { val new_mie = new MIP().fromBits(wdata) - if (params(UseVM)) { + if (usingVM) { reg_mie.ssip := new_mie.ssip reg_mie.stip := new_mie.stip } @@ -413,14 +415,14 @@ class CSRFile extends CoreModule when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } - if (params(UsePerfCounters)) + if (usingPerfCounters) when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } when (decoded_addr(CSRs.mtime)) { reg_time := wdata } - when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } - when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } + when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } + when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } - if (params(UseVM)) { + if (usingVM) { when (decoded_addr(CSRs.sstatus)) { val new_sstatus = new SStatus().fromBits(wdata) reg_mstatus.ie := new_sstatus.ie @@ -428,7 +430,7 @@ class CSRFile extends CoreModule reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U) reg_mstatus.mprv := new_sstatus.mprv reg_mstatus.fs := new_sstatus.fs // even without an FPU - if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs + if (usingRoCC) reg_mstatus.xs := new_sstatus.xs } when (decoded_addr(CSRs.sip)) { val new_sip = new MIP().fromBits(wdata) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 354934ca..5142c611 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -42,7 +42,7 @@ object ALU } import ALU._ -class ALUIO extends CoreBundle { +class ALUIO(implicit p: Parameters) extends CoreBundle()(p) { val dw = Bits(INPUT, SZ_DW) val fn = Bits(INPUT, SZ_ALU_FN) val in2 = UInt(INPUT, xLen) @@ -51,8 +51,7 @@ class ALUIO extends CoreBundle { val adder_out = UInt(OUTPUT, xLen) } -class ALU extends Module -{ +class ALU(implicit p: Parameters) extends Module { val io = new ALUIO // ADD, SUB diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index daecb199..c456413c 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -353,8 +353,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module io.out := Pipe(valid, res, latency-1) } -class FPU extends CoreModule -{ +class FPU(implicit p: Parameters) extends CoreModule()(p) { val io = new FPUIO val ex_reg_valid = Reg(next=io.valid, init=Bool(false)) @@ -385,7 +384,7 @@ class FPU extends CoreModule val regfile = Mem(32, Bits(width = 65)) when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded - if (EnableCommitLog) { + if (enableCommitLog) { printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32), Mux(load_wb_single, load_wb_data(31,0), load_wb_data)) } @@ -415,11 +414,11 @@ class FPU extends CoreModule req.in3 := ex_rs3 req.typ := ex_reg_inst(21,20) - val sfma = Module(new FPUFMAPipe(params(SFMALatency), 23, 9)) + val sfma = Module(new FPUFMAPipe(p(SFMALatency), 23, 9)) sfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && ex_ctrl.single sfma.io.in.bits := req - val dfma = Module(new FPUFMAPipe(params(DFMALatency), 52, 12)) + val dfma = Module(new FPUFMAPipe(p(DFMALatency), 52, 12)) dfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && !ex_ctrl.single dfma.io.in.bits := req @@ -488,7 +487,7 @@ class FPU extends CoreModule val wexc = Vec(pipes.map(_.res.exc))(wsrc) when (wen(0) || divSqrt_wen) { regfile(waddr) := wdata - if (EnableCommitLog) { + if (enableCommitLog) { val wdata_unrec_s = hardfloat.recodedFloatNToFloatN(wdata(64,0), 23, 9) val wdata_unrec_d = hardfloat.recodedFloatNToFloatN(wdata(64,0), 52, 12) val wb_single = (winfo(0) >> 5)(0) @@ -518,7 +517,7 @@ class FPU extends CoreModule divSqrt_wdata := 0 divSqrt_flags := 0 - if (params(FDivSqrt)) { + if (p(FDivSqrt)) { val divSqrt_single = Reg(Bool()) val divSqrt_rm = Reg(Bits()) val divSqrt_flags_double = Reg(Bits()) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala new file mode 100644 index 00000000..15fb6266 --- /dev/null +++ b/rocket/src/main/scala/frontend.scala @@ -0,0 +1,122 @@ +package rocket + +import Chisel._ +import uncore._ +import Util._ + +class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { + val pc = UInt(width = vaddrBitsExtended) +} + +class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { + val pc = UInt(width = vaddrBitsExtended) // ID stage PC + val data = Vec(Bits(width = coreInstBits), fetchWidth) + val mask = Bits(width = fetchWidth) + val xcpt_if = Bool() +} + +class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { + val req = Valid(new FrontendReq) + val resp = Decoupled(new FrontendResp).flip + val btb_resp = Valid(new BTBResp).flip + val btb_update = Valid(new BTBUpdate) + val bht_update = Valid(new BHTUpdate) + val ras_update = Valid(new RASUpdate) + val invalidate = Bool(OUTPUT) + val npc = UInt(INPUT, width = vaddrBitsExtended) +} + +class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { + val io = new Bundle { + val cpu = new FrontendIO().flip + val ptw = new TLBPTWIO() + val mem = new ClientUncachedTileLinkIO + } + + val btb = Module(new BTB) + val icache = Module(new ICache) + val tlb = Module(new TLB) + + val s1_pc_ = Reg(UInt()) + val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) + val s1_same_block = Reg(Bool()) + val s2_valid = Reg(init=Bool(true)) + val s2_pc = Reg(init=UInt(START_ADDR)) + val s2_btb_resp_valid = Reg(init=Bool(false)) + val s2_btb_resp_bits = Reg(btb.io.resp.bits) + val s2_xcpt_if = Reg(init=Bool(false)) + + val msb = vaddrBits-1 + val lsb = log2Up(fetchWidth*coreInstBytes) + val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) + val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth) + val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure + val icmiss = s2_valid && !icache.io.resp.valid + val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) + val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt + val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) + + val stall = io.cpu.resp.valid && !io.cpu.resp.ready + when (!stall) { + s1_same_block := s0_same_block && !tlb.io.resp.miss + s1_pc_ := npc + s2_valid := !icmiss + when (!icmiss) { + s2_pc := s1_pc + s2_btb_resp_valid := btb.io.resp.valid + when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits } + s2_xcpt_if := tlb.io.resp.xcpt_if + } + } + when (io.cpu.req.valid) { + s1_same_block := Bool(false) + s1_pc_ := io.cpu.req.bits.pc + s2_valid := Bool(false) + } + + btb.io.req.valid := !stall && !icmiss + btb.io.req.bits.addr := s1_pc + btb.io.btb_update := io.cpu.btb_update + btb.io.bht_update := io.cpu.bht_update + btb.io.ras_update := io.cpu.ras_update + btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate + + io.ptw <> tlb.io.ptw + tlb.io.req.valid := !stall && !icmiss + tlb.io.req.bits.vpn := s1_pc >> pgIdxBits + tlb.io.req.bits.asid := UInt(0) + tlb.io.req.bits.passthrough := Bool(false) + tlb.io.req.bits.instruction := Bool(true) + tlb.io.req.bits.store := Bool(false) + + io.mem <> icache.io.mem + icache.io.req.valid := !stall && !s0_same_block + icache.io.req.bits.idx := io.cpu.npc + icache.io.invalidate := io.cpu.invalidate + icache.io.req.bits.ppn := tlb.io.resp.ppn + icache.io.req.bits.kill := io.cpu.req.valid || + tlb.io.resp.miss || tlb.io.resp.xcpt_if || + icmiss || io.ptw.invalidate + icache.io.resp.ready := !stall && !s1_same_block + + io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) + io.cpu.resp.bits.pc := s2_pc + io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) + + require(fetchWidth * coreInstBytes <= rowBytes) + val fetch_data = + if (fetchWidth * coreInstBytes == rowBytes) icache.io.resp.bits.datablock + else icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) + + for (i <- 0 until fetchWidth) { + io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) + } + + val all_ones = UInt((1 << (fetchWidth+1))-1) + val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2) + io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) + io.cpu.resp.bits.xcpt_if := s2_xcpt_if + + io.cpu.btb_resp.valid := s2_btb_resp_valid + io.cpu.btb_resp.bits := s2_btb_resp_bits +} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index f9650b95..b7f2c565 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -4,148 +4,25 @@ import Chisel._ import uncore._ import Util._ -abstract trait L1CacheParameters extends CacheParameters with CoreParameters { - val outerDataBeats = params(TLDataBeats) - val outerDataBits = params(TLDataBits) +trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { + val outerDataBeats = p(TLDataBeats) + val outerDataBits = p(TLDataBits) val refillCyclesPerBeat = outerDataBits/rowBits val refillCycles = refillCyclesPerBeat*outerDataBeats } -abstract trait FrontendParameters extends L1CacheParameters -abstract class FrontendBundle extends Bundle with FrontendParameters -abstract class FrontendModule extends Module with FrontendParameters - -class FrontendReq extends CoreBundle { - val pc = UInt(width = vaddrBitsExtended) -} - -class FrontendResp extends CoreBundle { - val pc = UInt(width = vaddrBitsExtended) // ID stage PC - val data = Vec(Bits(width = coreInstBits), coreFetchWidth) - val mask = Bits(width = coreFetchWidth) - val xcpt_if = Bool() -} - -class CPUFrontendIO extends CoreBundle { - val req = Valid(new FrontendReq) - val resp = Decoupled(new FrontendResp).flip - val btb_resp = Valid(new BTBResp).flip - val btb_update = Valid(new BTBUpdate) - val bht_update = Valid(new BHTUpdate) - val ras_update = Valid(new RASUpdate) - val invalidate = Bool(OUTPUT) - val npc = UInt(INPUT, width = vaddrBitsExtended) -} - -class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule -{ - val io = new Bundle { - val cpu = new CPUFrontendIO().flip - val ptw = new TLBPTWIO() - val mem = new ClientUncachedTileLinkIO - } - - val btb = Module(new BTB(btb_updates_out_of_order)) - val icache = Module(new ICache) - val tlb = Module(new TLB) - - val s1_pc_ = Reg(UInt()) - val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) - val s1_same_block = Reg(Bool()) - val s2_valid = Reg(init=Bool(true)) - val s2_pc = Reg(init=UInt(START_ADDR)) - val s2_btb_resp_valid = Reg(init=Bool(false)) - val s2_btb_resp_bits = Reg(btb.io.resp.bits) - val s2_xcpt_if = Reg(init=Bool(false)) - - val msb = vaddrBits-1 - val lsb = log2Up(coreFetchWidth*coreInstBytes) - val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val ntpc_0 = s1_pc + UInt(coreInstBytes*coreFetchWidth) - val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure - val icmiss = s2_valid && !icache.io.resp.valid - val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) - val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) - - val stall = io.cpu.resp.valid && !io.cpu.resp.ready - when (!stall) { - s1_same_block := s0_same_block && !tlb.io.resp.miss - s1_pc_ := npc - s2_valid := !icmiss - when (!icmiss) { - s2_pc := s1_pc - s2_btb_resp_valid := btb.io.resp.valid - when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits } - s2_xcpt_if := tlb.io.resp.xcpt_if - } - } - when (io.cpu.req.valid) { - s1_same_block := Bool(false) - s1_pc_ := io.cpu.req.bits.pc - s2_valid := Bool(false) - } - - btb.io.req.valid := !stall && !icmiss - btb.io.req.bits.addr := s1_pc - btb.io.btb_update := io.cpu.btb_update - btb.io.bht_update := io.cpu.bht_update - btb.io.ras_update := io.cpu.ras_update - btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate - - io.ptw <> tlb.io.ptw - tlb.io.req.valid := !stall && !icmiss - tlb.io.req.bits.vpn := s1_pc >> pgIdxBits - tlb.io.req.bits.asid := UInt(0) - tlb.io.req.bits.passthrough := Bool(false) - tlb.io.req.bits.instruction := Bool(true) - tlb.io.req.bits.store := Bool(false) - - io.mem <> icache.io.mem - icache.io.req.valid := !stall && !s0_same_block - icache.io.req.bits.idx := io.cpu.npc - icache.io.invalidate := io.cpu.invalidate - icache.io.req.bits.ppn := tlb.io.resp.ppn - icache.io.req.bits.kill := io.cpu.req.valid || - tlb.io.resp.miss || tlb.io.resp.xcpt_if || - icmiss || io.ptw.invalidate - icache.io.resp.ready := !stall && !s1_same_block - - io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid) - io.cpu.resp.bits.pc := s2_pc - io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) - - require(coreFetchWidth * coreInstBytes <= rowBytes) - val fetch_data = - if (coreFetchWidth * coreInstBytes == rowBytes) icache.io.resp.bits.datablock - else icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits)) - - for (i <- 0 until coreFetchWidth) { - io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) - } - - val all_ones = UInt((1 << (coreFetchWidth+1))-1) - val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2) - io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) - io.cpu.resp.bits.xcpt_if := s2_xcpt_if - - io.cpu.btb_resp.valid := s2_btb_resp_valid - io.cpu.btb_resp.bits := s2_btb_resp_bits -} - -class ICacheReq extends FrontendBundle { +class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) { val idx = UInt(width = pgIdxBits) val ppn = UInt(width = ppnBits) // delayed one cycle val kill = Bool() // delayed one cycle } -class ICacheResp extends FrontendBundle { +class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters { val data = Bits(width = coreInstBits) val datablock = Bits(width = rowBits) } -class ICache extends FrontendModule -{ +class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { val io = new Bundle { val req = Valid(new ICacheReq).flip val resp = Decoupled(new ICacheResp) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index 6498aa2d..f6f4c9be 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -6,29 +6,35 @@ import Chisel._ import ALU._ import Util._ -class MultiplierReq extends CoreBundle { +class MultiplierReq(dataBits: Int, tagBits: Int) extends Bundle { val fn = Bits(width = SZ_ALU_FN) val dw = Bits(width = SZ_DW) - val in1 = Bits(width = xLen) - val in2 = Bits(width = xLen) - val tag = UInt(width = log2Up(params(NMultXpr))) + val in1 = Bits(width = dataBits) + val in2 = Bits(width = dataBits) + val tag = UInt(width = tagBits) + override def cloneType = new MultiplierReq(dataBits, tagBits).asInstanceOf[this.type] } -class MultiplierResp extends CoreBundle { - val data = Bits(width = xLen) - val tag = UInt(width = log2Up(params(NMultXpr))) +class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle { + val data = Bits(width = dataBits) + val tag = UInt(width = tagBits) + override def cloneType = new MultiplierResp(dataBits, tagBits).asInstanceOf[this.type] } -class MultiplierIO extends Bundle { - val req = Decoupled(new MultiplierReq).flip +class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle { + val req = Decoupled(new MultiplierReq(dataBits, tagBits)).flip val kill = Bool(INPUT) - val resp = Decoupled(new MultiplierResp) + val resp = Decoupled(new MultiplierResp(dataBits, tagBits)) } -class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { - val io = new MultiplierIO +class MulDiv( + width: Int, + nXpr: Int = 32, + unroll: Int = 1, + earlyOut: Boolean = false) extends Module { + val io = new MultiplierIO(width, log2Up(nXpr)) val w = io.req.bits.in1.getWidth - val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll + val mulw = (w+unroll-1)/unroll*unroll val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6) val state = Reg(init=s_ready) @@ -87,18 +93,18 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { val mplier = mulReg(mulw-1,0) val accum = mulReg(2*mulw,mulw).toSInt val mpcand = divisor.toSInt - val prod = mplier(mulUnroll-1,0) * mpcand + accum - val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt + val prod = mplier(unroll-1,0) * mpcand + accum + val nextMulReg = Cat(prod, mplier(mulw-1,unroll)).toUInt - val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * mulUnroll)(log2Up(mulw)-1,0))(mulw-1,0) - val eOut = Bool(earlyOut) && count != mulw/mulUnroll-1 && count != 0 && + val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * unroll)(log2Up(mulw)-1,0))(mulw-1,0) + val eOut = Bool(earlyOut) && count != mulw/unroll-1 && count != 0 && !isHi && (mplier & ~eOutMask) === UInt(0) - val eOutRes = (mulReg >> (mulw - count * mulUnroll)(log2Up(mulw)-1,0)) + val eOutRes = (mulReg >> (mulw - count * unroll)(log2Up(mulw)-1,0)) val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0)) remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)) count := count + 1 - when (eOut || count === mulw/mulUnroll-1) { + when (eOut || count === mulw/unroll-1) { state := Mux(isHi, s_move_rem, s_done) } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b06a8b89..d71cbf8f 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -14,11 +14,11 @@ case object NMSHRs extends Field[Int] case object NIOMSHRs extends Field[Int] case object LRSCCycles extends Field[Int] -abstract trait L1HellaCacheParameters extends L1CacheParameters { - val wordBits = params(WordBits) +trait HasL1HellaCacheParameters extends HasL1CacheParameters { + val wordBits = p(WordBits) val wordBytes = wordBits/8 val wordOffBits = log2Up(wordBytes) - val beatBytes = params(CacheBlockBytes) / params(TLDataBeats) + val beatBytes = p(CacheBlockBytes) / p(TLDataBeats) val beatWords = beatBytes / wordBytes val beatOffBits = log2Up(beatBytes) val idxMSB = untagBits-1 @@ -29,43 +29,48 @@ abstract trait L1HellaCacheParameters extends L1CacheParameters { val doNarrowRead = coreDataBits * nWays % rowBits == 0 val encDataBits = code.width(coreDataBits) val encRowBits = encDataBits*rowWords - val sdqDepth = params(StoreDataQueueDepth) - val nMSHRs = params(NMSHRs) - val nIOMSHRs = params(NIOMSHRs) + val sdqDepth = p(StoreDataQueueDepth) + val nMSHRs = p(NMSHRs) + val nIOMSHRs = p(NIOMSHRs) } -abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters -abstract class L1HellaCacheModule extends Module with L1HellaCacheParameters +abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module + with HasL1HellaCacheParameters +abstract class L1HellaCacheBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p) + with HasL1HellaCacheParameters -trait HasCoreMemOp extends CoreBundle { +trait HasCoreMemOp extends HasCoreParameters { val addr = UInt(width = coreMaxAddrBits) val tag = Bits(width = coreDCacheReqTagBits) val cmd = Bits(width = M_SZ) val typ = Bits(width = MT_SZ) } -trait HasCoreData extends CoreBundle { +trait HasCoreData extends HasCoreParameters { val data = Bits(width = coreDataBits) } -trait HasSDQId extends CoreBundle with L1HellaCacheParameters { +trait HasSDQId extends HasL1HellaCacheParameters { val sdq_id = UInt(width = log2Up(sdqDepth)) } -trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { +trait HasMissInfo extends HasL1HellaCacheParameters { val tag_match = Bool() val old_meta = new L1Metadata val way_en = Bits(width = nWays) } -class HellaCacheReqInternal extends HasCoreMemOp { +class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p) + with HasCoreMemOp { val kill = Bool() val phys = Bool() } -class HellaCacheReq extends HellaCacheReqInternal with HasCoreData +class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData -class HellaCacheResp extends HasCoreMemOp with HasCoreData { +class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p) + with HasCoreMemOp + with HasCoreData { val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val has_data = Bool() @@ -84,7 +89,7 @@ class HellaCacheExceptions extends Bundle { } // interface between D$ and processor/DTLB -class HellaCacheIO extends CoreBundle { +class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new HellaCacheReq) val resp = Valid(new HellaCacheResp).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip @@ -93,50 +98,51 @@ class HellaCacheIO extends CoreBundle { val ordered = Bool(INPUT) } -class L1DataReadReq extends L1HellaCacheBundle { +class L1DataReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { val way_en = Bits(width = nWays) val addr = Bits(width = untagBits) } -class L1DataWriteReq extends L1DataReadReq { +class L1DataWriteReq(implicit p: Parameters) extends L1DataReadReq()(p) { val wmask = Bits(width = rowWords) val data = Bits(width = encRowBits) } -class L1RefillReq extends L1DataReadReq +class L1RefillReq(implicit p: Parameters) extends L1DataReadReq()(p) -class L1MetaReadReq extends MetaReadReq { +class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq { val tag = Bits(width = tagBits) + override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove } -class L1MetaWriteReq extends +class L1MetaWriteReq(implicit p: Parameters) extends MetaWriteReq[L1Metadata](new L1Metadata) object L1Metadata { - def apply(tag: Bits, coh: ClientMetadata) = { + def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { val meta = Wire(new L1Metadata) meta.tag := tag meta.coh := coh meta } } -class L1Metadata extends Metadata with L1HellaCacheParameters { +class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters { val coh = new ClientMetadata } -class Replay extends HellaCacheReqInternal with HasCoreData -class ReplayInternal extends HellaCacheReqInternal with HasSDQId +class Replay(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData +class ReplayInternal(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasSDQId -class MSHRReq extends Replay with HasMissInfo -class MSHRReqInternal extends ReplayInternal with HasMissInfo +class MSHRReq(implicit p: Parameters) extends Replay()(p) with HasMissInfo +class MSHRReqInternal(implicit p: Parameters) extends ReplayInternal()(p) with HasMissInfo -class ProbeInternal extends Probe with HasClientTransactionId +class ProbeInternal(implicit p: Parameters) extends Probe()(p) with HasClientTransactionId -class WritebackReq extends Release with CacheParameters { +class WritebackReq(implicit p: Parameters) extends Release()(p) with HasCacheParameters { val way_en = Bits(width = nWays) } -class IOMSHR(id: Int) extends L1HellaCacheModule { +class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new HellaCacheReq).flip val acquire = Decoupled(new Acquire) @@ -213,7 +219,7 @@ class IOMSHR(id: Int) extends L1HellaCacheModule { } } -class MSHR(id: Int) extends L1HellaCacheModule { +class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req_pri_val = Bool(INPUT) val req_pri_rdy = Bool(OUTPUT) @@ -256,7 +262,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) // TODO: Zero width? val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) - val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth))) + val rpq = Module(new Queue(new ReplayInternal, p(ReplayQueueDepth))) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) rpq.io.enq.bits := io.req_bits rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid @@ -362,7 +368,7 @@ class MSHR(id: Int) extends L1HellaCacheModule { } } -class MSHRFile extends L1HellaCacheModule { +class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new MSHRReq).flip val resp = Decoupled(new HellaCacheResp) @@ -498,7 +504,7 @@ class MSHRFile extends L1HellaCacheModule { } } -class WritebackUnit extends L1HellaCacheModule { +class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new WritebackReq).flip val meta_read = Decoupled(new L1MetaReadReq) @@ -578,7 +584,7 @@ class WritebackUnit extends L1HellaCacheModule { } else { io.data_resp }) } -class ProbeUnit extends L1HellaCacheModule { +class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new ProbeInternal).flip val rep = Decoupled(new Release) @@ -653,7 +659,7 @@ class ProbeUnit extends L1HellaCacheModule { io.wb_req.bits.way_en := way_en } -class DataArray extends L1HellaCacheModule { +class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val read = Decoupled(new L1DataReadReq).flip val write = Decoupled(new L1DataWriteReq).flip @@ -700,18 +706,18 @@ class DataArray extends L1HellaCacheModule { io.write.ready := Bool(true) } -class HellaCache extends L1HellaCacheModule { +class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val cpu = (new HellaCacheIO).flip val ptw = new TLBPTWIO() val mem = new ClientTileLinkIO } - require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed + require(p(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed require(isPow2(nSets)) require(isPow2(nWays)) // TODO: relax this - require(params(RowBits) <= params(TLDataBits)) - require(paddrBits-blockOffBits == params(TLBlockAddrBits) ) + require(p(RowBits) <= p(TLDataBits)) + require(paddrBits-blockOffBits == p(TLBlockAddrBits) ) require(untagBits <= pgIdxBits) val wb = Module(new WritebackUnit) @@ -855,7 +861,7 @@ class HellaCache extends L1HellaCacheModule { when (lrsc_valid) { lrsc_count := lrsc_count - 1 } when (s2_valid_masked && s2_hit || s2_replay) { when (s2_lr) { - when (!lrsc_valid) { lrsc_count := params(LRSCCycles)-1 } + when (!lrsc_valid) { lrsc_count := p(LRSCCycles)-1 } lrsc_addr := s2_req.addr >> blockOffBits } when (s2_sc) { @@ -899,7 +905,7 @@ class HellaCache extends L1HellaCacheModule { writeArb.io.in(0).bits.way_en := s3_way // replacement policy - val replacer = params(Replacer)() + val replacer = p(Replacer)() val s1_replaced_way_en = UIntToOH(replacer.way) val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) @@ -1039,7 +1045,7 @@ class HellaCache extends L1HellaCacheModule { } // exposes a sane decoupled request interface -class SimpleHellaCacheIF extends Module +class SimpleHellaCacheIF(implicit p: Parameters) extends Module { val io = new Bundle { val requestor = new HellaCacheIO().flip diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 084eee2f..6f1d8534 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -6,32 +6,32 @@ import Chisel._ import uncore._ import Util._ -class PTWReq extends CoreBundle { +class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { val addr = UInt(width = vpnBits) val prv = Bits(width = 2) val store = Bool() val fetch = Bool() } -class PTWResp extends CoreBundle { +class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { val error = Bool() val pte = new PTE } -class TLBPTWIO extends CoreBundle { +class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new PTWReq) val resp = Valid(new PTWResp).flip val status = new MStatus().asInput val invalidate = Bool(INPUT) } -class DatapathPTWIO extends CoreBundle { +class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) { val ptbr = UInt(INPUT, paddrBits) val invalidate = Bool(INPUT) val status = new MStatus().asInput } -class PTE extends CoreBundle { +class PTE(implicit p: Parameters) extends CoreBundle()(p) { val ppn = Bits(width = ppnBits) val reserved_for_software = Bits(width = 3) val d = Bool() @@ -51,8 +51,7 @@ class PTE extends CoreBundle { Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur()))) } -class PTW(n: Int) extends CoreModule -{ +class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { val requestor = Vec(new TLBPTWIO, n).flip val mem = new HellaCacheIO diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index ed962e26..0cfa5b53 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -20,24 +20,21 @@ class RoCCInstruction extends Bundle val opcode = Bits(width = 7) } -class RoCCCommand extends CoreBundle -{ +class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) { val inst = new RoCCInstruction val rs1 = Bits(width = xLen) val rs2 = Bits(width = xLen) } -class RoCCResponse extends CoreBundle -{ +class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) { val rd = Bits(width = 5) val data = Bits(width = xLen) } -class RoCCInterface extends Bundle -{ +class RoCCInterface(implicit p: Parameters) extends Bundle { val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) - val mem = new HellaCacheIO + val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) val busy = Bool(OUTPUT) val s = Bool(INPUT) val interrupt = Bool(OUTPUT) @@ -51,15 +48,12 @@ class RoCCInterface extends Bundle val exception = Bool(INPUT) } -abstract class RoCC extends CoreModule -{ +abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) { val io = new RoCCInterface io.mem.req.bits.phys := Bool(true) // don't perform address translation } -class AccumulatorExample extends RoCC -{ - val n = 4 +class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { val regfile = Mem(UInt(width = xLen), n) val busy = Reg(init=Vec(Bool(false), n)) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 30e33d0e..6ff5a414 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -7,10 +7,9 @@ import junctions._ import uncore._ import Util._ -case object BuildFPU extends Field[Option[() => FPU]] +case object BuildFPU extends Field[Option[Parameters => FPU]] case object FDivSqrt extends Field[Boolean] case object XLen extends Field[Int] -case object NMultXpr extends Field[Int] case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] @@ -23,59 +22,65 @@ case object CoreDataBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int] case object NCustomMRWCSRs extends Field[Int] -abstract trait CoreParameters extends UsesParameters { - val xLen = params(XLen) - val paddrBits = params(PAddrBits) - val vaddrBits = params(VAddrBits) - val pgIdxBits = params(PgIdxBits) - val ppnBits = params(PPNBits) - val vpnBits = params(VPNBits) - val pgLevels = params(PgLevels) - val pgLevelBits = params(PgLevelBits) - val asIdBits = params(ASIdBits) +trait HasCoreParameters { + implicit val p: Parameters + val xLen = p(XLen) + val paddrBits = p(PAddrBits) + val vaddrBits = p(VAddrBits) + val pgIdxBits = p(PgIdxBits) + val ppnBits = p(PPNBits) + val vpnBits = p(VPNBits) + val pgLevels = p(PgLevels) + val pgLevelBits = p(PgLevelBits) + val asIdBits = p(ASIdBits) - val retireWidth = params(RetireWidth) - val coreFetchWidth = params(FetchWidth) - val coreInstBits = params(CoreInstBits) + val retireWidth = p(RetireWidth) + val fetchWidth = p(FetchWidth) + val coreInstBits = p(CoreInstBits) val coreInstBytes = coreInstBits/8 val coreDataBits = xLen val coreDataBytes = coreDataBits/8 - val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) + val coreDCacheReqTagBits = p(CoreDCacheReqTagBits) val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt - val mmioBase = params(MMIOBase) + val mmioBase = p(MMIOBase) + val nCustomMrwCsrs = p(NCustomMRWCSRs) + + val usingVM = p(UseVM) + val usingFPU = !p(BuildFPU).isEmpty + val usingFDivSqrt = p(FDivSqrt) + val usingRoCC = !p(BuildRoCC).isEmpty + val usingFastMulDiv = p(FastMulDiv) + val fastLoadWord = p(FastLoadWord) + val fastLoadByte = p(FastLoadByte) // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. - val EnableCommitLog = false + val enableCommitLog = false + val usingPerfCounters = p(UsePerfCounters) - if(params(FastLoadByte)) require(params(FastLoadWord)) + if (fastLoadByte) require(fastLoadWord) } -abstract trait RocketCoreParameters extends CoreParameters -{ - require(params(FetchWidth) == 1) // for now... - require(params(RetireWidth) == 1) // for now... -} +abstract class CoreModule(implicit val p: Parameters) extends Module + with HasCoreParameters +abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasCoreParameters -abstract class CoreBundle extends Bundle with CoreParameters -abstract class CoreModule extends Module with CoreParameters - -class Rocket extends CoreModule -{ +class Rocket(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { - val host = new HTIFIO - val imem = new CPUFrontendIO - val dmem = new HellaCacheIO + val host = new HtifIO + val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" })) + val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) val ptw = new DatapathPTWIO().flip val fpu = new FPUIO().flip val rocc = new RoCCInterface().flip } var decode_table = XDecode.table - if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table - if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table - if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table + if (usingFPU) decode_table ++= FDecode.table + if (usingFPU && usingFDivSqrt) decode_table ++= FDivSqrtDecode.table + if (usingRoCC) decode_table ++= RoCCDecode.table val ex_ctrl = Reg(new IntCtrlSigs) val mem_ctrl = Reg(new IntCtrlSigs) @@ -123,7 +128,7 @@ class Rocket extends CoreModule // decode stage val id_pc = io.imem.resp.bits.pc - val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) + val id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1) val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table) val id_raddr3 = id_inst(31,27) val id_raddr2 = id_inst(24,20) @@ -156,7 +161,7 @@ class Rocket extends CoreModule val id_amo_rl = id_inst(25) val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid - val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && + val id_rocc_busy = Bool(usingRoCC) && (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc || mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy @@ -169,8 +174,8 @@ class Rocket extends CoreModule (id_illegal_insn, UInt(Causes.illegal_instruction)))) val dcache_bypass_data = - if(params(FastLoadByte)) io.dmem.resp.bits.data - else if(params(FastLoadWord)) io.dmem.resp.bits.data_word_bypass + if (fastLoadByte) io.dmem.resp.bits.data + else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass else wb_reg_wdata // detect bypass opportunities @@ -207,8 +212,9 @@ class Rocket extends CoreModule alu.io.in1 := ex_op1.toUInt // multiplier and divider - val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, - earlyOut = params(FastMulDiv))) + val div = Module(new MulDiv(width = xLen, + unroll = if(usingFastMulDiv) 8 else 1, + earlyOut = usingFastMulDiv)) div.io.req.valid := ex_reg_valid && ex_ctrl.div div.io.req.bits.dw := ex_ctrl.alu_dw div.io.req.bits.fn := ex_ctrl.alu_fn @@ -345,7 +351,7 @@ class Rocket extends CoreModule val ll_wdata = Wire(init = div.io.resp.bits.data) val ll_waddr = Wire(init = div.io.resp.bits.tag) val ll_wen = Wire(init = div.io.resp.fire()) - if (!params(BuildRoCC).isEmpty) { + if (usingRoCC) { io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) when (io.rocc.resp.fire()) { div.io.resp.ready := Bool(false) @@ -356,7 +362,7 @@ class Rocket extends CoreModule } when (dmem_resp_replay && dmem_resp_xpu) { div.io.resp.ready := Bool(false) - if (!params(BuildRoCC).isEmpty) + if (usingRoCC) io.rocc.resp.ready := Bool(false) ll_waddr := dmem_resp_waddr ll_wen := Bool(true) @@ -410,7 +416,7 @@ class Rocket extends CoreModule // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. val mem_mem_cmd_bh = - if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass + if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass else Bool(true) val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr) @@ -423,7 +429,7 @@ class Rocket extends CoreModule val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr) val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) - val id_stall_fpu = if (!params(BuildFPU).isEmpty) { + val id_stall_fpu = if (usingFPU) { val fp_sboard = new Scoreboard(32) fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr) fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr) @@ -436,7 +442,7 @@ class Rocket extends CoreModule id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_ctrl.fp && id_stall_fpu || id_ctrl.mem && !io.dmem.req.ready || - Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || + Bool(usingRoCC) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || id_do_fence || csr.io.csr_stall ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt @@ -488,7 +494,7 @@ class Rocket extends CoreModule io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) - require(params(CoreDCacheReqTagBits) >= 6) + require(p(CoreDCacheReqTagBits) >= 6) io.dmem.invalidate_lr := wb_xcpt io.rocc.cmd.valid := wb_rocc_val @@ -498,7 +504,7 @@ class Rocket extends CoreModule io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 - if (EnableCommitLog) { + if (enableCommitLog) { val pc = Wire(SInt(width=64)) pc := wb_reg_pc val inst = wb_reg_inst diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index bfd8ebd8..623c64fc 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -11,22 +11,27 @@ case object NDCachePorts extends Field[Int] case object NPTWPorts extends Field[Int] case object BuildRoCC extends Field[Option[() => RoCC]] -abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { +abstract class Tile(resetSignal: Bool = null) + (implicit p: Parameters) extends Module(_reset = resetSignal) { val io = new Bundle { val cached = new ClientTileLinkIO val uncached = new ClientUncachedTileLinkIO - val host = new HTIFIO + val host = new HtifIO } } -class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { - val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) - val dcache = Module(new HellaCache, { case CacheName => "L1D" }) - val ptw = Module(new PTW(params(NPTWPorts))) - val core = Module(new Rocket, { case CoreName => "Rocket" }) +class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(resetSignal)(p) { + //TODO + val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) + val icache = Module(new Frontend()(p.alterPartial({ + case CacheName => "L1I" + case CoreName => "Rocket" }))) + val dcache = Module(new HellaCache()(dcacheParams)) + val ptw = Module(new PTW(p(NPTWPorts))(dcacheParams)) + val core = Module(new Rocket()(p.alterPartial({ case CoreName => "Rocket" }))) dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache - val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) + val dcArb = Module(new HellaCacheArbiter(p(NDCachePorts))(dcacheParams)) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem dcache.io.cpu <> dcArb.io.mem @@ -39,20 +44,16 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { core.io.ptw <> ptw.io.dpath //If so specified, build an FPU module and wire it in - params(BuildFPU) - .map { bf => bf() } - .foreach { fpu => - core.io.fpu <> fpu.io - } + p(BuildFPU) foreach { fpu => core.io.fpu <> fpu(p).io } // Connect the caches and ROCC to the outer memory system io.cached <> dcache.io.mem // If so specified, build an RoCC module and wire it in // otherwise, just hookup the icache - io.uncached <> params(BuildRoCC).map { buildItHere => + io.uncached <> p(BuildRoCC).map { buildItHere => val rocc = buildItHere() val memArb = Module(new ClientTileLinkIOArbiter(3)) - val dcIF = Module(new SimpleHellaCacheIF) + val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) core.io.rocc <> rocc.io dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2) <> dcIF.io.cache diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 478dd8f2..799c869b 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -9,17 +9,19 @@ import scala.math._ case object NTLBEntries extends Field[Int] -abstract trait TLBParameters extends CoreParameters { - val addrMap = new AddrHashMap(params(NastiAddrMap)) - val entries = params(NTLBEntries) +trait HasTLBParameters extends HasCoreParameters { + val addrMap = new AddrHashMap(p(NastiAddrMap)) + val entries = p(NTLBEntries) val camAddrBits = ceil(log(entries)/log(2)).toInt val camTagBits = asIdBits + vpnBits } -abstract class TLBBundle extends Bundle with TLBParameters -abstract class TLBModule extends Module with TLBParameters +abstract class TLBModule(implicit val p: Parameters) extends Module + with HasTLBParameters +abstract class TLBBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) + with HasTLBParameters -class CAMIO extends TLBBundle { +class CAMIO(implicit p: Parameters) extends TLBBundle()(p) { val clear = Bool(INPUT) val clear_mask = Bits(INPUT, entries) val tag = Bits(INPUT, camTagBits) @@ -32,7 +34,7 @@ class CAMIO extends TLBBundle { val write_addr = UInt(INPUT, camAddrBits) } -class RocketCAM extends TLBModule { +class RocketCAM(implicit p: Parameters) extends TLBModule()(p) { val io = new CAMIO val cam_tags = Mem(entries, Bits(width = camTagBits)) @@ -75,7 +77,7 @@ class PseudoLRU(n: Int) } } -class TLBReq extends CoreBundle { +class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { val asid = UInt(width = asIdBits) val vpn = UInt(width = vpnBits+1) val passthrough = Bool() @@ -83,7 +85,7 @@ class TLBReq extends CoreBundle { val store = Bool() } -class TLBRespNoHitIndex extends CoreBundle { +class TLBRespNoHitIndex(implicit p: Parameters) extends CoreBundle()(p) { // lookup responses val miss = Bool(OUTPUT) val ppn = UInt(OUTPUT, ppnBits) @@ -92,11 +94,11 @@ class TLBRespNoHitIndex extends CoreBundle { val xcpt_if = Bool(OUTPUT) } -class TLBResp extends TLBRespNoHitIndex with TLBParameters { +class TLBResp(implicit p: Parameters) extends TLBRespNoHitIndex()(p) with HasTLBParameters { val hit_idx = UInt(OUTPUT, entries) } -class TLB extends TLBModule { +class TLB(implicit p: Parameters) extends TLBModule()(p) { val io = new Bundle { val req = Decoupled(new TLBReq).flip val resp = new TLBResp @@ -177,7 +179,7 @@ class TLB extends TLBModule { io.resp.xcpt_st := !addr_ok || !addr_prot.w || bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR io.resp.xcpt_if := !addr_ok || !addr_prot.x || bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR io.resp.miss := tlb_miss - io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) + io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(ppnBits-1,0)) io.resp.hit_idx := tag_cam.io.hits // clear invalid entries on access, or all entries on a TLB flush From 8173695800f9ae682189a5b14621d1dc0f68ac18 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 Oct 2015 18:20:19 -0700 Subject: [PATCH 0908/1087] added HasAddrMapParameters --- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/rocket.scala | 10 +--------- rocket/src/main/scala/tlb.scala | 3 +-- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index d0f51e6d..8aa64112 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -10,7 +10,7 @@ case object BtbKey extends Field[BtbParameters] case class BtbParameters(nEntries: Int = 62, nRAS: Int = 2, updatesOutOfOrder: Boolean = false) abstract trait HasBtbParameters extends HasCoreParameters { - val matchBits = p(PgIdxBits) + val matchBits = pgIdxBits val entries = p(BtbKey).nEntries val nRAS = p(BtbKey).nRAS val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 6ff5a414..6f084aac 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -22,17 +22,9 @@ case object CoreDataBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int] case object NCustomMRWCSRs extends Field[Int] -trait HasCoreParameters { +trait HasCoreParameters extends HasAddrMapParameters { implicit val p: Parameters val xLen = p(XLen) - val paddrBits = p(PAddrBits) - val vaddrBits = p(VAddrBits) - val pgIdxBits = p(PgIdxBits) - val ppnBits = p(PPNBits) - val vpnBits = p(VPNBits) - val pgLevels = p(PgLevels) - val pgLevelBits = p(PgLevelBits) - val asIdBits = p(ASIdBits) val retireWidth = p(RetireWidth) val fetchWidth = p(FetchWidth) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 799c869b..eb6b72ce 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -9,8 +9,7 @@ import scala.math._ case object NTLBEntries extends Field[Int] -trait HasTLBParameters extends HasCoreParameters { - val addrMap = new AddrHashMap(p(NastiAddrMap)) +trait HasTLBParameters extends HasAddrMapParameters { val entries = p(NTLBEntries) val camAddrBits = ceil(log(entries)/log(2)).toInt val camTagBits = asIdBits + vpnBits From 4508666d964d8948af01bfeb3000d0827ef4760f Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 6 Oct 2015 18:22:23 -0700 Subject: [PATCH 0909/1087] log2ceil --- rocket/src/main/scala/tlb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index eb6b72ce..ee624896 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -11,7 +11,7 @@ case object NTLBEntries extends Field[Int] trait HasTLBParameters extends HasAddrMapParameters { val entries = p(NTLBEntries) - val camAddrBits = ceil(log(entries)/log(2)).toInt + val camAddrBits = log2Ceil(entries) val camTagBits = asIdBits + vpnBits } From 68cb54bc6818482cb4982f0e0426accac31da1f3 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 13 Oct 2015 23:42:53 -0700 Subject: [PATCH 0910/1087] refactor tilelink params --- rocket/src/main/scala/icache.scala | 5 +++-- rocket/src/main/scala/nbdcache.scala | 11 ++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index b7f2c565..de3c1fd4 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,8 +5,9 @@ import uncore._ import Util._ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { - val outerDataBeats = p(TLDataBeats) - val outerDataBits = p(TLDataBits) + val outerDataBeats = p(TLKey(p(TLId))).dataBeats + val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat + val outerAddrBits = p(TLKey(p(TLId))).addrBits val refillCyclesPerBeat = outerDataBits/rowBits val refillCycles = refillCyclesPerBeat*outerDataBeats } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index d71cbf8f..111accef 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -18,7 +18,7 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters { val wordBits = p(WordBits) val wordBytes = wordBits/8 val wordOffBits = log2Up(wordBytes) - val beatBytes = p(CacheBlockBytes) / p(TLDataBeats) + val beatBytes = p(CacheBlockBytes) / outerDataBeats val beatWords = beatBytes / wordBytes val beatOffBits = log2Up(beatBytes) val idxMSB = untagBits-1 @@ -32,6 +32,7 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters { val sdqDepth = p(StoreDataQueueDepth) val nMSHRs = p(NMSHRs) val nIOMSHRs = p(NIOMSHRs) + val lrscCycles = p(LRSCCycles) } abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module @@ -713,11 +714,11 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val mem = new ClientTileLinkIO } - require(p(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed + require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed require(isPow2(nSets)) require(isPow2(nWays)) // TODO: relax this - require(p(RowBits) <= p(TLDataBits)) - require(paddrBits-blockOffBits == p(TLBlockAddrBits) ) + require(rowBits <= outerDataBits) + require(paddrBits-blockOffBits == outerAddrBits) require(untagBits <= pgIdxBits) val wb = Module(new WritebackUnit) @@ -861,7 +862,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (lrsc_valid) { lrsc_count := lrsc_count - 1 } when (s2_valid_masked && s2_hit || s2_replay) { when (s2_lr) { - when (!lrsc_valid) { lrsc_count := p(LRSCCycles)-1 } + when (!lrsc_valid) { lrsc_count := lrscCycles-1 } lrsc_addr := s2_req.addr >> blockOffBits } when (s2_sc) { From 969ecaecf8222748845d68e060857635060a15ca Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 14 Oct 2015 14:16:47 -0700 Subject: [PATCH 0911/1087] pass parameters to BuildRoCC --- rocket/src/main/scala/tile.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 623c64fc..b3419866 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -9,7 +9,7 @@ import Util._ case object CoreName extends Field[String] case object NDCachePorts extends Field[Int] case object NPTWPorts extends Field[Int] -case object BuildRoCC extends Field[Option[() => RoCC]] +case object BuildRoCC extends Field[Option[Parameters => RoCC]] abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { @@ -51,7 +51,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( // If so specified, build an RoCC module and wire it in // otherwise, just hookup the icache io.uncached <> p(BuildRoCC).map { buildItHere => - val rocc = buildItHere() + val rocc = buildItHere(p) val memArb = Module(new ClientTileLinkIOArbiter(3)) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) core.io.rocc <> rocc.io From 1441590c3bb91ffca5ccee2218fa7c69a543c790 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 16 Oct 2015 19:11:57 -0700 Subject: [PATCH 0912/1087] add enabled field to BTBParameters --- rocket/src/main/scala/btb.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 8aa64112..84295f27 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -7,7 +7,12 @@ import junctions._ import Util._ case object BtbKey extends Field[BtbParameters] -case class BtbParameters(nEntries: Int = 62, nRAS: Int = 2, updatesOutOfOrder: Boolean = false) + +case class BtbParameters( + enabled: Boolean = true, + nEntries: Int = 62, + nRAS: Int = 2, + updatesOutOfOrder: Boolean = false) abstract trait HasBtbParameters extends HasCoreParameters { val matchBits = pgIdxBits From 6f8997bee9b7e975416e06bb76272a7f8cfd690b Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 16 Oct 2015 19:12:21 -0700 Subject: [PATCH 0913/1087] Minor refactor of StoreGen/AMOALU. --- rocket/src/main/scala/nbdcache.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 111accef..19e898ec 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -161,8 +161,8 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) - val storegen = new StoreGen(req.typ, req.addr, req.data) - val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc) + val storegen = new StoreGen64(req.typ, req.addr, req.data) + val loadgen = new LoadGen64(req.typ, req.addr, grant_word, req_cmd_sc) val beat_offset = req.addr(beatOffBits - 1, wordOffBits) val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) @@ -992,7 +992,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // load data subword mux/sign extension val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) - val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) + val loadgen = new LoadGen64(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) amoalu.io.addr := s2_req.addr amoalu.io.cmd := s2_req.cmd From 1a1185be3f57726a5207e01b86c1c7a261c6605e Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Tue, 20 Oct 2015 15:02:24 -0700 Subject: [PATCH 0914/1087] Vectorize ROCC and Tile memory interfaces --- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/icache.scala | 1 - rocket/src/main/scala/nbdcache.scala | 1 - rocket/src/main/scala/rocc.scala | 9 +++--- rocket/src/main/scala/rocket.scala | 22 ++++++------- rocket/src/main/scala/tile.scala | 47 ++++++++++++++-------------- 6 files changed, 41 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index cb9bb685..a6862b45 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -131,7 +131,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) - val irq_rocc = Bool(!p(BuildRoCC).isEmpty) && io.rocc.interrupt + val irq_rocc = Bool(usingRoCC) && io.rocc.interrupt io.interrupt_cause := 0 io.interrupt := io.interrupt_cause(xLen-1) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index de3c1fd4..cea6b122 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -7,7 +7,6 @@ import Util._ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { val outerDataBeats = p(TLKey(p(TLId))).dataBeats val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat - val outerAddrBits = p(TLKey(p(TLId))).addrBits val refillCyclesPerBeat = outerDataBits/rowBits val refillCycles = refillCyclesPerBeat*outerDataBeats } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 19e898ec..f99f4979 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -718,7 +718,6 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { require(isPow2(nSets)) require(isPow2(nWays)) // TODO: relax this require(rowBits <= outerDataBits) - require(paddrBits-blockOffBits == outerAddrBits) require(untagBits <= pgIdxBits) val wb = Module(new WritebackUnit) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 0cfa5b53..ebec55be 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -6,7 +6,8 @@ import Chisel._ import uncore._ import Util._ -case object RoCCMaxTaggedMemXacts extends Field[Int] +case object RoccMaxTaggedMemXacts extends Field[Int] +case object RoccNMemChannels extends Field[Int] class RoCCInstruction extends Bundle { @@ -41,7 +42,7 @@ class RoCCInterface(implicit p: Parameters) extends Bundle { // These should be handled differently, eventually val imem = new ClientUncachedTileLinkIO - val dmem = new ClientUncachedTileLinkIO + val dmem = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO) val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO @@ -120,8 +121,8 @@ class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { io.imem.acquire.valid := false io.imem.grant.ready := false - io.dmem.acquire.valid := false - io.dmem.grant.ready := false + io.dmem.head.acquire.valid := false + io.dmem.head.grant.ready := false io.iptw.req.valid := false io.dptw.req.valid := false io.pptw.req.valid := false diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 6f084aac..041e4de4 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -7,7 +7,7 @@ import junctions._ import uncore._ import Util._ -case object BuildFPU extends Field[Option[Parameters => FPU]] +case object UseFPU extends Field[Boolean] case object FDivSqrt extends Field[Boolean] case object XLen extends Field[Int] case object FetchWidth extends Field[Int] @@ -26,26 +26,26 @@ trait HasCoreParameters extends HasAddrMapParameters { implicit val p: Parameters val xLen = p(XLen) + val usingVM = p(UseVM) + val usingFPU = p(UseFPU) + val usingFDivSqrt = p(FDivSqrt) + val usingRoCC = !p(BuildRoCC).isEmpty + val usingFastMulDiv = p(FastMulDiv) + val fastLoadWord = p(FastLoadWord) + val fastLoadByte = p(FastLoadByte) + val retireWidth = p(RetireWidth) val fetchWidth = p(FetchWidth) val coreInstBits = p(CoreInstBits) val coreInstBytes = coreInstBits/8 val coreDataBits = xLen val coreDataBytes = coreDataBits/8 - val coreDCacheReqTagBits = p(CoreDCacheReqTagBits) + val coreDCacheReqTagBits = 7 + (2 + (if(!usingRoCC) 0 else 1)) val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt val mmioBase = p(MMIOBase) val nCustomMrwCsrs = p(NCustomMRWCSRs) - val usingVM = p(UseVM) - val usingFPU = !p(BuildFPU).isEmpty - val usingFDivSqrt = p(FDivSqrt) - val usingRoCC = !p(BuildRoCC).isEmpty - val usingFastMulDiv = p(FastMulDiv) - val fastLoadWord = p(FastLoadWord) - val fastLoadByte = p(FastLoadByte) - // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. val enableCommitLog = false @@ -486,7 +486,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) - require(p(CoreDCacheReqTagBits) >= 6) + require(coreDCacheReqTagBits >= 6) io.dmem.invalidate_lr := wb_xcpt io.rocc.cmd.valid := wb_rocc_val diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index b3419866..ce29b18e 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -7,31 +7,33 @@ import uncore._ import Util._ case object CoreName extends Field[String] -case object NDCachePorts extends Field[Int] -case object NPTWPorts extends Field[Int] case object BuildRoCC extends Field[Option[Parameters => RoCC]] abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { + val usingRocc = !p(BuildRoCC).isEmpty + val nDCachePorts = 2 + (if(!usingRocc) 0 else 1) + val nPTWPorts = 2 + (if(!usingRocc) 0 else 3) + val nCachedTileLinkPorts = 1 + val nUncachedTileLinkPorts = 1 + (if(!usingRocc) 0 else p(RoccNMemChannels)) + val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) val io = new Bundle { - val cached = new ClientTileLinkIO - val uncached = new ClientUncachedTileLinkIO + val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) + val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) val host = new HtifIO } } class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(resetSignal)(p) { - //TODO - val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) - val icache = Module(new Frontend()(p.alterPartial({ - case CacheName => "L1I" - case CoreName => "Rocket" }))) - val dcache = Module(new HellaCache()(dcacheParams)) - val ptw = Module(new PTW(p(NPTWPorts))(dcacheParams)) val core = Module(new Rocket()(p.alterPartial({ case CoreName => "Rocket" }))) + val icache = Module(new Frontend()(p.alterPartial({ + case CacheName => "L1I" + case CoreName => "Rocket" }))) + val dcache = Module(new HellaCache()(dcacheParams)) + val ptw = Module(new PTW(nPTWPorts)(dcacheParams)) dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache - val dcArb = Module(new HellaCacheArbiter(p(NDCachePorts))(dcacheParams)) + val dcArb = Module(new HellaCacheArbiter(nDCachePorts)(dcacheParams)) dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(1) <> core.io.dmem dcache.io.cpu <> dcArb.io.mem @@ -44,25 +46,24 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( core.io.ptw <> ptw.io.dpath //If so specified, build an FPU module and wire it in - p(BuildFPU) foreach { fpu => core.io.fpu <> fpu(p).io } + if (p(UseFPU)) core.io.fpu <> Module(new FPU()(p)).io - // Connect the caches and ROCC to the outer memory system - io.cached <> dcache.io.mem - // If so specified, build an RoCC module and wire it in - // otherwise, just hookup the icache + // Connect the caches and ROCC to the outer memory system + io.cached.head <> dcache.io.mem + // If so specified, build an RoCC module and wire it to core + TileLink ports, + // otherwise just hookup the icache io.uncached <> p(BuildRoCC).map { buildItHere => val rocc = buildItHere(p) - val memArb = Module(new ClientTileLinkIOArbiter(3)) + val iMemArb = Module(new ClientTileLinkIOArbiter(2)) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) core.io.rocc <> rocc.io dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2) <> dcIF.io.cache - memArb.io.in(0) <> icache.io.mem - memArb.io.in(1) <> rocc.io.imem - memArb.io.in(2) <> rocc.io.dmem + iMemArb.io.in(0) <> icache.io.mem + iMemArb.io.in(1) <> rocc.io.imem ptw.io.requestor(2) <> rocc.io.iptw ptw.io.requestor(3) <> rocc.io.dptw ptw.io.requestor(4) <> rocc.io.pptw - memArb.io.out - }.getOrElse(icache.io.mem) + rocc.io.dmem :+ iMemArb.io.out + }.getOrElse(List(icache.io.mem)) } From c68d9f8137f7ccb4249d3e7ea46434dddb1cc0b2 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 20 Oct 2015 23:25:23 -0700 Subject: [PATCH 0915/1087] make ProbeUnit state machine easier to understand --- rocket/src/main/scala/nbdcache.scala | 96 +++++++++++++++++----------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index f99f4979..70f64167 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -4,7 +4,7 @@ package rocket import Chisel._ import uncore._ -import junctions.MMIOBase +import junctions._ import Util._ case object WordBits extends Field[Int] @@ -405,7 +405,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val refillMux = Wire(Vec(new L1RefillReq, nMSHRs)) val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) - val mem_req_arb = Module(new LockingArbiter( + val mem_req_arb = Module(new JunctionsCountingArbiter( new Acquire, nMSHRs + nIOMSHRs, outerDataBeats, @@ -597,54 +597,25 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { val block_state = new ClientMetadata().asInput } - val s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 8) + val (s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: + s_mshr_resp :: s_release :: s_writeback_req :: s_writeback_resp :: + s_meta_write :: Nil) = Enum(UInt(), 9) val state = Reg(init=s_invalid) val old_coh = Reg(new ClientMetadata) val way_en = Reg(Bits()) val req = Reg(new ProbeInternal) val tag_matches = way_en.orR - when (state === s_meta_write && io.meta_write.ready) { - state := s_invalid - } - when (state === s_writeback_resp && io.wb_req.ready) { - state := s_meta_write - } - when (state === s_writeback_req && io.wb_req.ready) { - state := s_writeback_resp - } - when (state === s_release && io.rep.ready) { - state := s_invalid - when (tag_matches) { - state := Mux(old_coh.requiresVoluntaryWriteback(), - s_writeback_req, s_meta_write) - } - } - when (state === s_mshr_req) { - state := s_release - old_coh := io.block_state - way_en := io.way_en - when (!io.mshr_rdy) { state := s_meta_read } - } - when (state === s_meta_resp) { - state := s_mshr_req - } - when (state === s_meta_read && io.meta_read.ready) { - state := s_meta_resp - } - when (state === s_invalid && io.req.valid) { - state := s_meta_read - req := io.req.bits - } - val miss_coh = ClientMetadata.onReset val reply_coh = Mux(tag_matches, old_coh, miss_coh) val reply = reply_coh.makeRelease(req) io.req.ready := state === s_invalid - io.rep.valid := state === s_release && - !(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release + io.rep.valid := state === s_release io.rep.bits := reply + assert(!io.rep.valid || !io.rep.bits.hasData(), + "ProbeUnit should not send releases with data") + io.meta_read.valid := state === s_meta_read io.meta_read.bits.idx := req.addr_block io.meta_read.bits.tag := req.addr_block >> idxBits @@ -658,6 +629,53 @@ class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.wb_req.valid := state === s_writeback_req io.wb_req.bits := reply io.wb_req.bits.way_en := way_en + + // state === s_invalid + when (io.req.fire()) { + state := s_meta_read + req := io.req.bits + } + + // state === s_meta_read + when (io.meta_read.fire()) { + state := s_meta_resp + } + + // we need to wait one cycle for the metadata to be read from the array + when (state === s_meta_resp) { + state := s_mshr_req + } + + when (state === s_mshr_req) { + state := s_mshr_resp + old_coh := io.block_state + way_en := io.way_en + // if the read didn't go through, we need to retry + when (!io.mshr_rdy) { state := s_meta_read } + } + + when (state === s_mshr_resp) { + val needs_writeback = tag_matches && old_coh.requiresVoluntaryWriteback() + state := Mux(needs_writeback, s_writeback_req, s_release) + } + + when (state === s_release && io.rep.ready) { + state := Mux(tag_matches, s_meta_write, s_invalid) + } + + // state === s_writeback_req + when (io.wb_req.fire()) { + state := s_writeback_resp + } + + // wait for the writeback request to finish before updating the metadata + when (state === s_writeback_resp && io.wb_req.ready) { + state := s_meta_write + } + + when (io.meta_write.fire()) { + state := s_invalid + } } class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { @@ -930,7 +948,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes and releases - val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) + val releaseArb = Module(new JunctionsCountingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) io.mem.release <> releaseArb.io.out prober.io.req.valid := io.mem.probe.valid && !lrsc_valid From 0b7c828b5d5ec5c8395ae7e81836cb194e72ed9b Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 21 Oct 2015 09:15:51 -0700 Subject: [PATCH 0916/1087] go back to using standard LockingArbiter --- rocket/src/main/scala/nbdcache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 70f64167..7b6d4a11 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -405,7 +405,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val refillMux = Wire(Vec(new L1RefillReq, nMSHRs)) val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) - val mem_req_arb = Module(new JunctionsCountingArbiter( + val mem_req_arb = Module(new LockingArbiter( new Acquire, nMSHRs + nIOMSHRs, outerDataBeats, @@ -948,7 +948,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes and releases - val releaseArb = Module(new JunctionsCountingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) + val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) io.mem.release <> releaseArb.io.out prober.io.req.valid := io.mem.probe.valid && !lrsc_valid From 4f8468b60f69a83cdc99330fab21e93507cdb307 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Wed, 21 Oct 2015 18:18:32 -0700 Subject: [PATCH 0917/1087] depend on external cde library --- rocket/src/main/scala/arbiter.scala | 1 + rocket/src/main/scala/btb.scala | 1 + rocket/src/main/scala/csr.scala | 1 + rocket/src/main/scala/dpath_alu.scala | 1 + rocket/src/main/scala/fpu.scala | 1 + rocket/src/main/scala/frontend.scala | 1 + rocket/src/main/scala/icache.scala | 1 + rocket/src/main/scala/nbdcache.scala | 1 + rocket/src/main/scala/ptw.scala | 1 + rocket/src/main/scala/rocc.scala | 1 + rocket/src/main/scala/rocket.scala | 1 + rocket/src/main/scala/tile.scala | 1 + rocket/src/main/scala/tlb.scala | 1 + rocket/src/main/scala/util.scala | 1 + 14 files changed, 14 insertions(+) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 14f20735..27bfcc86 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -4,6 +4,7 @@ package rocket import Chisel._ import uncore._ +import cde.{Parameters, Field} class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 84295f27..3aa1b6a6 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -4,6 +4,7 @@ package rocket import Chisel._ import junctions._ +import cde.{Parameters, Field} import Util._ case object BtbKey extends Field[BtbParameters] diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index a6862b45..5898941c 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import Util._ import Instructions._ +import cde.{Parameters, Field} import uncore._ import scala.math._ diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 5142c611..58942266 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ +import cde.{Parameters, Field} import Instructions._ object ALU diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index c456413c..e5582ebc 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -7,6 +7,7 @@ import Instructions._ import Util._ import FPConstants._ import uncore.constants.MemoryOpConstants._ +import cde.{Parameters, Field} case object SFMALatency case object DFMALatency diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 15fb6266..0886625e 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import uncore._ import Util._ +import cde.{Parameters, Field} class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { val pc = UInt(width = vaddrBitsExtended) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index cea6b122..a56354ae 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -3,6 +3,7 @@ package rocket import Chisel._ import uncore._ import Util._ +import cde.{Parameters, Field} trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { val outerDataBeats = p(TLKey(p(TLId))).dataBeats diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7b6d4a11..2223ade0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore._ import junctions._ +import cde.{Parameters, Field} import Util._ case object WordBits extends Field[Int] diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 6f1d8534..1a551bda 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore._ import Util._ +import cde.{Parameters, Field} class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { val addr = UInt(width = vpnBits) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index ebec55be..ff059811 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore._ import Util._ +import cde.{Parameters, Field} case object RoccMaxTaggedMemXacts extends Field[Int] case object RoccNMemChannels extends Field[Int] diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 041e4de4..6c245cf5 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -6,6 +6,7 @@ import Chisel._ import junctions._ import uncore._ import Util._ +import cde.{Parameters, Field} case object UseFPU extends Field[Boolean] case object FDivSqrt extends Field[Boolean] diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index ce29b18e..c5e3e851 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore._ import Util._ +import cde.{Parameters, Field} case object CoreName extends Field[String] case object BuildRoCC extends Field[Option[Parameters => RoCC]] diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index ee624896..8c6f9528 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -6,6 +6,7 @@ import Chisel._ import Util._ import junctions._ import scala.math._ +import cde.{Parameters, Field} case object NTLBEntries extends Field[Int] diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 2ac3a0b8..4050be5b 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore._ import scala.math._ +import cde.{Parameters, Field} object Util { implicit def intToUInt(x: Int): UInt = UInt(x) From 0c587704a7fe7c97a6705c7dd78b69b07ad6c145 Mon Sep 17 00:00:00 2001 From: Jim Lawson Date: Thu, 22 Oct 2015 11:37:20 -0700 Subject: [PATCH 0918/1087] Add ability to generate libraryDependency on cde. --- rocket/build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/build.sbt b/rocket/build.sbt index bfc36cc5..97c51700 100644 --- a/rocket/build.sbt +++ b/rocket/build.sbt @@ -6,5 +6,5 @@ name := "rocket" scalaVersion := "2.11.6" -libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions").map { +libraryDependencies ++= (Seq("chisel", "hardfloat", "uncore", "junctions", "cde").map { dep: String => sys.props.get(dep + "Version") map { "edu.berkeley.cs" %% dep % _ }}).flatten From c7235fecb59d34a0c500a218cb2adbbc36a41ac0 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sun, 25 Oct 2015 10:23:46 -0700 Subject: [PATCH 0919/1087] further state optimization in CSRfile when not UseVM --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 5898941c..627831a3 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -293,7 +293,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.fatc := insn_sfence_vm io.evec := Mux(io.exception || csr_xcpt, (reg_mstatus.prv << 6) + MTVEC, Mux(maybe_insn_redirect_trap, reg_stvec.sextTo(vaddrBitsExtended), - Mux(reg_mstatus.prv(1), reg_mepc, reg_sepc))) + Mux(reg_mstatus.prv(1) || Bool(!p(UseVM)), reg_mepc, reg_sepc))) io.ptbr := reg_sptbr io.csr_xcpt := csr_xcpt io.eret := insn_ret || insn_redirect_trap From 2f515b2af652f1ed8a6cc99474d1c342a6c61716 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 6 Nov 2015 23:25:33 -0800 Subject: [PATCH 0920/1087] Reduce critical path for fdiv valid signal --- rocket/src/main/scala/fpu.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index e5582ebc..721c9898 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -445,6 +445,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val divSqrt_wdata = Wire(Bits()) val divSqrt_flags = Wire(Bits()) val divSqrt_in_flight = Reg(init=Bool(false)) + val divSqrt_killed = Reg(Bool()) // writeback arbitration case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult) @@ -527,8 +528,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64) divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div) val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt - val divSqrt_wb_hazard = wen.orR - divSqrt.io.inValid := mem_reg_valid && !divSqrt_wb_hazard && !divSqrt_in_flight && !io.killm && (mem_ctrl.div || mem_ctrl.sqrt) + divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) divSqrt.io.sqrtOp := mem_ctrl.sqrt divSqrt.io.a := fpiu.io.as_double.in1 divSqrt.io.b := fpiu.io.as_double.in2 @@ -536,13 +536,14 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { when (divSqrt.io.inValid && divSqrt_inReady) { divSqrt_in_flight := true + divSqrt_killed := killm divSqrt_single := mem_ctrl.single divSqrt_waddr := mem_reg_inst(11,7) divSqrt_rm := divSqrt.io.roundingMode } when (divSqrt_outValid) { - divSqrt_wen := true + divSqrt_wen := !divSqrt_killed divSqrt_wdata_double := divSqrt.io.out divSqrt_in_flight := false divSqrt_flags_double := divSqrt.io.exceptionFlags From 1e259a55daf59c661b4c7551dfd6ce7d32c2acbe Mon Sep 17 00:00:00 2001 From: jackkoenig Date: Sun, 8 Nov 2015 21:16:31 -0800 Subject: [PATCH 0921/1087] Fix SimpleHellaCacheIF assumption about receiving rejected request back 2 cycles later --- rocket/src/main/scala/nbdcache.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2223ade0..2a4645c2 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -751,6 +751,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_clk_en = Reg(Bool()) val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) + val s2_killed = Reg(next=s1_valid && io.cpu.req.bits.kill) val s2_req = Reg(io.cpu.req.bits) val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd != M_NOP val s2_recycle = Wire(Bool()) @@ -1053,7 +1054,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { uncache_resp.bits := mshrs.io.resp.bits uncache_resp.valid := mshrs.io.resp.valid - val cache_pass = s2_valid || s2_replay + val cache_pass = s2_valid || s2_killed || s2_replay mshrs.io.resp.ready := !cache_pass io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) From 19daee10f0c49a60d9cf373538fce69b9d5b74a3 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 12 Nov 2015 11:40:40 -0800 Subject: [PATCH 0922/1087] use default constructors for IOMSHR acquire construction --- rocket/src/main/scala/nbdcache.scala | 33 +++++++++++++++++----------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2a4645c2..52808e6e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -169,26 +169,33 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) val beat_data = Fill(beatWords, storegen.data) - val addr_byte = req.addr(beatOffBits - 1, 0) - val a_type = Mux(isRead(req.cmd), Acquire.getType, Acquire.putType) - val union = Mux(isRead(req.cmd), - Cat(addr_byte, req.typ, M_XRD), beat_mask) - val s_idle :: s_acquire :: s_grant :: s_resp :: Nil = Enum(Bits(), 4) val state = Reg(init = s_idle) io.req.ready := (state === s_idle) - io.acquire.valid := (state === s_acquire) - io.acquire.bits := Acquire( - is_builtin_type = Bool(true), - a_type = a_type, + val addr_block = req.addr(paddrBits - 1, blockOffBits) + val addr_beat = req.addr(blockOffBits - 1, beatOffBits) + val addr_byte = req.addr(beatOffBits - 1, 0) + + val get_acquire = Get( client_xact_id = UInt(id), - addr_block = req.addr(paddrBits - 1, blockOffBits), - addr_beat = req.addr(blockOffBits - 1, beatOffBits), + addr_block = addr_block, + addr_beat = addr_beat, + addr_byte = addr_byte, + operand_size = req.typ, + alloc = Bool(false)) + + val put_acquire = Put( + client_xact_id = UInt(id), + addr_block = addr_block, + addr_beat = addr_beat, data = beat_data, - // alloc bit should always be false - union = Cat(union, Bool(false))) + wmask = beat_mask, + alloc = Bool(false)) + + io.acquire.valid := (state === s_acquire) + io.acquire.bits := Mux(isRead(req.cmd), get_acquire, put_acquire) io.resp.valid := (state === s_resp) io.resp.bits := req From 3c3c946755adf5ab2cf54253c259b32359c4c9ed Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 14 Nov 2015 14:49:17 -0800 Subject: [PATCH 0923/1087] move to new version of hardfloat --- rocket/src/main/scala/fpu.scala | 165 ++++++++++++++++++++++++-------- 1 file changed, 124 insertions(+), 41 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 721c9898..2bf12d96 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -182,6 +182,31 @@ class FPInput extends FPUCtrlSigs { val in3 = Bits(width = 65) } +object ClassifyRecFN { + def apply(expWidth: Int, sigWidth: Int, in: UInt) = { + val sign = in(sigWidth + expWidth) + val exp = in(sigWidth + expWidth - 1, sigWidth - 1) + val sig = in(sigWidth - 2, 0) + + val code = exp(expWidth,expWidth-2) + val codeHi = code(2, 1) + val isSpecial = codeHi === UInt(3) + + val isHighSubnormalIn = exp(expWidth-2, 0) < UInt(2) + val isSubnormal = code === UInt(1) || codeHi === UInt(1) && isHighSubnormalIn + val isNormal = codeHi === UInt(1) && !isHighSubnormalIn || codeHi === UInt(2) + val isZero = code === UInt(0) + val isInf = isSpecial && !exp(expWidth-2) + val isNaN = code.andR + val isSNaN = isNaN && !sig(sigWidth-2) + val isQNaN = isNaN && sig(sigWidth-2) + + Cat(isQNaN, isSNaN, isInf && !sign, isNormal && !sign, + isSubnormal && !sign, isZero && !sign, isZero && sign, + isSubnormal && sign, isNormal && sign, isInf && sign) + } +} + class FPToInt extends Module { val io = new Bundle { @@ -197,30 +222,59 @@ class FPToInt extends Module val in = Reg(new FPInput) val valid = Reg(next=io.in.valid) + + def upconvert(x: UInt) = { + val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53)) + s2d.io.in := x + s2d.io.roundingMode := UInt(0) + s2d.io.out + } + + val in1_upconvert = upconvert(io.in.bits.in1) + val in2_upconvert = upconvert(io.in.bits.in2) + when (io.in.valid) { - def upconvert(x: UInt) = hardfloat.recodedFloatNToRecodedFloatM(x, Bits(0), 23, 9, 52, 12)._1 in := io.in.bits - when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd != FCMD_MV_XF) { - in.in1 := upconvert(io.in.bits.in1) - in.in2 := upconvert(io.in.bits.in2) + when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd != FCMD_MV_XF && + // need to also check toint because CVT_IF and SQRT overlap + !(io.in.bits.cmd === FCMD_CVT_IF && io.in.bits.toint)) { + in.in1 := in1_upconvert + in.in2 := in2_upconvert } } - val unrec_s = hardfloat.recodedFloatNToFloatN(in.in1, 23, 9) - val unrec_d = hardfloat.recodedFloatNToFloatN(in.in1, 52, 12) + val unrec_s = hardfloat.fNFromRecFN(8, 24, in.in1) + val unrec_d = hardfloat.fNFromRecFN(11, 53, in.in1) val unrec_out = Mux(in.single, Cat(Fill(32, unrec_s(31)), unrec_s), unrec_d) - val classify_s = hardfloat.recodedFloatNClassify(in.in1, 23, 9) - val classify_d = hardfloat.recodedFloatNClassify(in.in1, 52, 12) + val classify_s = ClassifyRecFN(8, 24, in.in1) + val classify_d = ClassifyRecFN(11, 53, in.in1) val classify_out = Mux(in.single, classify_s, classify_d) - val dcmp = Module(new hardfloat.recodedFloatNCompare(52, 12)) + val dcmp = Module(new hardfloat.CompareRecFN(11, 53)) dcmp.io.a := in.in1 dcmp.io.b := in.in2 - val dcmp_out = (~in.rm & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR - val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << 4 + dcmp.io.signaling := Bool(true) + val dcmp_out = (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR + val dcmp_exc = dcmp.io.exceptionFlags - val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, in.typ ^ 1, 52, 12, 64) + val s2l = Module(new hardfloat.RecFNToIN(8, 24, 64)) + val s2w = Module(new hardfloat.RecFNToIN(8, 24, 32)) + s2l.io.in := in.in1 + s2l.io.roundingMode := in.rm + s2l.io.signedOut := in.typ(0) ^ 1 + s2w.io.in := in.in1 + s2w.io.roundingMode := in.rm + s2w.io.signedOut := in.typ(0) ^ 1 + + val d2l = Module(new hardfloat.RecFNToIN(11, 53, 64)) + val d2w = Module(new hardfloat.RecFNToIN(11, 53, 32)) + d2l.io.in := in.in1 + d2l.io.roundingMode := in.rm + d2l.io.signedOut := in.typ(0) ^ 1 + d2w.io.in := in.in1 + d2w.io.roundingMode := in.rm + d2w.io.signedOut := in.typ(0) ^ 1 io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_out) io.out.bits.store := unrec_out @@ -231,12 +285,19 @@ class FPToInt extends Module io.out.bits.exc := dcmp_exc } when (in.cmd === FCMD_CVT_IF) { - io.out.bits.toint := Mux(in.typ(1), d2i._1, d2i._1(31,0).toSInt).toUInt - io.out.bits.exc := d2i._2 + when (in.single) { + io.out.bits.toint := Mux(in.typ(1), s2l.io.out, s2w.io.out.toSInt).toUInt + val sflags = Mux(in.typ(1), s2l.io.intExceptionFlags, s2w.io.intExceptionFlags) + io.out.bits.exc := Cat(sflags(2, 1).orR, UInt(0, 3), sflags(0)) + } .otherwise { + io.out.bits.toint := Mux(in.typ(1), d2l.io.out, d2w.io.out.toSInt).toUInt + val dflags = Mux(in.typ(1), d2l.io.intExceptionFlags, d2w.io.intExceptionFlags) + io.out.bits.exc := Cat(dflags(2, 1).orR, UInt(0, 3), dflags(0)) + } } io.out.valid := valid - io.out.bits.lt := dcmp.io.a_lt_b + io.out.bits.lt := dcmp.io.lt io.as_double := in } @@ -251,20 +312,36 @@ class IntToFP(val latency: Int) extends Module val mux = Wire(new FPResult) mux.exc := Bits(0) - mux.data := hardfloat.floatNToRecodedFloatN(in.bits.in1, 52, 12) + mux.data := hardfloat.recFNFromFN(11, 53, in.bits.in1) when (in.bits.single) { - mux.data := Cat(SInt(-1, 32), hardfloat.floatNToRecodedFloatN(in.bits.in1, 23, 9)) + mux.data := Cat(SInt(-1, 32), hardfloat.recFNFromFN(8, 24, in.bits.in1)) } + val l2s = Module(new hardfloat.INToRecFN(64, 8, 24)) + val w2s = Module(new hardfloat.INToRecFN(32, 8, 24)) + l2s.io.signedIn := in.bits.typ(0) ^ 1 + l2s.io.in := in.bits.in1 + l2s.io.roundingMode := in.bits.rm + w2s.io.signedIn := in.bits.typ(0) ^ 1 + w2s.io.in := in.bits.in1 + w2s.io.roundingMode := in.bits.rm + + val l2d = Module(new hardfloat.INToRecFN(64, 11, 53)) + val w2d = Module(new hardfloat.INToRecFN(32, 11, 53)) + l2d.io.signedIn := in.bits.typ(0) ^ 1 + l2d.io.in := in.bits.in1 + l2d.io.roundingMode := in.bits.rm + w2d.io.signedIn := in.bits.typ(0) ^ 1 + w2d.io.in := in.bits.in1 + w2d.io.roundingMode := in.bits.rm + when (in.bits.cmd === FCMD_CVT_FI) { when (in.bits.single) { - val u = hardfloat.anyToRecodedFloatN(in.bits.in1(63,0), in.bits.rm, in.bits.typ ^ 1, 23, 9, 64) - mux.data := Cat(SInt(-1, 32), u._1) - mux.exc := u._2 + mux.data := Cat(SInt(-1, 32), Mux(in.bits.typ(1), l2s.io.out, w2s.io.out)) + mux.exc := Mux(in.bits.typ(1), l2s.io.exceptionFlags, w2s.io.exceptionFlags) }.otherwise { - val u = hardfloat.anyToRecodedFloatN(in.bits.in1(63,0), in.bits.rm, in.bits.typ ^ 1, 52, 12, 64) - mux.data := u._1 - mux.exc := u._2 + mux.data := Mux(in.bits.typ(1), l2d.io.out, w2d.io.out) + mux.exc := Mux(in.bits.typ(1), l2d.io.exceptionFlags, w2d.io.exceptionFlags) } } @@ -289,8 +366,12 @@ class FPToFP(val latency: Int) extends Module val sign_d = fsgnjSign(in.bits.in1, in.bits.in2, 64, !in.bits.single && isSgnj, in.bits.rm) val fsgnj = Cat(sign_d, in.bits.in1(63,33), sign_s, in.bits.in1(31,0)) - val s2d = hardfloat.recodedFloatNToRecodedFloatM(in.bits.in1, in.bits.rm, 23, 9, 52, 12) - val d2s = hardfloat.recodedFloatNToRecodedFloatM(in.bits.in1, in.bits.rm, 52, 12, 23, 9) + val s2d = Module(new hardfloat.RecFNToRecFN(8, 24, 11, 53)) + val d2s = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24)) + s2d.io.in := in.bits.in1 + s2d.io.roundingMode := in.bits.rm + d2s.io.in := in.bits.in1 + d2s.io.roundingMode := in.bits.rm val isnan1 = Mux(in.bits.single, in.bits.in1(31,29).andR, in.bits.in1(63,61).andR) val isnan2 = Mux(in.bits.single, in.bits.in2(31,29).andR, in.bits.in2(63,61).andR) @@ -308,18 +389,18 @@ class FPToFP(val latency: Int) extends Module when (isSgnj || isLHS) { mux.data := fsgnj } when (in.bits.cmd === FCMD_CVT_FF) { when (in.bits.single) { - mux.data := Cat(SInt(-1, 32), d2s._1) - mux.exc := d2s._2 + mux.data := Cat(SInt(-1, 32), d2s.io.out) + mux.exc := d2s.io.exceptionFlags }.otherwise { - mux.data := s2d._1 - mux.exc := s2d._2 + mux.data := s2d.io.out + mux.exc := s2d.io.exceptionFlags } } io.out <> Pipe(in.valid, mux, latency-1) } -class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module +class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int) extends Module { val io = new Bundle { val in = Valid(new FPInput).flip @@ -341,7 +422,7 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module unless (cmd_fma || cmd_addsub) { in.in3 := zero } } - val fma = Module(new hardfloat.mulAddSubRecodedFloatN(sigWidth, expWidth)) + val fma = Module(new hardfloat.MulAddRecFN(expWidth, sigWidth)) fma.io.op := in.cmd fma.io.roundingMode := in.rm fma.io.a := in.in1 @@ -377,8 +458,8 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val load_wb_single = RegEnable(io.dmem_resp_type === MT_W || io.dmem_resp_type === MT_WU, io.dmem_resp_val) val load_wb_data = RegEnable(io.dmem_resp_data, io.dmem_resp_val) val load_wb_tag = RegEnable(io.dmem_resp_tag, io.dmem_resp_val) - val rec_s = hardfloat.floatNToRecodedFloatN(load_wb_data, 23, 9) - val rec_d = hardfloat.floatNToRecodedFloatN(load_wb_data, 52, 12) + val rec_s = hardfloat.recFNFromFN(8, 24, load_wb_data) + val rec_d = hardfloat.recFNFromFN(11, 53, load_wb_data) val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) // regfile @@ -415,11 +496,11 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { req.in3 := ex_rs3 req.typ := ex_reg_inst(21,20) - val sfma = Module(new FPUFMAPipe(p(SFMALatency), 23, 9)) + val sfma = Module(new FPUFMAPipe(p(SFMALatency), 8, 24)) sfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && ex_ctrl.single sfma.io.in.bits := req - val dfma = Module(new FPUFMAPipe(p(DFMALatency), 52, 12)) + val dfma = Module(new FPUFMAPipe(p(DFMALatency), 11, 53)) dfma.io.in.valid := ex_reg_valid && ex_ctrl.fma && !ex_ctrl.single dfma.io.in.bits := req @@ -490,8 +571,8 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { when (wen(0) || divSqrt_wen) { regfile(waddr) := wdata if (enableCommitLog) { - val wdata_unrec_s = hardfloat.recodedFloatNToFloatN(wdata(64,0), 23, 9) - val wdata_unrec_d = hardfloat.recodedFloatNToFloatN(wdata(64,0), 52, 12) + val wdata_unrec_s = hardfloat.fNFromRecFN(8, 24, wdata(64,0)) + val wdata_unrec_d = hardfloat.fNFromRecFN(11, 53, wdata(64,0)) val wb_single = (winfo(0) >> 5)(0) printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32), Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d)) @@ -525,7 +606,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val divSqrt_flags_double = Reg(Bits()) val divSqrt_wdata_double = Reg(Bits()) - val divSqrt = Module(new hardfloat.divSqrtRecodedFloat64) + val divSqrt = Module(new hardfloat.DivSqrtRecF64) divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div) val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) @@ -549,8 +630,10 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { divSqrt_flags_double := divSqrt.io.exceptionFlags } - val divSqrt_toSingle = hardfloat.recodedFloatNToRecodedFloatM(divSqrt_wdata_double, ex_rm, 52, 12, 23, 9) - divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle._1, divSqrt_wdata_double) - divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle._2, Bits(0)) + val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24)) + divSqrt_toSingle.io.in := divSqrt_wdata_double + divSqrt_toSingle.io.roundingMode := ex_rm + divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double) + divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0)) } } From 213c1a4c81a37e230010a25a5a5ce5432a07ff31 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Sat, 14 Nov 2015 16:43:15 -0800 Subject: [PATCH 0924/1087] fix fdiv/fsqrt control bug in fpu --- rocket/src/main/scala/fpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 2bf12d96..74f11bbb 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -609,7 +609,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val divSqrt = Module(new hardfloat.DivSqrtRecF64) divSqrt_inReady := Mux(divSqrt.io.sqrtOp, divSqrt.io.inReady_sqrt, divSqrt.io.inReady_div) val divSqrt_outValid = divSqrt.io.outValid_div || divSqrt.io.outValid_sqrt - divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) + divSqrt.io.inValid := mem_reg_valid && (mem_ctrl.div || mem_ctrl.sqrt) && !divSqrt_in_flight divSqrt.io.sqrtOp := mem_ctrl.sqrt divSqrt.io.a := fpiu.io.as_double.in1 divSqrt.io.b := fpiu.io.as_double.in2 From 0f092b9b59402384a9b00a6633060115388b7e61 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 16 Nov 2015 21:51:43 -0800 Subject: [PATCH 0925/1087] Remove IPI network This is now provided via MMIO. --- rocket/src/main/scala/csr.scala | 13 ++++++------- rocket/src/main/scala/instructions.scala | 6 ++++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 627831a3..efaf25b8 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -203,6 +203,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mtdeleg -> UInt(0), CSRs.mreset -> UInt(0), CSRs.mtvec -> UInt(MTVEC), + CSRs.miobase -> UInt(p(junctions.MMIOBase)), + CSRs.mipi -> UInt(0), CSRs.mip -> reg_mip.toBits, CSRs.mie -> reg_mie.toBits, CSRs.mscratch -> reg_mscratch, @@ -211,7 +213,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mcause -> reg_mcause, CSRs.mtimecmp -> reg_mtimecmp, CSRs.mhartid -> io.host.id, - CSRs.send_ipi -> io.host.id, /* don't care */ CSRs.stats -> reg_stats, CSRs.mtohost -> reg_tohost, CSRs.mfromhost -> reg_fromhost) @@ -353,9 +354,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } io.time := reg_cycle - io.host.ipi_req.valid := cpu_wen && decoded_addr(CSRs.send_ipi) - io.host.ipi_req.bits := io.rw.wdata - io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready + io.csr_replay := false io.csr_stall := reg_wfi when (host_csr_req_fire && !host_csr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } @@ -400,6 +399,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } reg_mip.msip := new_mip.msip } + when (decoded_addr(CSRs.mipi)) { + reg_mip.msip := true + } when (decoded_addr(CSRs.mie)) { val new_mie = new MIP().fromBits(wdata) if (usingVM) { @@ -449,9 +451,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } } - io.host.ipi_rep.ready := true - when (io.host.ipi_rep.valid) { reg_mip.msip := true } - when(this.reset) { reg_mstatus.zero1 := 0 reg_mstatus.zero2 := 0 diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index b5ab974f..b383e385 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -299,7 +299,8 @@ object CSRs { val mtohost = 0x780 val mfromhost = 0x781 val mreset = 0x782 - val send_ipi = 0x783 + val mipi = 0x783 + val miobase = 0x784 val cycleh = 0xc80 val timeh = 0xc81 val instreth = 0xc82 @@ -367,7 +368,8 @@ object CSRs { res += mtohost res += mfromhost res += mreset - res += send_ipi + res += mipi + res += miobase res.toArray } val all32 = { From b0a06a77dbc2d02e65bccf7cf9f360a94e2bbc54 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Fri, 20 Nov 2015 13:33:15 -0800 Subject: [PATCH 0926/1087] fix a few Chisel3 compat issues --- rocket/src/main/scala/nbdcache.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 52808e6e..296b7420 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1079,7 +1079,7 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module val cache = new HellaCacheIO } - val replaying_cmb = Bool() + val replaying_cmb = Wire(Bool()) val replaying = Reg(next = replaying_cmb, init = Bool(false)) replaying_cmb := replaying @@ -1099,10 +1099,10 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module val s1_req_fire = Reg(next=s0_req_fire) val s2_req_fire = Reg(next=s1_req_fire) + io.cache.req <> req_arb.io.out io.cache.req.bits.kill := s2_nack io.cache.req.bits.phys := Bool(true) io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) - io.cache.req <> req_arb.io.out /* replay queues: replayq1 holds the older request. From 6d1bf5c014cf736f74ac8f7e261928c1b9628f46 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 18:13:33 -0800 Subject: [PATCH 0927/1087] Use generic LoadGen/StoreGen --- rocket/src/main/scala/nbdcache.scala | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 296b7420..ea07eecf 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -162,8 +162,8 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) - val storegen = new StoreGen64(req.typ, req.addr, req.data) - val loadgen = new LoadGen64(req.typ, req.addr, grant_word, req_cmd_sc) + val storegen = new StoreGen(req.typ, req.addr, req.data, 8) + val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, 8) val beat_offset = req.addr(beatOffBits - 1, wordOffBits) val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) @@ -200,7 +200,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.resp.valid := (state === s_resp) io.resp.bits := req io.resp.bits.has_data := isRead(req.cmd) - io.resp.bits.data := loadgen.byte | req_cmd_sc + io.resp.bits.data := loadgen.data | req_cmd_sc io.resp.bits.store_data := req.data io.resp.bits.nack := Bool(false) io.resp.bits.replay := io.resp.valid @@ -815,11 +815,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { s2_req.cmd := s1_req.cmd } - val misaligned = - (((s1_req.typ === MT_H) || (s1_req.typ === MT_HU)) && (s1_req.addr(0) != Bits(0))) || - (((s1_req.typ === MT_W) || (s1_req.typ === MT_WU)) && (s1_req.addr(1,0) != Bits(0))) || - ((s1_req.typ === MT_D) && (s1_req.addr(2,0) != Bits(0))) - + val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), 8).misaligned io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld @@ -1018,7 +1014,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // load data subword mux/sign extension val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) - val loadgen = new LoadGen64(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) + val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, 8) amoalu.io.addr := s2_req.addr amoalu.io.cmd := s2_req.cmd @@ -1052,7 +1048,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable cache_resp.bits := s2_req cache_resp.bits.has_data := isRead(s2_req.cmd) - cache_resp.bits.data := loadgen.byte | s2_sc_fail + cache_resp.bits.data := loadgen.data | s2_sc_fail cache_resp.bits.store_data := s2_req.data cache_resp.bits.nack := s2_valid && s2_nack cache_resp.bits.replay := s2_replay @@ -1065,7 +1061,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { mshrs.io.resp.ready := !cache_pass io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) - io.cpu.resp.bits.data_word_bypass := loadgen.word + io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next.valid := s1_replay && s1_read io.cpu.replay_next.bits := s1_req.tag From 4616db469530b16c21a0724963f8ea7416f5471e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 18:27:07 -0800 Subject: [PATCH 0928/1087] Make RegFile/ImmGen usable by zscale --- rocket/src/main/scala/rocket.scala | 87 +++++++++++++++--------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 6c245cf5..2cabc403 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -60,6 +60,47 @@ abstract class CoreModule(implicit val p: Parameters) extends Module abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasCoreParameters +class RegFile(n: Int, w: Int, zero: Boolean = false) { + private val rf = Mem(n, UInt(width = w)) + private def access(addr: UInt) = rf(~addr(log2Up(n)-1,0)) + private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() + private var canRead = true + def read(addr: UInt) = { + require(canRead) + reads += addr -> Wire(UInt()) + reads.last._2 := Mux(Bool(zero) && addr === UInt(0), UInt(0), access(addr)) + reads.last._2 + } + def write(addr: UInt, data: UInt) = { + canRead = false + when (addr != UInt(0)) { + access(addr) := data + for ((raddr, rdata) <- reads) + when (addr === raddr) { rdata := data } + } + } +} + +object ImmGen { + def apply(sel: UInt, inst: UInt) = { + val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) + val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) + val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) + val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), + Mux(sel === IMM_UJ, inst(20).toSInt, + Mux(sel === IMM_SB, inst(7).toSInt, sign))) + val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25)) + val b4_1 = Mux(sel === IMM_U, Bits(0), + Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), + Mux(sel === IMM_Z, inst(19,16), inst(24,21)))) + val b0 = Mux(sel === IMM_S, inst(7), + Mux(sel === IMM_I, inst(20), + Mux(sel === IMM_Z, inst(15), Bits(0)))) + + Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt + } +} + class Rocket(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { val host = new HtifIO @@ -131,7 +172,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val id_reg_fence = Reg(init=Bool(false)) val id_ren = IndexedSeq(id_ctrl.rxs1, id_ctrl.rxs2) val id_raddr = IndexedSeq(id_raddr1, id_raddr2) - val rf = new RegFile + val rf = new RegFile(31, xLen) val id_rs = id_raddr.map(rf.read _) val ctrl_killd = Wire(Bool()) @@ -189,7 +230,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val ex_reg_rs_msb = Reg(Vec(UInt(), id_raddr.size)) val ex_rs = for (i <- 0 until id_raddr.size) yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) - val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst) + val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst) val ex_op1 = MuxLookup(ex_ctrl.sel_alu1, SInt(0), Seq( A1_RS1 -> ex_rs(0).toSInt, A1_PC -> ex_reg_pc.toSInt)) @@ -261,8 +302,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // memory stage val mem_br_taken = mem_reg_wdata(0) val mem_br_target = mem_reg_pc.toSInt + - Mux(mem_ctrl.branch && mem_br_taken, imm(IMM_SB, mem_reg_inst), - Mux(mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4))) + Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), + Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt val mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt val mem_wrong_npc = mem_npc != ex_reg_pc || !ex_reg_valid @@ -541,24 +582,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = targets.map(h => h._1 && cond(h._2)).reduce(_||_) - def imm(sel: UInt, inst: UInt) = { - val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) - val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) - val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) - val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), - Mux(sel === IMM_UJ, inst(20).toSInt, - Mux(sel === IMM_SB, inst(7).toSInt, sign))) - val b10_5 = Mux(sel === IMM_U || sel === IMM_Z, Bits(0), inst(30,25)) - val b4_1 = Mux(sel === IMM_U, Bits(0), - Mux(sel === IMM_S || sel === IMM_SB, inst(11,8), - Mux(sel === IMM_Z, inst(19,16), inst(24,21)))) - val b0 = Mux(sel === IMM_S, inst(7), - Mux(sel === IMM_I, inst(20), - Mux(sel === IMM_Z, inst(15), Bits(0)))) - - Cat(sign, b30_20, b19_12, b11, b10_5, b4_1, b0).toSInt - } - def vaSign(a0: UInt, ea: UInt) = { // efficient means to compress 64-bit VA into vaddrBits+1 bits // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) @@ -569,26 +592,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { e(0))) } - class RegFile { - private val rf = Mem(31, UInt(width = 64)) - private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() - private var canRead = true - def read(addr: UInt) = { - require(canRead) - reads += addr -> Wire(UInt()) - reads.last._2 := rf(~addr) - reads.last._2 - } - def write(addr: UInt, data: UInt) = { - canRead = false - when (addr != UInt(0)) { - rf(~addr) := data - for ((raddr, rdata) <- reads) - when (addr === raddr) { rdata := data } - } - } - } - class Scoreboard(n: Int) { def set(en: Bool, addr: UInt): Unit = update(en, _next | mask(en, addr)) From 5294e94794e351e7e58e992676d969c78478169b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 18:28:14 -0800 Subject: [PATCH 0929/1087] Remove CSR back pressure ability We were using it for IPIs, but no longer need it. --- rocket/src/main/scala/csr.scala | 4 +--- rocket/src/main/scala/rocket.scala | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index efaf25b8..c9675527 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -78,7 +78,6 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val wdata = Bits(INPUT, xLen) } - val csr_replay = Bool(OUTPUT) val csr_stall = Bool(OUTPUT) val csr_xcpt = Bool(OUTPUT) val eret = Bool(OUTPUT) @@ -347,14 +346,13 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_sepc := reg_mepc } - assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") + assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") when (read_time >= reg_mtimecmp) { reg_mip.mtip := true } io.time := reg_cycle - io.csr_replay := false io.csr_stall := reg_wfi when (host_csr_req_fire && !host_csr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 2cabc403..53824d51 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -364,8 +364,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc - val replay_wb_common = - io.dmem.resp.bits.nack || wb_reg_replay || csr.io.csr_replay + val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt From e203b8b3787780471ed68c3315998e973bbe0d83 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 Nov 2015 19:17:07 -0800 Subject: [PATCH 0930/1087] Make ALU generic for zscale --- rocket/src/main/scala/dpath_alu.scala | 50 ++++++++++++++++----------- rocket/src/main/scala/rocket.scala | 2 +- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 58942266..8aab0cfd 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -43,17 +43,15 @@ object ALU } import ALU._ -class ALUIO(implicit p: Parameters) extends CoreBundle()(p) { - val dw = Bits(INPUT, SZ_DW) - val fn = Bits(INPUT, SZ_ALU_FN) - val in2 = UInt(INPUT, xLen) - val in1 = UInt(INPUT, xLen) - val out = UInt(OUTPUT, xLen) - val adder_out = UInt(OUTPUT, xLen) -} - -class ALU(implicit p: Parameters) extends Module { - val io = new ALUIO +class ALU(xLen: Int) extends Module { + val io = new Bundle { + val dw = Bits(INPUT, SZ_DW) + val fn = Bits(INPUT, SZ_ALU_FN) + val in2 = UInt(INPUT, xLen) + val in1 = UInt(INPUT, xLen) + val out = UInt(OUTPUT, xLen) + val adder_out = UInt(OUTPUT, xLen) + } // ADD, SUB val sum = io.in1 + Mux(isSub(io.fn), -io.in2, io.in2) @@ -61,19 +59,26 @@ class ALU(implicit p: Parameters) extends Module { // SLT, SLTU val cmp = cmpInverted(io.fn) ^ Mux(cmpEq(io.fn), sum === UInt(0), - Mux(io.in1(63) === io.in2(63), sum(63), - Mux(cmpUnsigned(io.fn), io.in2(63), io.in1(63)))) + Mux(io.in1(xLen-1) === io.in2(xLen-1), sum(xLen-1), + Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1)))) // SLL, SRL, SRA - val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)).toUInt - val shin_hi_32 = Mux(isSub(io.fn), Fill(32, io.in1(31)), UInt(0,32)) - val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) - val shin_r = Cat(shin_hi, io.in1(31,0)) + val full_shamt = io.in2(log2Up(xLen)-1,0) + + val (shamt, shin_r) = + if (xLen == 32) (full_shamt, io.in1) + else { + require(xLen == 64) + val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31)) + val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) + val shamt = Cat(full_shamt(5) & (io.dw === DW_64), full_shamt(4,0)) + (shamt, Cat(shin_hi, io.in1(31,0))) + } val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r)) - val shout_r = (Cat(isSub(io.fn) & shin(63), shin).toSInt >> shamt)(63,0) + val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).toSInt >> shamt)(xLen-1,0) val shout_l = Reverse(shout_r) - val out64 = + val out = Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, Mux(io.fn === FN_SL, shout_l, @@ -82,7 +87,10 @@ class ALU(implicit p: Parameters) extends Module { Mux(io.fn === FN_XOR, io.in1 ^ io.in2, /* all comparisons */ cmp)))))) - val out_hi = Mux(io.dw === DW_64, out64(63,32), Fill(32, out64(31))) - io.out := Cat(out_hi, out64(31,0)).toUInt io.adder_out := sum + io.out := out + if (xLen > 32) { + require(xLen == 64) + when (io.dw === DW_32) { io.out := Cat(Fill(32, out(31)), out(31,0)) } + } } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 53824d51..570f120b 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -239,7 +239,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { A2_IMM -> ex_imm, A2_FOUR -> SInt(4))) - val alu = Module(new ALU) + val alu = Module(new ALU(xLen)) alu.io.dw := ex_ctrl.alu_dw alu.io.fn := ex_ctrl.alu_fn alu.io.in2 := ex_op2.toUInt From 58b0a868341d67fe5a5f62fa96c57b194d2424e3 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 25 Nov 2015 14:04:28 -0800 Subject: [PATCH 0931/1087] some modifications to AccumulatorExample --- rocket/src/main/scala/rocc.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index ff059811..fc46f9ac 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -56,12 +56,12 @@ abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) { } class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { - val regfile = Mem(UInt(width = xLen), n) - val busy = Reg(init=Vec(Bool(false), n)) + val regfile = Mem(n, UInt(width = xLen)) + val busy = Reg(init = Vec.fill(n){Bool(false)}) val cmd = Queue(io.cmd) val funct = cmd.bits.inst.funct - val addr = cmd.bits.inst.rs2(log2Up(n)-1,0) + val addr = cmd.bits.rs2(log2Up(n)-1,0) val doWrite = funct === UInt(0) val doRead = funct === UInt(1) val doLoad = funct === UInt(2) From 9256239206ed24e1841bbfd2bb60456b84492700 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 25 Nov 2015 16:02:27 -0800 Subject: [PATCH 0932/1087] implement support for multiple RoCC accelerators --- rocket/src/main/scala/rocc.scala | 170 +++++++++++++++++++++++++++++++ rocket/src/main/scala/tile.scala | 61 +++++++---- 2 files changed, 213 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index fc46f9ac..4952b406 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -128,3 +128,173 @@ class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { io.dptw.req.valid := false io.pptw.req.valid := false } + +class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { + val req_addr = Reg(UInt(width = coreMaxAddrBits)) + val req_rd = Reg(io.resp.bits.rd) + val req_offset = req_addr(pgIdxBits - 1, 0) + val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits) + val ppn = Reg(UInt(width = ppnBits)) + val error = Reg(Bool()) + + val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4) + val state = Reg(init = s_idle) + + io.cmd.ready := (state === s_idle) + + when (io.cmd.fire()) { + req_rd := io.cmd.bits.inst.rd + req_addr := io.cmd.bits.rs1 + state := s_ptw_req + } + + when (io.dptw.req.fire()) { state := s_ptw_resp } + + when (state === s_ptw_resp && io.dptw.resp.valid) { + error := io.dptw.resp.bits.error + ppn := io.dptw.resp.bits.pte.ppn + state := s_resp + } + + when (io.resp.fire()) { state := s_idle } + + io.dptw.req.valid := (state === s_ptw_req) + io.dptw.req.bits.addr := req_vpn + io.dptw.req.bits.store := Bool(false) + io.dptw.req.bits.fetch := Bool(false) + + io.resp.valid := (state === s_resp) + io.resp.bits.rd := req_rd + io.resp.bits.data := Mux(error, SInt(-1).toUInt, Cat(ppn, req_offset)) + + io.busy := (state =/= s_idle) + io.interrupt := Bool(false) + io.mem.req.valid := Bool(false) + io.dmem.head.acquire.valid := Bool(false) + io.dmem.head.grant.ready := Bool(false) + io.imem.acquire.valid := Bool(false) + io.imem.grant.ready := Bool(false) + io.iptw.req.valid := Bool(false) + io.pptw.req.valid := Bool(false) +} + +class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) + with HasTileLinkParameters { + + private val blockOffset = tlBeatAddrBits + tlByteAddrBits + + val needle = Reg(UInt(width = 8)) + val addr = Reg(UInt(width = coreMaxAddrBits)) + val count = Reg(UInt(width = xLen)) + val resp_rd = Reg(io.resp.bits.rd) + + val addr_block = addr(coreMaxAddrBits - 1, blockOffset) + val offset = addr(blockOffset - 1, 0) + val next_addr = (addr_block + UInt(1)) << UInt(blockOffset) + + val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5) + val state = Reg(init = s_idle) + + val gnt = io.dmem.head.grant.bits + val recv_data = Reg(UInt(width = tlDataBits)) + val recv_beat = Reg(UInt(width = tlBeatAddrBits)) + + val data_bytes = Vec.tabulate(tlDataBytes) { i => recv_data(8 * (i + 1) - 1, 8 * i) } + val zero_match = data_bytes.map(_ === UInt(0)) + val needle_match = data_bytes.map(_ === needle) + val first_zero = PriorityEncoder(zero_match) + + val chars_found = PopCount(needle_match.zipWithIndex.map { + case (matches, i) => + val idx = Cat(recv_beat, UInt(i, tlByteAddrBits)) + matches && idx >= offset && UInt(i) <= first_zero + }) + val zero_found = zero_match.reduce(_ || _) + val finished = Reg(Bool()) + + io.cmd.ready := (state === s_idle) + io.resp.valid := (state === s_resp) + io.resp.bits.rd := resp_rd + io.resp.bits.data := count + io.dmem.head.acquire.valid := (state === s_acq) + io.dmem.head.acquire.bits := GetBlock(addr_block = addr_block) + io.dmem.head.grant.ready := (state === s_gnt) + + when (io.cmd.fire()) { + addr := io.cmd.bits.rs1 + needle := io.cmd.bits.rs2 + resp_rd := io.cmd.bits.inst.rd + count := UInt(0) + finished := Bool(false) + state := s_acq + } + + when (io.dmem.head.acquire.fire()) { state := s_gnt } + + when (io.dmem.head.grant.fire()) { + recv_beat := gnt.addr_beat + recv_data := gnt.data + state := s_check + } + + when (state === s_check) { + when (!finished) { + count := count + chars_found + } + when (zero_found) { finished := Bool(true) } + when (recv_beat === UInt(tlDataBeats - 1)) { + addr := next_addr + state := Mux(zero_found || finished, s_resp, s_acq) + } .otherwise { + state := s_gnt + } + } + + when (io.resp.fire()) { state := s_idle } + + io.busy := (state =/= s_idle) + io.interrupt := Bool(false) + io.mem.req.valid := Bool(false) + io.imem.acquire.valid := Bool(false) + io.imem.grant.ready := Bool(false) + io.dptw.req.valid := Bool(false) + io.iptw.req.valid := Bool(false) + io.pptw.req.valid := Bool(false) +} + +class OpcodeSet(val opcodes: Seq[UInt]) { + def |(set: OpcodeSet) = + new OpcodeSet(this.opcodes ++ set.opcodes) + + def matches(oc: UInt) = opcodes.map(_ === oc).reduce(_ || _) +} + +object OpcodeSet { + val custom0 = new OpcodeSet(Seq(Bits("b0001011"))) + val custom1 = new OpcodeSet(Seq(Bits("b0101011"))) + val custom2 = new OpcodeSet(Seq(Bits("b1011011"))) + val custom3 = new OpcodeSet(Seq(Bits("b1111011"))) + val all = custom0 | custom1 | custom2 | custom3 +} + +class RoccCommandRouter(opcodes: Seq[OpcodeSet])(implicit p: Parameters) + extends CoreModule()(p) { + val io = new Bundle { + val in = Decoupled(new RoCCCommand).flip + val out = Vec(opcodes.size, Decoupled(new RoCCCommand)) + val busy = Bool(OUTPUT) + } + + val cmd = Queue(io.in) + val cmdReadys = io.out.zip(opcodes).map { case (out, opcode) => + val me = opcode.matches(cmd.bits.inst.opcode) + out.valid := cmd.valid && me + out.bits := cmd.bits + out.ready && me + } + cmd.ready := cmdReadys.reduce(_ || _) + io.busy := cmd.valid + + assert(PopCount(cmdReadys) <= UInt(1), + "Custom opcode matched for more than one accelerator") +} diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index c5e3e851..7c838475 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -8,15 +8,21 @@ import Util._ import cde.{Parameters, Field} case object CoreName extends Field[String] -case object BuildRoCC extends Field[Option[Parameters => RoCC]] +case object BuildRoCC extends Field[Seq[Parameters => RoCC]] +case object RoccOpcodes extends Field[Seq[OpcodeSet]] +case object RoccAcceleratorMemChannels extends Field[Seq[Int]] abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { - val usingRocc = !p(BuildRoCC).isEmpty - val nDCachePorts = 2 + (if(!usingRocc) 0 else 1) - val nPTWPorts = 2 + (if(!usingRocc) 0 else 3) + val buildRocc = p(BuildRoCC) + val roccOpcodes = p(RoccOpcodes) + val roccMemChannels = p(RoccAcceleratorMemChannels) + val usingRocc = !buildRocc.isEmpty + val nRocc = buildRocc.size + val nDCachePorts = 2 + nRocc + val nPTWPorts = 2 + 3 * nRocc val nCachedTileLinkPorts = 1 - val nUncachedTileLinkPorts = 1 + (if(!usingRocc) 0 else p(RoccNMemChannels)) + val nUncachedTileLinkPorts = 1 + p(RoccNMemChannels) val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) val io = new Bundle { val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) @@ -53,18 +59,37 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( io.cached.head <> dcache.io.mem // If so specified, build an RoCC module and wire it to core + TileLink ports, // otherwise just hookup the icache - io.uncached <> p(BuildRoCC).map { buildItHere => - val rocc = buildItHere(p) - val iMemArb = Module(new ClientTileLinkIOArbiter(2)) - val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) - core.io.rocc <> rocc.io - dcIF.io.requestor <> rocc.io.mem - dcArb.io.requestor(2) <> dcIF.io.cache + io.uncached <> (if (usingRocc) { + val iMemArb = Module(new ClientTileLinkIOArbiter(1 + nRocc)) iMemArb.io.in(0) <> icache.io.mem - iMemArb.io.in(1) <> rocc.io.imem - ptw.io.requestor(2) <> rocc.io.iptw - ptw.io.requestor(3) <> rocc.io.dptw - ptw.io.requestor(4) <> rocc.io.pptw - rocc.io.dmem :+ iMemArb.io.out - }.getOrElse(List(icache.io.mem)) + + val respArb = Module(new RRArbiter(new RoCCResponse, nRocc)) + core.io.rocc.resp <> respArb.io.out + + val cmdRouter = Module(new RoccCommandRouter(roccOpcodes)) + cmdRouter.io.in <> core.io.rocc.cmd + + val roccs = buildRocc.zip(roccMemChannels).zipWithIndex.map { + case ((buildItHere, nchannels), i) => + val accelParams = p.alterPartial({ case RoccNMemChannels => nchannels}) + val rocc = buildItHere(accelParams) + val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) + rocc.io.cmd <> cmdRouter.io.out(i) + rocc.io.s := core.io.rocc.s + rocc.io.exception := core.io.rocc.exception + dcIF.io.requestor <> rocc.io.mem + dcArb.io.requestor(2 + i) <> dcIF.io.cache + iMemArb.io.in(1 + i) <> rocc.io.imem + ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw + ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw + ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw + rocc + } + + core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) + core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) + respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) + + roccs.flatMap(_.io.dmem) :+ iMemArb.io.out + } else { Seq(icache.io.mem) }) } From e80340198a621c40bd5007ad71229b9df8d967c4 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 30 Nov 2015 17:35:33 -0800 Subject: [PATCH 0933/1087] use implicit parameters for ALU --- rocket/src/main/scala/dpath_alu.scala | 2 +- rocket/src/main/scala/rocket.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index 8aab0cfd..e26bff8a 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -43,7 +43,7 @@ object ALU } import ALU._ -class ALU(xLen: Int) extends Module { +class ALU(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { val dw = Bits(INPUT, SZ_DW) val fn = Bits(INPUT, SZ_ALU_FN) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 570f120b..53824d51 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -239,7 +239,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { A2_IMM -> ex_imm, A2_FOUR -> SInt(4))) - val alu = Module(new ALU(xLen)) + val alu = Module(new ALU) alu.io.dw := ex_ctrl.alu_dw alu.io.fn := ex_ctrl.alu_fn alu.io.in2 := ex_op2.toUInt From 0b15b19381f026a05ae791c9d6a0c006e147407c Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 1 Dec 2015 10:22:31 -0800 Subject: [PATCH 0934/1087] add arbiter for FPU --- rocket/src/main/scala/arbiter.scala | 49 +++++++++++++++++++++++++++++ rocket/src/main/scala/rocket.scala | 7 ----- rocket/src/main/scala/tile.scala | 14 +++++++-- 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 27bfcc86..16c04858 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -5,6 +5,7 @@ package rocket import Chisel._ import uncore._ import cde.{Parameters, Field} +import junctions.ParameterizedBundle class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { @@ -53,3 +54,51 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> log2Up(n) } } + +class InOrderArbiter[T <: Data, U <: Data](reqTyp: T, respTyp: U, n: Int) + (implicit p: Parameters) extends Module { + val io = new Bundle { + val in_req = Vec(n, Decoupled(reqTyp)).flip + val in_resp = Vec(n, Decoupled(respTyp)) + val out_req = Decoupled(reqTyp) + val out_resp = Decoupled(respTyp).flip + } + + if (n > 1) { + val route_q = Module(new Queue(UInt(width = log2Up(n)), 2)) + val req_arb = Module(new RRArbiter(reqTyp, n)) + req_arb.io.in <> io.in_req + + val req_helper = DecoupledHelper( + req_arb.io.out.valid, + route_q.io.enq.ready, + io.out_req.ready) + + io.out_req.bits := req_arb.io.out.bits + io.out_req.valid := req_helper.fire(io.out_req.ready) + + route_q.io.enq.bits := req_arb.io.chosen + route_q.io.enq.valid := req_helper.fire(route_q.io.enq.ready) + + req_arb.io.out.ready := req_helper.fire(req_arb.io.out.valid) + + val resp_sel = route_q.io.deq.bits + val resp_ready = io.in_resp(resp_sel).ready + val resp_helper = DecoupledHelper( + resp_ready, + route_q.io.deq.valid, + io.out_resp.valid) + + val resp_valid = resp_helper.fire(resp_ready) + for (i <- 0 until n) { + io.in_resp(i).bits := io.out_resp.bits + io.in_resp(i).valid := resp_valid && resp_sel === UInt(i) + } + + route_q.io.deq.ready := resp_helper.fire(route_q.io.deq.valid) + io.out_resp.ready := resp_helper.fire(io.out_resp.valid) + } else { + io.out_req <> io.in_req.head + io.in_resp.head <> io.out_resp + } +} diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 251ac330..53824d51 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -537,13 +537,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 - if (usingFPU && usingRoCC) { - io.fpu.cp_req <> io.rocc.fpu_req - io.fpu.cp_resp <> io.rocc.fpu_resp - } else { - io.fpu.cp_req.valid := Bool(false) - } - if (enableCommitLog) { val pc = Wire(SInt(width=64)) pc := wb_reg_pc diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 7c838475..0420c6a5 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -52,8 +52,8 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( icache.io.cpu <> core.io.imem core.io.ptw <> ptw.io.dpath - //If so specified, build an FPU module and wire it in - if (p(UseFPU)) core.io.fpu <> Module(new FPU()(p)).io + val fpuOpt = if (p(UseFPU)) Some(Module(new FPU)) else None + fpuOpt.foreach(fpu => core.io.fpu <> fpu.io) // Connect the caches and ROCC to the outer memory system io.cached.head <> dcache.io.mem @@ -86,6 +86,16 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( rocc } + fpuOpt.foreach { fpu => + val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nRocc)) + fpArb.io.in_req <> roccs.map(_.io.fpu_req) + roccs.zip(fpArb.io.in_resp).foreach { + case (rocc, fpu_resp) => rocc.io.fpu_resp <> fpu_resp + } + fpu.io.cp_req <> fpArb.io.out_req + fpArb.io.out_resp <> fpu.io.cp_resp + } + core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) From 4833d41dbc62a4e779f49dfef067d482182caa37 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 1 Dec 2015 16:48:05 -0800 Subject: [PATCH 0935/1087] make the connection of FPU ports optional per accelerator --- rocket/src/main/scala/tile.scala | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 0420c6a5..8ded6be8 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -11,6 +11,7 @@ case object CoreName extends Field[String] case object BuildRoCC extends Field[Seq[Parameters => RoCC]] case object RoccOpcodes extends Field[Seq[OpcodeSet]] case object RoccAcceleratorMemChannels extends Field[Seq[Int]] +case object RoccUseFPU extends Field[Seq[Boolean]] abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { @@ -19,6 +20,8 @@ abstract class Tile(resetSignal: Bool = null) val roccMemChannels = p(RoccAcceleratorMemChannels) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size + val roccUseFPU = p(RoccUseFPU) + val nFPUPorts = roccUseFPU.filter(useFPU => useFPU).size val nDCachePorts = 2 + nRocc val nPTWPorts = 2 + 3 * nRocc val nCachedTileLinkPorts = 1 @@ -86,15 +89,20 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( rocc } - fpuOpt.foreach { fpu => - val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nRocc)) - fpArb.io.in_req <> roccs.map(_.io.fpu_req) - roccs.zip(fpArb.io.in_resp).foreach { - case (rocc, fpu_resp) => rocc.io.fpu_resp <> fpu_resp + if (nFPUPorts > 0) { + fpuOpt.foreach { fpu => + val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts)) + val fp_roccs = roccs.zip(roccUseFPU) + .filter { case (_, useFPU) => useFPU } + .map { case (rocc, _) => rocc } + fpArb.io.in_req <> fp_roccs.map(_.io.fpu_req) + fp_roccs.zip(fpArb.io.in_resp).foreach { + case (rocc, fpu_resp) => rocc.io.fpu_resp <> fpu_resp + } + fpu.io.cp_req <> fpArb.io.out_req + fpArb.io.out_resp <> fpu.io.cp_resp } - fpu.io.cp_req <> fpArb.io.out_req - fpArb.io.out_resp <> fpu.io.cp_resp - } + } core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) From e76dfa55f76610811469a82d369aa46873bff25c Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 1 Dec 2015 17:54:56 -0800 Subject: [PATCH 0936/1087] change the way rocc is parameterized --- rocket/src/main/scala/tile.scala | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 7c838475..c92de644 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -8,15 +8,16 @@ import Util._ import cde.{Parameters, Field} case object CoreName extends Field[String] -case object BuildRoCC extends Field[Seq[Parameters => RoCC]] -case object RoccOpcodes extends Field[Seq[OpcodeSet]] -case object RoccAcceleratorMemChannels extends Field[Seq[Int]] +case object BuildRoCC extends Field[Seq[RoccParameters]] + +case class RoccParameters( + opcodes: OpcodeSet, + generator: Parameters => RoCC, + nMemChannels: Int = 1) abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { val buildRocc = p(BuildRoCC) - val roccOpcodes = p(RoccOpcodes) - val roccMemChannels = p(RoccAcceleratorMemChannels) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size val nDCachePorts = 2 + nRocc @@ -66,13 +67,14 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val respArb = Module(new RRArbiter(new RoCCResponse, nRocc)) core.io.rocc.resp <> respArb.io.out + val roccOpcodes = buildRocc.map(_.opcodes) val cmdRouter = Module(new RoccCommandRouter(roccOpcodes)) cmdRouter.io.in <> core.io.rocc.cmd - val roccs = buildRocc.zip(roccMemChannels).zipWithIndex.map { - case ((buildItHere, nchannels), i) => - val accelParams = p.alterPartial({ case RoccNMemChannels => nchannels}) - val rocc = buildItHere(accelParams) + val roccs = buildRocc.zipWithIndex.map { + case (RoccParameters(_, generator, nchannels), i) => + val accelParams = p.alterPartial({ case RoccNMemChannels => nchannels }) + val rocc = generator(accelParams) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) rocc.io.cmd <> cmdRouter.io.out(i) rocc.io.s := core.io.rocc.s From dcca0b1d86eb58ce8883f86b81a29446d1ac284c Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 1 Dec 2015 18:14:58 -0800 Subject: [PATCH 0937/1087] fix up FPU connection --- rocket/src/main/scala/tile.scala | 45 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 8599f543..8878e722 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,15 +13,15 @@ case object BuildRoCC extends Field[Seq[RoccParameters]] case class RoccParameters( opcodes: OpcodeSet, generator: Parameters => RoCC, - nMemChannels: Int = 1) + nMemChannels: Int = 1, + useFPU: Boolean = false) abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { val buildRocc = p(BuildRoCC) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size - val roccUseFPU = p(RoccUseFPU) - val nFPUPorts = roccUseFPU.filter(useFPU => useFPU).size + val nFPUPorts = buildRocc.filter(_.useFPU).size val nDCachePorts = 2 + nRocc val nPTWPorts = 2 + 3 * nRocc val nCachedTileLinkPorts = 1 @@ -73,32 +73,31 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val cmdRouter = Module(new RoccCommandRouter(roccOpcodes)) cmdRouter.io.in <> core.io.rocc.cmd - val roccs = buildRocc.zipWithIndex.map { - case (RoccParameters(_, generator, nchannels), i) => - val accelParams = p.alterPartial({ case RoccNMemChannels => nchannels }) - val rocc = generator(accelParams) - val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) - rocc.io.cmd <> cmdRouter.io.out(i) - rocc.io.s := core.io.rocc.s - rocc.io.exception := core.io.rocc.exception - dcIF.io.requestor <> rocc.io.mem - dcArb.io.requestor(2 + i) <> dcIF.io.cache - iMemArb.io.in(1 + i) <> rocc.io.imem - ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw - ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw - ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw - rocc + val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) => + val rocc = accelParams.generator( + p.alterPartial({ case RoccNMemChannels => accelParams.nMemChannels })) + val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) + rocc.io.cmd <> cmdRouter.io.out(i) + rocc.io.s := core.io.rocc.s + rocc.io.exception := core.io.rocc.exception + dcIF.io.requestor <> rocc.io.mem + dcArb.io.requestor(2 + i) <> dcIF.io.cache + iMemArb.io.in(1 + i) <> rocc.io.imem + ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw + ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw + ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw + rocc } if (nFPUPorts > 0) { fpuOpt.foreach { fpu => val fpArb = Module(new InOrderArbiter(new FPInput, new FPResult, nFPUPorts)) - val fp_roccs = roccs.zip(roccUseFPU) - .filter { case (_, useFPU) => useFPU } - .map { case (rocc, _) => rocc } - fpArb.io.in_req <> fp_roccs.map(_.io.fpu_req) + val fp_roccs = roccs.zip(buildRocc) + .filter { case (_, params) => params.useFPU } + .map { case (rocc, _) => rocc.io } + fpArb.io.in_req <> fp_roccs.map(_.fpu_req) fp_roccs.zip(fpArb.io.in_resp).foreach { - case (rocc, fpu_resp) => rocc.io.fpu_resp <> fpu_resp + case (rocc, fpu_resp) => rocc.fpu_resp <> fpu_resp } fpu.io.cp_req <> fpArb.io.out_req fpArb.io.out_resp <> fpu.io.cp_resp From 3f8f726296fc32b114d8fd3c5566f2d8e148162f Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 1 Dec 2015 18:47:52 -0800 Subject: [PATCH 0938/1087] make rocc build independent from parameter structure --- rocket/src/main/scala/tile.scala | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index c92de644..c5e4c64b 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -71,21 +71,20 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val cmdRouter = Module(new RoccCommandRouter(roccOpcodes)) cmdRouter.io.in <> core.io.rocc.cmd - val roccs = buildRocc.zipWithIndex.map { - case (RoccParameters(_, generator, nchannels), i) => - val accelParams = p.alterPartial({ case RoccNMemChannels => nchannels }) - val rocc = generator(accelParams) - val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) - rocc.io.cmd <> cmdRouter.io.out(i) - rocc.io.s := core.io.rocc.s - rocc.io.exception := core.io.rocc.exception - dcIF.io.requestor <> rocc.io.mem - dcArb.io.requestor(2 + i) <> dcIF.io.cache - iMemArb.io.in(1 + i) <> rocc.io.imem - ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw - ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw - ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw - rocc + val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) => + val rocc = accelParams.generator( + p.alterPartial({ case RoccNMemChannels => accelParams.nMemChannels })) + val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) + rocc.io.cmd <> cmdRouter.io.out(i) + rocc.io.s := core.io.rocc.s + rocc.io.exception := core.io.rocc.exception + dcIF.io.requestor <> rocc.io.mem + dcArb.io.requestor(2 + i) <> dcIF.io.cache + iMemArb.io.in(1 + i) <> rocc.io.imem + ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw + ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw + ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw + rocc } core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) From 73b026366301c29b74be13b05b3a8bb16bfd2260 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 1 Dec 2015 20:41:58 -0800 Subject: [PATCH 0939/1087] disconnect fpu port if no fpu-using RoCC accelerators --- rocket/src/main/scala/tile.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 8878e722..09c4f006 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -110,4 +110,11 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( roccs.flatMap(_.io.dmem) :+ iMemArb.io.out } else { Seq(icache.io.mem) }) + + if (!usingRocc || nFPUPorts == 0) { + fpuOpt.foreach { fpu => + fpu.io.cp_req.valid := Bool(false) + fpu.io.cp_resp.ready := Bool(false) + } + } } From 369ee74a2c821ca6941b41715e0973d947fbcff3 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 2 Dec 2015 16:28:23 -0800 Subject: [PATCH 0940/1087] change names of RoCC tilelink interfaces to be more sensible --- rocket/src/main/scala/rocc.scala | 30 ++++++++++++------------------ rocket/src/main/scala/tile.scala | 10 +++++----- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 76673a6f..f2b2decd 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -42,8 +42,8 @@ class RoCCInterface(implicit p: Parameters) extends Bundle { val interrupt = Bool(OUTPUT) // These should be handled differently, eventually - val imem = new ClientUncachedTileLinkIO - val dmem = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO) + val autl = new ClientUncachedTileLinkIO + val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO) val iptw = new TLBPTWIO val dptw = new TLBPTWIO val pptw = new TLBPTWIO @@ -122,10 +122,8 @@ class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { io.mem.req.bits.data := Bits(0) // we're not performing any stores... io.mem.invalidate_lr := false - io.imem.acquire.valid := false - io.imem.grant.ready := false - io.dmem.head.acquire.valid := false - io.dmem.head.grant.ready := false + io.autl.acquire.valid := false + io.autl.grant.ready := false io.iptw.req.valid := false io.dptw.req.valid := false io.pptw.req.valid := false @@ -172,10 +170,8 @@ class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { io.busy := (state =/= s_idle) io.interrupt := Bool(false) io.mem.req.valid := Bool(false) - io.dmem.head.acquire.valid := Bool(false) - io.dmem.head.grant.ready := Bool(false) - io.imem.acquire.valid := Bool(false) - io.imem.grant.ready := Bool(false) + io.autl.acquire.valid := Bool(false) + io.autl.grant.ready := Bool(false) io.iptw.req.valid := Bool(false) io.pptw.req.valid := Bool(false) } @@ -197,7 +193,7 @@ class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) val s_idle :: s_acq :: s_gnt :: s_check :: s_resp :: Nil = Enum(Bits(), 5) val state = Reg(init = s_idle) - val gnt = io.dmem.head.grant.bits + val gnt = io.autl.grant.bits val recv_data = Reg(UInt(width = tlDataBits)) val recv_beat = Reg(UInt(width = tlBeatAddrBits)) @@ -218,9 +214,9 @@ class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) io.resp.valid := (state === s_resp) io.resp.bits.rd := resp_rd io.resp.bits.data := count - io.dmem.head.acquire.valid := (state === s_acq) - io.dmem.head.acquire.bits := GetBlock(addr_block = addr_block) - io.dmem.head.grant.ready := (state === s_gnt) + io.autl.acquire.valid := (state === s_acq) + io.autl.acquire.bits := GetBlock(addr_block = addr_block) + io.autl.grant.ready := (state === s_gnt) when (io.cmd.fire()) { addr := io.cmd.bits.rs1 @@ -231,9 +227,9 @@ class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) state := s_acq } - when (io.dmem.head.acquire.fire()) { state := s_gnt } + when (io.autl.acquire.fire()) { state := s_gnt } - when (io.dmem.head.grant.fire()) { + when (io.autl.grant.fire()) { recv_beat := gnt.addr_beat recv_data := gnt.data state := s_check @@ -257,8 +253,6 @@ class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) io.busy := (state =/= s_idle) io.interrupt := Bool(false) io.mem.req.valid := Bool(false) - io.imem.acquire.valid := Bool(false) - io.imem.grant.ready := Bool(false) io.dptw.req.valid := Bool(false) io.iptw.req.valid := Bool(false) io.pptw.req.valid := Bool(false) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 09c4f006..86582cff 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -13,7 +13,7 @@ case object BuildRoCC extends Field[Seq[RoccParameters]] case class RoccParameters( opcodes: OpcodeSet, generator: Parameters => RoCC, - nMemChannels: Int = 1, + nMemChannels: Int = 0, useFPU: Boolean = false) abstract class Tile(resetSignal: Bool = null) @@ -63,8 +63,8 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( // If so specified, build an RoCC module and wire it to core + TileLink ports, // otherwise just hookup the icache io.uncached <> (if (usingRocc) { - val iMemArb = Module(new ClientTileLinkIOArbiter(1 + nRocc)) - iMemArb.io.in(0) <> icache.io.mem + val uncachedArb = Module(new ClientTileLinkIOArbiter(1 + nRocc)) + uncachedArb.io.in(0) <> icache.io.mem val respArb = Module(new RRArbiter(new RoCCResponse, nRocc)) core.io.rocc.resp <> respArb.io.out @@ -82,7 +82,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( rocc.io.exception := core.io.rocc.exception dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2 + i) <> dcIF.io.cache - iMemArb.io.in(1 + i) <> rocc.io.imem + uncachedArb.io.in(1 + i) <> rocc.io.autl ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw @@ -108,7 +108,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) - roccs.flatMap(_.io.dmem) :+ iMemArb.io.out + roccs.flatMap(_.io.utl) :+ uncachedArb.io.out } else { Seq(icache.io.mem) }) if (!usingRocc || nFPUPorts == 0) { From 7690de07e1af7d954241267a223041fa1fb7a103 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 2 Dec 2015 17:17:49 -0800 Subject: [PATCH 0941/1087] allow icache to configure which side of the way mux gets buffered --- rocket/src/main/scala/frontend.scala | 27 ++++++++++++++++++++------- rocket/src/main/scala/icache.scala | 14 ++++++++++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 36a5a2d0..fd45d299 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -46,14 +46,15 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_bits = Reg(btb.io.resp.bits) val s2_xcpt_if = Reg(init=Bool(false)) - val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) + val s2_resp_valid = Wire(init=Bool(false)) + val s2_resp_data = Wire(UInt(width = rowBits)) val msb = vaddrBits-1 val lsb = log2Up(fetchWidth*coreInstBytes) val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth) val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure - val icmiss = s2_valid && !icbuf.io.deq.valid + val icmiss = s2_valid && !s2_resp_valid val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) @@ -101,17 +102,29 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icmiss || io.ptw.invalidate icache.io.resp.ready := !stall && !s1_same_block - io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid) + io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.bits.pc := s2_pc io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) - icbuf.io.enq <> icache.io.resp - icbuf.io.deq.ready := !stall && !s1_same_block + // if the ways are buffered, we don't need to buffer again + if (p(ICacheBufferWays)) { + icache.io.resp.ready := !stall && !s1_same_block + + s2_resp_valid := icache.io.resp.valid + s2_resp_data := icache.io.resp.bits.datablock + } else { + val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) + icbuf.io.enq <> icache.io.resp + icbuf.io.deq.ready := !stall && !s1_same_block + + s2_resp_valid := icbuf.io.deq.valid + s2_resp_data := icbuf.io.deq.bits.datablock + } require(fetchWidth * coreInstBytes <= rowBytes) val fetch_data = - if (fetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock - else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) + if (fetchWidth * coreInstBytes == rowBytes) s2_resp_data + else s2_resp_data >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) for (i <- 0 until fetchWidth) { io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 1b3cf067..e2ef913d 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -5,6 +5,8 @@ import uncore._ import Util._ import cde.{Parameters, Field} +case object ICacheBufferWays extends Field[Boolean] + trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { val outerDataBeats = p(TLKey(p(TLId))).dataBeats val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat @@ -124,10 +126,18 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara s1_dout(i) := 0 when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := s1_rdata } } - io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) // output signals - io.resp.valid := s1_hit + if (p(ICacheBufferWays)) { + val s2_hit = RegEnable(s1_hit, !stall) + val s2_tag_hit = RegEnable(s1_tag_hit, !stall) + val s2_dout = RegEnable(s1_dout, !stall) + io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) + io.resp.valid := s2_hit + } else { + io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) + io.resp.valid := s1_hit + } io.mem.acquire.valid := (state === s_request) io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits) From 01a34479894eedfda24444725d5e68ee84c7b58c Mon Sep 17 00:00:00 2001 From: Albert Magyar Date: Wed, 16 Dec 2015 16:12:47 -0800 Subject: [PATCH 0942/1087] Remove duplicate PseudoLRU class from rocket TLB --- rocket/src/main/scala/tlb.scala | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 8c6f9528..9244fa23 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -7,6 +7,7 @@ import Util._ import junctions._ import scala.math._ import cde.{Parameters, Field} +import uncore.PseudoLRU case object NTLBEntries extends Field[Int] @@ -54,29 +55,6 @@ class RocketCAM(implicit p: Parameters) extends TLBModule()(p) { io.hit := io.hits.orR } -class PseudoLRU(n: Int) -{ - val state = Reg(Bits(width = n)) - def access(way: UInt) = { - var next_state = state - var idx = UInt(1,1) - for (i <- log2Up(n)-1 to 0 by -1) { - val bit = way(i) - val mask = (UInt(1,n) << idx)(n-1,0) - next_state = next_state & ~mask | Mux(bit, UInt(0), mask) - //next_state.bitSet(idx, !bit) - idx = Cat(idx, bit) - } - state := next_state - } - def replace = { - var idx = UInt(1,1) - for (i <- 0 until log2Up(n)) - idx = Cat(idx, state(idx)) - idx(log2Up(n)-1,0) - } -} - class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { val asid = UInt(width = asIdBits) val vpn = UInt(width = vpnBits+1) From 304d8b814abf37a2e5ae241fd70c02a8c5bd57e8 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 17 Nov 2015 18:14:30 -0800 Subject: [PATCH 0943/1087] Implement client-side DMA controller --- rocket/src/main/scala/dma.scala | 337 +++++++++++++++++++++++++++++++ rocket/src/main/scala/rocc.scala | 2 + rocket/src/main/scala/tile.scala | 18 +- rocket/src/main/scala/util.scala | 6 + 4 files changed, 362 insertions(+), 1 deletion(-) create mode 100644 rocket/src/main/scala/dma.scala diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala new file mode 100644 index 00000000..333ff95d --- /dev/null +++ b/rocket/src/main/scala/dma.scala @@ -0,0 +1,337 @@ +package rocket + +import Chisel._ +import uncore._ +import uncore.DmaRequest._ +import junctions.ParameterizedBundle +import cde.Parameters + +trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters { + val dmaAddrBits = coreMaxAddrBits + val dmaSizeBits = coreMaxAddrBits + val dmaSegmentBits = 24 +} + +abstract class ClientDmaBundle(implicit val p: Parameters) + extends ParameterizedBundle()(p) with HasClientDmaParameters +abstract class ClientDmaModule(implicit val p: Parameters) + extends Module with HasClientDmaParameters + +class ClientDmaRequest(implicit p: Parameters) extends ClientDmaBundle()(p) { + val cmd = UInt(width = DMA_CMD_SZ) + val src_start = UInt(width = dmaAddrBits) + val dst_start = UInt(width = dmaAddrBits) + val src_stride = UInt(width = dmaSizeBits) + val dst_stride = UInt(width = dmaSizeBits) + val segment_size = UInt(width = dmaSizeBits) + val nsegments = UInt(width = dmaSegmentBits) +} + +object ClientDmaRequest { + def apply(cmd: UInt, + src_start: UInt, + dst_start: UInt, + segment_size: UInt, + nsegments: UInt = UInt(1), + src_stride: UInt = UInt(0), + dst_stride: UInt = UInt(0)) + (implicit p: Parameters) = { + val req = Wire(new ClientDmaRequest) + req.cmd := cmd + req.src_start := src_start + req.dst_start := dst_start + req.src_stride := src_stride + req.dst_stride := dst_stride + req.segment_size := segment_size + req.nsegments := nsegments + req + } +} + +object ClientDmaResponse { + val pagefault = UInt("b01") + val outer_err = UInt("b10") + + def apply(status: UInt = UInt(0))(implicit p: Parameters) = { + val resp = Wire(new ClientDmaResponse) + resp.status := status + resp + } +} + +class ClientDmaResponse(implicit p: Parameters) extends ClientDmaBundle { + val status = UInt(width = dmaStatusBits) +} + +class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) { + val req = Decoupled(new ClientDmaRequest) + val resp = Valid(new ClientDmaResponse).flip +} + +class DmaFrontend(implicit val p: Parameters) + extends Module with HasClientDmaParameters { + val io = new Bundle { + val cpu = (new ClientDmaIO).flip + val dma = new DmaIO + val ptw = new TLBPTWIO + val busy = Bool(OUTPUT) + } + + private val pgSize = 1 << pgIdxBits + + val priv = Mux(io.ptw.status.mprv, io.ptw.status.prv1, io.ptw.status.prv) + val vm_enabled = io.ptw.status.vm(3) && priv <= UInt(PRV_S) + + val cmd = Reg(UInt(width = DMA_CMD_SZ)) + + val segment_size = Reg(UInt(width = dmaSizeBits)) + val bytes_left = Reg(UInt(width = dmaSizeBits)) + val segments_left = Reg(UInt(width = dmaSegmentBits)) + + val src_vaddr = Reg(UInt(width = dmaAddrBits)) + val dst_vaddr = Reg(UInt(width = dmaAddrBits)) + val src_vpn = src_vaddr(dmaAddrBits - 1, pgIdxBits) + val dst_vpn = dst_vaddr(dmaAddrBits - 1, pgIdxBits) + val src_idx = src_vaddr(pgIdxBits - 1, 0) + val dst_idx = dst_vaddr(pgIdxBits - 1, 0) + val src_pglen = UInt(pgSize) - src_idx + val dst_pglen = UInt(pgSize) - dst_idx + + val src_stride = Reg(UInt(width = dmaSizeBits)) + val dst_stride = Reg(UInt(width = dmaSizeBits)) + + val src_ppn = Reg(UInt(width = ppnBits)) + val dst_ppn = Reg(UInt(width = ppnBits)) + + val src_paddr = Mux(vm_enabled, Cat(src_ppn, src_idx), src_vaddr) + val dst_paddr = Mux(vm_enabled, Cat(dst_ppn, dst_idx), dst_vaddr) + + val last_src_vpn = Reg(UInt(width = vpnBits)) + val last_dst_vpn = Reg(UInt(width = vpnBits)) + + val tx_len = Mux(!vm_enabled, bytes_left, + Util.minUInt(src_pglen, dst_pglen, bytes_left)) + + val (dma_xact_id, _) = Counter(io.dma.req.fire(), nDmaXactsPerClient) + val dma_busy = Reg(init = UInt(0, nDmaXactsPerClient)) + + val (s_idle :: s_translate :: s_dma_req :: s_dma_update :: + s_prepare :: s_finish :: Nil) = Enum(Bits(), 6) + val state = Reg(init = s_idle) + + // lower bit is for src, higher bit is for dst + val to_translate = Reg(init = UInt(0, 2)) + val ptw_sent = Reg(init = UInt(0, 2)) + val ptw_to_send = to_translate & ~ptw_sent + val ptw_resp_id = Reg(init = UInt(0, 1)) + val resp_status = Reg(UInt(width = dmaStatusBits)) + + io.ptw.req.valid := ptw_to_send.orR && vm_enabled + io.ptw.req.bits.addr := Mux(ptw_to_send(0), src_vpn, dst_vpn) + io.ptw.req.bits.prv := io.ptw.status.prv + io.ptw.req.bits.store := !ptw_to_send(0) // storing to destination + io.ptw.req.bits.fetch := Bool(true) + + when (io.ptw.req.fire()) { + ptw_sent := ptw_sent | PriorityEncoderOH(ptw_to_send) + } + + when (io.ptw.resp.valid) { + when (io.ptw.resp.bits.error) { + resp_status := ClientDmaResponse.pagefault + state := s_finish + } + val recv_choice = PriorityEncoderOH(to_translate) + to_translate := to_translate & ~recv_choice + + // getting the src translation + // if this is a prefetch, dst_ppn and src_ppn should be equal + when (recv_choice(0) || cmd(1)) { + src_ppn := io.ptw.resp.bits.pte.ppn + } .otherwise { + dst_ppn := io.ptw.resp.bits.pte.ppn + } + } + + io.cpu.req.ready := state === s_idle + io.cpu.resp.valid := state === s_finish + io.cpu.resp.bits := ClientDmaResponse(resp_status) + io.dma.req.valid := state === s_dma_req && !dma_busy(dma_xact_id) + io.dma.req.bits := DmaRequest( + client_xact_id = dma_xact_id, + cmd = cmd, + source = src_paddr, + dest = dst_paddr, + length = tx_len) + io.dma.resp.ready := Bool(true) + + when (io.cpu.req.fire()) { + val req = io.cpu.req.bits + cmd := req.cmd + src_vaddr := req.src_start + dst_vaddr := req.dst_start + src_stride := req.src_stride + dst_stride := req.dst_stride + segment_size := req.segment_size + segments_left := req.nsegments - UInt(1) + bytes_left := req.segment_size + to_translate := Mux(req.cmd(1), UInt("b10"), UInt("b11")) + ptw_sent := UInt(0) + state := Mux(vm_enabled, s_translate, s_dma_req) + } + + when (state === s_translate && !to_translate.orR) { + state := s_dma_req + } + + when (io.dma.req.fire()) { + src_vaddr := src_vaddr + tx_len + dst_vaddr := dst_vaddr + tx_len + bytes_left := bytes_left - tx_len + dma_busy := dma_busy | UIntToOH(dma_xact_id) + state := s_dma_update + } + + when (io.dma.resp.fire()) { + dma_busy := dma_busy & ~UIntToOH(io.dma.resp.bits.client_xact_id) + } + + when (state === s_dma_update) { + when (bytes_left === UInt(0)) { + when (segments_left === UInt(0)) { + resp_status := UInt(0) + state := s_finish + } .otherwise { + last_src_vpn := src_vpn + last_dst_vpn := dst_vpn + src_vaddr := src_vaddr + src_stride + dst_vaddr := dst_vaddr + dst_stride + bytes_left := segment_size + segments_left := segments_left - UInt(1) + state := Mux(vm_enabled, s_prepare, s_dma_req) + } + } .otherwise { + to_translate := Cat(dst_idx === UInt(0), !cmd(1) && src_idx === UInt(0)) + ptw_sent := UInt(0) + state := s_translate + } + } + + when (state === s_prepare) { + to_translate := Cat( + dst_vpn =/= last_dst_vpn, + src_vpn =/= last_src_vpn && !cmd(1)) + ptw_sent := UInt(0) + state := s_translate + } + + when (state === s_finish) { state := s_idle } + + io.busy := (state =/= s_idle) || dma_busy.orR +} + +object DmaCtrlRegNumbers { + val SRC_STRIDE = 0 + val DST_STRIDE = 1 + val SEGMENT_SIZE = 2 + val NSEGMENTS = 3 + val RESP_STATUS = 4 +} +import DmaCtrlRegNumbers._ + +class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { + private val nWriteRegs = 4 + private val nReadRegs = 1 + private val nRegs = nWriteRegs + nReadRegs + + val io = new Bundle { + val wen = Bool(INPUT) + val addr = UInt(INPUT, log2Up(nRegs)) + val wdata = UInt(INPUT, dmaSizeBits) + val rdata = UInt(OUTPUT, dmaSizeBits) + + val src_stride = UInt(OUTPUT, dmaSizeBits) + val dst_stride = UInt(OUTPUT, dmaSizeBits) + val segment_size = UInt(OUTPUT, dmaSizeBits) + val nsegments = UInt(OUTPUT, dmaSegmentBits) + + val status = UInt(INPUT, dmaStatusBits) + } + + val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSizeBits))) + + io.src_stride := regs(SRC_STRIDE) + io.dst_stride := regs(DST_STRIDE) + io.segment_size := regs(SEGMENT_SIZE) + io.nsegments := regs(NSEGMENTS) + + when (io.wen && io.addr < UInt(nWriteRegs)) { + regs.write(io.addr, io.wdata) + } + + io.rdata := MuxLookup(io.addr, regs(io.addr), Seq( + UInt(RESP_STATUS) -> io.status)) +} + +class DmaController(implicit p: Parameters) extends RoCC()(p) + with HasClientDmaParameters { + io.mem.req.valid := Bool(false) + io.autl.acquire.valid := Bool(false) + io.autl.grant.ready := Bool(false) + io.iptw.req.valid := Bool(false) + io.pptw.req.valid := Bool(false) + + val cmd = Queue(io.cmd) + val inst = cmd.bits.inst + val is_transfer = inst.funct < UInt(4) + val is_cr_write = inst.funct === UInt(4) + val is_cr_read = inst.funct === UInt(5) + val is_cr_access = is_cr_write || is_cr_read + + val resp_rd = Reg(io.resp.bits.rd) + val resp_data = Reg(io.resp.bits.data) + + val s_idle :: s_resp :: Nil = Enum(Bits(), 2) + val state = Reg(init = s_idle) + + val reg_status = Reg(UInt(width = dmaStatusBits)) + val crfile = Module(new DmaCtrlRegFile) + crfile.io.addr := cmd.bits.rs1 + crfile.io.wdata := cmd.bits.rs2 + crfile.io.wen := cmd.fire() && is_cr_write + + val frontend = Module(new DmaFrontend) + io.dma <> frontend.io.dma + io.dptw <> frontend.io.ptw + frontend.io.cpu.req.valid := cmd.valid && is_transfer + frontend.io.cpu.req.bits := ClientDmaRequest( + cmd = cmd.bits.inst.funct, + src_start = cmd.bits.rs2, + dst_start = cmd.bits.rs1, + src_stride = crfile.io.src_stride, + dst_stride = crfile.io.dst_stride, + segment_size = crfile.io.segment_size, + nsegments = crfile.io.nsegments) + + cmd.ready := state === s_idle && (!is_transfer || frontend.io.cpu.req.ready) + io.resp.valid := state === s_resp + io.resp.bits.rd := resp_rd + io.resp.bits.data := resp_data + + when (cmd.fire()) { + when (is_cr_read) { + resp_rd := inst.rd + resp_data := crfile.io.rdata + state := s_resp + } + } + + when (io.resp.fire()) { state := s_idle } + + when (frontend.io.cpu.resp.valid) { + reg_status := frontend.io.cpu.resp.bits.status + } + + io.busy := (state =/= s_idle) || cmd.valid || frontend.io.busy + io.interrupt := Bool(false) +} diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index f2b2decd..a0d68abd 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -50,6 +50,8 @@ class RoCCInterface(implicit p: Parameters) extends Bundle { val fpu_req = Decoupled(new FPInput) val fpu_resp = Decoupled(new FPResult).flip val exception = Bool(INPUT) + + val dma = new DmaIO } abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 86582cff..233b5b8b 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -14,7 +14,8 @@ case class RoccParameters( opcodes: OpcodeSet, generator: Parameters => RoCC, nMemChannels: Int = 0, - useFPU: Boolean = false) + useFPU: Boolean = false, + useDma: Boolean = false) abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { @@ -22,6 +23,7 @@ abstract class Tile(resetSignal: Bool = null) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size val nFPUPorts = buildRocc.filter(_.useFPU).size + val nDmaPorts = buildRocc.filter(_.useDma).size val nDCachePorts = 2 + nRocc val nPTWPorts = 2 + 3 * nRocc val nCachedTileLinkPorts = 1 @@ -31,6 +33,7 @@ abstract class Tile(resetSignal: Bool = null) val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) val host = new HtifIO + val dma = new DmaIO } } @@ -104,6 +107,14 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( } } + if (nDmaPorts > 0) { + val dmaArb = Module(new DmaArbiter(nDmaPorts)) + dmaArb.io.in <> roccs.zip(buildRocc) + .filter { case (_, params) => params.useDma } + .map { case (rocc, _) => rocc.io.dma } + io.dma <> dmaArb.io.out + } + core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) @@ -117,4 +128,9 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( fpu.io.cp_resp.ready := Bool(false) } } + + if (!usingRocc || nDmaPorts == 0) { + io.dma.req.valid := Bool(false) + io.dma.resp.ready := Bool(false) + } } diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 4050be5b..a6ac1ad5 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -21,6 +21,12 @@ object Util { implicit def booleanToIntConv(x: Boolean) = new AnyRef { def toInt: Int = if (x) 1 else 0 } + + def minUInt(values: Seq[UInt]): UInt = + values.reduce((a, b) => Mux(a < b, a, b)) + + def minUInt(first: UInt, rest: UInt*): UInt = + minUInt(first +: rest.toSeq) } import Util._ From 05b359d35781ed47188bea2859eea1936a41d2f8 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 5 Jan 2016 20:05:10 -0800 Subject: [PATCH 0944/1087] support streaming DMA in DMA frontend --- rocket/src/main/scala/dma.scala | 93 ++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index 333ff95d..c2a02bec 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -8,7 +8,7 @@ import cde.Parameters trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters { val dmaAddrBits = coreMaxAddrBits - val dmaSizeBits = coreMaxAddrBits + val dmaSegmentSizeBits = coreMaxAddrBits val dmaSegmentBits = 24 } @@ -21,10 +21,11 @@ class ClientDmaRequest(implicit p: Parameters) extends ClientDmaBundle()(p) { val cmd = UInt(width = DMA_CMD_SZ) val src_start = UInt(width = dmaAddrBits) val dst_start = UInt(width = dmaAddrBits) - val src_stride = UInt(width = dmaSizeBits) - val dst_stride = UInt(width = dmaSizeBits) - val segment_size = UInt(width = dmaSizeBits) + val src_stride = UInt(width = dmaSegmentSizeBits) + val dst_stride = UInt(width = dmaSegmentSizeBits) + val segment_size = UInt(width = dmaSegmentSizeBits) val nsegments = UInt(width = dmaSegmentBits) + val word_size = UInt(width = dmaWordSizeBits) } object ClientDmaRequest { @@ -34,7 +35,8 @@ object ClientDmaRequest { segment_size: UInt, nsegments: UInt = UInt(1), src_stride: UInt = UInt(0), - dst_stride: UInt = UInt(0)) + dst_stride: UInt = UInt(0), + word_size: UInt = UInt(0)) (implicit p: Parameters) = { val req = Wire(new ClientDmaRequest) req.cmd := cmd @@ -44,6 +46,7 @@ object ClientDmaRequest { req.dst_stride := dst_stride req.segment_size := segment_size req.nsegments := nsegments + req.word_size := word_size req } } @@ -83,10 +86,16 @@ class DmaFrontend(implicit val p: Parameters) val vm_enabled = io.ptw.status.vm(3) && priv <= UInt(PRV_S) val cmd = Reg(UInt(width = DMA_CMD_SZ)) + val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq( + DMA_CMD_PFR -> UInt("b10"), + DMA_CMD_PFW -> UInt("b10"), + DMA_CMD_SIN -> UInt("b10"), + DMA_CMD_SOUT -> UInt("b01"))) - val segment_size = Reg(UInt(width = dmaSizeBits)) - val bytes_left = Reg(UInt(width = dmaSizeBits)) + val segment_size = Reg(UInt(width = dmaSegmentSizeBits)) + val bytes_left = Reg(UInt(width = dmaSegmentSizeBits)) val segments_left = Reg(UInt(width = dmaSegmentBits)) + val word_size = Reg(UInt(width = dmaWordSizeBits)) val src_vaddr = Reg(UInt(width = dmaAddrBits)) val dst_vaddr = Reg(UInt(width = dmaAddrBits)) @@ -97,8 +106,8 @@ class DmaFrontend(implicit val p: Parameters) val src_pglen = UInt(pgSize) - src_idx val dst_pglen = UInt(pgSize) - dst_idx - val src_stride = Reg(UInt(width = dmaSizeBits)) - val dst_stride = Reg(UInt(width = dmaSizeBits)) + val src_stride = Reg(UInt(width = dmaSegmentSizeBits)) + val dst_stride = Reg(UInt(width = dmaSegmentSizeBits)) val src_ppn = Reg(UInt(width = ppnBits)) val dst_ppn = Reg(UInt(width = ppnBits)) @@ -145,8 +154,7 @@ class DmaFrontend(implicit val p: Parameters) to_translate := to_translate & ~recv_choice // getting the src translation - // if this is a prefetch, dst_ppn and src_ppn should be equal - when (recv_choice(0) || cmd(1)) { + when (recv_choice(0)) { src_ppn := io.ptw.resp.bits.pte.ppn } .otherwise { dst_ppn := io.ptw.resp.bits.pte.ppn @@ -162,11 +170,13 @@ class DmaFrontend(implicit val p: Parameters) cmd = cmd, source = src_paddr, dest = dst_paddr, - length = tx_len) + length = tx_len, + size = word_size) io.dma.resp.ready := Bool(true) when (io.cpu.req.fire()) { val req = io.cpu.req.bits + val is_prefetch = req.cmd(2, 1) === UInt("b01") cmd := req.cmd src_vaddr := req.src_start dst_vaddr := req.dst_start @@ -175,7 +185,8 @@ class DmaFrontend(implicit val p: Parameters) segment_size := req.segment_size segments_left := req.nsegments - UInt(1) bytes_left := req.segment_size - to_translate := Mux(req.cmd(1), UInt("b10"), UInt("b11")) + word_size := req.word_size + to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11")) ptw_sent := UInt(0) state := Mux(vm_enabled, s_translate, s_dma_req) } @@ -184,16 +195,20 @@ class DmaFrontend(implicit val p: Parameters) state := s_dma_req } - when (io.dma.req.fire()) { - src_vaddr := src_vaddr + tx_len - dst_vaddr := dst_vaddr + tx_len - bytes_left := bytes_left - tx_len - dma_busy := dma_busy | UIntToOH(dma_xact_id) - state := s_dma_update - } + def setBusyOnSend(req: DecoupledIO[DmaRequest]): UInt = + Mux(req.fire(), UIntToOH(req.bits.client_xact_id), UInt(0)) - when (io.dma.resp.fire()) { - dma_busy := dma_busy & ~UIntToOH(io.dma.resp.bits.client_xact_id) + def clearBusyOnRecv(resp: DecoupledIO[DmaResponse]): UInt = + ~Mux(resp.fire(), UIntToOH(resp.bits.client_xact_id), UInt(0)) + + dma_busy := (dma_busy | setBusyOnSend(io.dma.req)) & + clearBusyOnRecv(io.dma.resp) + + when (io.dma.req.fire()) { + src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0)) + dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0)) + bytes_left := bytes_left - tx_len + state := s_dma_update } when (state === s_dma_update) { @@ -211,16 +226,16 @@ class DmaFrontend(implicit val p: Parameters) state := Mux(vm_enabled, s_prepare, s_dma_req) } } .otherwise { - to_translate := Cat(dst_idx === UInt(0), !cmd(1) && src_idx === UInt(0)) + to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0)) ptw_sent := UInt(0) state := s_translate } } when (state === s_prepare) { - to_translate := Cat( + to_translate := adv_ptr & Cat( dst_vpn =/= last_dst_vpn, - src_vpn =/= last_src_vpn && !cmd(1)) + src_vpn =/= last_src_vpn) ptw_sent := UInt(0) state := s_translate } @@ -235,35 +250,38 @@ object DmaCtrlRegNumbers { val DST_STRIDE = 1 val SEGMENT_SIZE = 2 val NSEGMENTS = 3 - val RESP_STATUS = 4 + val WORD_SIZE = 4 + val RESP_STATUS = 5 } import DmaCtrlRegNumbers._ class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { - private val nWriteRegs = 4 + private val nWriteRegs = 5 private val nReadRegs = 1 private val nRegs = nWriteRegs + nReadRegs val io = new Bundle { val wen = Bool(INPUT) val addr = UInt(INPUT, log2Up(nRegs)) - val wdata = UInt(INPUT, dmaSizeBits) - val rdata = UInt(OUTPUT, dmaSizeBits) + val wdata = UInt(INPUT, dmaSegmentSizeBits) + val rdata = UInt(OUTPUT, dmaSegmentSizeBits) - val src_stride = UInt(OUTPUT, dmaSizeBits) - val dst_stride = UInt(OUTPUT, dmaSizeBits) - val segment_size = UInt(OUTPUT, dmaSizeBits) + val src_stride = UInt(OUTPUT, dmaSegmentSizeBits) + val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits) + val segment_size = UInt(OUTPUT, dmaSegmentSizeBits) val nsegments = UInt(OUTPUT, dmaSegmentBits) + val word_size = UInt(OUTPUT, dmaWordSizeBits) val status = UInt(INPUT, dmaStatusBits) } - val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSizeBits))) + val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits))) io.src_stride := regs(SRC_STRIDE) io.dst_stride := regs(DST_STRIDE) io.segment_size := regs(SEGMENT_SIZE) io.nsegments := regs(NSEGMENTS) + io.word_size := regs(WORD_SIZE) when (io.wen && io.addr < UInt(nWriteRegs)) { regs.write(io.addr, io.wdata) @@ -283,9 +301,9 @@ class DmaController(implicit p: Parameters) extends RoCC()(p) val cmd = Queue(io.cmd) val inst = cmd.bits.inst - val is_transfer = inst.funct < UInt(4) - val is_cr_write = inst.funct === UInt(4) - val is_cr_read = inst.funct === UInt(5) + val is_transfer = inst.funct < UInt(8) + val is_cr_write = inst.funct === UInt(8) + val is_cr_read = inst.funct === UInt(9) val is_cr_access = is_cr_write || is_cr_read val resp_rd = Reg(io.resp.bits.rd) @@ -311,7 +329,8 @@ class DmaController(implicit p: Parameters) extends RoCC()(p) src_stride = crfile.io.src_stride, dst_stride = crfile.io.dst_stride, segment_size = crfile.io.segment_size, - nsegments = crfile.io.nsegments) + nsegments = crfile.io.nsegments, + word_size = crfile.io.word_size) cmd.ready := state === s_idle && (!is_transfer || frontend.io.cpu.req.ready) io.resp.valid := state === s_resp From 13ce91e45346380bfb84d3daafc8f7db2cb37cd3 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 12 Jan 2016 12:42:57 -0800 Subject: [PATCH 0945/1087] fix Chisel3 compat warnings in ICache and FPU --- rocket/src/main/scala/fpu.scala | 2 +- rocket/src/main/scala/frontend.scala | 1 - rocket/src/main/scala/icache.scala | 14 +++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 4a9a5e1a..9be609a6 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -452,7 +452,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val fp_decoder = Module(new FPUDecoder) fp_decoder.io.inst := io.inst - val cp_ctrl = new FPUCtrlSigs + val cp_ctrl = Wire(new FPUCtrlSigs) cp_ctrl <> io.cp_req.bits io.cp_resp.valid := Bool(false) io.cp_resp.bits.data := UInt(0) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index fd45d299..717cac7a 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -100,7 +100,6 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.ptw.invalidate - icache.io.resp.ready := !stall && !s1_same_block io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.bits.pc := s2_pc diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index e2ef913d..ca68a2b7 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -43,7 +43,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val rdy = Wire(Bool()) val refill_addr = Reg(UInt(width = paddrBits)) - val s1_any_tag_hit = Bool() + val s1_any_tag_hit = Wire(Bool()) val s1_valid = Reg(init=Bool(false)) val s1_pgoff = Reg(UInt(width = pgIdxBits)) @@ -77,7 +77,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0) val entagbits = code.width(tagBits) - val tag_array = SeqMem(Vec(Bits(width = entagbits), nWays), nSets) + val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits))) val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) when (refill_done) { val tag = code.encode(refill_tag).toUInt @@ -92,13 +92,13 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara vb_array := Bits(0) invalidated := Bool(true) } - val s1_disparity = Vec.fill(nWays){Bool()} + val s1_disparity = Wire(Vec(nWays, Bool())) for (i <- 0 until nWays) when (s1_valid && s1_disparity(i)) { vb_array := vb_array.bitSet(Cat(UInt(i), s1_idx), Bool(false)) } - val s1_tag_match = Vec.fill(nWays){Bool()} - val s1_tag_hit = Vec.fill(nWays){Bool()} - val s1_dout = Vec.fill(nWays){(Bits())} + val s1_tag_match = Wire(Vec(nWays, Bool())) + val s1_tag_hit = Wire(Vec(nWays, Bool())) + val s1_dout = Wire(Vec(nWays, Bits(width = rowBits))) for (i <- 0 until nWays) { val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool @@ -113,7 +113,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara s1_any_tag_hit := s1_tag_hit.reduceLeft(_||_) && !s1_disparity.reduceLeft(_||_) for (i <- 0 until nWays) { - val data_array = SeqMem(Bits(width = code.width(rowBits)), nSets*refillCycles) + val data_array = SeqMem(nSets * refillCycles, Bits(width = code.width(rowBits))) val wen = narrow_grant.valid && repl_way === UInt(i) when (wen) { val e_d = code.encode(narrow_grant.bits.data).toUInt From 31d537c405b31b6b26ed74337176a6de39f7dcdf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 12 Jan 2016 15:36:16 -0800 Subject: [PATCH 0946/1087] Add missing cloneType --- rocket/src/main/scala/rocc.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index a0d68abd..84baa909 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -52,6 +52,8 @@ class RoCCInterface(implicit p: Parameters) extends Bundle { val exception = Bool(INPUT) val dma = new DmaIO + + override def cloneType = new RoCCInterface().asInstanceOf[this.type] } abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) { From 7bf503a2751c391b294ac35cb3854ec84869705a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 12 Jan 2016 16:06:23 -0800 Subject: [PATCH 0947/1087] Remove four integer/FP converters --- rocket/src/main/scala/fpu.scala | 56 ++++++++++----------------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 9be609a6..7da59549 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -235,9 +235,7 @@ class FPToInt extends Module when (io.in.valid) { in := io.in.bits - when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd != FCMD_MV_XF && - // need to also check toint because CVT_IF and SQRT overlap - !(io.in.bits.cmd === FCMD_CVT_IF && io.in.bits.toint)) { + when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd != FCMD_MV_XF) { in.in1 := in1_upconvert in.in2 := in2_upconvert } @@ -258,23 +256,14 @@ class FPToInt extends Module val dcmp_out = (~in.rm & Cat(dcmp.io.lt, dcmp.io.eq)).orR val dcmp_exc = dcmp.io.exceptionFlags - val s2l = Module(new hardfloat.RecFNToIN(8, 24, 64)) - val s2w = Module(new hardfloat.RecFNToIN(8, 24, 32)) - s2l.io.in := in.in1 - s2l.io.roundingMode := in.rm - s2l.io.signedOut := in.typ(0) ^ 1 - s2w.io.in := in.in1 - s2w.io.roundingMode := in.rm - s2w.io.signedOut := in.typ(0) ^ 1 - val d2l = Module(new hardfloat.RecFNToIN(11, 53, 64)) val d2w = Module(new hardfloat.RecFNToIN(11, 53, 32)) d2l.io.in := in.in1 d2l.io.roundingMode := in.rm - d2l.io.signedOut := in.typ(0) ^ 1 + d2l.io.signedOut := ~in.typ(0) d2w.io.in := in.in1 d2w.io.roundingMode := in.rm - d2w.io.signedOut := in.typ(0) ^ 1 + d2w.io.signedOut := ~in.typ(0) io.out.bits.toint := Mux(in.rm(0), classify_out, unrec_out) io.out.bits.store := unrec_out @@ -285,15 +274,9 @@ class FPToInt extends Module io.out.bits.exc := dcmp_exc } when (in.cmd === FCMD_CVT_IF) { - when (in.single) { - io.out.bits.toint := Mux(in.typ(1), s2l.io.out, s2w.io.out.toSInt).toUInt - val sflags = Mux(in.typ(1), s2l.io.intExceptionFlags, s2w.io.intExceptionFlags) - io.out.bits.exc := Cat(sflags(2, 1).orR, UInt(0, 3), sflags(0)) - } .otherwise { - io.out.bits.toint := Mux(in.typ(1), d2l.io.out, d2w.io.out.toSInt).toUInt - val dflags = Mux(in.typ(1), d2l.io.intExceptionFlags, d2w.io.intExceptionFlags) - io.out.bits.exc := Cat(dflags(2, 1).orR, UInt(0, 3), dflags(0)) - } + io.out.bits.toint := Mux(in.typ(1), d2l.io.out.toSInt, d2w.io.out.toSInt).toUInt + val dflags = Mux(in.typ(1), d2l.io.intExceptionFlags, d2w.io.intExceptionFlags) + io.out.bits.exc := Cat(dflags(2, 1).orR, UInt(0, 3), dflags(0)) } io.out.valid := valid @@ -317,31 +300,26 @@ class IntToFP(val latency: Int) extends Module mux.data := Cat(SInt(-1, 32), hardfloat.recFNFromFN(8, 24, in.bits.in1)) } + val longValue = + Mux(in.bits.typ(1), in.bits.in1.toSInt, + Mux(in.bits.typ(0), in.bits.in1(31,0).zext, in.bits.in1(31,0).toSInt)) val l2s = Module(new hardfloat.INToRecFN(64, 8, 24)) - val w2s = Module(new hardfloat.INToRecFN(32, 8, 24)) - l2s.io.signedIn := in.bits.typ(0) ^ 1 - l2s.io.in := in.bits.in1 + l2s.io.signedIn := ~in.bits.typ(0) + l2s.io.in := longValue l2s.io.roundingMode := in.bits.rm - w2s.io.signedIn := in.bits.typ(0) ^ 1 - w2s.io.in := in.bits.in1 - w2s.io.roundingMode := in.bits.rm val l2d = Module(new hardfloat.INToRecFN(64, 11, 53)) - val w2d = Module(new hardfloat.INToRecFN(32, 11, 53)) - l2d.io.signedIn := in.bits.typ(0) ^ 1 - l2d.io.in := in.bits.in1 + l2d.io.signedIn := ~in.bits.typ(0) + l2d.io.in := longValue l2d.io.roundingMode := in.bits.rm - w2d.io.signedIn := in.bits.typ(0) ^ 1 - w2d.io.in := in.bits.in1 - w2d.io.roundingMode := in.bits.rm when (in.bits.cmd === FCMD_CVT_FI) { when (in.bits.single) { - mux.data := Cat(SInt(-1, 32), Mux(in.bits.typ(1), l2s.io.out, w2s.io.out)) - mux.exc := Mux(in.bits.typ(1), l2s.io.exceptionFlags, w2s.io.exceptionFlags) + mux.data := Cat(SInt(-1, 32), l2s.io.out) + mux.exc := l2s.io.exceptionFlags }.otherwise { - mux.data := Mux(in.bits.typ(1), l2d.io.out, w2d.io.out) - mux.exc := Mux(in.bits.typ(1), l2d.io.exceptionFlags, w2d.io.exceptionFlags) + mux.data := l2d.io.out + mux.exc := l2d.io.exceptionFlags } } From 00d17abd781cfc04e5c886fb934325fbaa220a31 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 12 Jan 2016 16:23:06 -0800 Subject: [PATCH 0948/1087] Don't ignore data value when writing MIPI --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index c9675527..1c29a2ef 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -398,7 +398,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mip.msip := new_mip.msip } when (decoded_addr(CSRs.mipi)) { - reg_mip.msip := true + reg_mip.msip := wdata(0) } when (decoded_addr(CSRs.mie)) { val new_mie = new MIP().fromBits(wdata) From ae98af707731e771e2a73a0de5583dcb87dbbc7c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 12 Jan 2016 16:27:14 -0800 Subject: [PATCH 0949/1087] don't mix SInt/UInt --- rocket/src/main/scala/fpu.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 7da59549..35b36a5e 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -305,12 +305,12 @@ class IntToFP(val latency: Int) extends Module Mux(in.bits.typ(0), in.bits.in1(31,0).zext, in.bits.in1(31,0).toSInt)) val l2s = Module(new hardfloat.INToRecFN(64, 8, 24)) l2s.io.signedIn := ~in.bits.typ(0) - l2s.io.in := longValue + l2s.io.in := longValue.toUInt l2s.io.roundingMode := in.bits.rm val l2d = Module(new hardfloat.INToRecFN(64, 11, 53)) l2d.io.signedIn := ~in.bits.typ(0) - l2d.io.in := longValue + l2d.io.in := longValue.toUInt l2d.io.roundingMode := in.bits.rm when (in.bits.cmd === FCMD_CVT_FI) { From d51c127646187be681d0c32b1065a8cedfc4255d Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 13 Jan 2016 21:21:41 -0800 Subject: [PATCH 0950/1087] fix deprecation warnings in rocket.scala --- rocket/src/main/scala/rocket.scala | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 53824d51..f67302d9 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -73,7 +73,7 @@ class RegFile(n: Int, w: Int, zero: Boolean = false) { } def write(addr: UInt, data: UInt) = { canRead = false - when (addr != UInt(0)) { + when (addr =/= UInt(0)) { access(addr) := data for ((raddr, rdata) <- reads) when (addr === raddr) { rdata := data } @@ -85,7 +85,7 @@ object ImmGen { def apply(sel: UInt, inst: UInt) = { val sign = Mux(sel === IMM_Z, SInt(0), inst(31).toSInt) val b30_20 = Mux(sel === IMM_U, inst(30,20).toSInt, sign) - val b19_12 = Mux(sel != IMM_U && sel != IMM_UJ, sign, inst(19,12).toSInt) + val b19_12 = Mux(sel =/= IMM_U && sel =/= IMM_UJ, sign, inst(19,12).toSInt) val b11 = Mux(sel === IMM_U || sel === IMM_Z, SInt(0), Mux(sel === IMM_UJ, inst(20).toSInt, Mux(sel === IMM_SB, inst(7).toSInt, sign))) @@ -177,7 +177,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val ctrl_killd = Wire(Bool()) val csr = Module(new CSRFile) - val id_csr_en = id_ctrl.csr != CSR.N + val id_csr_en = id_ctrl.csr =/= CSR.N val id_system_insn = id_ctrl.csr === CSR.I val id_csr_ren = (id_ctrl.csr === CSR.S || id_ctrl.csr === CSR.C) && id_raddr1 === UInt(0) val id_csr = Mux(id_csr_ren, CSR.R, id_ctrl.csr) @@ -225,9 +225,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // execute stage val bypass_mux = Vec(bypass_sources.map(_._3)) - val ex_reg_rs_bypass = Reg(Vec(Bool(), id_raddr.size)) - val ex_reg_rs_lsb = Reg(Vec(UInt(), id_raddr.size)) - val ex_reg_rs_msb = Reg(Vec(UInt(), id_raddr.size)) + val ex_reg_rs_bypass = Reg(Vec(id_raddr.size, Bool())) + val ex_reg_rs_lsb = Reg(Vec(id_raddr.size, UInt())) + val ex_reg_rs_msb = Reg(Vec(id_raddr.size, UInt())) val ex_rs = for (i <- 0 until id_raddr.size) yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) val ex_imm = ImmGen(ex_ctrl.sel_imm, ex_reg_inst) @@ -306,7 +306,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt val mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt - val mem_wrong_npc = mem_npc != ex_reg_pc || !ex_reg_valid + val mem_wrong_npc = mem_npc =/= ex_reg_pc || !ex_reg_valid val mem_npc_misaligned = mem_npc(1) val mem_misprediction = mem_wrong_npc && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) @@ -407,7 +407,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data, Mux(ll_wen, ll_wdata, - Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata, + Mux(wb_ctrl.csr =/= CSR.N, csr.io.rw.rdata, wb_reg_wdata))) when (rf_wen) { rf.write(rf_waddr, rf_wdata) } @@ -428,9 +428,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { csr.io.rw.cmd := Mux(wb_reg_valid, wb_ctrl.csr, CSR.N) csr.io.rw.wdata := wb_reg_wdata - val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 != UInt(0), id_raddr1), - (id_ctrl.rxs2 && id_raddr2 != UInt(0), id_raddr2), - (id_ctrl.wxd && id_waddr != UInt(0), id_waddr)) + val hazard_targets = Seq((id_ctrl.rxs1 && id_raddr1 =/= UInt(0), id_raddr1), + (id_ctrl.rxs2 && id_raddr2 =/= UInt(0), id_raddr2), + (id_ctrl.wxd && id_waddr =/= UInt(0), id_waddr)) val fp_hazard_targets = Seq((io.fpu.dec.ren1, id_raddr1), (io.fpu.dec.ren2, id_raddr2), (io.fpu.dec.ren3, id_raddr3), @@ -442,7 +442,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { sboard.set(wb_set_sboard && wb_wen, wb_waddr) // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. - val ex_cannot_bypass = ex_ctrl.csr != CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc + val ex_cannot_bypass = ex_ctrl.csr =/= CSR.N || ex_ctrl.jalr || ex_ctrl.mem || ex_ctrl.div || ex_ctrl.fp || ex_ctrl.rocc val data_hazard_ex = ex_ctrl.wxd && checkHazards(hazard_targets, _ === ex_waddr) val fp_data_hazard_ex = ex_ctrl.wfd && checkHazards(fp_hazard_targets, _ === ex_waddr) val id_ex_hazard = ex_reg_valid && (data_hazard_ex && ex_cannot_bypass || fp_data_hazard_ex) @@ -451,7 +451,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val mem_mem_cmd_bh = if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass else Bool(true) - val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc + val mem_cannot_bypass = mem_ctrl.csr =/= CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr) val fp_data_hazard_mem = mem_ctrl.wfd && checkHazards(fp_hazard_targets, _ === mem_waddr) val id_mem_hazard = mem_reg_valid && (data_hazard_mem && mem_cannot_bypass || fp_data_hazard_mem) @@ -551,10 +551,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { when (wfd) { printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32)) } - .elsewhen (wxd && rd != UInt(0) && has_data) { + .elsewhen (wxd && rd =/= UInt(0) && has_data) { printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata) } - .elsewhen (wxd && rd != UInt(0) && !has_data) { + .elsewhen (wxd && rd =/= UInt(0) && !has_data) { printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd) } .otherwise { @@ -562,7 +562,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } } - when (ll_wen && rf_waddr != UInt(0)) { + when (ll_wen && rf_waddr =/= UInt(0)) { printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata) } } @@ -586,7 +586,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) val a = a0 >> vaddrBits-1 val e = ea(vaddrBits,vaddrBits-1) - Mux(a === UInt(0) || a === UInt(1), e != UInt(0), + Mux(a === UInt(0) || a === UInt(1), e =/= UInt(0), Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e.toSInt === SInt(-1), e(0))) } From 120361226da35d6d648047f2067fec386da53f33 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 14 Jan 2016 13:57:45 -0800 Subject: [PATCH 0951/1087] fix more Chisel3 deprecations --- rocket/src/main/scala/arbiter.scala | 2 +- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/csr.scala | 10 +++++----- rocket/src/main/scala/fpu.scala | 6 +++--- rocket/src/main/scala/frontend.scala | 2 +- rocket/src/main/scala/multiplier.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 26 +++++++++++++------------- rocket/src/main/scala/ptw.scala | 4 ++-- rocket/src/main/scala/tlb.scala | 18 +++++++++--------- 9 files changed, 37 insertions(+), 37 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 16c04858..126dc429 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -10,7 +10,7 @@ import junctions.ParameterizedBundle class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { val io = new Bundle { - val requestor = Vec(new HellaCacheIO, n).flip + val requestor = Vec(n, new HellaCacheIO).flip val mem = new HellaCacheIO } diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 3aa1b6a6..6d835aa4 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -46,7 +46,7 @@ class RAS(nras: Int) { private val count = Reg(init=UInt(0,log2Up(nras+1))) private val pos = Reg(init=UInt(0,log2Up(nras))) - private val stack = Reg(Vec(UInt(), nras)) + private val stack = Reg(Vec(nras, UInt())) } class BHTResp(implicit p: Parameters) extends BtbBundle()(p) { diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 1c29a2ef..a3873c6b 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -87,8 +87,8 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val evec = UInt(OUTPUT, vaddrBitsExtended) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) - val uarch_counters = Vec(UInt(INPUT, log2Up(1+retireWidth)), 16) - val custom_mrw_csrs = Vec(UInt(INPUT, xLen), nCustomMrwCsrs) + val uarch_counters = Vec(16, UInt(INPUT, log2Up(1+retireWidth))) + val custom_mrw_csrs = Vec(nCustomMrwCsrs, UInt(INPUT, xLen)) val cause = UInt(INPUT, xLen) val pc = UInt(INPUT, vaddrBitsExtended) val fatc = Bool(OUTPUT) @@ -149,11 +149,11 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) checkInterrupt(PRV_M, reg_mie.msip && reg_mip.msip, 0) checkInterrupt(PRV_S, reg_mie.stip && reg_mip.stip, 1) checkInterrupt(PRV_M, reg_mie.mtip && reg_mip.mtip, 1) - checkInterrupt(PRV_M, reg_fromhost != 0, 2) + checkInterrupt(PRV_M, reg_fromhost =/= 0, 2) checkInterrupt(PRV_M, irq_rocc, 3) val system_insn = io.rw.cmd === CSR.I - val cpu_ren = io.rw.cmd != CSR.N && !system_insn + val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn val host_csr_req_valid = Reg(Bool()) // don't reset val host_csr_req_fire = host_csr_req_valid && !cpu_ren @@ -266,7 +266,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val csr_addr_priv = io.rw.addr(9,8) val priv_sufficient = reg_mstatus.prv >= csr_addr_priv val read_only = io.rw.addr(11,10).andR - val cpu_wen = cpu_ren && io.rw.cmd != CSR.R && priv_sufficient + val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient val wen = cpu_wen && !read_only || host_csr_req_fire && host_csr_bits.rw val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata, Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 35b36a5e..77e37e8f 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -235,7 +235,7 @@ class FPToInt extends Module when (io.in.valid) { in := io.in.bits - when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd != FCMD_MV_XF) { + when (io.in.bits.single && !io.in.bits.ldst && io.in.bits.cmd =/= FCMD_MV_XF) { in.in1 := in1_upconvert in.in2 := in2_upconvert } @@ -357,7 +357,7 @@ class FPToFP(val latency: Int) extends Module val issnan2 = isnan2 && ~Mux(in.bits.single, in.bits.in2(22), in.bits.in2(51)) val minmax_exc = Cat(issnan1 || issnan2, Bits(0,4)) val isMax = in.bits.rm(0) - val isLHS = isnan2 || isMax != io.lt && !isnan1 + val isLHS = isnan2 || isMax =/= io.lt && !isnan1 val mux = Wire(new FPResult) mux.exc := minmax_exc @@ -539,7 +539,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val memLatencyMask = latencyMask(mem_ctrl, 2) val wen = Reg(init=Bits(0, maxLatency-1)) - val winfo = Reg(Vec(Bits(), maxLatency-1)) + val winfo = Reg(Vec(maxLatency-1, Bits())) val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid) val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 717cac7a..a5b9e9fa 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -11,7 +11,7 @@ class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { val pc = UInt(width = vaddrBitsExtended) // ID stage PC - val data = Vec(Bits(width = coreInstBits), fetchWidth) + val data = Vec(fetchWidth, Bits(width = coreInstBits)) val mask = Bits(width = fetchWidth) val xcpt_if = Bool() } diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index f6f4c9be..ce063399 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -97,7 +97,7 @@ class MulDiv( val nextMulReg = Cat(prod, mplier(mulw-1,unroll)).toUInt val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * unroll)(log2Up(mulw)-1,0))(mulw-1,0) - val eOut = Bool(earlyOut) && count != mulw/unroll-1 && count != 0 && + val eOut = Bool(earlyOut) && count =/= mulw/unroll-1 && count =/= 0 && !isHi && (mplier & ~eOutMask) === UInt(0) val eOutRes = (mulReg >> (mulw - count * unroll)(log2Up(mulw)-1,0)) val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0)) @@ -136,7 +136,7 @@ class MulDiv( isMul := cmdMul isHi := cmdHi count := 0 - neg_out := !cmdMul && Mux(cmdHi, lhs_sign, lhs_sign != rhs_sign) + neg_out := !cmdMul && Mux(cmdHi, lhs_sign, lhs_sign =/= rhs_sign) divisor := Cat(rhs_sign, rhs_in) remainder := lhs_in req := io.req.bits diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ea07eecf..7c962fd9 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -330,7 +330,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } - io.idx_match := (state != s_invalid) && idx_match + io.idx_match := (state =/= s_invalid) && idx_match io.refill.way_en := req.way_en io.refill.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits io.tag := req.addr >> untagBits @@ -338,7 +338,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.req_sec_rdy := sec_rdy && rpq.io.enq.ready val meta_hazard = Reg(init=UInt(0,2)) - when (meta_hazard != UInt(0)) { meta_hazard := meta_hazard + 1 } + when (meta_hazard =/= UInt(0)) { meta_hazard := meta_hazard + 1 } when (io.meta_write.fire()) { meta_hazard := 1 } io.probe_rdy := !idx_match || (!states_before_refill.contains(state) && meta_hazard === 0) @@ -405,12 +405,12 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val sdq = Mem(sdqDepth, io.req.bits.data) when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } - val idxMatch = Wire(Vec(Bool(), nMSHRs)) - val tagList = Wire(Vec(Bits(width = tagBits), nMSHRs)) + val idxMatch = Wire(Vec(nMSHRs, Bool())) + val tagList = Wire(Vec(nMSHRs, Bits(width = tagBits))) val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits - val wbTagList = Wire(Vec(Bits(), nMSHRs)) - val refillMux = Wire(Vec(new L1RefillReq, nMSHRs)) + val wbTagList = Wire(Vec(nMSHRs, Bits())) + val refillMux = Wire(Vec(nMSHRs, new L1RefillReq)) val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) val mem_req_arb = Module(new LockingArbiter( @@ -690,7 +690,7 @@ class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val read = Decoupled(new L1DataReadReq).flip val write = Decoupled(new L1DataWriteReq).flip - val resp = Vec(Bits(OUTPUT, encRowBits), nWays) + val resp = Vec(nWays, Bits(OUTPUT, encRowBits)) } val waddr = io.write.bits.addr >> rowOffBits @@ -700,10 +700,10 @@ class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { for (w <- 0 until nWays by rowWords) { val wway_en = io.write.bits.way_en(w+rowWords-1,w) val rway_en = io.read.bits.way_en(w+rowWords-1,w) - val resp = Wire(Vec(Bits(width = encRowBits), rowWords)) + val resp = Wire(Vec(rowWords, Bits(width = encRowBits))) val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) for (p <- 0 until resp.size) { - val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) + val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits))) when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) array.write(waddr, data, wway_en.toBools) @@ -720,7 +720,7 @@ class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } else { for (w <- 0 until nWays) { - val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles) + val array = SeqMem(nSets*refillCycles, Vec(rowWords, Bits(width=encDataBits))) when (io.write.bits.way_en(w) && io.write.valid) { val data = Vec.tabulate(rowWords)(i => io.write.bits.data(encDataBits*(i+1)-1,encDataBits*i)) array.write(waddr, data, io.write.bits.wmask.toBools) @@ -760,7 +760,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_killed = Reg(next=s1_valid && io.cpu.req.bits.kill) val s2_req = Reg(io.cpu.req.bits) - val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd != M_NOP + val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_NOP val s2_recycle = Wire(Bool()) val s2_valid_masked = Wire(Bool()) @@ -893,9 +893,9 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } when (io.cpu.invalidate_lr) { lrsc_count := 0 } - val s2_data = Wire(Vec(Bits(width=encRowBits), nWays)) + val s2_data = Wire(Vec(nWays, Bits(width=encRowBits))) for (w <- 0 until nWays) { - val regs = Reg(Vec(Bits(width = encDataBits), rowWords)) + val regs = Reg(Vec(rowWords, Bits(width = encDataBits))) val en1 = s1_clk_en && s1_tag_eq_way(w) for (i <- 0 until regs.size) { val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 1a551bda..a8bfa03c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -54,7 +54,7 @@ class PTE(implicit p: Parameters) extends CoreBundle()(p) { class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { - val requestor = Vec(new TLBPTWIO, n).flip + val requestor = Vec(n, new TLBPTWIO).flip val mem = new HellaCacheIO val dpath = new DatapathPTWIO } @@ -85,7 +85,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val (pte_cache_hit, pte_cache_data) = { val size = log2Up(pgLevels * 2) val plru = new PseudoLRU(size) - val valid = Reg(Vec(Bool(), size)) + val valid = Reg(Vec(size, Bool())) val validBits = valid.toBits val tags = Mem(size, UInt(width = paddrBits)) val data = Mem(size, UInt(width = ppnBits)) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 9244fa23..c76ac068 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -100,14 +100,14 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val tag_hit_addr = OHToUInt(tag_cam.io.hits) // permission bit arrays - val valid_array = Reg(Vec(Bool(), entries)) // PTE is valid (not equivalent to CAM tag valid bit!) - val ur_array = Reg(Vec(Bool(), entries)) // user read permission - val uw_array = Reg(Vec(Bool(), entries)) // user write permission - val ux_array = Reg(Vec(Bool(), entries)) // user execute permission - val sr_array = Reg(Vec(Bool(), entries)) // supervisor read permission - val sw_array = Reg(Vec(Bool(), entries)) // supervisor write permission - val sx_array = Reg(Vec(Bool(), entries)) // supervisor execute permission - val dirty_array = Reg(Vec(Bool(), entries)) // PTE dirty bit + val valid_array = Reg(Vec(entries, Bool())) // PTE is valid (not equivalent to CAM tag valid bit!) + val ur_array = Reg(Vec(entries, Bool())) // user read permission + val uw_array = Reg(Vec(entries, Bool())) // user write permission + val ux_array = Reg(Vec(entries, Bool())) // user execute permission + val sr_array = Reg(Vec(entries, Bool())) // supervisor read permission + val sw_array = Reg(Vec(entries, Bool())) // supervisor write permission + val sx_array = Reg(Vec(entries, Bool())) // supervisor execute permission + val dirty_array = Reg(Vec(entries, Bool())) // PTE dirty bit when (io.ptw.resp.valid) { val pte = io.ptw.resp.bits.pte tag_ram(r_refill_waddr) := pte.ppn @@ -137,7 +137,7 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough - val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) + val bad_va = io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) // it's only a store hit if the dirty bit is set val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) val tag_hit = tag_hits.orR From 77e068c153fde22e753aa6132a7dc03f9063105e Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 14 Jan 2016 22:42:44 -0800 Subject: [PATCH 0952/1087] fix Chisel3 compat issue in SimpleHellaCacheIF --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7c962fd9..4af94fa7 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1120,8 +1120,8 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module // stash d$ request in stage 1 if nacked (newer request) replayq2.io.enq.valid := s2_req_fire && s3_nack - replayq2.io.enq.bits.data := io.cache.resp.bits.store_data replayq2.io.enq.bits <> io.cache.resp.bits + replayq2.io.enq.bits.data := io.cache.resp.bits.store_data replayq2.io.deq.ready := Bool(false) when (s2_nack) { From 52d6b0b1a55249bd12ba2fb747c4b78e4cdfa121 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 20 Jan 2016 17:42:31 -0800 Subject: [PATCH 0953/1087] Improve ALU QoR Rejigger muxes; share XOR gates between ADD/SUB, XOR, and BEQ. --- rocket/src/main/scala/dpath_alu.scala | 50 +++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/dpath_alu.scala b/rocket/src/main/scala/dpath_alu.scala index e26bff8a..841f0ec0 100644 --- a/rocket/src/main/scala/dpath_alu.scala +++ b/rocket/src/main/scala/dpath_alu.scala @@ -12,12 +12,12 @@ object ALU val FN_X = BitPat("b????") val FN_ADD = UInt(0) val FN_SL = UInt(1) + val FN_SEQ = UInt(2) + val FN_SNE = UInt(3) val FN_XOR = UInt(4) + val FN_SR = UInt(5) val FN_OR = UInt(6) val FN_AND = UInt(7) - val FN_SR = UInt(5) - val FN_SEQ = UInt(8) - val FN_SNE = UInt(9) val FN_SUB = UInt(10) val FN_SRA = UInt(11) val FN_SLT = UInt(12) @@ -35,11 +35,12 @@ object ALU val FN_MULHSU = FN_SLT val FN_MULHU = FN_SLTU - def isMulFN(fn: Bits, cmp: Bits) = fn(1,0) === cmp(1,0) - def isSub(cmd: Bits) = cmd(3) - def cmpUnsigned(cmd: Bits) = cmd(1) - def cmpInverted(cmd: Bits) = cmd(0) - def cmpEq(cmd: Bits) = !cmd(2) + def isMulFN(fn: UInt, cmp: UInt) = fn(1,0) === cmp(1,0) + def isSub(cmd: UInt) = cmd(3) + def isCmp(cmd: UInt) = cmd === FN_SEQ || cmd === FN_SNE || cmd >= FN_SLT + def cmpUnsigned(cmd: UInt) = cmd(1) + def cmpInverted(cmd: UInt) = cmd(0) + def cmpEq(cmd: UInt) = !cmd(3) } import ALU._ @@ -51,43 +52,42 @@ class ALU(implicit p: Parameters) extends CoreModule()(p) { val in1 = UInt(INPUT, xLen) val out = UInt(OUTPUT, xLen) val adder_out = UInt(OUTPUT, xLen) + val cmp_out = Bool(OUTPUT) } // ADD, SUB - val sum = io.in1 + Mux(isSub(io.fn), -io.in2, io.in2) + val in2_inv = Mux(isSub(io.fn), ~io.in2, io.in2) + val in1_xor_in2 = io.in1 ^ in2_inv + io.adder_out := io.in1 + in2_inv + isSub(io.fn) // SLT, SLTU - val cmp = cmpInverted(io.fn) ^ - Mux(cmpEq(io.fn), sum === UInt(0), - Mux(io.in1(xLen-1) === io.in2(xLen-1), sum(xLen-1), + io.cmp_out := cmpInverted(io.fn) ^ + Mux(cmpEq(io.fn), in1_xor_in2 === UInt(0), + Mux(io.in1(xLen-1) === io.in2(xLen-1), io.adder_out(xLen-1), Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1)))) // SLL, SRL, SRA - val full_shamt = io.in2(log2Up(xLen)-1,0) - val (shamt, shin_r) = - if (xLen == 32) (full_shamt, io.in1) + if (xLen == 32) (io.in2(4,0), io.in1) else { require(xLen == 64) val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31)) val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32) - val shamt = Cat(full_shamt(5) & (io.dw === DW_64), full_shamt(4,0)) + val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0)) (shamt, Cat(shin_hi, io.in1(31,0))) } val shin = Mux(io.fn === FN_SR || io.fn === FN_SRA, shin_r, Reverse(shin_r)) val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).toSInt >> shamt)(xLen-1,0) val shout_l = Reverse(shout_r) + val shout = Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, UInt(0)) | + Mux(io.fn === FN_SL, shout_l, UInt(0)) - val out = - Mux(io.fn === FN_ADD || io.fn === FN_SUB, sum, - Mux(io.fn === FN_SR || io.fn === FN_SRA, shout_r, - Mux(io.fn === FN_SL, shout_l, - Mux(io.fn === FN_AND, io.in1 & io.in2, - Mux(io.fn === FN_OR, io.in1 | io.in2, - Mux(io.fn === FN_XOR, io.in1 ^ io.in2, - /* all comparisons */ cmp)))))) + // AND, OR, XOR + val logic = Mux(io.fn === FN_XOR || io.fn === FN_OR, in1_xor_in2, UInt(0)) | + Mux(io.fn === FN_OR || io.fn === FN_AND, io.in1 & io.in2, UInt(0)) + val shift_logic = (isCmp(io.fn) && io.cmp_out) | logic | shout + val out = Mux(io.fn === FN_ADD || io.fn === FN_SUB, io.adder_out, shift_logic) - io.adder_out := sum io.out := out if (xLen > 32) { require(xLen == 64) From d170fcd9137aece8d43fae831ec5710551d08ea3 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 21 Jan 2016 15:37:29 -0800 Subject: [PATCH 0954/1087] DecoupledHelper is now imported from junctions --- rocket/src/main/scala/arbiter.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 126dc429..d0b64d9f 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -5,7 +5,7 @@ package rocket import Chisel._ import uncore._ import cde.{Parameters, Field} -import junctions.ParameterizedBundle +import junctions.{ParameterizedBundle, DecoupledHelper} class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module { From 58fcc6b7c6dcd7332d5be9c839b1ecdf97ce517a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 28 Jan 2016 11:44:59 -0800 Subject: [PATCH 0955/1087] Get rid of useless mux --- rocket/src/main/scala/icache.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index ca68a2b7..2e6a5b17 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -121,10 +121,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara else data_array.write(s1_idx, e_d) } val s0_raddr = s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) - val s1_rdata = data_array.read(s0_raddr, !wen && s0_valid) - // if s1_tag_match is critical, replace with partial tag check - s1_dout(i) := 0 - when (s1_valid && rdy && !stall && (Bool(isDM) || s1_tag_match(i))) { s1_dout(i) := s1_rdata } + s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) } // output signals From 305185c034336f08ca467dfcaf4113ee416e5783 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 14 Jan 2016 11:37:58 -0800 Subject: [PATCH 0956/1087] send DMA requests through MMIO and get responses through CSRs --- rocket/src/main/scala/csr.scala | 12 +- rocket/src/main/scala/dma.scala | 210 +++++++++++++---------- rocket/src/main/scala/instructions.scala | 1 + rocket/src/main/scala/rocc.scala | 14 +- rocket/src/main/scala/rocket.scala | 4 + rocket/src/main/scala/tile.scala | 37 ++-- rocket/src/main/scala/tlb.scala | 24 +++ 7 files changed, 189 insertions(+), 113 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index a3873c6b..a1c277f9 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -252,12 +252,16 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } for (i <- 0 until nCustomMrwCsrs) { - val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? - require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") + val addr = CSRs.mrwbase + i require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") read_mapping += addr -> io.custom_mrw_csrs(i) } + for ((addr, i) <- roccCsrs.zipWithIndex) { + require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use") + read_mapping += addr -> io.rocc.csr.rdata(i) + } + val addr = Mux(cpu_ren, io.rw.addr, host_csr_bits.addr) val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } @@ -449,6 +453,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } } + io.rocc.csr.waddr := addr + io.rocc.csr.wdata := wdata + io.rocc.csr.wen := wen + when(this.reset) { reg_mstatus.zero1 := 0 reg_mstatus.zero2 := 0 diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index c2a02bec..24240c18 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -71,19 +71,23 @@ class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) { val resp = Valid(new ClientDmaResponse).flip } -class DmaFrontend(implicit val p: Parameters) - extends Module with HasClientDmaParameters { +class DmaFrontend(implicit p: Parameters) extends CoreModule()(p) + with HasClientDmaParameters with HasTileLinkParameters { val io = new Bundle { val cpu = (new ClientDmaIO).flip - val dma = new DmaIO + val mem = new ClientUncachedTileLinkIO val ptw = new TLBPTWIO val busy = Bool(OUTPUT) + val incr_outstanding = Bool(OUTPUT) + val host_id = UInt(INPUT, log2Up(nCores)) } - private val pgSize = 1 << pgIdxBits + val tlb = Module(new DecoupledTLB()(p.alterPartial({ + case CacheName => "L1D" + }))) + io.ptw <> tlb.io.ptw - val priv = Mux(io.ptw.status.mprv, io.ptw.status.prv1, io.ptw.status.prv) - val vm_enabled = io.ptw.status.vm(3) && priv <= UInt(PRV_S) + private val pgSize = 1 << pgIdxBits val cmd = Reg(UInt(width = DMA_CMD_SZ)) val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq( @@ -112,17 +116,17 @@ class DmaFrontend(implicit val p: Parameters) val src_ppn = Reg(UInt(width = ppnBits)) val dst_ppn = Reg(UInt(width = ppnBits)) - val src_paddr = Mux(vm_enabled, Cat(src_ppn, src_idx), src_vaddr) - val dst_paddr = Mux(vm_enabled, Cat(dst_ppn, dst_idx), dst_vaddr) + val src_paddr = Cat(src_ppn, src_idx) + val dst_paddr = Cat(dst_ppn, dst_idx) val last_src_vpn = Reg(UInt(width = vpnBits)) val last_dst_vpn = Reg(UInt(width = vpnBits)) - val tx_len = Mux(!vm_enabled, bytes_left, - Util.minUInt(src_pglen, dst_pglen, bytes_left)) + val tx_len = Util.minUInt(src_pglen, dst_pglen, bytes_left) - val (dma_xact_id, _) = Counter(io.dma.req.fire(), nDmaXactsPerClient) - val dma_busy = Reg(init = UInt(0, nDmaXactsPerClient)) + val dma_busy = Reg(init = UInt(0, tlMaxClientXacts)) + val dma_xact_id = PriorityEncoder(~dma_busy) + val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats) val (s_idle :: s_translate :: s_dma_req :: s_dma_update :: s_prepare :: s_finish :: Nil) = Enum(Bits(), 6) @@ -130,49 +134,80 @@ class DmaFrontend(implicit val p: Parameters) // lower bit is for src, higher bit is for dst val to_translate = Reg(init = UInt(0, 2)) - val ptw_sent = Reg(init = UInt(0, 2)) - val ptw_to_send = to_translate & ~ptw_sent - val ptw_resp_id = Reg(init = UInt(0, 1)) + val tlb_sent = Reg(init = UInt(0, 2)) + val tlb_to_send = to_translate & ~tlb_sent val resp_status = Reg(UInt(width = dmaStatusBits)) - io.ptw.req.valid := ptw_to_send.orR && vm_enabled - io.ptw.req.bits.addr := Mux(ptw_to_send(0), src_vpn, dst_vpn) - io.ptw.req.bits.prv := io.ptw.status.prv - io.ptw.req.bits.store := !ptw_to_send(0) // storing to destination - io.ptw.req.bits.fetch := Bool(true) + def make_acquire( + addr_beat: UInt, client_xact_id: UInt, client_id: UInt, + cmd: UInt, source: UInt, dest: UInt, + length: UInt, size: UInt): Acquire = { - when (io.ptw.req.fire()) { - ptw_sent := ptw_sent | PriorityEncoderOH(ptw_to_send) + val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits)) + data_blob := DmaRequest( + xact_id = UInt(0), + client_id = client_id, + cmd = cmd, + source = source, + dest = dest, + length = length, + size = size).toBits + val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob) + val base_addr = addrMap("devices:dma").start + val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits)) + + PutBlock( + client_xact_id = client_xact_id, + addr_block = addr_block, + addr_beat = addr_beat, + data = data_beats(addr_beat), + alloc = Bool(false)) } - when (io.ptw.resp.valid) { - when (io.ptw.resp.bits.error) { + tlb.io.req.valid := tlb_to_send.orR + tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn) + tlb.io.req.bits.passthrough := Bool(false) + tlb.io.req.bits.instruction := Bool(false) + tlb.io.req.bits.store := !tlb_to_send(0) + tlb.io.resp.ready := tlb_sent.orR + + when (tlb.io.req.fire()) { + tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send) + } + + when (tlb.io.resp.fire()) { + val recv_choice = PriorityEncoderOH(to_translate) + val error = Mux(recv_choice(0), + tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st) + + when (error) { resp_status := ClientDmaResponse.pagefault state := s_finish } - val recv_choice = PriorityEncoderOH(to_translate) - to_translate := to_translate & ~recv_choice // getting the src translation when (recv_choice(0)) { - src_ppn := io.ptw.resp.bits.pte.ppn + src_ppn := tlb.io.resp.bits.ppn } .otherwise { - dst_ppn := io.ptw.resp.bits.pte.ppn + dst_ppn := tlb.io.resp.bits.ppn } + + to_translate := to_translate & ~recv_choice } io.cpu.req.ready := state === s_idle io.cpu.resp.valid := state === s_finish io.cpu.resp.bits := ClientDmaResponse(resp_status) - io.dma.req.valid := state === s_dma_req && !dma_busy(dma_xact_id) - io.dma.req.bits := DmaRequest( + + io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR + io.mem.acquire.bits := make_acquire( + addr_beat = dma_req_beat, + client_id = io.host_id, client_xact_id = dma_xact_id, - cmd = cmd, - source = src_paddr, - dest = dst_paddr, - length = tx_len, - size = word_size) - io.dma.resp.ready := Bool(true) + cmd = cmd, source = src_paddr, dest = dst_paddr, + length = tx_len, size = word_size) + + io.mem.grant.ready := (state =/= s_dma_req) when (io.cpu.req.fire()) { val req = io.cpu.req.bits @@ -187,24 +222,23 @@ class DmaFrontend(implicit val p: Parameters) bytes_left := req.segment_size word_size := req.word_size to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11")) - ptw_sent := UInt(0) - state := Mux(vm_enabled, s_translate, s_dma_req) + tlb_sent := UInt(0) + state := s_translate } when (state === s_translate && !to_translate.orR) { state := s_dma_req } - def setBusyOnSend(req: DecoupledIO[DmaRequest]): UInt = - Mux(req.fire(), UIntToOH(req.bits.client_xact_id), UInt(0)) + def setBusy(set: Bool, xact_id: UInt): UInt = + Mux(set, UIntToOH(xact_id), UInt(0)) - def clearBusyOnRecv(resp: DecoupledIO[DmaResponse]): UInt = - ~Mux(resp.fire(), UIntToOH(resp.bits.client_xact_id), UInt(0)) + dma_busy := (dma_busy | + setBusy(dma_req_done, dma_xact_id)) & + ~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id) - dma_busy := (dma_busy | setBusyOnSend(io.dma.req)) & - clearBusyOnRecv(io.dma.resp) - when (io.dma.req.fire()) { + when (dma_req_done) { src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0)) dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0)) bytes_left := bytes_left - tx_len @@ -223,11 +257,11 @@ class DmaFrontend(implicit val p: Parameters) dst_vaddr := dst_vaddr + dst_stride bytes_left := segment_size segments_left := segments_left - UInt(1) - state := Mux(vm_enabled, s_prepare, s_dma_req) + state := s_prepare } } .otherwise { to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0)) - ptw_sent := UInt(0) + tlb_sent := UInt(0) state := s_translate } } @@ -236,13 +270,14 @@ class DmaFrontend(implicit val p: Parameters) to_translate := adv_ptr & Cat( dst_vpn =/= last_dst_vpn, src_vpn =/= last_src_vpn) - ptw_sent := UInt(0) + tlb_sent := UInt(0) state := s_translate } when (state === s_finish) { state := s_idle } io.busy := (state =/= s_idle) || dma_busy.orR + io.incr_outstanding := dma_req_done } object DmaCtrlRegNumbers { @@ -252,19 +287,23 @@ object DmaCtrlRegNumbers { val NSEGMENTS = 3 val WORD_SIZE = 4 val RESP_STATUS = 5 + val OUTSTANDING = 6 + val NCSRS = 7 + val CSR_BASE = 0x800 + val CSR_END = CSR_BASE + NCSRS } import DmaCtrlRegNumbers._ -class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { +class DmaCtrlRegFile(implicit val p: Parameters) extends Module + with HasClientDmaParameters with HasTileLinkParameters { + private val nWriteRegs = 5 - private val nReadRegs = 1 - private val nRegs = nWriteRegs + nReadRegs + private val nRegs = nWriteRegs + 2 val io = new Bundle { val wen = Bool(INPUT) - val addr = UInt(INPUT, log2Up(nRegs)) + val waddr = UInt(INPUT, log2Up(nRegs)) val wdata = UInt(INPUT, dmaSegmentSizeBits) - val rdata = UInt(OUTPUT, dmaSegmentSizeBits) val src_stride = UInt(OUTPUT, dmaSegmentSizeBits) val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits) @@ -272,10 +311,12 @@ class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { val nsegments = UInt(OUTPUT, dmaSegmentBits) val word_size = UInt(OUTPUT, dmaWordSizeBits) - val status = UInt(INPUT, dmaStatusBits) + val incr_outstanding = Bool(INPUT) + val xact_outstanding = Bool(OUTPUT) } val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits))) + val waddr = io.waddr(log2Up(NCSRS) - 1, 0) io.src_stride := regs(SRC_STRIDE) io.dst_stride := regs(DST_STRIDE) @@ -283,44 +324,48 @@ class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) { io.nsegments := regs(NSEGMENTS) io.word_size := regs(WORD_SIZE) - when (io.wen && io.addr < UInt(nWriteRegs)) { - regs.write(io.addr, io.wdata) + when (io.wen && waddr < UInt(nWriteRegs)) { + regs.write(waddr, io.wdata) } - io.rdata := MuxLookup(io.addr, regs(io.addr), Seq( - UInt(RESP_STATUS) -> io.status)) + val outstanding_cnt = TwoWayCounter( + io.incr_outstanding, + io.wen && io.waddr === UInt(OUTSTANDING), + tlMaxClientXacts) + + io.xact_outstanding := outstanding_cnt > UInt(0) } class DmaController(implicit p: Parameters) extends RoCC()(p) with HasClientDmaParameters { io.mem.req.valid := Bool(false) - io.autl.acquire.valid := Bool(false) - io.autl.grant.ready := Bool(false) io.iptw.req.valid := Bool(false) io.pptw.req.valid := Bool(false) + io.resp.valid := Bool(false) + io.interrupt := Bool(false) val cmd = Queue(io.cmd) val inst = cmd.bits.inst val is_transfer = inst.funct < UInt(8) - val is_cr_write = inst.funct === UInt(8) - val is_cr_read = inst.funct === UInt(9) - val is_cr_access = is_cr_write || is_cr_read - - val resp_rd = Reg(io.resp.bits.rd) - val resp_data = Reg(io.resp.bits.data) - - val s_idle :: s_resp :: Nil = Enum(Bits(), 2) - val state = Reg(init = s_idle) val reg_status = Reg(UInt(width = dmaStatusBits)) val crfile = Module(new DmaCtrlRegFile) - crfile.io.addr := cmd.bits.rs1 - crfile.io.wdata := cmd.bits.rs2 - crfile.io.wen := cmd.fire() && is_cr_write + crfile.io.waddr := io.csr.waddr + crfile.io.wdata := io.csr.wdata + crfile.io.wen := io.csr.wen + + io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride + io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride + io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size + io.csr.rdata(NSEGMENTS) := crfile.io.nsegments + io.csr.rdata(WORD_SIZE) := crfile.io.word_size + io.csr.rdata(RESP_STATUS) := reg_status val frontend = Module(new DmaFrontend) - io.dma <> frontend.io.dma io.dptw <> frontend.io.ptw + io.autl <> frontend.io.mem + crfile.io.incr_outstanding := frontend.io.incr_outstanding + frontend.io.host_id := io.host_id frontend.io.cpu.req.valid := cmd.valid && is_transfer frontend.io.cpu.req.bits := ClientDmaRequest( cmd = cmd.bits.inst.funct, @@ -331,26 +376,11 @@ class DmaController(implicit p: Parameters) extends RoCC()(p) segment_size = crfile.io.segment_size, nsegments = crfile.io.nsegments, word_size = crfile.io.word_size) - - cmd.ready := state === s_idle && (!is_transfer || frontend.io.cpu.req.ready) - io.resp.valid := state === s_resp - io.resp.bits.rd := resp_rd - io.resp.bits.data := resp_data - - when (cmd.fire()) { - when (is_cr_read) { - resp_rd := inst.rd - resp_data := crfile.io.rdata - state := s_resp - } - } - - when (io.resp.fire()) { state := s_idle } + cmd.ready := is_transfer && frontend.io.cpu.req.ready when (frontend.io.cpu.resp.valid) { reg_status := frontend.io.cpu.resp.bits.status } - io.busy := (state =/= s_idle) || cmd.valid || frontend.io.busy - io.interrupt := Bool(false) + io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index b383e385..564b0f8d 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -311,6 +311,7 @@ object CSRs { val stimehw = 0xa81 val mtimecmph = 0x361 val mtimeh = 0x741 + val mrwbase = 0x790 val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 84baa909..4d0a41b3 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -9,6 +9,14 @@ import cde.{Parameters, Field} case object RoccMaxTaggedMemXacts extends Field[Int] case object RoccNMemChannels extends Field[Int] +case object RoccNCSRs extends Field[Int] + +class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) { + val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen)) + val waddr = UInt(OUTPUT, CSR.ADDRSZ) + val wdata = UInt(OUTPUT, xLen) + val wen = Bool(OUTPUT) +} class RoCCInstruction extends Bundle { @@ -33,7 +41,7 @@ class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) { val data = Bits(width = xLen) } -class RoCCInterface(implicit p: Parameters) extends Bundle { +class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { val cmd = Decoupled(new RoCCCommand).flip val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) @@ -50,8 +58,8 @@ class RoCCInterface(implicit p: Parameters) extends Bundle { val fpu_req = Decoupled(new FPInput) val fpu_resp = Decoupled(new FPResult).flip val exception = Bool(INPUT) - - val dma = new DmaIO + val csr = (new RoCCCSRs).flip + val host_id = UInt(INPUT, log2Up(nCores)) override def cloneType = new RoCCInterface().asInstanceOf[this.type] } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index f67302d9..0e43d162 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -46,6 +46,10 @@ trait HasCoreParameters extends HasAddrMapParameters { val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt val mmioBase = p(MMIOBase) val nCustomMrwCsrs = p(NCustomMRWCSRs) + val roccCsrs = if (p(BuildRoCC).isEmpty) Nil + else p(BuildRoCC).flatMap(_.csrs) + val nRoccCsrs = p(RoccNCSRs) + val nCores = p(HtifKey).nCores // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 233b5b8b..1c9f16a7 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -14,8 +14,8 @@ case class RoccParameters( opcodes: OpcodeSet, generator: Parameters => RoCC, nMemChannels: Int = 0, - useFPU: Boolean = false, - useDma: Boolean = false) + csrs: Seq[Int] = Nil, + useFPU: Boolean = false) abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { @@ -23,7 +23,6 @@ abstract class Tile(resetSignal: Bool = null) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size val nFPUPorts = buildRocc.filter(_.useFPU).size - val nDmaPorts = buildRocc.filter(_.useDma).size val nDCachePorts = 2 + nRocc val nPTWPorts = 2 + 3 * nRocc val nCachedTileLinkPorts = 1 @@ -77,12 +76,15 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( cmdRouter.io.in <> core.io.rocc.cmd val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) => - val rocc = accelParams.generator( - p.alterPartial({ case RoccNMemChannels => accelParams.nMemChannels })) + val rocc = accelParams.generator(p.alterPartial({ + case RoccNMemChannels => accelParams.nMemChannels + case RoccNCSRs => accelParams.csrs.size + })) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) rocc.io.cmd <> cmdRouter.io.out(i) rocc.io.s := core.io.rocc.s rocc.io.exception := core.io.rocc.exception + rocc.io.host_id := io.host.id dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2 + i) <> dcIF.io.cache uncachedArb.io.in(1 + i) <> rocc.io.autl @@ -107,18 +109,22 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( } } - if (nDmaPorts > 0) { - val dmaArb = Module(new DmaArbiter(nDmaPorts)) - dmaArb.io.in <> roccs.zip(buildRocc) - .filter { case (_, params) => params.useDma } - .map { case (rocc, _) => rocc.io.dma } - io.dma <> dmaArb.io.out - } - core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) + if (p(RoccNCSRs) > 0) { + core.io.rocc.csr.rdata <> roccs.map(_.io.csr.rdata).reduce(_ ++ _) + for ((rocc, accelParams) <- roccs.zip(buildRocc)) { + rocc.io.csr.waddr := core.io.rocc.csr.waddr + rocc.io.csr.wdata := core.io.rocc.csr.wdata + rocc.io.csr.wen := core.io.rocc.csr.wen && + accelParams.csrs + .map(core.io.rocc.csr.waddr === UInt(_)) + .reduce((a, b) => a || b) + } + } + roccs.flatMap(_.io.utl) :+ uncachedArb.io.out } else { Seq(icache.io.mem) }) @@ -128,9 +134,4 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( fpu.io.cp_resp.ready := Bool(false) } } - - if (!usingRocc || nDmaPorts == 0) { - io.dma.req.valid := Bool(false) - io.dma.resp.ready := Bool(false) - } } diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index c76ac068..55e7359d 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -193,3 +193,27 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { state := s_ready } } + +class DecoupledTLB(implicit p: Parameters) extends Module { + val io = new Bundle { + val req = Decoupled(new TLBReq).flip + val resp = Decoupled(new TLBResp) + val ptw = new TLBPTWIO + } + + val reqq = Queue(io.req) + val tlb = Module(new TLB) + + val resp_helper = DecoupledHelper( + reqq.valid, tlb.io.req.ready, io.resp.ready) + val tlb_miss = tlb.io.resp.miss + + tlb.io.req.valid := resp_helper.fire(tlb.io.req.ready) + tlb.io.req.bits := reqq.bits + reqq.ready := resp_helper.fire(reqq.valid, !tlb_miss) + + io.resp.valid := resp_helper.fire(io.resp.ready, !tlb_miss) + io.resp.bits := tlb.io.resp + + io.ptw <> tlb.io.ptw +} From 7937fbf074f784ec0ed68ca5ebfef5fdf27cf071 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 25 Jan 2016 13:23:11 -0800 Subject: [PATCH 0957/1087] fix number of IOMSHRs at 1 --- rocket/src/main/scala/nbdcache.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 4af94fa7..2d0eee29 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -12,7 +12,6 @@ case object WordBits extends Field[Int] case object StoreDataQueueDepth extends Field[Int] case object ReplayQueueDepth extends Field[Int] case object NMSHRs extends Field[Int] -case object NIOMSHRs extends Field[Int] case object LRSCCycles extends Field[Int] trait HasL1HellaCacheParameters extends HasL1CacheParameters { @@ -32,7 +31,7 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters { val encRowBits = encDataBits*rowWords val sdqDepth = p(StoreDataQueueDepth) val nMSHRs = p(NMSHRs) - val nIOMSHRs = p(NIOMSHRs) + val nIOMSHRs = 1 val lrscCycles = p(LRSCCycles) } From 78579672d3b8108c9962d5577f0c781164dc2647 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Fri, 29 Jan 2016 11:32:59 -0800 Subject: [PATCH 0958/1087] make mtvec configurable and writeable --- rocket/src/main/scala/csr.scala | 6 ++++-- rocket/src/main/scala/frontend.scala | 2 +- rocket/src/main/scala/package.scala | 4 ---- rocket/src/main/scala/rocket.scala | 3 +++ 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index a1c277f9..62f81ff7 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -184,6 +184,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | isa_string.map(x => 1 << (x - 'A')).reduce(_|_) val impid = 1 + val reg_mtvec = Reg(init = UInt(mtvecInit, xLen)) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.fflags -> (if (usingFPU) reg_fflags else UInt(0)), @@ -201,7 +202,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mstatus -> read_mstatus, CSRs.mtdeleg -> UInt(0), CSRs.mreset -> UInt(0), - CSRs.mtvec -> UInt(MTVEC), + CSRs.mtvec -> reg_mtvec, CSRs.miobase -> UInt(p(junctions.MMIOBase)), CSRs.mipi -> UInt(0), CSRs.mip -> reg_mip.toBits, @@ -295,7 +296,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (some_interrupt_pending) { reg_wfi := false } io.fatc := insn_sfence_vm - io.evec := Mux(io.exception || csr_xcpt, (reg_mstatus.prv << 6) + MTVEC, + io.evec := Mux(io.exception || csr_xcpt, (reg_mstatus.prv << 6) + reg_mtvec, Mux(maybe_insn_redirect_trap, reg_stvec.sextTo(vaddrBitsExtended), Mux(reg_mstatus.prv(1) || Bool(!p(UseVM)), reg_mepc, reg_sepc))) io.ptbr := reg_sptbr @@ -427,6 +428,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } + when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata & ~UInt("b11") } if (usingVM) { when (decoded_addr(CSRs.sstatus)) { val new_sstatus = new SStatus().fromBits(wdata) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index a5b9e9fa..96e7c2ec 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -42,7 +42,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) - val s2_pc = Reg(init=UInt(START_ADDR)) + val s2_pc = Reg(init=UInt(startAddr)) val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_bits = Reg(btb.io.resp.bits) val s2_xcpt_if = Reg(init=Bool(false)) diff --git a/rocket/src/main/scala/package.scala b/rocket/src/main/scala/package.scala index e0b879ac..30368040 100644 --- a/rocket/src/main/scala/package.scala +++ b/rocket/src/main/scala/package.scala @@ -2,7 +2,3 @@ package object rocket extends rocket.constants.ScalarOpConstants -{ - val MTVEC = 0x100 - val START_ADDR = MTVEC + 0x100 -} diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 0e43d162..d9657095 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -22,6 +22,7 @@ case object CoreInstBits extends Field[Int] case object CoreDataBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int] case object NCustomMRWCSRs extends Field[Int] +case object MtvecInit extends Field[BigInt] trait HasCoreParameters extends HasAddrMapParameters { implicit val p: Parameters @@ -50,6 +51,8 @@ trait HasCoreParameters extends HasAddrMapParameters { else p(BuildRoCC).flatMap(_.csrs) val nRoccCsrs = p(RoccNCSRs) val nCores = p(HtifKey).nCores + val mtvecInit = p(MtvecInit) + val startAddr = mtvecInit + 0x100 // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. From 5abfd1a4ab50ac7cb833ced543bc4ec25c8d1c9e Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 3 Feb 2016 15:40:44 -0800 Subject: [PATCH 0959/1087] make sure to check for region violations in DMA frontend --- rocket/src/main/scala/dma.scala | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index 24240c18..aa45c5be 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -53,7 +53,7 @@ object ClientDmaRequest { object ClientDmaResponse { val pagefault = UInt("b01") - val outer_err = UInt("b10") + val invalid_region = UInt("b10") def apply(status: UInt = UInt(0))(implicit p: Parameters) = { val resp = Wire(new ClientDmaResponse) @@ -164,6 +164,12 @@ class DmaFrontend(implicit p: Parameters) extends CoreModule()(p) alloc = Bool(false)) } + def check_region(cmd: UInt, src: UInt, dst: UInt): Bool = { + val dst_ok = Mux(cmd === DMA_CMD_SOUT, dst >= UInt(mmioBase), dst < UInt(mmioBase)) + val src_ok = Mux(cmd === DMA_CMD_SIN, src >= UInt(mmioBase), Bool(true)) + dst_ok && src_ok + } + tlb.io.req.valid := tlb_to_send.orR tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn) tlb.io.req.bits.passthrough := Bool(false) @@ -227,7 +233,12 @@ class DmaFrontend(implicit p: Parameters) extends CoreModule()(p) } when (state === s_translate && !to_translate.orR) { - state := s_dma_req + when (check_region(cmd, src_paddr, dst_paddr)) { + state := s_dma_req + } .otherwise { + resp_status := ClientDmaResponse.invalid_region + state := s_finish + } } def setBusy(set: Bool, xact_id: UInt): UInt = From 31dd311affd4be2e00e83e6f22783081c331e2c6 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Mon, 8 Feb 2016 17:38:31 -0800 Subject: [PATCH 0960/1087] [fpu] fix rounding mode bug in fdivfsqrt --- rocket/src/main/scala/fpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 77e37e8f..68dea0a4 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -633,7 +633,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val divSqrt_toSingle = Module(new hardfloat.RecFNToRecFN(11, 53, 8, 24)) divSqrt_toSingle.io.in := divSqrt_wdata_double - divSqrt_toSingle.io.roundingMode := ex_rm + divSqrt_toSingle.io.roundingMode := divSqrt_rm divSqrt_wdata := Mux(divSqrt_single, divSqrt_toSingle.io.out, divSqrt_wdata_double) divSqrt_flags := divSqrt_flags_double | Mux(divSqrt_single, divSqrt_toSingle.io.exceptionFlags, Bits(0)) } From b96343a4e534d24e02835a28a19952332390a32e Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Mon, 8 Feb 2016 17:41:38 -0800 Subject: [PATCH 0961/1087] [btb] fix mix type error for fetch-width > 1 closes #24 --- rocket/src/main/scala/btb.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 6d835aa4..a86357e6 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -258,8 +258,8 @@ class BTB(implicit p: Parameters) extends BtbModule { io.resp.bits.mask := UInt(1) } else { // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case - io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)), - SInt(-1)) + io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)).toSInt, + SInt(-1)).toUInt } if (nBHT > 0) { From 15ac4d317f3363a0e76231fad63e12064c64a907 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Wed, 24 Feb 2016 22:39:00 -0800 Subject: [PATCH 0962/1087] RoCC PTW refactoring --- rocket/src/main/scala/dma.scala | 4 +--- rocket/src/main/scala/rocc.scala | 31 ++++++++++++------------------- rocket/src/main/scala/tile.scala | 9 +++++---- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index aa45c5be..a31e311e 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -350,8 +350,6 @@ class DmaCtrlRegFile(implicit val p: Parameters) extends Module class DmaController(implicit p: Parameters) extends RoCC()(p) with HasClientDmaParameters { io.mem.req.valid := Bool(false) - io.iptw.req.valid := Bool(false) - io.pptw.req.valid := Bool(false) io.resp.valid := Bool(false) io.interrupt := Bool(false) @@ -373,7 +371,7 @@ class DmaController(implicit p: Parameters) extends RoCC()(p) io.csr.rdata(RESP_STATUS) := reg_status val frontend = Module(new DmaFrontend) - io.dptw <> frontend.io.ptw + io.ptw(0) <> frontend.io.ptw io.autl <> frontend.io.mem crfile.io.incr_outstanding := frontend.io.incr_outstanding frontend.io.host_id := io.host_id diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index 4d0a41b3..d17dde1b 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -9,6 +9,7 @@ import cde.{Parameters, Field} case object RoccMaxTaggedMemXacts extends Field[Int] case object RoccNMemChannels extends Field[Int] +case object RoccNPTWPorts extends Field[Int] case object RoccNCSRs extends Field[Int] class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) { @@ -52,9 +53,7 @@ class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { // These should be handled differently, eventually val autl = new ClientUncachedTileLinkIO val utl = Vec(p(RoccNMemChannels), new ClientUncachedTileLinkIO) - val iptw = new TLBPTWIO - val dptw = new TLBPTWIO - val pptw = new TLBPTWIO + val ptw = Vec(p(RoccNPTWPorts), new TLBPTWIO) val fpu_req = Decoupled(new FPInput) val fpu_resp = Decoupled(new FPResult).flip val exception = Bool(INPUT) @@ -136,9 +135,6 @@ class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) { io.autl.acquire.valid := false io.autl.grant.ready := false - io.iptw.req.valid := false - io.dptw.req.valid := false - io.pptw.req.valid := false } class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { @@ -160,20 +156,22 @@ class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { state := s_ptw_req } - when (io.dptw.req.fire()) { state := s_ptw_resp } + private val ptw = io.ptw(0) - when (state === s_ptw_resp && io.dptw.resp.valid) { - error := io.dptw.resp.bits.error - ppn := io.dptw.resp.bits.pte.ppn + when (ptw.req.fire()) { state := s_ptw_resp } + + when (state === s_ptw_resp && ptw.resp.valid) { + error := ptw.resp.bits.error + ppn := ptw.resp.bits.pte.ppn state := s_resp } when (io.resp.fire()) { state := s_idle } - io.dptw.req.valid := (state === s_ptw_req) - io.dptw.req.bits.addr := req_vpn - io.dptw.req.bits.store := Bool(false) - io.dptw.req.bits.fetch := Bool(false) + ptw.req.valid := (state === s_ptw_req) + ptw.req.bits.addr := req_vpn + ptw.req.bits.store := Bool(false) + ptw.req.bits.fetch := Bool(false) io.resp.valid := (state === s_resp) io.resp.bits.rd := req_rd @@ -184,8 +182,6 @@ class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { io.mem.req.valid := Bool(false) io.autl.acquire.valid := Bool(false) io.autl.grant.ready := Bool(false) - io.iptw.req.valid := Bool(false) - io.pptw.req.valid := Bool(false) } class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) @@ -265,9 +261,6 @@ class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) io.busy := (state =/= s_idle) io.interrupt := Bool(false) io.mem.req.valid := Bool(false) - io.dptw.req.valid := Bool(false) - io.iptw.req.valid := Bool(false) - io.pptw.req.valid := Bool(false) } class OpcodeSet(val opcodes: Seq[UInt]) { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 1c9f16a7..a7bfeec3 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -14,6 +14,7 @@ case class RoccParameters( opcodes: OpcodeSet, generator: Parameters => RoCC, nMemChannels: Int = 0, + nPTWPorts : Int = 0, csrs: Seq[Int] = Nil, useFPU: Boolean = false) @@ -24,7 +25,7 @@ abstract class Tile(resetSignal: Bool = null) val nRocc = buildRocc.size val nFPUPorts = buildRocc.filter(_.useFPU).size val nDCachePorts = 2 + nRocc - val nPTWPorts = 2 + 3 * nRocc + val nPTWPorts = 2 + p(RoccNPTWPorts) val nCachedTileLinkPorts = 1 val nUncachedTileLinkPorts = 1 + p(RoccNMemChannels) val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) @@ -78,6 +79,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) => val rocc = accelParams.generator(p.alterPartial({ case RoccNMemChannels => accelParams.nMemChannels + case RoccNPTWPorts => accelParams.nPTWPorts case RoccNCSRs => accelParams.csrs.size })) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) @@ -88,9 +90,6 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( dcIF.io.requestor <> rocc.io.mem dcArb.io.requestor(2 + i) <> dcIF.io.cache uncachedArb.io.in(1 + i) <> rocc.io.autl - ptw.io.requestor(2 + 3 * i) <> rocc.io.iptw - ptw.io.requestor(3 + 3 * i) <> rocc.io.dptw - ptw.io.requestor(4 + 3 * i) <> rocc.io.pptw rocc } @@ -109,6 +108,8 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( } } + ptw.io.requestor.drop(2) <> roccs.flatMap(_.io.ptw) + core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) From bc15e8649e6454aa6e8f0608a273f10d8d3986bf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 2 Mar 2016 23:29:58 -0800 Subject: [PATCH 0963/1087] WIP on priv spec v1.9 --- rocket/src/main/scala/consts.scala | 6 - rocket/src/main/scala/csr.scala | 406 ++++++++++++----------- rocket/src/main/scala/frontend.scala | 2 +- rocket/src/main/scala/idecode.scala | 1 - rocket/src/main/scala/instructions.scala | 120 ++++--- rocket/src/main/scala/ptw.scala | 26 +- rocket/src/main/scala/rocc.scala | 10 +- rocket/src/main/scala/rocket.scala | 6 +- rocket/src/main/scala/tile.scala | 2 +- rocket/src/main/scala/tlb.scala | 37 ++- 10 files changed, 316 insertions(+), 300 deletions(-) diff --git a/rocket/src/main/scala/consts.scala b/rocket/src/main/scala/consts.scala index fda38fc2..74386c12 100644 --- a/rocket/src/main/scala/consts.scala +++ b/rocket/src/main/scala/consts.scala @@ -46,10 +46,4 @@ trait ScalarOpConstants { val DW_32 = N val DW_64 = Y val DW_XPR = Y - - val SZ_PRV = 2 - val PRV_U = 0 - val PRV_S = 1 - val PRV_H = 2 - val PRV_M = 3 } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 62f81ff7..eb99d7be 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -10,40 +10,37 @@ import uncore._ import scala.math._ class MStatus extends Bundle { + val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient val sd = Bool() - val zero2 = UInt(width = 31) - val sd_rv32 = UInt(width = 1) - val zero1 = UInt(width = 9) + val zero3 = UInt(width = 31) + val sd_rv32 = Bool() + val zero2 = UInt(width = 2) val vm = UInt(width = 5) + val zero1 = UInt(width = 5) + val pum = Bool() val mprv = Bool() val xs = UInt(width = 2) val fs = UInt(width = 2) - val prv3 = UInt(width = 2) - val ie3 = Bool() - val prv2 = UInt(width = 2) - val ie2 = Bool() - val prv1 = UInt(width = 2) - val ie1 = Bool() - val prv = UInt(width = 2) - val ie = Bool() -} - -class SStatus extends Bundle { - val sd = Bool() - val zero4 = UInt(width = 31) - val sd_rv32 = UInt(width = 1) - val zero3 = UInt(width = 14) - val mprv = Bool() - val xs = UInt(width = 2) - val fs = UInt(width = 2) - val zero2 = UInt(width = 7) - val ps = Bool() - val pie = Bool() - val zero1 = UInt(width = 2) - val ie = Bool() + val mpp = UInt(width = 2) + val hpp = UInt(width = 2) + val spp = UInt(width = 1) + val mpie = Bool() + val hpie = Bool() + val spie = Bool() + val upie = Bool() + val mie = Bool() + val hie = Bool() + val sie = Bool() + val uie = Bool() } class MIP extends Bundle { + val host = Bool() + val rocc = Bool() + val mdip = Bool() + val hdip = Bool() + val sdip = Bool() + val udip = Bool() val mtip = Bool() val htip = Bool() val stip = Bool() @@ -54,6 +51,15 @@ class MIP extends Bundle { val usip = Bool() } +object PRV +{ + val SZ = 2 + val U = 0 + val S = 1 + val H = 2 + val M = 3 +} + object CSR { // commands @@ -82,6 +88,7 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val csr_xcpt = Bool(OUTPUT) val eret = Bool(OUTPUT) + val prv = UInt(OUTPUT, PRV.SZ) val status = new MStatus().asOutput val ptbr = UInt(OUTPUT, paddrBits) val evec = UInt(OUTPUT, vaddrBitsExtended) @@ -104,13 +111,44 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) { val io = new CSRFileIO - val reg_mstatus = Reg(new MStatus) - val reg_mie = Reg(init=new MIP().fromBits(0)) + val reset_mstatus = Wire(init=new MStatus().fromBits(0)) + reset_mstatus.mpp := PRV.M + reset_mstatus.prv := PRV.M + val reg_mstatus = Reg(init=reset_mstatus) + + val (supported_interrupts, delegable_interrupts) = { + val sup = Wire(init=new MIP().fromBits(0)) + sup.ssip := Bool(p(UseVM)) + sup.msip := true + sup.stip := Bool(p(UseVM)) + sup.mtip := true + sup.rocc := usingRoCC + sup.host := true + + val del = Wire(init=sup) + del.msip := false + del.mtip := false + del.mdip := false + + (sup.toBits, del.toBits) + } + val delegable_exceptions = UInt(Seq( + Causes.misaligned_fetch, + Causes.fault_fetch, + Causes.breakpoint, + Causes.fault_load, + Causes.fault_store, + Causes.user_ecall).map(1 << _).sum) + + val reg_mie = Reg(init=UInt(0, xLen)) + val reg_mideleg = Reg(init=UInt(0, xLen)) + val reg_medeleg = Reg(init=UInt(0, xLen)) val reg_mip = Reg(init=new MIP().fromBits(0)) val reg_mepc = Reg(UInt(width = vaddrBitsExtended)) val reg_mcause = Reg(Bits(width = xLen)) val reg_mbadaddr = Reg(UInt(width = vaddrBitsExtended)) val reg_mscratch = Reg(Bits(width = xLen)) + val reg_mtvec = Reg(init=UInt(p(MtvecInit), paddrBits min xLen)) val reg_sepc = Reg(UInt(width = vaddrBitsExtended)) val reg_scause = Reg(Bits(width = xLen)) @@ -118,7 +156,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_sscratch = Reg(Bits(width = xLen)) val reg_stvec = Reg(UInt(width = vaddrBits)) val reg_mtimecmp = Reg(Bits(width = xLen)) - val reg_sptbr = Reg(UInt(width = paddrBits)) + val reg_sptbr = Reg(UInt(width = ppnBits)) val reg_wfi = Reg(init=Bool(false)) val reg_tohost = Reg(init=Bits(0, xLen)) @@ -126,31 +164,22 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_stats = Reg(init=Bool(false)) val reg_time = Reg(UInt(width = xLen)) val reg_instret = WideCounter(xLen, io.retire) - val reg_cycle = if (enableCommitLog) { reg_instret } else { WideCounter(xLen) } + val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(xLen) } val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) - val irq_rocc = Bool(usingRoCC) && io.rocc.interrupt + val mip = Wire(init=reg_mip) + mip.host := (reg_fromhost =/= 0) + mip.rocc := io.rocc.interrupt + val read_mip = mip.toBits & supported_interrupts - io.interrupt_cause := 0 - io.interrupt := io.interrupt_cause(xLen-1) - val some_interrupt_pending = Wire(init=Bool(false)) - def checkInterrupt(max_priv: UInt, cond: Bool, num: Int) = { - when (cond && (reg_mstatus.prv < max_priv || reg_mstatus.prv === max_priv && reg_mstatus.ie)) { - io.interrupt_cause := UInt((BigInt(1) << (xLen-1)) + num) - } - when (cond && reg_mstatus.prv <= max_priv) { - some_interrupt_pending := true - } - } - - checkInterrupt(PRV_S, reg_mie.ssip && reg_mip.ssip, 0) - checkInterrupt(PRV_M, reg_mie.msip && reg_mip.msip, 0) - checkInterrupt(PRV_S, reg_mie.stip && reg_mip.stip, 1) - checkInterrupt(PRV_M, reg_mie.mtip && reg_mip.mtip, 1) - checkInterrupt(PRV_M, reg_fromhost =/= 0, 2) - checkInterrupt(PRV_M, irq_rocc, 3) + val pending_interrupts = read_mip & reg_mie + val m_interrupts = Mux(reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie), pending_interrupts & ~reg_mideleg, UInt(0)) + val s_interrupts = Mux(reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie), pending_interrupts & reg_mideleg, UInt(0)) + val all_interrupts = m_interrupts | s_interrupts + io.interrupt := all_interrupts.orR + io.interrupt_cause := (io.interrupt << (xLen-1)) + PriorityEncoder(all_interrupts) val system_insn = io.rw.cmd === CSR.I val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn @@ -175,38 +204,34 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.host.debug_stats_csr := reg_stats // direct export up the hierarchy - val read_time = if (usingPerfCounters) reg_time else (reg_cycle: UInt) - val read_mstatus = io.status.toBits val isa_string = "IMA" + (if (usingVM) "S" else "") + - (if (usingFPU) "FD" else "") + + (if (usingFPU) "FDG" else "") + (if (usingRoCC) "X" else "") - val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | + val isa = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | isa_string.map(x => 1 << (x - 'A')).reduce(_|_) - val impid = 1 - val reg_mtvec = Reg(init = UInt(mtvecInit, xLen)) + val read_mstatus = io.status.toBits()(xLen-1,0) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( - CSRs.fflags -> (if (usingFPU) reg_fflags else UInt(0)), - CSRs.frm -> (if (usingFPU) reg_frm else UInt(0)), - CSRs.fcsr -> (if (usingFPU) Cat(reg_frm, reg_fflags) else UInt(0)), - CSRs.cycle -> reg_cycle, - CSRs.cyclew -> reg_cycle, - CSRs.time -> read_time, - CSRs.timew -> read_time, - CSRs.stime -> read_time, - CSRs.stimew -> read_time, - CSRs.mtime -> read_time, - CSRs.mcpuid -> UInt(cpuid), - CSRs.mimpid -> UInt(impid), + CSRs.mimpid -> UInt(0), + CSRs.marchid -> UInt(0), + CSRs.mvendorid -> UInt(0), + CSRs.mtime -> reg_time, + CSRs.mcycle -> reg_cycle, + CSRs.minstret -> reg_instret, + CSRs.mucounteren -> UInt(0), + CSRs.mutime_delta -> UInt(0), + CSRs.mucycle_delta -> UInt(0), + CSRs.muinstret_delta -> UInt(0), + CSRs.misa -> UInt(isa), CSRs.mstatus -> read_mstatus, - CSRs.mtdeleg -> UInt(0), - CSRs.mreset -> UInt(0), CSRs.mtvec -> reg_mtvec, - CSRs.miobase -> UInt(p(junctions.MMIOBase)), - CSRs.mipi -> UInt(0), - CSRs.mip -> reg_mip.toBits, - CSRs.mie -> reg_mie.toBits, + CSRs.mcfgaddr -> UInt(p(junctions.MMIOBase)), + CSRs.mipi -> reg_mip.msip, + CSRs.mip -> read_mip, + CSRs.mie -> reg_mie, + CSRs.mideleg -> reg_mideleg, + CSRs.medeleg -> reg_medeleg, CSRs.mscratch -> reg_mscratch, CSRs.mepc -> reg_mepc.sextTo(xLen), CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen), @@ -217,30 +242,26 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mtohost -> reg_tohost, CSRs.mfromhost -> reg_fromhost) - if (usingPerfCounters) { - read_mapping += CSRs.instret -> reg_instret - read_mapping += CSRs.instretw -> reg_instret - - for (i <- 0 until reg_uarch_counters.size) - read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i) + if (usingFPU) { + read_mapping += CSRs.fflags -> reg_fflags + read_mapping += CSRs.frm -> reg_frm + read_mapping += CSRs.fcsr -> Cat(reg_frm, reg_fflags) } if (usingVM) { - val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus)) - read_sstatus.zero1 := 0 - read_sstatus.zero2 := 0 - read_sstatus.zero3 := 0 - read_sstatus.zero4 := 0 + val read_sie = reg_mie & reg_mideleg + val read_sip = read_mip & reg_mideleg + val read_sstatus = Wire(init=io.status) + read_sstatus.vm := 0 + read_sstatus.mprv := 0 + read_sstatus.mpp := 0 + read_sstatus.hpp := 0 + read_sstatus.mpie := 0 + read_sstatus.hpie := 0 + read_sstatus.mie := 0 + read_sstatus.hie := 0 - val read_sip = Wire(init=new MIP().fromBits(0)) - read_sip.ssip := reg_mip.ssip - read_sip.stip := reg_mip.stip - - val read_sie = Wire(init=new MIP().fromBits(0)) - read_sie.ssip := reg_mie.ssip - read_sie.stip := reg_mie.stip - - read_mapping += CSRs.sstatus -> read_sstatus.toBits + read_mapping += CSRs.sstatus -> (read_sstatus.toBits())(xLen-1,0) read_mapping += CSRs.sip -> read_sip.toBits read_mapping += CSRs.sie -> read_sie.toBits read_mapping += CSRs.sscratch -> reg_sscratch @@ -250,10 +271,15 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) read_mapping += CSRs.sasid -> UInt(0) read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen) read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) + read_mapping += CSRs.mscounteren -> UInt(0) + read_mapping += CSRs.mstime_delta -> UInt(0) + read_mapping += CSRs.mscycle_delta -> UInt(0) + read_mapping += CSRs.msinstret_delta -> UInt(0) } for (i <- 0 until nCustomMrwCsrs) { - val addr = CSRs.mrwbase + i + val addr = 0xff0 + i + require(addr < (1 << CSR.ADDRSZ)) require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") read_mapping += addr -> io.custom_mrw_csrs(i) } @@ -278,14 +304,13 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, host_csr_bits.data))) - val opcode = io.rw.addr - val insn_call = !opcode(8) && !opcode(0) && system_insn - val insn_break = !opcode(8) && opcode(0) && system_insn - val insn_ret = opcode(8) && !opcode(1) && !opcode(0) && system_insn && priv_sufficient - val insn_sfence_vm = opcode(8) && !opcode(1) && opcode(0) && system_insn && priv_sufficient - val maybe_insn_redirect_trap = opcode(2) && system_insn - val insn_redirect_trap = maybe_insn_redirect_trap && priv_sufficient - val insn_wfi = opcode(8) && opcode(1) && !opcode(0) && system_insn && priv_sufficient + val do_system_insn = priv_sufficient && system_insn + val opcode = UInt(1) << io.rw.addr(2,0) + val insn_call = do_system_insn && opcode(0) + val insn_break = do_system_insn && opcode(1) + val insn_ret = do_system_insn && opcode(2) + val insn_sfence_vm = do_system_insn && opcode(4) + val insn_wfi = do_system_insn && opcode(5) val csr_xcpt = (cpu_wen && read_only) || (cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) || @@ -293,67 +318,77 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) insn_call || insn_break when (insn_wfi) { reg_wfi := true } - when (some_interrupt_pending) { reg_wfi := false } + when (read_mip.orR) { reg_wfi := false } + val cause = + Mux(!csr_xcpt, io.cause, + Mux(insn_call, reg_mstatus.prv + Causes.user_ecall, + Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction))) + val cause_lsbs = cause(log2Up(xLen)-1,0) + val can_delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M + val delegate = can_delegate && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) + val tvec = Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec) + val epc = Mux(can_delegate, reg_sepc, reg_mepc) io.fatc := insn_sfence_vm - io.evec := Mux(io.exception || csr_xcpt, (reg_mstatus.prv << 6) + reg_mtvec, - Mux(maybe_insn_redirect_trap, reg_stvec.sextTo(vaddrBitsExtended), - Mux(reg_mstatus.prv(1) || Bool(!p(UseVM)), reg_mepc, reg_sepc))) + io.evec := Mux(io.exception || csr_xcpt, tvec, epc) io.ptbr := reg_sptbr io.csr_xcpt := csr_xcpt - io.eret := insn_ret || insn_redirect_trap + io.eret := insn_ret io.status := reg_mstatus - io.status.fs := Fill(2, reg_mstatus.fs.orR) // either off or dirty (no clean/initial support yet) - io.status.xs := Fill(2, reg_mstatus.xs.orR) // either off or dirty (no clean/initial support yet) io.status.sd := io.status.fs.andR || io.status.xs.andR if (xLen == 32) io.status.sd_rv32 := io.status.sd when (io.exception || csr_xcpt) { - reg_mstatus.ie := false - reg_mstatus.prv := PRV_M - reg_mstatus.mprv := false - reg_mstatus.prv1 := reg_mstatus.prv - reg_mstatus.ie1 := reg_mstatus.ie - reg_mstatus.prv2 := reg_mstatus.prv1 - reg_mstatus.ie2 := reg_mstatus.ie1 - - reg_mepc := ~(~io.pc | (coreInstBytes-1)) - reg_mcause := io.cause - when (csr_xcpt) { - reg_mcause := Causes.illegal_instruction - when (insn_break) { reg_mcause := Causes.breakpoint } - when (insn_call) { reg_mcause := reg_mstatus.prv + Causes.user_ecall } - } - - reg_mbadaddr := io.pc - when (io.cause === Causes.fault_load || io.cause === Causes.misaligned_load || - io.cause === Causes.fault_store || io.cause === Causes.misaligned_store) { + val ldst_badaddr = { val (upper, lower) = Split(io.rw.wdata, vaddrBits) val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) - reg_mbadaddr := Cat(sign, lower) + Cat(sign, lower) + } + val ldst = + cause === Causes.fault_load || cause === Causes.misaligned_load || + cause === Causes.fault_store || cause === Causes.misaligned_store + val badaddr = Mux(ldst, ldst_badaddr, io.pc) + val epc = ~(~io.pc | (coreInstBytes-1)) + val pie = read_mstatus(reg_mstatus.prv) + + when (delegate) { + reg_sepc := epc + reg_scause := cause + reg_sbadaddr := badaddr + reg_mstatus.spie := pie + reg_mstatus.spp := reg_mstatus.prv + reg_mstatus.sie := false + reg_mstatus.prv := PRV.S + }.otherwise { + reg_mepc := epc + reg_mcause := cause + reg_mbadaddr := badaddr + reg_mstatus.mpie := pie + reg_mstatus.mpp := reg_mstatus.prv + reg_mstatus.mie := false + reg_mstatus.prv := PRV.M } } when (insn_ret) { - reg_mstatus.ie := reg_mstatus.ie1 - reg_mstatus.prv := reg_mstatus.prv1 - reg_mstatus.prv1 := reg_mstatus.prv2 - reg_mstatus.ie1 := reg_mstatus.ie2 - reg_mstatus.prv2 := PRV_U - reg_mstatus.ie2 := true - } - - when (insn_redirect_trap) { - reg_mstatus.prv := PRV_S - reg_sbadaddr := reg_mbadaddr - reg_scause := reg_mcause - reg_sepc := reg_mepc + when (can_delegate) { + when (reg_mstatus.spp.toBool) { reg_mstatus.sie := reg_mstatus.spie } + reg_mstatus.spie := false + reg_mstatus.spp := PRV.U + reg_mstatus.prv := reg_mstatus.spp + }.otherwise { + when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie } + when (reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie } + reg_mstatus.mpie := false + reg_mstatus.mpp := PRV.U + reg_mstatus.prv := reg_mstatus.mpp + } } - assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") + assert(PopCount(insn_ret :: io.exception :: csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") - when (read_time >= reg_mtimecmp) { + when (reg_time >= reg_mtimecmp) { reg_mip.mtip := true } @@ -372,17 +407,18 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (wen) { when (decoded_addr(CSRs.mstatus)) { val new_mstatus = new MStatus().fromBits(wdata) - reg_mstatus.ie := new_mstatus.ie - reg_mstatus.ie1 := new_mstatus.ie1 + reg_mstatus.mie := new_mstatus.mie + reg_mstatus.mpie := new_mstatus.mpie - val supportedModes = Vec((PRV_M :: PRV_U :: (if (usingVM) List(PRV_S) else Nil)).map(UInt(_))) + val supportedModes = Vec((PRV.M :: PRV.U :: (if (usingVM) List(PRV.S) else Nil)).map(UInt(_))) if (supportedModes.size > 1) { reg_mstatus.mprv := new_mstatus.mprv - when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv } - when (supportedModes contains new_mstatus.prv1) { reg_mstatus.prv1 := new_mstatus.prv1 } + when (supportedModes contains new_mstatus.mpp) { reg_mstatus.mpp := new_mstatus.mpp } if (supportedModes.size > 2) { - when (supportedModes contains new_mstatus.prv2) { reg_mstatus.prv2 := new_mstatus.prv2 } - reg_mstatus.ie2 := new_mstatus.ie2 + reg_mstatus.pum := new_mstatus.pum + reg_mstatus.spp := new_mstatus.spp + reg_mstatus.spie := new_mstatus.spie + reg_mstatus.sie := new_mstatus.sie } } @@ -391,8 +427,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } } - if (usingVM || usingFPU) reg_mstatus.fs := new_mstatus.fs - if (usingRoCC) reg_mstatus.xs := new_mstatus.xs + if (usingVM || usingFPU) reg_mstatus.fs := Fill(2, new_mstatus.fs.orR) + if (usingRoCC) reg_mstatus.xs := Fill(2, new_mstatus.xs.orR) } when (decoded_addr(CSRs.mip)) { val new_mip = new MIP().fromBits(wdata) @@ -405,76 +441,48 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.mipi)) { reg_mip.msip := wdata(0) } - when (decoded_addr(CSRs.mie)) { - val new_mie = new MIP().fromBits(wdata) - if (usingVM) { - reg_mie.ssip := new_mie.ssip - reg_mie.stip := new_mie.stip - } - reg_mie.msip := new_mie.msip - reg_mie.mtip := new_mie.mtip - } + when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts } when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) } when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } + if (p(MtvecWritable)) + when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata >> 2 << 2 } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } - if (usingPerfCounters) - when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } when (decoded_addr(CSRs.mtime)) { reg_time := wdata } when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } - when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata & ~UInt("b11") } if (usingVM) { when (decoded_addr(CSRs.sstatus)) { - val new_sstatus = new SStatus().fromBits(wdata) - reg_mstatus.ie := new_sstatus.ie - reg_mstatus.ie1 := new_sstatus.pie - reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U) - reg_mstatus.mprv := new_sstatus.mprv - reg_mstatus.fs := new_sstatus.fs // even without an FPU - if (usingRoCC) reg_mstatus.xs := new_sstatus.xs + val new_sstatus = new MStatus().fromBits(wdata) + reg_mstatus.sie := new_sstatus.sie + reg_mstatus.spie := new_sstatus.spie + reg_mstatus.spp := new_sstatus.spp + reg_mstatus.pum := new_sstatus.pum + reg_mstatus.fs := Fill(2, new_sstatus.fs.orR) // even without an FPU + if (usingRoCC) reg_mstatus.xs := Fill(2, new_sstatus.xs.orR) } when (decoded_addr(CSRs.sip)) { val new_sip = new MIP().fromBits(wdata) reg_mip.ssip := new_sip.ssip } - when (decoded_addr(CSRs.sie)) { - val new_sie = new MIP().fromBits(wdata) - reg_mie.ssip := new_sie.ssip - reg_mie.stip := new_sie.stip - } + when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } - when (decoded_addr(CSRs.sptbr)) { reg_sptbr := Cat(wdata(paddrBits-1, pgIdxBits), Bits(0, pgIdxBits)) } - when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) } - when (decoded_addr(CSRs.stvec)) { reg_stvec := ~(~wdata | (coreInstBytes-1)) } + when (decoded_addr(CSRs.sptbr)) { reg_sptbr := wdata } + when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata >> log2Up(coreInstBytes) << log2Up(coreInstBytes) } + when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 } + when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } + when (decoded_addr(CSRs.sbadaddr)) { reg_sbadaddr := wdata(vaddrBitsExtended-1,0) } + when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata & delegable_interrupts } + when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions } } } io.rocc.csr.waddr := addr io.rocc.csr.wdata := wdata io.rocc.csr.wen := wen - - when(this.reset) { - reg_mstatus.zero1 := 0 - reg_mstatus.zero2 := 0 - reg_mstatus.ie := false - reg_mstatus.prv := PRV_M - reg_mstatus.ie1 := false - reg_mstatus.prv1 := PRV_M /* hard-wired to M when missing user mode */ - reg_mstatus.ie2 := false /* hard-wired to 0 when missing supervisor mode */ - reg_mstatus.prv2 := PRV_U /* hard-wired to 0 when missing supervisor mode */ - reg_mstatus.ie3 := false /* hard-wired to 0 when missing hypervisor mode */ - reg_mstatus.prv3 := PRV_U /* hard-wired to 0 when missing hypervisor mode */ - reg_mstatus.mprv := false - reg_mstatus.vm := 0 - reg_mstatus.fs := 0 - reg_mstatus.xs := 0 - reg_mstatus.sd_rv32 := false - reg_mstatus.sd := false - } } diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 96e7c2ec..28cfbfa0 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -42,7 +42,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) - val s2_pc = Reg(init=UInt(startAddr)) + val s2_pc = Reg(init=UInt(p(ResetVector))) val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_bits = Reg(btb.io.resp.bits) val s2_xcpt_if = Reg(init=Bool(false)) diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 2464b817..d71bf32c 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -174,7 +174,6 @@ object XDecode extends DecodeConstants SCALL-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SBREAK-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SRET-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - MRTS-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), WFI-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 564b0f8d..d854fac8 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -94,12 +94,9 @@ object Instructions { def SC_D = BitPat("b00011????????????011?????0101111") def SCALL = BitPat("b00000000000000000000000001110011") def SBREAK = BitPat("b00000000000100000000000001110011") - def SRET = BitPat("b00010000000000000000000001110011") - def SFENCE_VM = BitPat("b000100000001?????000000001110011") - def WFI = BitPat("b00010000001000000000000001110011") - def MRTH = BitPat("b00110000011000000000000001110011") - def MRTS = BitPat("b00110000010100000000000001110011") - def HRTS = BitPat("b00100000010100000000000001110011") + def SRET = BitPat("b00010000001000000000000001110011") + def SFENCE_VM = BitPat("b000100000100?????000000001110011") + def WFI = BitPat("b00010000010100000000000001110011") def CSRRW = BitPat("b?????????????????001?????1110011") def CSRRS = BitPat("b?????????????????010?????1110011") def CSRRC = BitPat("b?????????????????011?????1110011") @@ -268,50 +265,63 @@ object CSRs { val uarch14 = 0xcce val uarch15 = 0xccf val sstatus = 0x100 - val stvec = 0x101 val sie = 0x104 + val stvec = 0x105 val sscratch = 0x140 val sepc = 0x141 + val scause = 0x142 + val sbadaddr = 0x143 val sip = 0x144 val sptbr = 0x180 val sasid = 0x181 - val cyclew = 0x900 - val timew = 0x901 - val instretw = 0x902 + val scycle = 0xd00 val stime = 0xd01 - val scause = 0xd42 - val sbadaddr = 0xd43 - val stimew = 0xa01 + val sinstret = 0xd02 val mstatus = 0x300 - val mtvec = 0x301 - val mtdeleg = 0x302 + val medeleg = 0x302 + val mideleg = 0x303 val mie = 0x304 + val mtvec = 0x305 val mtimecmp = 0x321 val mscratch = 0x340 val mepc = 0x341 val mcause = 0x342 val mbadaddr = 0x343 val mip = 0x344 - val mtime = 0x701 - val mcpuid = 0xf00 - val mimpid = 0xf01 - val mhartid = 0xf10 - val mtohost = 0x780 - val mfromhost = 0x781 - val mreset = 0x782 - val mipi = 0x783 - val miobase = 0x784 + val mipi = 0x345 + val mucounteren = 0x310 + val mscounteren = 0x311 + val mucycle_delta = 0x700 + val mutime_delta = 0x701 + val muinstret_delta = 0x702 + val mscycle_delta = 0x704 + val mstime_delta = 0x705 + val msinstret_delta = 0x706 + val mcycle = 0xf00 + val mtime = 0xf01 + val minstret = 0xf02 + val misa = 0xf10 + val mvendorid = 0xf11 + val marchid = 0xf12 + val mimpid = 0xf13 + val mcfgaddr = 0xf14 + val mhartid = 0xf15 + val mtohost = 0x7c0 + val mfromhost = 0x7c1 + val mreset = 0x7c2 val cycleh = 0xc80 val timeh = 0xc81 val instreth = 0xc82 - val cyclehw = 0x980 - val timehw = 0x981 - val instrethw = 0x982 - val stimeh = 0xd81 - val stimehw = 0xa81 val mtimecmph = 0x361 - val mtimeh = 0x741 - val mrwbase = 0x790 + val mucycle_deltah = 0x780 + val mutime_deltah = 0x781 + val muinstret_deltah = 0x782 + val mscycle_deltah = 0x784 + val mstime_deltah = 0x785 + val msinstret_deltah = 0x786 + val mcycleh = 0xf80 + val mtimeh = 0xf81 + val minstreth = 0xf82 val all = { val res = collection.mutable.ArrayBuffer[Int]() res += fflags @@ -338,39 +348,50 @@ object CSRs { res += uarch14 res += uarch15 res += sstatus - res += stvec res += sie + res += stvec res += sscratch res += sepc + res += scause + res += sbadaddr res += sip res += sptbr res += sasid - res += cyclew - res += timew - res += instretw + res += scycle res += stime - res += scause - res += sbadaddr - res += stimew + res += sinstret res += mstatus - res += mtvec - res += mtdeleg + res += medeleg + res += mideleg res += mie + res += mtvec res += mtimecmp res += mscratch res += mepc res += mcause res += mbadaddr res += mip + res += mipi + res += mucounteren + res += mscounteren + res += mucycle_delta + res += mutime_delta + res += muinstret_delta + res += mscycle_delta + res += mstime_delta + res += msinstret_delta + res += mcycle res += mtime - res += mcpuid + res += minstret + res += misa + res += mvendorid + res += marchid res += mimpid + res += mcfgaddr res += mhartid res += mtohost res += mfromhost res += mreset - res += mipi - res += miobase res.toArray } val all32 = { @@ -378,13 +399,16 @@ object CSRs { res += cycleh res += timeh res += instreth - res += cyclehw - res += timehw - res += instrethw - res += stimeh - res += stimehw res += mtimecmph + res += mucycle_deltah + res += mutime_deltah + res += muinstret_deltah + res += mscycle_deltah + res += mstime_deltah + res += msinstret_deltah + res += mcycleh res += mtimeh + res += minstreth res.toArray } } diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index a8bfa03c..6ec57d5c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -15,7 +15,6 @@ class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { } class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { - val error = Bool() val pte = new PTE } @@ -27,7 +26,7 @@ class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) { } class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) { - val ptbr = UInt(INPUT, paddrBits) + val ptbr = UInt(INPUT, ppnBits) val invalidate = Bool(INPUT) val status = new MStatus().asInput } @@ -59,7 +58,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val dpath = new DatapathPTWIO } - val s_ready :: s_req :: s_wait :: s_set_dirty :: s_wait_dirty :: s_done :: s_error :: Nil = Enum(UInt(), 7) + val s_ready :: s_req :: s_wait :: s_set_dirty :: s_wait_dirty :: s_done :: Nil = Enum(UInt(), 6) val state = Reg(init=s_ready) val count = Reg(UInt(width = log2Up(pgLevels))) @@ -79,7 +78,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { when (arb.io.out.fire()) { r_req := arb.io.out.bits r_req_dest := arb.io.chosen - r_pte.ppn := io.dpath.ptbr(paddrBits-1,pgIdxBits) + r_pte.ppn := io.dpath.ptbr } val (pte_cache_hit, pte_cache_data) = { @@ -122,16 +121,12 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.mem.req.bits.kill := Bool(false) io.mem.req.bits.data := pte_wdata.toBits - val resp_err = state === s_error - val resp_val = state === s_done || resp_err - val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) + val resp_val = state === s_done for (i <- 0 until io.requestor.size) { - val me = r_req_dest === UInt(i) - io.requestor(i).resp.valid := resp_val && me - io.requestor(i).resp.bits.error := resp_err + io.requestor(i).resp.valid := resp_val && (r_req_dest === i) io.requestor(i).resp.bits.pte := r_pte io.requestor(i).resp.bits.pte.ppn := resp_ppn io.requestor(i).invalidate := io.dpath.invalidate @@ -161,14 +156,14 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { state := s_req } when (io.mem.resp.valid) { - state := s_error + state := s_done + when (pte.leaf() && set_dirty_bit) { + state := s_set_dirty + } when (pte.table() && count < pgLevels-1) { state := s_req count := count + 1 } - when (pte.leaf()) { - state := Mux(set_dirty_bit, s_set_dirty, s_done) - } } } is (s_set_dirty) { @@ -187,8 +182,5 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { is (s_done) { state := s_ready } - is (s_error) { - state := s_ready - } } } diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index d17dde1b..eee67228 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -47,7 +47,7 @@ class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) val busy = Bool(OUTPUT) - val s = Bool(INPUT) + val status = new MStatus().asInput val interrupt = Bool(OUTPUT) // These should be handled differently, eventually @@ -142,8 +142,7 @@ class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { val req_rd = Reg(io.resp.bits.rd) val req_offset = req_addr(pgIdxBits - 1, 0) val req_vpn = req_addr(coreMaxAddrBits - 1, pgIdxBits) - val ppn = Reg(UInt(width = ppnBits)) - val error = Reg(Bool()) + val pte = Reg(new PTE) val s_idle :: s_ptw_req :: s_ptw_resp :: s_resp :: Nil = Enum(Bits(), 4) val state = Reg(init = s_idle) @@ -161,8 +160,7 @@ class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { when (ptw.req.fire()) { state := s_ptw_resp } when (state === s_ptw_resp && ptw.resp.valid) { - error := ptw.resp.bits.error - ppn := ptw.resp.bits.pte.ppn + pte := ptw.resp.bits.pte state := s_resp } @@ -175,7 +173,7 @@ class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { io.resp.valid := (state === s_resp) io.resp.bits.rd := req_rd - io.resp.bits.data := Mux(error, SInt(-1).toUInt, Cat(ppn, req_offset)) + io.resp.bits.data := Mux(pte.leaf(), Cat(pte.ppn, req_offset), ~UInt(0, xLen)) io.busy := (state =/= s_idle) io.interrupt := Bool(false) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index d9657095..0eb175e5 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -22,7 +22,9 @@ case object CoreInstBits extends Field[Int] case object CoreDataBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int] case object NCustomMRWCSRs extends Field[Int] +case object MtvecWritable extends Field[Boolean] case object MtvecInit extends Field[BigInt] +case object ResetVector extends Field[BigInt] trait HasCoreParameters extends HasAddrMapParameters { implicit val p: Parameters @@ -51,8 +53,6 @@ trait HasCoreParameters extends HasAddrMapParameters { else p(BuildRoCC).flatMap(_.csrs) val nRoccCsrs = p(RoccNCSRs) val nCores = p(HtifKey).nCores - val mtvecInit = p(MtvecInit) - val startAddr = mtvecInit + 0x100 // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. @@ -539,7 +539,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.rocc.cmd.valid := wb_rocc_val io.rocc.exception := wb_xcpt && csr.io.status.xs.orR - io.rocc.s := csr.io.status.prv.orR // should we just pass all of mstatus? + io.rocc.status := csr.io.status io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index a7bfeec3..4dbda975 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -84,7 +84,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( })) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) rocc.io.cmd <> cmdRouter.io.out(i) - rocc.io.s := core.io.rocc.s + rocc.io.status := core.io.rocc.status rocc.io.exception := core.io.rocc.exception rocc.io.host_id := io.host.id dcIF.io.requestor <> rocc.io.mem diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 55e7359d..323bc5fc 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -100,7 +100,6 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val tag_hit_addr = OHToUInt(tag_cam.io.hits) // permission bit arrays - val valid_array = Reg(Vec(entries, Bool())) // PTE is valid (not equivalent to CAM tag valid bit!) val ur_array = Reg(Vec(entries, Bool())) // user read permission val uw_array = Reg(Vec(entries, Bool())) // user write permission val ux_array = Reg(Vec(entries, Bool())) // user execute permission @@ -111,13 +110,12 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { when (io.ptw.resp.valid) { val pte = io.ptw.resp.bits.pte tag_ram(r_refill_waddr) := pte.ppn - valid_array(r_refill_waddr) := !io.ptw.resp.bits.error - ur_array(r_refill_waddr) := pte.ur() && !io.ptw.resp.bits.error - uw_array(r_refill_waddr) := pte.uw() && !io.ptw.resp.bits.error - ux_array(r_refill_waddr) := pte.ux() && !io.ptw.resp.bits.error - sr_array(r_refill_waddr) := pte.sr() && !io.ptw.resp.bits.error - sw_array(r_refill_waddr) := pte.sw() && !io.ptw.resp.bits.error - sx_array(r_refill_waddr) := pte.sx() && !io.ptw.resp.bits.error + ur_array(r_refill_waddr) := pte.ur() + uw_array(r_refill_waddr) := pte.uw() + ux_array(r_refill_waddr) := pte.ux() + sr_array(r_refill_waddr) := pte.sr() + sw_array(r_refill_waddr) := pte.sw() + sx_array(r_refill_waddr) := pte.sx() dirty_array(r_refill_waddr) := pte.d } @@ -126,14 +124,17 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - - val priv = Mux(io.ptw.status.mprv && !io.req.bits.instruction, io.ptw.status.prv1, io.ptw.status.prv) - val priv_s = priv === PRV_S - val priv_uses_vm = priv <= PRV_S + + val do_mprv = io.ptw.status.prv === PRV.M && io.ptw.status.mprv && !io.req.bits.instruction + val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv) + val priv_s = priv === PRV.S + val priv_uses_vm = priv <= PRV.S val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store)) - val r_array = Mux(priv_s, sr_array.toBits, ur_array.toBits) - val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits) + val ur_bits = ur_array.toBits + val pum_ok = ~Mux(io.ptw.status.pum, ur_bits, UInt(0)) + val r_array = Mux(priv_s, sr_array.toBits & pum_ok, ur_bits) + val w_array = Mux(priv_s, sw_array.toBits & pum_ok, uw_array.toBits) val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough @@ -160,10 +161,10 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(ppnBits-1,0)) io.resp.hit_idx := tag_cam.io.hits - // clear invalid entries on access, or all entries on a TLB flush - tag_cam.io.clear := io.ptw.invalidate || io.req.fire() - tag_cam.io.clear_mask := ~valid_array.toBits | (tag_cam.io.hits & ~tag_hits) - when (io.ptw.invalidate) { tag_cam.io.clear_mask := ~UInt(0, entries) } + // clear entries on a TLB flush. + // TODO: selective flushing. careful with superpage mappings (flush it all) + tag_cam.io.clear := io.ptw.invalidate + tag_cam.io.clear_mask := ~UInt(0, entries) io.ptw.req.valid := state === s_request io.ptw.req.bits.addr := r_refill_tag From 82c595d11aa63f98528caef32d42b5fd4a026a9b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 10 Mar 2016 17:30:56 -0800 Subject: [PATCH 0964/1087] Fix no-FPU elaboration of CSR file --- rocket/src/main/scala/csr.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index eb99d7be..9507f6d5 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -293,7 +293,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } val addr_valid = decoded_addr.values.reduce(_||_) - val fp_csr = decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr) + val fp_csr = + if (usingFPU) decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr) + else Bool(false) val csr_addr_priv = io.rw.addr(9,8) val priv_sufficient = reg_mstatus.prv >= csr_addr_priv val read_only = io.rw.addr(11,10).andR @@ -442,9 +444,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mip.msip := wdata(0) } when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts } - when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } - when (decoded_addr(CSRs.frm)) { reg_frm := wdata } - when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) } when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } if (p(MtvecWritable)) @@ -456,6 +455,11 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } + if (usingFPU) { + when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } + when (decoded_addr(CSRs.frm)) { reg_frm := wdata } + when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } + } if (usingVM) { when (decoded_addr(CSRs.sstatus)) { val new_sstatus = new MStatus().fromBits(wdata) From 7ae44d4905745de24f57a107cf4731c2d65ab7eb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 10 Mar 2016 17:32:00 -0800 Subject: [PATCH 0965/1087] Add RV32 support --- rocket/src/main/scala/csr.scala | 21 ++++++++-- rocket/src/main/scala/fpu.scala | 7 ++-- rocket/src/main/scala/idecode.scala | 57 ++++++++++++++-------------- rocket/src/main/scala/nbdcache.scala | 8 ++-- rocket/src/main/scala/rocket.scala | 32 ++++++++-------- rocket/src/main/scala/tlb.scala | 8 ++-- 6 files changed, 77 insertions(+), 56 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 9507f6d5..d7e406ab 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -162,13 +162,14 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_tohost = Reg(init=Bits(0, xLen)) val reg_fromhost = Reg(init=Bits(0, xLen)) val reg_stats = Reg(init=Bool(false)) - val reg_time = Reg(UInt(width = xLen)) - val reg_instret = WideCounter(xLen, io.retire) - val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(xLen) } val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) + val reg_time = Reg(UInt(width = 64)) // regardless of XLEN + val reg_instret = WideCounter(64, io.retire) + val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(64) } + val mip = Wire(init=reg_mip) mip.host := (reg_fromhost =/= 0) mip.rocc := io.rocc.interrupt @@ -277,6 +278,20 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) read_mapping += CSRs.msinstret_delta -> UInt(0) } + if (xLen == 32) { + read_mapping += CSRs.mtimeh -> (reg_time >> 32) + read_mapping += CSRs.mcycleh -> (reg_cycle >> 32) + read_mapping += CSRs.minstreth -> (reg_instret >> 32) + read_mapping += CSRs.mutime_deltah -> UInt(0) + read_mapping += CSRs.mucycle_deltah -> UInt(0) + read_mapping += CSRs.muinstret_deltah -> UInt(0) + if (usingVM) { + read_mapping += CSRs.mstime_deltah -> UInt(0) + read_mapping += CSRs.mscycle_deltah -> UInt(0) + read_mapping += CSRs.msinstret_deltah -> UInt(0) + } + } + for (i <- 0 until nCustomMrwCsrs) { val addr = 0xff0 + i require(addr < (1 << CSR.ADDRSZ)) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 68dea0a4..0b8190c1 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -138,15 +138,15 @@ class FPUDecoder extends Module sigs zip decoder map {case(s,d) => s := d} } -class FPUIO extends Bundle { +class FPUIO(implicit p: Parameters) extends CoreBundle { val inst = Bits(INPUT, 32) - val fromint_data = Bits(INPUT, 64) + val fromint_data = Bits(INPUT, xLen) val fcsr_rm = Bits(INPUT, FPConstants.RM_SZ) val fcsr_flags = Valid(Bits(width = FPConstants.FLAGS_SZ)) val store_data = Bits(OUTPUT, 64) - val toint_data = Bits(OUTPUT, 64) + val toint_data = Bits(OUTPUT, xLen) val dmem_resp_val = Bool(INPUT) val dmem_resp_type = Bits(INPUT, 3) @@ -414,6 +414,7 @@ class FPUFMAPipe(val latency: Int, expWidth: Int, sigWidth: Int) extends Module } class FPU(implicit p: Parameters) extends CoreModule()(p) { + require(xLen == 64, "RV32 Rocket FP support missing") val io = new FPUIO val ex_reg_valid = Reg(next=io.valid, init=Bool(false)) diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index d71bf32c..107de8d6 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -6,21 +6,11 @@ import Chisel._ import Instructions._ import uncore.constants.MemoryOpConstants._ import ALU._ +import cde.Parameters -abstract trait DecodeConstants +abstract trait DecodeConstants extends HasCoreParameters { - val xpr64 = Y - - val decode_default: List[BitPat] = - // jal renf1 fence.i - // | jalr | renf2 | - // fp_val| | renx2 | | renf3 | - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | - // | | | | | | | | | | | | | | | | | | | | | wxd | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) + val xpr64 = Bool(xLen == 64) val table: Array[(BitPat, List[BitPat])] } @@ -53,8 +43,19 @@ class IntCtrlSigs extends Bundle { val fence = Bool() val amo = Bool() + def default: List[BitPat] = + // jal renf1 fence.i + // | jalr | renf2 | + // fp_val| | renx2 | | renf3 | + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) + def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = { - val decoder = DecodeLogic(inst, XDecode.decode_default, table) + val decoder = DecodeLogic(inst, default, table) val sigs = Seq(legal, fp, rocc, branch, jal, jalr, rxs2, rxs1, sel_alu2, sel_alu1, sel_imm, alu_dw, alu_fn, mem, mem_cmd, mem_type, rfs1, rfs2, rfs3, wfd, div, wxd, csr, fence_i, fence, amo) @@ -63,7 +64,7 @@ class IntCtrlSigs extends Bundle { } } -object XDecode extends DecodeConstants +class XDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( // jal renf1 fence.i @@ -183,7 +184,7 @@ object XDecode extends DecodeConstants CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) } -object FDecode extends DecodeConstants +class FDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( // jal renf1 fence.i @@ -223,15 +224,15 @@ object FDecode extends DecodeConstants FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FMV_X_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FMV_X_D-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_L_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_L_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_LU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_LU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_S-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_D-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_S-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_D-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), @@ -239,22 +240,22 @@ object FDecode extends DecodeConstants FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FMV_D_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FMV_D_X-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_L-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_LU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_L-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_L-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_LU-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_LU-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N), FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N), FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N), FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N)) } -object FDivSqrtDecode extends DecodeConstants +class FDivSqrtDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( FDIV_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), @@ -263,7 +264,7 @@ object FDivSqrtDecode extends DecodeConstants FSQRT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N)) } -object RoCCDecode extends DecodeConstants +class RoCCDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( // jal renf1 fence.i diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2d0eee29..8b72a146 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -161,8 +161,8 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) - val storegen = new StoreGen(req.typ, req.addr, req.data, 8) - val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, 8) + val storegen = new StoreGen(req.typ, req.addr, req.data, wordBits/8) + val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBits/8) val beat_offset = req.addr(beatOffBits - 1, wordOffBits) val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) @@ -814,7 +814,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { s2_req.cmd := s1_req.cmd } - val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), 8).misaligned + val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBits/8).misaligned io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld @@ -1013,7 +1013,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // load data subword mux/sign extension val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) - val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, 8) + val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBits/8) amoalu.io.addr := s2_req.addr amoalu.io.cmd := s2_req.cmd diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 0eb175e5..9f56a4b1 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -45,8 +45,9 @@ trait HasCoreParameters extends HasAddrMapParameters { val coreDataBits = xLen val coreDataBytes = coreDataBits/8 val coreDCacheReqTagBits = 7 + (2 + (if(!usingRoCC) 0 else 1)) - val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits - val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt + val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt + val vaddrBitsExtended = vpnBitsExtended + pgIdxBits + val coreMaxAddrBits = paddrBits max vaddrBitsExtended val mmioBase = p(MMIOBase) val nCustomMrwCsrs = p(NCustomMRWCSRs) val roccCsrs = if (p(BuildRoCC).isEmpty) Nil @@ -118,10 +119,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val rocc = new RoCCInterface().flip } - var decode_table = XDecode.table - if (usingFPU) decode_table ++= FDecode.table - if (usingFPU && usingFDivSqrt) decode_table ++= FDivSqrtDecode.table - if (usingRoCC) decode_table ++= RoCCDecode.table + var decode_table = new XDecode().table + if (usingFPU) decode_table ++= new FDecode().table + if (usingFPU && usingFDivSqrt) decode_table ++= new FDivSqrtDecode().table + if (usingRoCC) decode_table ++= new RoCCDecode().table val ex_ctrl = Reg(new IntCtrlSigs) val mem_ctrl = Reg(new IntCtrlSigs) @@ -312,7 +313,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { Mux(mem_ctrl.branch && mem_br_taken, ImmGen(IMM_SB, mem_reg_inst), Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt - val mem_npc = (Mux(mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(vaddrBits-1,0)).toSInt, mem_br_target) & SInt(-2)).toUInt + val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt val mem_wrong_npc = mem_npc =/= ex_reg_pc || !ex_reg_valid val mem_npc_misaligned = mem_npc(1) val mem_misprediction = mem_wrong_npc && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) @@ -531,7 +532,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.dmem.req.bits.cmd := ex_ctrl.mem_cmd io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.phys := Bool(false) - io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt + io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out) io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) require(coreDCacheReqTagBits >= 6) @@ -545,7 +546,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.rocc.cmd.bits.rs2 := wb_reg_rs2 if (enableCommitLog) { - val pc = Wire(SInt(width=64)) + val pc = Wire(SInt(width=xLen)) pc := wb_reg_pc val inst = wb_reg_inst val rd = RegNext(RegNext(RegNext(id_waddr))) @@ -575,7 +576,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } else { printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", - io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc, + io.host.id, csr.io.time(31,0), wb_valid, wb_reg_pc, Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen, wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), @@ -588,14 +589,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = targets.map(h => h._1 && cond(h._2)).reduce(_||_) - def vaSign(a0: UInt, ea: UInt) = { + def encodeVirtualAddress(a0: UInt, ea: UInt) = if (xLen == 32) ea else { // efficient means to compress 64-bit VA into vaddrBits+1 bits // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) val a = a0 >> vaddrBits-1 - val e = ea(vaddrBits,vaddrBits-1) - Mux(a === UInt(0) || a === UInt(1), e =/= UInt(0), - Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e.toSInt === SInt(-1), - e(0))) + val e = ea(vaddrBits,vaddrBits-1).toSInt + val msb = + Mux(a === UInt(0) || a === UInt(1), e =/= SInt(0), + Mux(a.toSInt === SInt(-1) || a.toSInt === SInt(-2), e === SInt(-1), e(0))) + Cat(msb, ea(vaddrBits-1,0)) } class Scoreboard(n: Int) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 323bc5fc..aff29c03 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -11,7 +11,7 @@ import uncore.PseudoLRU case object NTLBEntries extends Field[Int] -trait HasTLBParameters extends HasAddrMapParameters { +trait HasTLBParameters extends HasCoreParameters { val entries = p(NTLBEntries) val camAddrBits = log2Ceil(entries) val camTagBits = asIdBits + vpnBits @@ -57,7 +57,7 @@ class RocketCAM(implicit p: Parameters) extends TLBModule()(p) { class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { val asid = UInt(width = asIdBits) - val vpn = UInt(width = vpnBits+1) + val vpn = UInt(width = vpnBitsExtended) val passthrough = Bool() val instruction = Bool() val store = Bool() @@ -138,7 +138,9 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough - val bad_va = io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) + val bad_va = + if (vpnBits == vpnBitsExtended) Bool(false) + else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) // it's only a store hit if the dirty bit is set val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) val tag_hit = tag_hits.orR From 8d1ba4d1ecc3073e5090a2fc102b5d5d332dcdf0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 24 Mar 2016 14:52:12 -0700 Subject: [PATCH 0966/1087] Remove hard-coded XLEN values from D$ --- rocket/src/main/scala/nbdcache.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 8b72a146..fef27ef4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -153,7 +153,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { def wordFromBeat(addr: UInt, dat: UInt) = { val offset = addr(beatOffBits - 1, wordOffBits) - val shift = Cat(offset, UInt(0, wordOffBits + 3)) + val shift = Cat(offset, UInt(0, wordOffBits + log2Up(wordBytes))) (dat >> shift)(wordBits - 1, 0) } @@ -161,8 +161,8 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) - val storegen = new StoreGen(req.typ, req.addr, req.data, wordBits/8) - val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBits/8) + val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes) + val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBytes) val beat_offset = req.addr(beatOffBits - 1, wordOffBits) val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) @@ -814,7 +814,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { s2_req.cmd := s1_req.cmd } - val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBits/8).misaligned + val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld @@ -906,7 +906,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_data_decoded = (0 until rowWords).map(i => code.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i))) val s2_data_corrected = Vec(s2_data_decoded.map(_.corrected)).toBits val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits - val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,3) + val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,log2Up(wordBytes)) val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) // store/amo hits @@ -1013,7 +1013,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // load data subword mux/sign extension val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) - val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBits/8) + val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes) amoalu.io.addr := s2_req.addr amoalu.io.cmd := s2_req.cmd From 27b3cca0461bebb8fe68d644aeb973ab5c808ab9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 25 Mar 2016 14:16:56 -0700 Subject: [PATCH 0967/1087] Discover D$, PTW port counts dynamically This is a generator, after all... --- rocket/src/main/scala/tile.scala | 57 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 4dbda975..1c5926ac 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -24,8 +24,6 @@ abstract class Tile(resetSignal: Bool = null) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size val nFPUPorts = buildRocc.filter(_.useFPU).size - val nDCachePorts = 2 + nRocc - val nPTWPorts = 2 + p(RoccNPTWPorts) val nCachedTileLinkPorts = 1 val nUncachedTileLinkPorts = 1 + p(RoccNMemChannels) val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) @@ -43,32 +41,20 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( case CacheName => "L1I" case CoreName => "Rocket" }))) val dcache = Module(new HellaCache()(dcacheParams)) - val ptw = Module(new PTW(nPTWPorts)(dcacheParams)) + val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.io.ptw) + val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem) + val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem) + val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]() + val cachedPorts = collection.mutable.ArrayBuffer(dcache.io.mem) dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache - val dcArb = Module(new HellaCacheArbiter(nDCachePorts)(dcacheParams)) - dcArb.io.requestor(0) <> ptw.io.mem - dcArb.io.requestor(1) <> core.io.dmem - dcache.io.cpu <> dcArb.io.mem - - ptw.io.requestor(0) <> icache.io.ptw - ptw.io.requestor(1) <> dcache.io.ptw - io.host <> core.io.host icache.io.cpu <> core.io.imem - core.io.ptw <> ptw.io.dpath val fpuOpt = if (p(UseFPU)) Some(Module(new FPU)) else None fpuOpt.foreach(fpu => core.io.fpu <> fpu.io) - // Connect the caches and ROCC to the outer memory system - io.cached.head <> dcache.io.mem - // If so specified, build an RoCC module and wire it to core + TileLink ports, - // otherwise just hookup the icache - io.uncached <> (if (usingRocc) { - val uncachedArb = Module(new ClientTileLinkIOArbiter(1 + nRocc)) - uncachedArb.io.in(0) <> icache.io.mem - + if (usingRocc) { val respArb = Module(new RRArbiter(new RoCCResponse, nRocc)) core.io.rocc.resp <> respArb.io.out @@ -88,8 +74,8 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( rocc.io.exception := core.io.rocc.exception rocc.io.host_id := io.host.id dcIF.io.requestor <> rocc.io.mem - dcArb.io.requestor(2 + i) <> dcIF.io.cache - uncachedArb.io.in(1 + i) <> rocc.io.autl + dcPorts += dcIF.io.cache + uncachedArbPorts += rocc.io.autl rocc } @@ -108,8 +94,6 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( } } - ptw.io.requestor.drop(2) <> roccs.flatMap(_.io.ptw) - core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) @@ -126,8 +110,29 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( } } - roccs.flatMap(_.io.utl) :+ uncachedArb.io.out - } else { Seq(icache.io.mem) }) + ptwPorts ++= roccs.flatMap(_.io.ptw) + uncachedPorts ++= roccs.flatMap(_.io.utl) + } + + val uncachedArb = Module(new ClientTileLinkIOArbiter(uncachedArbPorts.size)) + uncachedArb.io.in <> uncachedArbPorts + uncachedArb.io.out +=: uncachedPorts + + // Connect the caches and RoCC to the outer memory system + io.uncached <> uncachedPorts + io.cached <> cachedPorts + // TODO remove nCached/nUncachedTileLinkPorts parameters and these assertions + require(uncachedPorts.size == nUncachedTileLinkPorts) + require(cachedPorts.size == nCachedTileLinkPorts) + + val ptw = Module(new PTW(ptwPorts.size)(dcacheParams)) + ptw.io.requestor <> ptwPorts + ptw.io.mem +=: dcPorts + core.io.ptw <> ptw.io.dpath + + val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams)) + dcArb.io.requestor <> dcPorts + dcache.io.cpu <> dcArb.io.mem if (!usingRocc || nFPUPorts == 0) { fpuOpt.foreach { fpu => From a4685a073fe799fda5367f7fe151111ecb0248cb Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 25 Mar 2016 14:17:25 -0700 Subject: [PATCH 0968/1087] Don't instantiate PTW when UseVM=false --- rocket/src/main/scala/tile.scala | 10 ++++---- rocket/src/main/scala/tlb.scala | 40 +++++++++++++++++--------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 1c5926ac..f417c853 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -125,10 +125,12 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( require(uncachedPorts.size == nUncachedTileLinkPorts) require(cachedPorts.size == nCachedTileLinkPorts) - val ptw = Module(new PTW(ptwPorts.size)(dcacheParams)) - ptw.io.requestor <> ptwPorts - ptw.io.mem +=: dcPorts - core.io.ptw <> ptw.io.dpath + if (p(UseVM)) { + val ptw = Module(new PTW(ptwPorts.size)(dcacheParams)) + ptw.io.requestor <> ptwPorts + ptw.io.mem +=: dcPorts + core.io.ptw <> ptw.io.dpath + } val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams)) dcArb.io.requestor <> dcPorts diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index aff29c03..2c3fe123 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -137,7 +137,7 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val w_array = Mux(priv_s, sw_array.toBits & pum_ok, uw_array.toBits) val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) - val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough + val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough val bad_va = if (vpnBits == vpnBitsExtended) Bool(false) else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) @@ -174,26 +174,28 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { io.ptw.req.bits.store := r_req.store io.ptw.req.bits.fetch := r_req.instruction - when (io.req.fire() && tlb_miss) { - state := s_request - r_refill_tag := lookup_tag - r_refill_waddr := repl_waddr - r_req := io.req.bits - } - when (state === s_request) { - when (io.ptw.invalidate) { + if (usingVM) { + when (io.req.fire() && tlb_miss) { + state := s_request + r_refill_tag := lookup_tag + r_refill_waddr := repl_waddr + r_req := io.req.bits + } + when (state === s_request) { + when (io.ptw.invalidate) { + state := s_ready + } + when (io.ptw.req.ready) { + state := s_wait + when (io.ptw.invalidate) { state := s_wait_invalidate } + } + } + when (state === s_wait && io.ptw.invalidate) { + state := s_wait_invalidate + } + when (io.ptw.resp.valid) { state := s_ready } - when (io.ptw.req.ready) { - state := s_wait - when (io.ptw.invalidate) { state := s_wait_invalidate } - } - } - when (state === s_wait && io.ptw.invalidate) { - state := s_wait_invalidate - } - when (io.ptw.resp.valid) { - state := s_ready } } From 1ae6d09751991d041e0cf932de220524f8c1ac3f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 25 Mar 2016 15:29:32 -0700 Subject: [PATCH 0969/1087] Slightly ameliorate D$->I$ critical path via scoreboard --- rocket/src/main/scala/nbdcache.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index fef27ef4..6b684f64 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -757,7 +757,6 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_clk_en = Reg(Bool()) val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) - val s2_killed = Reg(next=s1_valid && io.cpu.req.bits.kill) val s2_req = Reg(io.cpu.req.bits) val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_NOP val s2_recycle = Wire(Bool()) @@ -1055,11 +1054,9 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val uncache_resp = Wire(Valid(new HellaCacheResp)) uncache_resp.bits := mshrs.io.resp.bits uncache_resp.valid := mshrs.io.resp.valid + mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay)) - val cache_pass = s2_valid || s2_killed || s2_replay - mshrs.io.resp.ready := !cache_pass - - io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp) + io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp) io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid io.cpu.replay_next.valid := s1_replay && s1_read From ed280fb3de7407131bc91397923572656988c5aa Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 25 Mar 2016 15:52:18 -0700 Subject: [PATCH 0970/1087] Remove empty when statement (???) --- rocket/src/main/scala/icache.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 2e6a5b17..df6c6c36 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -104,8 +104,6 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool val tag_out = tag_rdata(i) val s1_tag_disparity = code.decode(tag_out).error - when (s1_valid && rdy && !stall) { - } s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag s1_tag_hit(i) := s1_vb && s1_tag_match(i) s1_disparity(i) := s1_vb && (s1_tag_disparity || code.decode(s1_dout(i)).error) From f526d380fdac684469f5fe078ed41eaccc87607d Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Sat, 26 Mar 2016 05:37:26 -0700 Subject: [PATCH 0971/1087] separate btb response mask from the frontend mask It is now the job of the pipeline to monitor the frontend's valid mask (of instructions) and the BTB's suggested valid mask (based on the prediction it makes). Some processors may want to ignore or override the BTB's prediction and thus can supply their own instruction mask. --- rocket/src/main/scala/btb.scala | 4 ++-- rocket/src/main/scala/frontend.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index a86357e6..0cb33e7e 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -115,8 +115,8 @@ class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) { // - "bridx" is the low-order PC bits of the predicted branch (after // shifting off the lowest log(inst_bytes) bits off). -// - "resp.mask" provides a mask of valid instructions (instructions are -// masked off by the predicted taken branch). +// - "mask" provides a mask of valid instructions (instructions are +// masked off by the predicted taken branch from the BTB). class BTBResp(implicit p: Parameters) extends BtbBundle()(p) { val taken = Bool() val mask = Bits(width = fetchWidth) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 96e7c2ec..e98df9d3 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -131,7 +131,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val all_ones = UInt((1 << (fetchWidth+1))-1) val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2) - io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc) + io.cpu.resp.bits.mask := msk_pc io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.btb_resp.valid := s2_btb_resp_valid From e652821962d1876678618363e7899b0684c42ae2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 28 Mar 2016 22:53:47 -0700 Subject: [PATCH 0972/1087] Use correct kind of TileLink arbiter It was "correct" before, but broke Chisel3 build. --- rocket/src/main/scala/tile.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index f417c853..68c0714d 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -114,7 +114,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( uncachedPorts ++= roccs.flatMap(_.io.utl) } - val uncachedArb = Module(new ClientTileLinkIOArbiter(uncachedArbPorts.size)) + val uncachedArb = Module(new ClientUncachedTileLinkIOArbiter(uncachedArbPorts.size)) uncachedArb.io.in <> uncachedArbPorts uncachedArb.io.out +=: uncachedPorts From 8ad8e8a69130389225aa9e430621d8bd11dd594c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 30 Mar 2016 11:01:53 -0700 Subject: [PATCH 0973/1087] Add partial Sv48/Sv57 support Right now, we don't support Sv39 and Sv48 at the same time, which needs to change. --- rocket/src/main/scala/csr.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index d7e406ab..71c139de 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -440,7 +440,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } if (usingVM) { - val vm_on = if (xLen == 32) 8 else 9 + require(if (xLen == 32) pgLevels == 2 else pgLevels > 2 && pgLevels < 6) + val vm_on = 6 + pgLevels // TODO Sv48 support should imply Sv39 support when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } } From 70664bbca0118995b1f7d77c8ae8e20b0bfca597 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 30 Mar 2016 22:48:31 -0700 Subject: [PATCH 0974/1087] Fix Chisel3 build for UseVM=false --- rocket/src/main/scala/tlb.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 2c3fe123..1559802a 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -83,14 +83,14 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val ptw = new TLBPTWIO } - val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4) - val state = Reg(init=s_ready) - val r_refill_tag = Reg(UInt()) - val r_refill_waddr = Reg(UInt()) - val r_req = Reg(new TLBReq) - val tag_cam = Module(new RocketCAM) val tag_ram = Mem(entries, io.ptw.resp.bits.pte.ppn) + + val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4) + val state = Reg(init=s_ready) + val r_refill_tag = Reg(tag_cam.io.write_tag) + val r_refill_waddr = Reg(tag_cam.io.write_addr) + val r_req = Reg(new TLBReq) val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt tag_cam.io.tag := lookup_tag From adb7eacf6e2c2ef9aa5962effb8a35213b08adb1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 30 Mar 2016 22:48:51 -0700 Subject: [PATCH 0975/1087] Fix Chisel3 build for XLen=32 --- rocket/src/main/scala/csr.scala | 14 ++++++++------ rocket/src/main/scala/util.scala | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 71c139de..4186cf5e 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -357,15 +357,17 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.status.sd_rv32 := io.status.sd when (io.exception || csr_xcpt) { - val ldst_badaddr = { - val (upper, lower) = Split(io.rw.wdata, vaddrBits) - val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) - Cat(sign, lower) - } + def compressVAddr(addr: UInt) = + if (vaddrBitsExtended == vaddrBits) addr + else { + val (upper, lower) = Split(addr, vaddrBits) + val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) + Cat(sign, lower) + } val ldst = cause === Causes.fault_load || cause === Causes.misaligned_load || cause === Causes.fault_store || cause === Causes.misaligned_store - val badaddr = Mux(ldst, ldst_badaddr, io.pc) + val badaddr = Mux(ldst, compressVAddr(io.rw.wdata), io.pc) val epc = ~(~io.pc | (coreInstBytes-1)) val pie = read_mstatus(reg_mstatus.prv) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index a6ac1ad5..18dec13e 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -14,7 +14,9 @@ object Util { implicit def seqToVec[T <: Data](x: Seq[T]): Vec[T] = Vec(x) implicit def wcToUInt(c: WideCounter): UInt = c.value implicit def sextToConv(x: UInt) = new AnyRef { - def sextTo(n: Int): UInt = Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x) + def sextTo(n: Int): UInt = + if (x.getWidth == n) x + else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x) } implicit def intToUnsigned(x: Int): Unsigned = new Unsigned(x) From 1792d01ce1234097ecd16a91fe5c0e4efe30cc34 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Thu, 31 Mar 2016 15:55:22 -0700 Subject: [PATCH 0976/1087] fix leaky assert in nbdcache Squash of #33. --- rocket/src/main/scala/nbdcache.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 6b684f64..40cac1c8 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -821,7 +821,8 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { assert (!(Reg(next= (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && - io.cpu.resp.valid), "DCache exception occurred - cache response not killed.") + s2_valid_masked), + "DCache exception occurred - cache response not killed.") // tags def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) From 54dd82ff7696b01acff864a5f32b2d7ec2e31ba4 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Thu, 31 Mar 2016 15:26:39 -0700 Subject: [PATCH 0977/1087] bugfix for WB data buffer --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 40cac1c8..96e57367 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -583,7 +583,7 @@ class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) { // then buffer enough data_resps to make a whole beat val data_buf = Reg(Bits()) when(active && r2_data_req_fired && !beat_done) { - data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat-1)*encRowBits-1, encRowBits)) + data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat)*encRowBits-1, encRowBits)) buf_v := (if(refillCyclesPerBeat > 2) Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1)) else UInt(1)) From d406dc12312986de141214aa7eff9899b7c5389a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 1 Apr 2016 15:14:34 -0700 Subject: [PATCH 0978/1087] Remove vestigial BTB enable option --- rocket/src/main/scala/btb.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 0cb33e7e..54a31dd4 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -10,7 +10,6 @@ import Util._ case object BtbKey extends Field[BtbParameters] case class BtbParameters( - enabled: Boolean = true, nEntries: Int = 62, nRAS: Int = 2, updatesOutOfOrder: Boolean = false) From 4480d1e81720775d26890ba5439bdfb9fb19989d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 1 Apr 2016 15:14:45 -0700 Subject: [PATCH 0979/1087] Don't compile BTB when nEntries=0 --- rocket/src/main/scala/frontend.scala | 42 +++++++++++++++++----------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index d1661ad0..0d3763c7 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -34,7 +34,6 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val mem = new ClientUncachedTileLinkIO } - val btb = Module(new BTB) val icache = Module(new ICache) val tlb = Module(new TLB) @@ -44,20 +43,19 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s2_valid = Reg(init=Bool(true)) val s2_pc = Reg(init=UInt(p(ResetVector))) val s2_btb_resp_valid = Reg(init=Bool(false)) - val s2_btb_resp_bits = Reg(btb.io.resp.bits) + val s2_btb_resp_bits = Reg(new BTBResp) val s2_xcpt_if = Reg(init=Bool(false)) val s2_resp_valid = Wire(init=Bool(false)) val s2_resp_data = Wire(UInt(width = rowBits)) - val msb = vaddrBits-1 - val lsb = log2Up(fetchWidth*coreInstBytes) - val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target) - val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth) - val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure + val ntpc_0 = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) + val ntpc = // don't increment PC into virtual address space hole + if (vaddrBitsExtended == vaddrBits) ntpc_0 + else Cat(s1_pc(vaddrBits-1) & ntpc_0(vaddrBits-1), ntpc_0) + val predicted_npc = Wire(init = ntpc) val icmiss = s2_valid && !s2_resp_valid - val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc) val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt - val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes)) + val s0_same_block = Wire(init = !icmiss && !io.cpu.req.valid && ((ntpc & rowBytes) === (s1_pc & rowBytes))) val stall = io.cpu.resp.valid && !io.cpu.resp.ready when (!stall) { @@ -66,8 +64,6 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa s2_valid := !icmiss when (!icmiss) { s2_pc := s1_pc - s2_btb_resp_valid := btb.io.resp.valid - when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits } s2_xcpt_if := tlb.io.resp.xcpt_if } } @@ -77,12 +73,24 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa s2_valid := Bool(false) } - btb.io.req.valid := !stall && !icmiss - btb.io.req.bits.addr := s1_pc - btb.io.btb_update := io.cpu.btb_update - btb.io.bht_update := io.cpu.bht_update - btb.io.ras_update := io.cpu.ras_update - btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate + if (p(BtbKey).nEntries > 0) { + val btb = Module(new BTB) + btb.io.req.valid := false + btb.io.req.bits.addr := s1_pc + btb.io.btb_update := io.cpu.btb_update + btb.io.bht_update := io.cpu.bht_update + btb.io.ras_update := io.cpu.ras_update + btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate + when (!stall && !icmiss) { + btb.io.req.valid := true + s2_btb_resp_valid := btb.io.resp.valid + s2_btb_resp_bits := btb.io.resp.bits + } + when (btb.io.resp.bits.taken) { + predicted_npc := btb.io.resp.bits.target.sextTo(vaddrBitsExtended) + s0_same_block := Bool(false) + } + } io.ptw <> tlb.io.ptw tlb.io.req.valid := !stall && !icmiss From 37b9051762106e23bef266d6d2a2a8ca6bb889ac Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 1 Apr 2016 15:46:36 -0700 Subject: [PATCH 0980/1087] No need to validate npc if BTB is disabled --- rocket/src/main/scala/rocket.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 9f56a4b1..351c1244 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -316,7 +316,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt val mem_wrong_npc = mem_npc =/= ex_reg_pc || !ex_reg_valid val mem_npc_misaligned = mem_npc(1) - val mem_misprediction = mem_wrong_npc && mem_reg_valid && (mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal) + val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal + val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal + val mem_misprediction = + if (p(BtbKey).nEntries == 0) mem_cfi_taken + else mem_cfi && mem_wrong_npc val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) take_pc_mem := want_take_pc_mem && !mem_npc_misaligned @@ -496,7 +500,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt - io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && ((mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal) && !take_pc_wb + io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && mem_cfi_taken && !take_pc_wb io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr io.imem.btb_update.bits.isReturn := mem_ctrl.jalr && mem_reg_inst(19,15) === BitPat("b00??1") io.imem.btb_update.bits.pc := mem_reg_pc From 78bc18736e65d28da00fa032f455dabbe60a8c37 Mon Sep 17 00:00:00 2001 From: Henry Cook Date: Fri, 1 Apr 2016 15:34:40 -0700 Subject: [PATCH 0981/1087] LRSC startvation fix: HellaCache generates its own Finish messages again. --- rocket/src/main/scala/nbdcache.scala | 42 ++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 96e57367..15fff9d4 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -147,7 +147,8 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val io = new Bundle { val req = Decoupled(new HellaCacheReq).flip val acquire = Decoupled(new Acquire) - val grant = Valid(new Grant).flip + val grant = Valid(new GrantFromSrc).flip + val finish = Decoupled(new FinishToDst) val resp = Decoupled(new HellaCacheResp) } @@ -160,6 +161,11 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val req = Reg(new HellaCacheReq) val req_cmd_sc = req.cmd === M_XSC val grant_word = Reg(UInt(width = wordBits)) + val fq = Module(new FinishQueue(1)) + + fq.io.enq.valid := io.grant.valid && io.grant.bits.requiresAck() + fq.io.enq.bits := io.grant.bits.makeFinish() + io.finish <> fq.io.deq val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes) val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBytes) @@ -168,7 +174,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) val beat_data = Fill(beatWords, storegen.data) - val s_idle :: s_acquire :: s_grant :: s_resp :: Nil = Enum(Bits(), 4) + val s_idle :: s_acquire :: s_grant :: s_resp :: s_finish :: Nil = Enum(Bits(), 5) val state = Reg(init = s_idle) io.req.ready := (state === s_idle) @@ -223,6 +229,10 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { } when (io.resp.fire()) { + state := s_finish + } + + when (io.finish.fire()) { state := s_idle } } @@ -243,7 +253,8 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new ReplayInternal) - val mem_grant = Valid(new Grant).flip + val mem_grant = Valid(new GrantFromSrc).flip + val mem_finish = Decoupled(new FinishToDst) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) } @@ -290,9 +301,9 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (state === s_meta_write_req && io.meta_write.ready) { state := s_meta_write_resp } - when (state === s_refill_resp) { - when (io.mem_grant.valid) { new_coh_state := coh_on_grant } - when (refill_done) { state := s_meta_write_req } + when (state === s_refill_resp && refill_done) { + state := s_meta_write_req + new_coh_state := coh_on_grant } when (io.mem_req.fire()) { // s_refill_req state := s_refill_resp @@ -329,6 +340,15 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { } } + val fq = Module(new FinishQueue(1)) + val g = io.mem_grant.bits + val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp + fq.io.enq.valid := io.mem_grant.valid && g.requiresAck() && refill_done + fq.io.enq.bits := g.makeFinish() + io.mem_finish.valid := fq.io.deq.valid && can_finish + fq.io.deq.ready := io.mem_finish.ready && can_finish + io.mem_finish.bits := fq.io.deq.bits + io.idx_match := (state =/= s_invalid) && idx_match io.refill.way_en := req.way_en io.refill.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits @@ -355,7 +375,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { addr_block = Cat(req.old_meta.tag, req_idx)) io.wb_req.bits.way_en := req.way_en - io.mem_req.valid := state === s_refill_req + io.mem_req.valid := state === s_refill_req && fq.io.enq.ready io.mem_req.bits := req.old_meta.coh.makeAcquire( addr_block = Cat(io.tag, req_idx).toUInt, client_xact_id = Bits(id), @@ -387,7 +407,8 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val meta_read = Decoupled(new L1MetaReadReq) val meta_write = Decoupled(new L1MetaWriteReq) val replay = Decoupled(new Replay) - val mem_grant = Valid(new Grant).flip + val mem_grant = Valid(new GrantFromSrc).flip + val mem_finish = Decoupled(new FinishToDst) val wb_req = Decoupled(new WritebackReq) val probe_rdy = Bool(OUTPUT) @@ -417,6 +438,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { nMSHRs + nIOMSHRs, outerDataBeats, (a: Acquire) => a.hasMultibeatData())) + val mem_finish_arb = Module(new Arbiter(new FinishToDst, nMSHRs + nIOMSHRs)) val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs)) val alloc_arb = Module(new Arbiter(Bool(), nMSHRs)) @@ -445,6 +467,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { meta_read_arb.io.in(i) <> mshr.io.meta_read meta_write_arb.io.in(i) <> mshr.io.meta_write mem_req_arb.io.in(i) <> mshr.io.mem_req + mem_finish_arb.io.in(i) <> mshr.io.mem_finish wb_req_arb.io.in(i) <> mshr.io.wb_req replay_arb.io.in(i) <> mshr.io.replay @@ -466,6 +489,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.meta_read <> meta_read_arb.io.out io.meta_write <> meta_write_arb.io.out io.mem_req <> mem_req_arb.io.out + io.mem_finish <> mem_finish_arb.io.out io.wb_req <> wb_req_arb.io.out val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs)) @@ -484,6 +508,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { mmio_rdy = mmio_rdy || mshr.io.req.ready mem_req_arb.io.in(id) <> mshr.io.acquire + mem_finish_arb.io.in(id) <> mshr.io.finish mshr.io.grant.bits := io.mem_grant.bits mshr.io.grant.valid := io.mem_grant.valid && @@ -981,6 +1006,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) data.io.read <> readArb.io.out readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked + io.mem.finish <> mshrs.io.mem_finish // writebacks val wbArb = Module(new Arbiter(new WritebackReq, 2)) From 72f7f71eb526cacc5fc58bd37b7af6edb1150048 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 1 Apr 2016 16:19:57 -0700 Subject: [PATCH 0982/1087] No need to allow finishes to be sent in s_refill_resp state This is a hold-over from when writebacks needed finish messages. --- rocket/src/main/scala/nbdcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 15fff9d4..9f691008 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -342,7 +342,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val fq = Module(new FinishQueue(1)) val g = io.mem_grant.bits - val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp + val can_finish = state === s_invalid || state === s_refill_req fq.io.enq.valid := io.mem_grant.valid && g.requiresAck() && refill_done fq.io.enq.bits := g.makeFinish() io.mem_finish.valid := fq.io.deq.valid && can_finish From dc662f28a00368c4e16705fc57162222b1f39d3c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 1 Apr 2016 17:28:42 -0700 Subject: [PATCH 0983/1087] Specify width on s1_pc to avoid width inference problem --- rocket/src/main/scala/frontend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 0d3763c7..e0d7a1fb 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -37,7 +37,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val icache = Module(new ICache) val tlb = Module(new TLB) - val s1_pc_ = Reg(UInt()) + val s1_pc_ = Reg(UInt(width=vaddrBitsExtended)) val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) From 51e0870e237a116f4e380f4854a1813b598a2414 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 1 Apr 2016 19:30:39 -0700 Subject: [PATCH 0984/1087] Separate I$ and D$ interface signals that span clock cycles For example, Decopuled[HellaCacheReq].bits.kill doesn't make sense, since it doesn't come the same cycle as ready/valid. --- rocket/src/main/scala/arbiter.scala | 23 ++++++++++------- rocket/src/main/scala/frontend.scala | 6 ++--- rocket/src/main/scala/icache.scala | 11 +++++---- rocket/src/main/scala/nbdcache.scala | 37 ++++++++++++++-------------- rocket/src/main/scala/ptw.scala | 8 +++--- rocket/src/main/scala/rocket.scala | 15 +++++------ 6 files changed, 52 insertions(+), 48 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index d0b64d9f..579ea6f8 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -21,20 +21,26 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module for (i <- 1 until n) io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid - io.mem.req.bits := io.requestor(n-1).req.bits - io.mem.req.bits.tag := Cat(io.requestor(n-1).req.bits.tag, UInt(n-1, log2Up(n))) - for (i <- n-2 to 0 by -1) { + for (i <- n-1 to 0 by -1) { val req = io.requestor(i).req - when (req.valid) { + def connect_s0() = { io.mem.req.bits.cmd := req.bits.cmd io.mem.req.bits.typ := req.bits.typ io.mem.req.bits.addr := req.bits.addr io.mem.req.bits.phys := req.bits.phys io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n))) } - when (r_valid(i)) { - io.mem.req.bits.kill := req.bits.kill - io.mem.req.bits.data := req.bits.data + def connect_s1() = { + io.mem.s1_kill := io.requestor(i).s1_kill + io.mem.s1_data := io.requestor(i).s1_data + } + + if (i == n-1) { + connect_s0() + connect_s1() + } else { + when (req.valid) { connect_s0() } + when (r_valid(i)) { connect_s1() } } } @@ -44,10 +50,9 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).xcpt := io.mem.xcpt io.requestor(i).ordered := io.mem.ordered + io.requestor(i).s2_nack := io.mem.s2_nack && tag_hit resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) - resp.bits.nack := io.mem.resp.bits.nack && tag_hit - resp.bits.replay := io.mem.resp.bits.replay && tag_hit io.requestor(i).replay_next.valid := io.mem.replay_next.valid && io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index e0d7a1fb..602b13e0 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -104,10 +104,8 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.idx := io.cpu.npc icache.io.invalidate := io.cpu.invalidate - icache.io.req.bits.ppn := tlb.io.resp.ppn - icache.io.req.bits.kill := io.cpu.req.valid || - tlb.io.resp.miss || tlb.io.resp.xcpt_if || - icmiss || io.ptw.invalidate + icache.io.s1_ppn := tlb.io.resp.ppn + icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.ptw.invalidate io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.bits.pc := s2_pc diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index df6c6c36..94c7822e 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -16,8 +16,6 @@ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) { val idx = UInt(width = pgIdxBits) - val ppn = UInt(width = ppnBits) // delayed one cycle - val kill = Bool() // delayed one cycle } class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters { @@ -28,6 +26,9 @@ class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1Cache class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { val io = new Bundle { val req = Valid(new ICacheReq).flip + val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req + val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req + val resp = Decoupled(new ICacheResp) val invalidate = Bool(INPUT) val mem = new ClientUncachedTileLinkIO @@ -47,18 +48,18 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val s1_valid = Reg(init=Bool(false)) val s1_pgoff = Reg(UInt(width = pgIdxBits)) - val s1_addr = Cat(io.req.bits.ppn, s1_pgoff).toUInt + val s1_addr = Cat(io.s1_ppn, s1_pgoff).toUInt val s1_tag = s1_addr(tagBits+untagBits-1,untagBits) val s0_valid = io.req.valid || s1_valid && stall val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx) - s1_valid := io.req.valid && rdy || s1_valid && stall && !io.req.bits.kill + s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill when (io.req.valid && rdy) { s1_pgoff := io.req.bits.idx } - val out_valid = s1_valid && !io.req.bits.kill && state === s_ready + val out_valid = s1_valid && !io.s1_kill && state === s_ready val s1_idx = s1_addr(untagBits-1,blockOffBits) val s1_offset = s1_addr(blockOffBits-1,0) val s1_hit = out_valid && s1_any_tag_hit diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 9f691008..83d7b9da 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -33,6 +33,11 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters { val nMSHRs = p(NMSHRs) val nIOMSHRs = 1 val lrscCycles = p(LRSCCycles) + + require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed + require(isPow2(nSets)) + require(rowBits <= outerDataBits) + require(untagBits <= pgIdxBits) } abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module @@ -63,7 +68,6 @@ trait HasMissInfo extends HasL1HellaCacheParameters { class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p) with HasCoreMemOp { - val kill = Bool() val phys = Bool() } @@ -72,7 +76,6 @@ class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) w class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p) with HasCoreMemOp with HasCoreData { - val nack = Bool() // comes 2 cycles after req.fire val replay = Bool() val has_data = Bool() val data_word_bypass = Bits(width = coreDataBits) @@ -92,6 +95,10 @@ class HellaCacheExceptions extends Bundle { // interface between D$ and processor/DTLB class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new HellaCacheReq) + val s1_kill = Bool(OUTPUT) // kill previous cycle's req + val s1_data = Bits(OUTPUT, coreDataBits) // data for previous cycle's req + val s2_nack = Bool(INPUT) // req from two cycles ago is rejected + val resp = Valid(new HellaCacheResp).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val xcpt = (new HellaCacheExceptions).asInput @@ -207,8 +214,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.resp.bits.has_data := isRead(req.cmd) io.resp.bits.data := loadgen.data | req_cmd_sc io.resp.bits.store_data := req.data - io.resp.bits.nack := Bool(false) - io.resp.bits.replay := io.resp.valid + io.resp.bits.replay := Bool(true) when (io.req.fire()) { req := io.req.bits @@ -764,11 +770,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val mem = new ClientTileLinkIO } - require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed - require(isPow2(nSets)) require(isPow2(nWays)) // TODO: relax this - require(rowBits <= outerDataBits) - require(untagBits <= pgIdxBits) val wb = Module(new WritebackUnit) val prober = Module(new ProbeUnit) @@ -777,7 +779,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.req.ready := Bool(true) val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) val s1_req = Reg(io.cpu.req.bits) - val s1_valid_masked = s1_valid && !io.cpu.req.bits.kill + val s1_valid_masked = s1_valid && !io.cpu.s1_kill val s1_replay = Reg(init=Bool(false)) val s1_clk_en = Reg(Bool()) @@ -826,12 +828,11 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_addr = Cat(dtlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) when (s1_clk_en) { - s2_req.kill := s1_req.kill s2_req.typ := s1_req.typ s2_req.phys := s1_req.phys s2_req.addr := s1_addr when (s1_write) { - s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.req.bits.data) + s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data) } when (s1_recycled) { s2_req.data := s1_req.data } s2_req.tag := s1_req.tag @@ -1075,7 +1076,6 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { cache_resp.bits.has_data := isRead(s2_req.cmd) cache_resp.bits.data := loadgen.data | s2_sc_fail cache_resp.bits.store_data := s2_req.data - cache_resp.bits.nack := s2_valid && s2_nack cache_resp.bits.replay := s2_replay val uncache_resp = Wire(Valid(new HellaCacheResp)) @@ -1083,6 +1083,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { uncache_resp.valid := mshrs.io.resp.valid mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay)) + io.cpu.s2_nack := s2_valid && s2_nack io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp) io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid @@ -1111,17 +1112,15 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module req_arb.io.in(1).bits := io.requestor.req.bits io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready - val s2_nack = io.cache.resp.bits.nack - val s3_nack = Reg(next=s2_nack) - val s0_req_fire = io.cache.req.fire() val s1_req_fire = Reg(next=s0_req_fire) val s2_req_fire = Reg(next=s1_req_fire) + val s3_nack = Reg(next=io.cache.s2_nack) io.cache.req <> req_arb.io.out - io.cache.req.bits.kill := s2_nack io.cache.req.bits.phys := Bool(true) - io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) + io.cache.s1_kill := io.cache.s2_nack + io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) /* replay queues: replayq1 holds the older request. @@ -1147,13 +1146,13 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module replayq2.io.enq.bits.data := io.cache.resp.bits.store_data replayq2.io.deq.ready := Bool(false) - when (s2_nack) { + when (io.cache.s2_nack) { replayq1.io.enq.valid := Bool(true) replaying_cmb := Bool(true) } // when replaying request got sunk into the d$ - when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) { + when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !io.cache.s2_nack) { // see if there's a stashed request in replayq2 when (replayq2.io.deq.valid) { replayq1.io.enq.valid := Bool(true) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 6ec57d5c..09b2328d 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -118,8 +118,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.mem.req.bits.cmd := Mux(state === s_set_dirty, M_XA_OR, M_XRD) io.mem.req.bits.typ := MT_D io.mem.req.bits.addr := pte_addr - io.mem.req.bits.kill := Bool(false) - io.mem.req.bits.data := pte_wdata.toBits + io.mem.s1_data := pte_wdata.toBits + io.mem.s1_kill := Bool(false) val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) @@ -152,7 +152,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { } } is (s_wait) { - when (io.mem.resp.bits.nack) { + when (io.mem.s2_nack) { state := s_req } when (io.mem.resp.valid) { @@ -172,7 +172,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { } } is (s_wait_dirty) { - when (io.mem.resp.bits.nack) { + when (io.mem.s2_nack) { state := s_set_dirty } when (io.mem.resp.valid) { diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 351c1244..81778134 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -376,7 +376,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc - val replay_wb_common = io.dmem.resp.bits.nack || wb_reg_replay + val replay_wb_common = io.dmem.s2_nack || wb_reg_replay val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt @@ -388,9 +388,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool - val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1) + val dmem_resp_waddr = io.dmem.resp.bits.tag >> 1 val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data - val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data + val dmem_resp_replay = dmem_resp_valid && io.dmem.resp.bits.replay div.io.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) val ll_wdata = Wire(init = div.io.resp.bits.data) @@ -532,14 +532,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.fpu.dmem_resp_tag := dmem_resp_waddr io.dmem.req.valid := ex_reg_valid && ex_ctrl.mem - io.dmem.req.bits.kill := killm_common || mem_xcpt + val ex_dcache_tag = Cat(ex_waddr, ex_ctrl.fp) + require(coreDCacheReqTagBits >= ex_dcache_tag.getWidth) + io.dmem.req.bits.tag := ex_dcache_tag io.dmem.req.bits.cmd := ex_ctrl.mem_cmd io.dmem.req.bits.typ := ex_ctrl.mem_type io.dmem.req.bits.phys := Bool(false) io.dmem.req.bits.addr := encodeVirtualAddress(ex_rs(0), alu.io.adder_out) - io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) - io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) - require(coreDCacheReqTagBits >= 6) + io.dmem.s1_kill := killm_common || mem_xcpt + io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) io.dmem.invalidate_lr := wb_xcpt io.rocc.cmd.valid := wb_rocc_val From 2d6f35525ea8d73b589dacd6dd1e597dd962eff6 Mon Sep 17 00:00:00 2001 From: Christopher Celio Date: Wed, 6 Apr 2016 14:47:03 -0700 Subject: [PATCH 0985/1087] Added Field[Int] to SFMALatency/DFMALatency params --- rocket/src/main/scala/fpu.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 0b8190c1..b7c72945 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -9,8 +9,8 @@ import FPConstants._ import uncore.constants.MemoryOpConstants._ import cde.{Parameters, Field} -case object SFMALatency -case object DFMALatency +case object SFMALatency extends Field[Int] +case object DFMALatency extends Field[Int] object FPConstants { From b7527268bb23ef0852eb02b6b0c3a92416ac9e80 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 21 Apr 2016 15:34:28 -0700 Subject: [PATCH 0986/1087] use address map instead of MMIOBase to find size of memory --- rocket/src/main/scala/csr.scala | 3 ++- rocket/src/main/scala/dma.scala | 8 +++++--- rocket/src/main/scala/nbdcache.scala | 4 ++-- rocket/src/main/scala/rocket.scala | 1 - 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 4186cf5e..606fb10a 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -8,6 +8,7 @@ import Instructions._ import cde.{Parameters, Field} import uncore._ import scala.math._ +import junctions.{AddrHashMap, GlobalAddrMap} class MStatus extends Bundle { val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient @@ -227,7 +228,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.misa -> UInt(isa), CSRs.mstatus -> read_mstatus, CSRs.mtvec -> reg_mtvec, - CSRs.mcfgaddr -> UInt(p(junctions.MMIOBase)), + CSRs.mcfgaddr -> UInt(addrMap("mem").size), CSRs.mipi -> reg_mip.msip, CSRs.mip -> read_mip, CSRs.mie -> reg_mie, diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index a31e311e..a18d8644 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import uncore._ import uncore.DmaRequest._ -import junctions.ParameterizedBundle +import junctions.{ParameterizedBundle, AddrHashMap, GlobalAddrMap} import cde.Parameters trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters { @@ -165,8 +165,10 @@ class DmaFrontend(implicit p: Parameters) extends CoreModule()(p) } def check_region(cmd: UInt, src: UInt, dst: UInt): Bool = { - val dst_ok = Mux(cmd === DMA_CMD_SOUT, dst >= UInt(mmioBase), dst < UInt(mmioBase)) - val src_ok = Mux(cmd === DMA_CMD_SIN, src >= UInt(mmioBase), Bool(true)) + val src_cacheable = addrMap.isCacheable(src) + val dst_cacheable = addrMap.isCacheable(dst) + val dst_ok = Mux(cmd === DMA_CMD_SOUT, !dst_cacheable, dst_cacheable) + val src_ok = Mux(cmd === DMA_CMD_SIN, !src_cacheable, Bool(true)) dst_ok && src_ok } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 83d7b9da..ade7ed85 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -421,8 +421,8 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val fence_rdy = Bool(OUTPUT) } - // determine if the request is in the memory region or mmio region - val cacheable = io.req.bits.addr < UInt(mmioBase) + // determine if the request is cacheable or not + val cacheable = addrMap.isCacheable(io.req.bits.addr) val sdq_val = Reg(init=Bits(0, sdqDepth)) val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 81778134..b4b9601a 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -48,7 +48,6 @@ trait HasCoreParameters extends HasAddrMapParameters { val vpnBitsExtended = vpnBits + (vaddrBits < xLen).toInt val vaddrBitsExtended = vpnBitsExtended + pgIdxBits val coreMaxAddrBits = paddrBits max vaddrBitsExtended - val mmioBase = p(MMIOBase) val nCustomMrwCsrs = p(NCustomMRWCSRs) val roccCsrs = if (p(BuildRoCC).isEmpty) Nil else p(BuildRoCC).flatMap(_.csrs) From 84fd45fd77efc40cb68102db8b30c7cea3779c93 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 22 Apr 2016 15:20:17 -0700 Subject: [PATCH 0987/1087] Pass TLB flush signal to I$ explicitly --- rocket/src/main/scala/frontend.scala | 7 ++++--- rocket/src/main/scala/rocket.scala | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 602b13e0..490ae71c 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -23,7 +23,8 @@ class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { val btb_update = Valid(new BTBUpdate) val bht_update = Valid(new BHTUpdate) val ras_update = Valid(new RASUpdate) - val invalidate = Bool(OUTPUT) + val flush_icache = Bool(OUTPUT) + val flush_tlb = Bool(OUTPUT) val npc = UInt(INPUT, width = vaddrBitsExtended) } @@ -80,7 +81,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa btb.io.btb_update := io.cpu.btb_update btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update - btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate + btb.io.invalidate := io.cpu.flush_icache || io.cpu.flush_tlb // virtual tags when (!stall && !icmiss) { btb.io.req.valid := true s2_btb_resp_valid := btb.io.resp.valid @@ -103,7 +104,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa io.mem <> icache.io.mem icache.io.req.valid := !stall && !s0_same_block icache.io.req.bits.idx := io.cpu.npc - icache.io.invalidate := io.cpu.invalidate + icache.io.invalidate := io.cpu.flush_icache icache.io.s1_ppn := tlb.io.resp.ppn icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.ptw.invalidate diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index b4b9601a..45bf48a8 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -496,7 +496,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret Mux(replay_wb, wb_reg_pc, // replay mem_npc)).toUInt // mispredicted branch - io.imem.invalidate := wb_reg_valid && wb_ctrl.fence_i + io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i + io.imem.flush_tlb := csr.io.fatc io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && mem_cfi_taken && !take_pc_wb From 5dbf9640e22629b78cd0bf076ad850e825fada44 Mon Sep 17 00:00:00 2001 From: Yunsup Lee Date: Fri, 22 Apr 2016 15:41:31 -0700 Subject: [PATCH 0988/1087] Use TLB flush signal to I$ explicitly --- rocket/src/main/scala/frontend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 490ae71c..5d67e158 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -106,7 +106,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icache.io.req.bits.idx := io.cpu.npc icache.io.invalidate := io.cpu.flush_icache icache.io.s1_ppn := tlb.io.resp.ppn - icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.ptw.invalidate + icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) io.cpu.resp.bits.pc := s2_pc From d93677a343215b5391fb45678eda73b1adbd78e8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 25 Apr 2016 17:55:22 -0700 Subject: [PATCH 0989/1087] Support larger cache sets when not using VM --- rocket/src/main/scala/nbdcache.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index ade7ed85..bc006ed8 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -25,7 +25,7 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters { val idxLSB = blockOffBits val offsetmsb = idxLSB-1 val offsetlsb = wordOffBits - val rowWords = rowBits/wordBits + val rowWords = rowBits/wordBits val doNarrowRead = coreDataBits * nWays % rowBits == 0 val encDataBits = code.width(coreDataBits) val encRowBits = encDataBits*rowWords @@ -37,7 +37,7 @@ trait HasL1HellaCacheParameters extends HasL1CacheParameters { require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed require(isPow2(nSets)) require(rowBits <= outerDataBits) - require(untagBits <= pgIdxBits) + require(!usingVM || untagBits <= pgIdxBits) } abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module @@ -800,7 +800,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val dtlb = Module(new TLB) io.ptw <> dtlb.io.ptw - dtlb.io.req.valid := s1_valid_masked && s1_readwrite && !s1_req.phys + dtlb.io.req.valid := s1_valid_masked && s1_readwrite dtlb.io.req.bits.passthrough := s1_req.phys dtlb.io.req.bits.asid := UInt(0) dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits @@ -866,7 +866,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits val wdata_encoded = (0 until rowWords).map(i => code.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i))) - data.io.write.bits.data := Vec(wdata_encoded).toBits + data.io.write.bits.data := Cat(wdata_encoded.reverse) // tag read for new requests metaReadArb.io.in(4).valid := io.cpu.req.valid From 5fd5b587436f394cb2ae9123066993d5cdf55a57 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 25 Apr 2016 17:57:48 -0700 Subject: [PATCH 0990/1087] Remove stats CSR --- rocket/src/main/scala/csr.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 606fb10a..72270bf4 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -162,7 +162,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_tohost = Reg(init=Bits(0, xLen)) val reg_fromhost = Reg(init=Bits(0, xLen)) - val reg_stats = Reg(init=Bool(false)) val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) @@ -204,8 +203,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } when (io.host.csr.resp.fire()) { host_csr_rep_valid := false } - io.host.debug_stats_csr := reg_stats // direct export up the hierarchy - val isa_string = "IMA" + (if (usingVM) "S" else "") + (if (usingFPU) "FDG" else "") + @@ -240,7 +237,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mcause -> reg_mcause, CSRs.mtimecmp -> reg_mtimecmp, CSRs.mhartid -> io.host.id, - CSRs.stats -> reg_stats, CSRs.mtohost -> reg_tohost, CSRs.mfromhost -> reg_fromhost) @@ -473,7 +469,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.mtime)) { reg_time := wdata } when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } - when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } if (usingFPU) { when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } From fe8c91f62011aaedc2f559cefc7451fa534f85b3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 26 Apr 2016 15:30:24 -0700 Subject: [PATCH 0991/1087] Fix IOMSHR state machine bug Sending the finish too early causes the CPU response to get dropped. attn @zhemao --- rocket/src/main/scala/nbdcache.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index bc006ed8..84e326c0 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -170,9 +170,15 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val grant_word = Reg(UInt(width = wordBits)) val fq = Module(new FinishQueue(1)) + val s_idle :: s_acquire :: s_grant :: s_resp :: s_finish :: Nil = Enum(Bits(), 5) + val state = Reg(init = s_idle) + io.req.ready := (state === s_idle) + fq.io.enq.valid := io.grant.valid && io.grant.bits.requiresAck() fq.io.enq.bits := io.grant.bits.makeFinish() - io.finish <> fq.io.deq + io.finish.valid := fq.io.deq.valid && (state === s_finish) + io.finish.bits := fq.io.deq.bits + fq.io.deq.ready := io.finish.ready && (state === s_finish) val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes) val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBytes) @@ -181,11 +187,6 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) val beat_data = Fill(beatWords, storegen.data) - val s_idle :: s_acquire :: s_grant :: s_resp :: s_finish :: Nil = Enum(Bits(), 5) - val state = Reg(init = s_idle) - - io.req.ready := (state === s_idle) - val addr_block = req.addr(paddrBits - 1, blockOffBits) val addr_beat = req.addr(blockOffBits - 1, beatOffBits) val addr_byte = req.addr(beatOffBits - 1, 0) From 8acec8eb367b379e6bb47eaf31c6f52b9da5f2e8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 27 Apr 2016 00:28:12 -0700 Subject: [PATCH 0992/1087] Remove dead code from BTB --- rocket/src/main/scala/btb.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 54a31dd4..c5fb0e78 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -177,12 +177,6 @@ class BTB(implicit p: Parameters) extends BtbModule { val updatePageHit = pageMatch(r_btb_update.bits.pc) val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) - private var lfsr = LFSR16(r_btb_update.valid) - def rand(width: Int) = { - lfsr = lfsr(lfsr.getWidth-1,1) - Random.oneHot(width, lfsr) - } - val updateHit = r_btb_update.bits.prediction.valid val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1 From b99db83e678dc85053d542968c36a7bc1e3aa8ae Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 27 Apr 2016 00:28:39 -0700 Subject: [PATCH 0993/1087] Avoid needless Vec generation --- rocket/src/main/scala/util.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 18dec13e..c0b89bd1 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -160,5 +160,5 @@ object Random private def round(x: Double): Int = if (x.toInt.toDouble == x) x.toInt else (x.toInt + 1) & -2 private def partition(value: UInt, slices: Int) = - Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices)) + Seq.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices)) } From fb5c38c186f5dfd03dc4c731d045f01107511a0f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 27 Apr 2016 11:22:04 -0700 Subject: [PATCH 0994/1087] Handle invalidate_lr in cache arbiter, not tile --- rocket/src/main/scala/arbiter.scala | 1 + rocket/src/main/scala/ptw.scala | 1 + rocket/src/main/scala/tile.scala | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 579ea6f8..8a207f2e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -16,6 +16,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module val r_valid = io.requestor.map(r => Reg(next=r.req.valid)) + io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_) io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) io.requestor(0).req.ready := io.mem.req.ready for (i <- 1 until n) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 09b2328d..b83e0fc8 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -120,6 +120,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.mem.req.bits.addr := pte_addr io.mem.s1_data := pte_wdata.toBits io.mem.s1_kill := Bool(false) + io.mem.invalidate_lr := Bool(false) val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 68c0714d..d9b8fca0 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -47,7 +47,6 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem) val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]() val cachedPorts = collection.mutable.ArrayBuffer(dcache.io.mem) - dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache io.host <> core.io.host icache.io.cpu <> core.io.imem From 739cf0763782ddc8fd7973082f9746ae977942e2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 27 Apr 2016 14:54:51 -0700 Subject: [PATCH 0995/1087] Remove mtime/mtimecmp The RTC is now a device that lives on the MMIO bus. --- rocket/src/main/scala/csr.scala | 12 +-------- rocket/src/main/scala/instructions.scala | 34 ------------------------ 2 files changed, 1 insertion(+), 45 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 72270bf4..db17f67e 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -156,7 +156,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended)) val reg_sscratch = Reg(Bits(width = xLen)) val reg_stvec = Reg(UInt(width = vaddrBits)) - val reg_mtimecmp = Reg(Bits(width = xLen)) val reg_sptbr = Reg(UInt(width = ppnBits)) val reg_wfi = Reg(init=Bool(false)) @@ -166,7 +165,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) - val reg_time = Reg(UInt(width = 64)) // regardless of XLEN val reg_instret = WideCounter(64, io.retire) val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(64) } @@ -215,7 +213,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mimpid -> UInt(0), CSRs.marchid -> UInt(0), CSRs.mvendorid -> UInt(0), - CSRs.mtime -> reg_time, CSRs.mcycle -> reg_cycle, CSRs.minstret -> reg_instret, CSRs.mucounteren -> UInt(0), @@ -235,7 +232,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mepc -> reg_mepc.sextTo(xLen), CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen), CSRs.mcause -> reg_mcause, - CSRs.mtimecmp -> reg_mtimecmp, CSRs.mhartid -> io.host.id, CSRs.mtohost -> reg_tohost, CSRs.mfromhost -> reg_fromhost) @@ -276,7 +272,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } if (xLen == 32) { - read_mapping += CSRs.mtimeh -> (reg_time >> 32) read_mapping += CSRs.mcycleh -> (reg_cycle >> 32) read_mapping += CSRs.minstreth -> (reg_instret >> 32) read_mapping += CSRs.mutime_deltah -> UInt(0) @@ -404,10 +399,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) assert(PopCount(insn_ret :: io.exception :: csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") - when (reg_time >= reg_mtimecmp) { - reg_mip.mtip := true - } - + reg_mip.mtip := io.host.timerIRQ io.time := reg_cycle io.csr_stall := reg_wfi @@ -465,8 +457,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata >> 2 << 2 } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } - when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } - when (decoded_addr(CSRs.mtime)) { reg_time := wdata } when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } if (usingFPU) { diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index d854fac8..84b0ed70 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -247,23 +247,6 @@ object CSRs { val cycle = 0xc00 val time = 0xc01 val instret = 0xc02 - val stats = 0xc0 - val uarch0 = 0xcc0 - val uarch1 = 0xcc1 - val uarch2 = 0xcc2 - val uarch3 = 0xcc3 - val uarch4 = 0xcc4 - val uarch5 = 0xcc5 - val uarch6 = 0xcc6 - val uarch7 = 0xcc7 - val uarch8 = 0xcc8 - val uarch9 = 0xcc9 - val uarch10 = 0xcca - val uarch11 = 0xccb - val uarch12 = 0xccc - val uarch13 = 0xccd - val uarch14 = 0xcce - val uarch15 = 0xccf val sstatus = 0x100 val sie = 0x104 val stvec = 0x105 @@ -330,23 +313,6 @@ object CSRs { res += cycle res += time res += instret - res += stats - res += uarch0 - res += uarch1 - res += uarch2 - res += uarch3 - res += uarch4 - res += uarch5 - res += uarch6 - res += uarch7 - res += uarch8 - res += uarch9 - res += uarch10 - res += uarch11 - res += uarch12 - res += uarch13 - res += uarch14 - res += uarch15 res += sstatus res += sie res += stvec From cae4265f3b95da7db6af30ec423c9a16aa43390c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 28 Apr 2016 16:14:05 -0700 Subject: [PATCH 0996/1087] Change mcfgaddr pointer --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index db17f67e..edb8dc4f 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -222,7 +222,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.misa -> UInt(isa), CSRs.mstatus -> read_mstatus, CSRs.mtvec -> reg_mtvec, - CSRs.mcfgaddr -> UInt(addrMap("mem").size), + CSRs.mcfgaddr -> UInt(addrMap("io:int:configstring").start), CSRs.mipi -> reg_mip.msip, CSRs.mip -> read_mip, CSRs.mie -> reg_mie, From 5af98145b9d67f2ca90681edcb81ae71ff0b838a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 30 Apr 2016 17:31:46 -0700 Subject: [PATCH 0997/1087] don't signal bad physical address on TLB miss --- rocket/src/main/scala/tlb.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 1559802a..700736c3 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -152,13 +152,12 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { } val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits)) - val addr_ok = addrMap.isValid(paddr) val addr_prot = addrMap.getProt(paddr) io.req.ready := state === s_ready - io.resp.xcpt_ld := !addr_ok || !addr_prot.r || bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR - io.resp.xcpt_st := !addr_ok || !addr_prot.w || bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR - io.resp.xcpt_if := !addr_ok || !addr_prot.x || bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR + io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & tag_cam.io.hits).orR) + io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & tag_cam.io.hits).orR) + io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & tag_cam.io.hits).orR) io.resp.miss := tlb_miss io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(ppnBits-1,0)) io.resp.hit_idx := tag_cam.io.hits From 491184a8f809c7b6dc92a5cd7ff9d0ee2099f4a3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 30 Apr 2016 17:32:51 -0700 Subject: [PATCH 0998/1087] ERET -> xRET; remove mcfgaddr --- rocket/src/main/scala/csr.scala | 8 +++----- rocket/src/main/scala/idecode.scala | 1 + rocket/src/main/scala/instructions.scala | 22 ++++++++-------------- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index edb8dc4f..86901dcd 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -222,7 +222,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.misa -> UInt(isa), CSRs.mstatus -> read_mstatus, CSRs.mtvec -> reg_mtvec, - CSRs.mcfgaddr -> UInt(addrMap("io:int:configstring").start), CSRs.mipi -> reg_mip.msip, CSRs.mip -> read_mip, CSRs.mie -> reg_mie, @@ -334,10 +333,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) Mux(insn_call, reg_mstatus.prv + Causes.user_ecall, Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction))) val cause_lsbs = cause(log2Up(xLen)-1,0) - val can_delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M - val delegate = can_delegate && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) + val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) val tvec = Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec) - val epc = Mux(can_delegate, reg_sepc, reg_mepc) + val epc = Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc) io.fatc := insn_sfence_vm io.evec := Mux(io.exception || csr_xcpt, tvec, epc) io.ptbr := reg_sptbr @@ -383,7 +381,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } when (insn_ret) { - when (can_delegate) { + when (Bool(p(UseVM)) && !csr_addr_priv(1)) { when (reg_mstatus.spp.toBool) { reg_mstatus.sie := reg_mstatus.spie } reg_mstatus.spie := false reg_mstatus.spp := PRV.U diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 107de8d6..c301424c 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -175,6 +175,7 @@ class XDecode(implicit val p: Parameters) extends DecodeConstants SCALL-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SBREAK-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SRET-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + MRET-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), WFI-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 84b0ed70..808505d5 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -92,9 +92,12 @@ object Instructions { def AMOSWAP_D = BitPat("b00001????????????011?????0101111") def LR_D = BitPat("b00010??00000?????011?????0101111") def SC_D = BitPat("b00011????????????011?????0101111") - def SCALL = BitPat("b00000000000000000000000001110011") - def SBREAK = BitPat("b00000000000100000000000001110011") + def ECALL = BitPat("b00000000000000000000000001110011") + def EBREAK = BitPat("b00000000000100000000000001110011") + def URET = BitPat("b00000000001000000000000001110011") def SRET = BitPat("b00010000001000000000000001110011") + def HRET = BitPat("b00100000001000000000000001110011") + def MRET = BitPat("b00110000001000000000000001110011") def SFENCE_VM = BitPat("b000100000100?????000000001110011") def WFI = BitPat("b00010000010100000000000001110011") def CSRRW = BitPat("b?????????????????001?????1110011") @@ -206,9 +209,8 @@ object Instructions { def RDCYCLEH = BitPat("b11001000000000000010?????1110011") def RDTIMEH = BitPat("b11001000000100000010?????1110011") def RDINSTRETH = BitPat("b11001000001000000010?????1110011") - def ECALL = BitPat("b00000000000000000000000001110011") - def EBREAK = BitPat("b00000000000100000000000001110011") - def ERET = BitPat("b00010000000000000000000001110011") + def SCALL = BitPat("b00000000000000000000000001110011") + def SBREAK = BitPat("b00000000000100000000000001110011") } object Causes { val misaligned_fetch = 0x0 @@ -265,7 +267,6 @@ object CSRs { val mideleg = 0x303 val mie = 0x304 val mtvec = 0x305 - val mtimecmp = 0x321 val mscratch = 0x340 val mepc = 0x341 val mcause = 0x342 @@ -287,15 +288,13 @@ object CSRs { val mvendorid = 0xf11 val marchid = 0xf12 val mimpid = 0xf13 - val mcfgaddr = 0xf14 - val mhartid = 0xf15 + val mhartid = 0xf14 val mtohost = 0x7c0 val mfromhost = 0x7c1 val mreset = 0x7c2 val cycleh = 0xc80 val timeh = 0xc81 val instreth = 0xc82 - val mtimecmph = 0x361 val mucycle_deltah = 0x780 val mutime_deltah = 0x781 val muinstret_deltah = 0x782 @@ -303,7 +302,6 @@ object CSRs { val mstime_deltah = 0x785 val msinstret_deltah = 0x786 val mcycleh = 0xf80 - val mtimeh = 0xf81 val minstreth = 0xf82 val all = { val res = collection.mutable.ArrayBuffer[Int]() @@ -331,7 +329,6 @@ object CSRs { res += mideleg res += mie res += mtvec - res += mtimecmp res += mscratch res += mepc res += mcause @@ -353,7 +350,6 @@ object CSRs { res += mvendorid res += marchid res += mimpid - res += mcfgaddr res += mhartid res += mtohost res += mfromhost @@ -365,7 +361,6 @@ object CSRs { res += cycleh res += timeh res += instreth - res += mtimecmph res += mucycle_deltah res += mutime_deltah res += muinstret_deltah @@ -373,7 +368,6 @@ object CSRs { res += mstime_deltah res += msinstret_deltah res += mcycleh - res += mtimeh res += minstreth res.toArray } From 0ff4fd0ccdfa4fc345dfc32a266117a66b57697b Mon Sep 17 00:00:00 2001 From: Albert Ou Date: Sat, 30 Apr 2016 22:20:29 -0700 Subject: [PATCH 0999/1087] Fix IOMSHR to send finishes for stores --- rocket/src/main/scala/nbdcache.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 84e326c0..c9c108f1 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -227,11 +227,9 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { } when (state === s_grant && io.grant.valid) { + state := s_resp when (isRead(req.cmd)) { grant_word := wordFromBeat(req.addr, io.grant.bits.data) - state := s_resp - } .otherwise { - state := s_idle } } From 83fa489cef935ad38871858cb4103e97da788196 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 May 2016 14:40:52 -0700 Subject: [PATCH 1000/1087] Stop using HTIF CSR port The port itself is still present to keep other stuff compiling. --- rocket/src/main/scala/csr.scala | 62 ++++++++---------------------- rocket/src/main/scala/rocket.scala | 6 +-- rocket/src/main/scala/tile.scala | 12 ++++-- 3 files changed, 29 insertions(+), 51 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 86901dcd..41efa30f 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -36,12 +36,11 @@ class MStatus extends Bundle { } class MIP extends Bundle { - val host = Bool() val rocc = Bool() - val mdip = Bool() - val hdip = Bool() - val sdip = Bool() - val udip = Bool() + val meip = Bool() + val heip = Bool() + val seip = Bool() + val ueip = Bool() val mtip = Bool() val htip = Bool() val stip = Bool() @@ -77,7 +76,7 @@ object CSR } class CSRFileIO(implicit p: Parameters) extends CoreBundle { - val host = new HtifIO + val prci = new PRCICoreIO().flip val rw = new Bundle { val addr = UInt(INPUT, CSR.ADDRSZ) val cmd = Bits(INPUT, CSR.SZ) @@ -123,13 +122,14 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) sup.msip := true sup.stip := Bool(p(UseVM)) sup.mtip := true + sup.meip := true + sup.seip := Bool(p(UseVM)) sup.rocc := usingRoCC - sup.host := true val del = Wire(init=sup) del.msip := false del.mtip := false - del.mdip := false + del.meip := false (sup.toBits, del.toBits) } @@ -159,8 +159,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_sptbr = Reg(UInt(width = ppnBits)) val reg_wfi = Reg(init=Bool(false)) - val reg_tohost = Reg(init=Bits(0, xLen)) - val reg_fromhost = Reg(init=Bits(0, xLen)) val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) @@ -169,7 +167,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_cycle: UInt = if (enableCommitLog) { reg_instret } else { WideCounter(64) } val mip = Wire(init=reg_mip) - mip.host := (reg_fromhost =/= 0) mip.rocc := io.rocc.interrupt val read_mip = mip.toBits & supported_interrupts @@ -183,27 +180,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val system_insn = io.rw.cmd === CSR.I val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn - val host_csr_req_valid = Reg(Bool()) // don't reset - val host_csr_req_fire = host_csr_req_valid && !cpu_ren - val host_csr_rep_valid = Reg(Bool()) // don't reset - val host_csr_bits = Reg(io.host.csr.req.bits) - io.host.csr.req.ready := !host_csr_req_valid && !host_csr_rep_valid - io.host.csr.resp.valid := host_csr_rep_valid - io.host.csr.resp.bits := host_csr_bits.data - when (io.host.csr.req.fire()) { - host_csr_req_valid := true - host_csr_bits := io.host.csr.req.bits - } - when (host_csr_req_fire) { - host_csr_req_valid := false - host_csr_rep_valid := true - host_csr_bits.data := io.rw.rdata - } - when (io.host.csr.resp.fire()) { host_csr_rep_valid := false } - val isa_string = "IMA" + (if (usingVM) "S" else "") + - (if (usingFPU) "FDG" else "") + + (if (usingFPU) "FD" else "") + (if (usingRoCC) "X" else "") val isa = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | isa_string.map(x => 1 << (x - 'A')).reduce(_|_) @@ -231,9 +210,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mepc -> reg_mepc.sextTo(xLen), CSRs.mbadaddr -> reg_mbadaddr.sextTo(xLen), CSRs.mcause -> reg_mcause, - CSRs.mhartid -> io.host.id, - CSRs.mtohost -> reg_tohost, - CSRs.mfromhost -> reg_fromhost) + CSRs.mhartid -> io.prci.id) if (usingFPU) { read_mapping += CSRs.fflags -> reg_fflags @@ -295,8 +272,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) read_mapping += addr -> io.rocc.csr.rdata(i) } - val addr = Mux(cpu_ren, io.rw.addr, host_csr_bits.addr) - val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } + val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) } val addr_valid = decoded_addr.values.reduce(_||_) val fp_csr = @@ -306,11 +282,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val priv_sufficient = reg_mstatus.prv >= csr_addr_priv val read_only = io.rw.addr(11,10).andR val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient - val wen = cpu_wen && !read_only || host_csr_req_fire && host_csr_bits.rw - val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata, + val wen = cpu_wen && !read_only + val wdata = Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, - Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, - host_csr_bits.data))) + io.rw.wdata)) val do_system_insn = priv_sufficient && system_insn val opcode = UInt(1) << io.rw.addr(2,0) @@ -397,12 +372,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) assert(PopCount(insn_ret :: io.exception :: csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") - reg_mip.mtip := io.host.timerIRQ io.time := reg_cycle io.csr_stall := reg_wfi - when (host_csr_req_fire && !host_csr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } - io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) io.fcsr_rm := reg_frm @@ -455,8 +427,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata >> 2 << 2 } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } - when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } } - when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } } if (usingFPU) { when (decoded_addr(CSRs.fflags)) { reg_fflags := wdata } when (decoded_addr(CSRs.frm)) { reg_frm := wdata } @@ -488,7 +458,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } } - io.rocc.csr.waddr := addr + reg_mip := io.prci.interrupts + + io.rocc.csr.waddr := io.rw.addr io.rocc.csr.wdata := wdata io.rocc.csr.wen := wen } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 45bf48a8..05fcfbcb 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -110,7 +110,7 @@ object ImmGen { class Rocket(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { - val host = new HtifIO + val prci = new PRCICoreIO().flip val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" })) val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) val ptw = new DatapathPTWIO().flip @@ -426,7 +426,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { csr.io.exception := wb_reg_xcpt csr.io.cause := wb_reg_cause csr.io.retire := wb_valid - io.host <> csr.io.host + csr.io.prci <> io.prci io.fpu.fcsr_rm := csr.io.fcsr_rm csr.io.fcsr_flags := io.fpu.fcsr_flags csr.io.rocc <> io.rocc @@ -581,7 +581,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } else { printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", - io.host.id, csr.io.time(31,0), wb_valid, wb_reg_pc, + io.prci.id, csr.io.time(31,0), wb_valid, wb_reg_pc, Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen, wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index d9b8fca0..e21d81ae 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -30,7 +30,8 @@ abstract class Tile(resetSignal: Bool = null) val io = new Bundle { val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) - val host = new HtifIO + val host = new HtifIO // Unused, but temporarily extant for zscale/groundtest + val prci = new PRCICoreIO().flip val dma = new DmaIO } } @@ -47,7 +48,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem) val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]() val cachedPorts = collection.mutable.ArrayBuffer(dcache.io.mem) - io.host <> core.io.host + core.io.prci <> io.prci icache.io.cpu <> core.io.imem val fpuOpt = if (p(UseFPU)) Some(Module(new FPU)) else None @@ -71,7 +72,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( rocc.io.cmd <> cmdRouter.io.out(i) rocc.io.status := core.io.rocc.status rocc.io.exception := core.io.rocc.exception - rocc.io.host_id := io.host.id + rocc.io.host_id := io.prci.id dcIF.io.requestor <> rocc.io.mem dcPorts += dcIF.io.cache uncachedArbPorts += rocc.io.autl @@ -141,4 +142,9 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( fpu.io.cp_resp.ready := Bool(false) } } + + // TODO remove + io.host.csr.resp.valid := io.host.csr.req.valid + io.host.csr.req.ready := io.host.csr.resp.ready + io.host.csr.resp.bits := UInt(0) } From 000e20f93786a86691a13e9dd66907665b14dc0b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 May 2016 15:18:41 -0700 Subject: [PATCH 1001/1087] Remove MIPI; make mip.MSIP read-only The PRCI block outside the core will provide IPIs eventually --- rocket/src/main/scala/csr.scala | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 41efa30f..b9ac2b65 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -144,7 +144,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_mie = Reg(init=UInt(0, xLen)) val reg_mideleg = Reg(init=UInt(0, xLen)) val reg_medeleg = Reg(init=UInt(0, xLen)) - val reg_mip = Reg(init=new MIP().fromBits(0)) + val reg_mip = Reg(new MIP) val reg_mepc = Reg(UInt(width = vaddrBitsExtended)) val reg_mcause = Reg(Bits(width = xLen)) val reg_mbadaddr = Reg(UInt(width = vaddrBitsExtended)) @@ -201,7 +201,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.misa -> UInt(isa), CSRs.mstatus -> read_mstatus, CSRs.mtvec -> reg_mtvec, - CSRs.mipi -> reg_mip.msip, CSRs.mip -> read_mip, CSRs.mie -> reg_mie, CSRs.mideleg -> reg_mideleg, @@ -415,10 +414,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mip.ssip := new_mip.ssip reg_mip.stip := new_mip.stip } - reg_mip.msip := new_mip.msip - } - when (decoded_addr(CSRs.mipi)) { - reg_mip.msip := wdata(0) } when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts } when (decoded_addr(CSRs.mepc)) { reg_mepc := ~(~wdata | (coreInstBytes-1)) } From f784f4da933fbdf272c4c8673eaafea5afc6c6b7 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 May 2016 18:08:01 -0700 Subject: [PATCH 1002/1087] Rename PRCICoreIO to PRCITileIO --- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/rocket.scala | 2 +- rocket/src/main/scala/tile.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index b9ac2b65..d91332e7 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -76,7 +76,7 @@ object CSR } class CSRFileIO(implicit p: Parameters) extends CoreBundle { - val prci = new PRCICoreIO().flip + val prci = new PRCITileIO().flip val rw = new Bundle { val addr = UInt(INPUT, CSR.ADDRSZ) val cmd = Bits(INPUT, CSR.SZ) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 05fcfbcb..97938ee0 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -110,7 +110,7 @@ object ImmGen { class Rocket(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { - val prci = new PRCICoreIO().flip + val prci = new PRCITileIO().flip val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" })) val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) val ptw = new DatapathPTWIO().flip diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index e21d81ae..4312fc47 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -31,7 +31,7 @@ abstract class Tile(resetSignal: Bool = null) val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) val host = new HtifIO // Unused, but temporarily extant for zscale/groundtest - val prci = new PRCICoreIO().flip + val prci = new PRCITileIO().flip val dma = new DmaIO } } From 5cbcc415155694d8d7c079c377deddde15167bfc Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 2 May 2016 18:23:40 -0700 Subject: [PATCH 1003/1087] get rid of unused imports --- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/dma.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index d91332e7..e163967b 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -8,7 +8,7 @@ import Instructions._ import cde.{Parameters, Field} import uncore._ import scala.math._ -import junctions.{AddrHashMap, GlobalAddrMap} +import junctions.AddrHashMap class MStatus extends Bundle { val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index a18d8644..076ac29f 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import uncore._ import uncore.DmaRequest._ -import junctions.{ParameterizedBundle, AddrHashMap, GlobalAddrMap} +import junctions.{ParameterizedBundle, AddrHashMap} import cde.Parameters trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters { From 5352497edbb12eb45b71cce2ed3631f6044894fc Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 2 May 2016 19:48:39 -0700 Subject: [PATCH 1004/1087] MPRV takes effect regardless of privilege mode --- rocket/src/main/scala/tlb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 700736c3..909f49e1 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -125,7 +125,7 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val plru = new PseudoLRU(entries) val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) - val do_mprv = io.ptw.status.prv === PRV.M && io.ptw.status.mprv && !io.req.bits.instruction + val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv) val priv_s = priv === PRV.S val priv_uses_vm = priv <= PRV.S From 9dd23a603ae79b0f9d1efad7571acb24a0b4bcef Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 3 May 2016 13:41:58 -0700 Subject: [PATCH 1005/1087] Remove HTIF port --- rocket/src/main/scala/tile.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 4312fc47..dd88f993 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -30,7 +30,6 @@ abstract class Tile(resetSignal: Bool = null) val io = new Bundle { val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) - val host = new HtifIO // Unused, but temporarily extant for zscale/groundtest val prci = new PRCITileIO().flip val dma = new DmaIO } @@ -142,9 +141,4 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( fpu.io.cp_resp.ready := Bool(false) } } - - // TODO remove - io.host.csr.resp.valid := io.host.csr.req.valid - io.host.csr.req.ready := io.host.csr.resp.ready - io.host.csr.resp.bits := UInt(0) } From 8fa2de081636464e534ffe68f8949b27d2d365cc Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Thu, 5 May 2016 18:09:48 -0700 Subject: [PATCH 1006/1087] chisel3 fix to RoCC connections honor last connect --- rocket/src/main/scala/rocket.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 97938ee0..03fc0f20 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -429,7 +429,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { csr.io.prci <> io.prci io.fpu.fcsr_rm := csr.io.fcsr_rm csr.io.fcsr_flags := io.fpu.fcsr_flags - csr.io.rocc <> io.rocc + io.rocc.csr <> csr.io.rocc.csr + csr.io.rocc.interrupt <> io.rocc.interrupt csr.io.pc := wb_reg_pc csr.io.uarch_counters.foreach(_ := Bool(false)) io.ptw.ptbr := csr.io.ptbr From 742c05d6a7890af4968ffc2e1bb49cd4f20a50a8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 13 May 2016 17:07:28 -0700 Subject: [PATCH 1007/1087] Pipeline D$->I$ control paths These stretch the miss latency by a cycle in exchange for slack. The current implementation also adds a cycle to mul/div latency, which can be worked around for more hardware (possibly gated by the FastMulDiv option). --- rocket/src/main/scala/rocket.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 03fc0f20..757e870d 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -450,7 +450,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val sboard = new Scoreboard(32) sboard.clear(ll_wen, ll_waddr) - val id_sboard_hazard = checkHazards(hazard_targets, sboard.readBypassed _) + val id_sboard_hazard = checkHazards(hazard_targets, sboard.read _) sboard.set(wb_set_sboard && wb_wen, wb_waddr) // stall for RAW/WAW hazards on CSRs, loads, AMOs, and mul/div in execute stage. @@ -486,7 +486,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_ctrl.fp && id_stall_fpu || - id_ctrl.mem && !io.dmem.req.ready || + id_ctrl.mem && Reg(next = !io.dmem.req.ready) || Bool(usingRoCC) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || id_do_fence || csr.io.csr_stall From 4aef567a80b04afee5b81bb8383f0ccd4866869a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 13 May 2016 17:54:23 -0700 Subject: [PATCH 1008/1087] Fix MMIO bug: replay_next wasn't set --- rocket/src/main/scala/arbiter.scala | 4 +--- rocket/src/main/scala/nbdcache.scala | 10 +++++++--- rocket/src/main/scala/rocket.scala | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 8a207f2e..79cf6a36 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -55,9 +55,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) - io.requestor(i).replay_next.valid := io.mem.replay_next.valid && - io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) - io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> log2Up(n) + io.requestor(i).replay_next := io.mem.replay_next } } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c9c108f1..2181b96e 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -100,7 +100,7 @@ class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { val s2_nack = Bool(INPUT) // req from two cycles ago is rejected val resp = Valid(new HellaCacheResp).flip - val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip + val replay_next = Bool(INPUT) val xcpt = (new HellaCacheExceptions).asInput val invalidate_lr = Bool(OUTPUT) val ordered = Bool(INPUT) @@ -157,6 +157,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val grant = Valid(new GrantFromSrc).flip val finish = Decoupled(new FinishToDst) val resp = Decoupled(new HellaCacheResp) + val replay_next = Bool(OUTPUT) } def wordFromBeat(addr: UInt, dat: UInt) = { @@ -210,6 +211,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.acquire.valid := (state === s_acquire) io.acquire.bits := Mux(isRead(req.cmd), get_acquire, put_acquire) + io.replay_next := (state === s_grant) || io.resp.valid && !io.resp.ready io.resp.valid := (state === s_resp) io.resp.bits := req io.resp.bits.has_data := isRead(req.cmd) @@ -418,6 +420,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val probe_rdy = Bool(OUTPUT) val fence_rdy = Bool(OUTPUT) + val replay_next = Bool(OUTPUT) } // determine if the request is cacheable or not @@ -501,6 +504,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs)) var mmio_rdy = Bool(false) + io.replay_next := Bool(false) for (i <- 0 until nIOMSHRs) { val id = nMSHRs + i @@ -522,6 +526,7 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { resp_arb.io.in(i) <> mshr.io.resp when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) } + when (mshr.io.replay_next) { io.replay_next := Bool(true) } } mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable @@ -1086,8 +1091,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp) io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid - io.cpu.replay_next.valid := s1_replay && s1_read - io.cpu.replay_next.bits := s1_req.tag + io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next } // exposes a sane decoupled request interface diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 757e870d..879f0f08 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -352,7 +352,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) - val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next.valid // structural hazard on writeback port + val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem val replay_mem = dcache_kill_mem || mem_reg_replay || fpu_kill_mem val killm_common = dcache_kill_mem || take_pc_wb || mem_reg_xcpt || !mem_reg_valid From f228309bd106b6f84c13c9d8e3ccd8f1717f9579 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Fri, 20 May 2016 16:30:27 -0700 Subject: [PATCH 1009/1087] add assertion to make sure SimpleHellaCacheIF doesn't get exception --- rocket/src/main/scala/nbdcache.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 2181b96e..14a91f5d 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1171,4 +1171,9 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module } io.requestor.resp := io.cache.resp + + assert(!Reg(next = io.cache.req.fire()) || + !(io.cache.xcpt.ma.ld || io.cache.xcpt.ma.st || + io.cache.xcpt.pf.ld || io.cache.xcpt.pf.st), + "SimpleHellaCacheIF exception") } From 7bc38383de67064becec28fbeeb3ddd6de0a3df6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 20 May 2016 18:59:05 -0700 Subject: [PATCH 1010/1087] add (non-working) blocking data cache --- rocket/src/main/scala/dcache.scala | 285 +++++++++++++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 rocket/src/main/scala/dcache.scala diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala new file mode 100644 index 00000000..bc58aab7 --- /dev/null +++ b/rocket/src/main/scala/dcache.scala @@ -0,0 +1,285 @@ +// See LICENSE for license details. + +package rocket + +import Chisel._ +import uncore._ +import junctions._ +import cde.{Parameters, Field} +import Util._ + +class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { + val addr = Bits(width = untagBits) + val write = Bool() + val wdata = Bits(width = rowBits) + val wmask = Bits(width = rowBytes) + val way_en = Bits(width = nWays) +} + +class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { + val io = new Bundle { + val req = Valid(new DCacheDataReq).flip + val resp = Vec(nWays, Bits(OUTPUT, rowBits)) + } + + val addr = io.req.bits.addr >> rowOffBits + for (w <- 0 until nWays) { + val array = SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8))) + val valid = io.req.valid && io.req.bits.way_en(w) + when (valid && io.req.bits.write) { + val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i)) + array.write(addr, data, io.req.bits.wmask.toBools) + } + io.resp(w) := array.read(addr, valid && !io.req.bits.write).toBits + } +} + +class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { + val io = new Bundle { + val cpu = (new HellaCacheIO).flip + val ptw = new TLBPTWIO() + val mem = new ClientTileLinkIO + } + + val fq = Module(new FinishQueue(1)) + + require(nWays == 1) // TODO associativity + require(rowBits == encRowBits) // no ECC + require(refillCyclesPerBeat == 1) + require(rowBits >= coreDataBits) + + // tags + val replacer = p(Replacer)() + def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) + val meta = Module(new MetadataArray(onReset _)) + val metaReadArb = Module(new Arbiter(new MetaReadReq, 2)) + val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3)) + meta.io.read <> metaReadArb.io.out + meta.io.write <> metaWriteArb.io.out + + // data + val data = Module(new DCacheDataArray) + val dataArb = Module(new Arbiter(new DCacheDataReq, 4)) + data.io.req <> dataArb.io.out + dataArb.io.out.ready := true + + val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) + val s1_probe = Reg(next=io.mem.probe.fire(), init=Bool(false)) + val probe_bits = RegEnable(io.mem.probe.bits, io.mem.probe.fire()) + val s1_nack = Wire(init=Bool(false)) + val s1_valid_masked = s1_valid && !io.cpu.s1_kill + val s1_valid_not_nacked = s1_valid_masked && !s1_nack + val s1_req = RegEnable(io.cpu.req.bits, io.cpu.req.valid) + val s1_read = isRead(s1_req.cmd) + val s1_write = isWrite(s1_req.cmd) + + val s_ready :: s_grant_wait :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8) + val grant_wait = Reg(init=Bool(false)) + val release_state = Reg(init=s_ready) + val pstore_valid = Reg(init=Bool(false)) + val inWriteback = release_state === s_voluntary_writeback || release_state === s_probe_rep_dirty + io.cpu.req.ready := (release_state === s_ready) && !grant_wait && !s1_nack + + // hit initiation path + dataArb.io.in(3).valid := io.cpu.req.valid && isRead(io.cpu.req.bits.cmd) + dataArb.io.in(3).bits.write := false + dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr + dataArb.io.in(3).bits.way_en := ~UInt(0, nWays) + when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false } + metaReadArb.io.in(1).valid := io.cpu.req.valid + metaReadArb.io.in(1).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB) + metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays) + when (!metaReadArb.io.in(1).ready) { io.cpu.req.ready := false } + + // address translation + val tlb = Module(new TLB) + io.ptw <> tlb.io.ptw + tlb.io.req.valid := s1_valid_masked + tlb.io.req.bits.passthrough := s1_req.phys + tlb.io.req.bits.asid := 0 + tlb.io.req.bits.vpn := s1_req.addr(vaddrBits-1, pgIdxBits) + tlb.io.req.bits.instruction := false + tlb.io.req.bits.store := s1_write + when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } + when (!tlb.io.req.ready && tlb.io.req.valid) { s1_nack := true } + + val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) + val s1_tag = Mux(s1_probe || inWriteback, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) + val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag) + val s1_hit_state = Mux1H(s1_hit_way, meta.io.resp.map(_.coh)) + val s1_data = Mux1H(s1_hit_way, data.io.resp) // retime into s2 if critical + + val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) + val s2_probe = Reg(next=s1_probe, init=Bool(false)) + val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready + val s2_valid_masked = s2_valid && Reg(next = !s1_nack) + val s2_req = Reg(io.cpu.req.bits) + when (s1_valid_not_nacked) { + s2_req := s1_req + s2_req.addr := s1_paddr + } + val s2_data = RegEnable(s1_data, s1_valid || inWriteback) + val s2_hit_way = RegEnable(Cat(s1_hit_way.reverse), s1_valid_not_nacked || s1_probe) + val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked || s1_probe) + val s2_hit = s2_hit_state.isHit(s2_req.cmd) + val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback() + val s2_valid_hit = s2_valid_masked && s2_hit + val s2_valid_miss = s2_valid_masked && !s2_hit && !pstore_valid + val s2_repl = RegEnable(meta.io.resp(replacer.way), s1_valid_not_nacked) + val s2_repl_dirty = s2_repl.coh.requiresVoluntaryWriteback() + io.cpu.s2_nack := s2_valid && !s2_valid_hit + when (io.cpu.s2_nack) { s1_nack := true } + + // exceptions + val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned + io.cpu.xcpt.ma.ld := s1_read && misaligned + io.cpu.xcpt.ma.st := s1_write && misaligned + io.cpu.xcpt.pf.ld := s1_read && tlb.io.resp.xcpt_ld + io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st + assert(!(Reg(next= + (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && + io.cpu.resp.valid), + "DCache exception occurred - cache response not killed.") + + // committed stores + val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) + val s2_cpu_data = RegEnable(io.cpu.s1_data, s1_valid && s1_write) + val s2_storegen = new StoreGen(s2_req.typ, s2_req.addr, s2_cpu_data, wordBytes) + val pstore_drain = s2_store_valid || releaseInFlight || io.cpu.s2_nack || !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)) + pstore_valid := s2_store_valid || (pstore_valid && !pstore_drain) + val pstore_addr = RegEnable(s2_req.addr, s2_store_valid) + val pstore_way = RegEnable(s2_hit_way, s2_store_valid) + val pstore_data = RegEnable(s2_storegen.data, s2_store_valid) + val pstore_mask = RegEnable(s2_storegen.mask, s2_store_valid) + dataArb.io.in(0).valid := pstore_valid && pstore_drain + dataArb.io.in(0).bits.write := true + dataArb.io.in(0).bits.addr := pstore_addr + dataArb.io.in(0).bits.way_en := pstore_way + dataArb.io.in(0).bits.wdata := Fill(rowWords, pstore_data) + dataArb.io.in(0).bits.wmask := pstore_mask << (if (rowOffBits > offsetlsb) (pstore_addr(rowOffBits-1,offsetlsb) << wordOffBits) else UInt(0)) + + // store->load RAW hazard detection + val s1_idx = s1_req.addr(idxMSB, wordOffBits) + val s1_raw_hazard = s1_read && + ((s2_store_valid && s2_req.addr(idxMSB, wordOffBits) === s1_idx) || + (pstore_valid && pstore_addr(idxMSB, wordOffBits) === s1_idx)) + when (s1_raw_hazard) { s1_nack := true } + + val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) + metaWriteArb.io.in(0).valid := s2_valid_hit && s2_hit_state =/= s2_new_hit_state + metaWriteArb.io.in(0).bits.way_en := s2_hit_way + metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB) + metaWriteArb.io.in(0).bits.data.coh := s2_new_hit_state + metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) + + // acquire + io.mem.acquire.valid := s2_valid_miss && !s2_repl_dirty && fq.io.enq.ready + io.mem.acquire.bits := s2_hit_state.makeAcquire(addr_block = s2_req.addr(paddrBits-1, blockOffBits), client_xact_id = UInt(0), op_code = s2_req.cmd) + when (io.mem.acquire.fire()) { grant_wait := true } + + // grant + val grantIsRefill = io.mem.grant.bits.hasMultibeatData() + assert(!io.mem.grant.valid || grantIsRefill, "TODO uncached") + val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles) + val grantDone = !grantIsRefill || refillDone + when (io.mem.grant.fire() && grantDone) { grant_wait := false } + + // data refill + dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid + io.mem.grant.ready := true + assert(io.mem.grant.ready || !io.mem.grant.valid, "") + dataArb.io.in(1).bits.write := true + dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits + dataArb.io.in(1).bits.way_en := UIntToOH(replacer.way) + dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data + dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes) + // tag updates on refill + metaWriteArb.io.in(1).valid := refillDone + assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready, "") + metaWriteArb.io.in(1).bits.way_en := UIntToOH(replacer.way) + metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB) + metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd) + metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) + + // probe + metaReadArb.io.in(0).valid := io.mem.probe.valid + io.mem.probe.ready := metaReadArb.io.in(0).ready && !releaseInFlight && !s1_valid && (!s2_valid || s2_valid_hit) + metaReadArb.io.in(0).bits.idx := io.mem.probe.bits.addr_block + metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays) + + // finish + fq.io.enq.valid := refillDone + fq.io.enq.bits := io.mem.grant.bits.makeFinish() + io.mem.finish <> fq.io.deq + when (fq.io.enq.valid) { + assert(fq.io.enq.ready, "") + replacer.miss + } + + // release + val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles) + val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback) + val new_coh = Wire(init = s2_hit_state.onProbe(probe_bits)) + val releaseRejected = io.mem.release.valid && !io.mem.release.ready + val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) + val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) + val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) + io.mem.release.valid := s2_release_data_valid + io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits) + when (s2_valid_miss && s2_repl_dirty) { + release_state := s_voluntary_writeback + probe_bits.addr_block := Cat(s2_repl.tag, s2_req.addr(idxMSB, idxLSB)) + } + when (s2_probe) { + when (s2_hit_dirty) { release_state := s_probe_rep_dirty } + .elsewhen (s2_hit) { release_state := s_probe_rep_clean } + .otherwise { + io.mem.release.valid := true + release_state := s_probe_rep_miss + } + } + when (releaseDone) { release_state := s_ready } + when (release_state === s_probe_rep_miss || release_state === s_probe_rep_clean) { + io.mem.release.valid := true + } + when (release_state === s_probe_rep_clean || release_state === s_probe_rep_dirty) { + io.mem.release.bits := s2_hit_state.makeRelease(probe_bits) + when (releaseDone) { release_state := s_probe_write_meta } + } + when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) { + io.mem.release.bits := s2_hit_state.makeVoluntaryWriteback(UInt(0), UInt(0)) + new_coh := s2_hit_state.onCacheControl(M_FLUSH) + when (releaseDone) { release_state := s_voluntary_write_meta } + } + when (s2_probe && !io.mem.release.fire()) { s1_nack := true } + io.mem.release.bits.addr_block := probe_bits.addr_block + io.mem.release.bits.addr_beat := writebackCount + io.mem.release.bits.data := s2_data + + dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles + dataArb.io.in(2).bits.write := false + dataArb.io.in(2).bits.addr := Cat(io.mem.release.bits.addr_block, releaseDataBeat(log2Up(refillCycles)-1,0)) << rowOffBits + dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) + + metaWriteArb.io.in(2).valid := (release_state === s_voluntary_write_meta || release_state === s_probe_write_meta) + metaWriteArb.io.in(2).bits.way_en := s2_hit_way + metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB) + metaWriteArb.io.in(2).bits.data.coh := new_coh + metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits) + when (metaWriteArb.io.in(2).fire()) { release_state := s_ready } + + // response + io.cpu.replay_next := io.mem.grant.valid && !grantIsRefill + io.cpu.resp.valid := s2_valid_hit || io.cpu.resp.bits.replay + io.cpu.resp.bits := s2_req + io.cpu.resp.bits.replay := Reg(next = io.cpu.replay_next) + io.cpu.ordered := !(s1_valid || s2_valid || grant_wait) + + // load data subword mux/sign extension + val s2_sc = Bool(false) + val s2_word_idx = s2_req.addr(log2Up(rowWords*coreDataBytes)-1, log2Up(wordBytes)) + val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits))) + val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes) + io.cpu.resp.bits.data := loadgen.data + io.cpu.resp.bits.data_word_bypass := loadgen.wordData +} From e19c5e5d2cf7c8652704c004ecebc11bd30628ba Mon Sep 17 00:00:00 2001 From: "Wesley W. Terpstra" Date: Mon, 16 May 2016 18:28:45 -0700 Subject: [PATCH 1011/1087] IOMSHR: support atomic operations --- rocket/src/main/scala/nbdcache.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 14a91f5d..b71a3f23 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -208,8 +208,17 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { wmask = beat_mask, alloc = Bool(false)) + val putAtomic_acquire = PutAtomic( + client_xact_id = UInt(id), + addr_block = addr_block, + addr_beat = addr_beat, + addr_byte = addr_byte, + atomic_opcode = req.cmd, + operand_size = req.typ, + data = beat_data) + io.acquire.valid := (state === s_acquire) - io.acquire.bits := Mux(isRead(req.cmd), get_acquire, put_acquire) + io.acquire.bits := Mux(isAMO(req.cmd), putAtomic_acquire, Mux(isRead(req.cmd), get_acquire, put_acquire)) io.replay_next := (state === s_grant) || io.resp.valid && !io.resp.ready io.resp.valid := (state === s_resp) From 80482890fdb24f63923e279d38f4af1d6f33339c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 May 2016 16:55:42 -0700 Subject: [PATCH 1012/1087] Don't rely on tag value for nacks --- rocket/src/main/scala/arbiter.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 79cf6a36..507fb45e 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -14,7 +14,8 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module val mem = new HellaCacheIO } - val r_valid = io.requestor.map(r => Reg(next=r.req.valid)) + val s1_id = Reg(UInt()) + val s2_id = Reg(next=s1_id) io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_) io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) @@ -30,6 +31,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module io.mem.req.bits.addr := req.bits.addr io.mem.req.bits.phys := req.bits.phys io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n))) + s1_id := UInt(i) } def connect_s1() = { io.mem.s1_kill := io.requestor(i).s1_kill @@ -41,7 +43,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module connect_s1() } else { when (req.valid) { connect_s0() } - when (r_valid(i)) { connect_s1() } + when (s1_id === UInt(i)) { connect_s1() } } } @@ -51,7 +53,7 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module resp.valid := io.mem.resp.valid && tag_hit io.requestor(i).xcpt := io.mem.xcpt io.requestor(i).ordered := io.mem.ordered - io.requestor(i).s2_nack := io.mem.s2_nack && tag_hit + io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i) resp.bits := io.mem.resp.bits resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) From a3061047e3fc20e799901bce57dc5ac453e188ff Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 May 2016 16:56:17 -0700 Subject: [PATCH 1013/1087] Instantiate blocking D$ when NMSHRS=0 --- rocket/src/main/scala/tile.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index dd88f993..14c51966 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -40,13 +40,15 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val icache = Module(new Frontend()(p.alterPartial({ case CacheName => "L1I" case CoreName => "Rocket" }))) - val dcache = Module(new HellaCache()(dcacheParams)) + val dcache = + if (p(NMSHRs) == 0) Module(new DCache()(dcacheParams)).io + else Module(new HellaCache()(dcacheParams)).io - val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.io.ptw) + val ptwPorts = collection.mutable.ArrayBuffer(icache.io.ptw, dcache.ptw) val dcPorts = collection.mutable.ArrayBuffer(core.io.dmem) val uncachedArbPorts = collection.mutable.ArrayBuffer(icache.io.mem) val uncachedPorts = collection.mutable.ArrayBuffer[ClientUncachedTileLinkIO]() - val cachedPorts = collection.mutable.ArrayBuffer(dcache.io.mem) + val cachedPorts = collection.mutable.ArrayBuffer(dcache.mem) core.io.prci <> io.prci icache.io.cpu <> core.io.imem @@ -133,7 +135,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( val dcArb = Module(new HellaCacheArbiter(dcPorts.size)(dcacheParams)) dcArb.io.requestor <> dcPorts - dcache.io.cpu <> dcArb.io.mem + dcache.cpu <> dcArb.io.mem if (!usingRocc || nFPUPorts == 0) { fpuOpt.foreach { fpu => From 765b90f6a48e6e217c7d317baa956350f1dbf69d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 May 2016 16:56:49 -0700 Subject: [PATCH 1014/1087] Stall on D$ lockups less conservatively --- rocket/src/main/scala/rocket.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 879f0f08..2fad32bd 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -483,10 +483,13 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { id_csr_en && !io.fpu.fcsr_rdy || checkHazards(fp_hazard_targets, fp_sboard.read _) } else Bool(false) + val dcache_blocked = Reg(Bool()) + dcache_blocked := !io.dmem.req.ready && (io.dmem.req.valid || dcache_blocked) + val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_ctrl.fp && id_stall_fpu || - id_ctrl.mem && Reg(next = !io.dmem.req.ready) || + id_ctrl.mem && dcache_blocked || // reduce activity during D$ misses Bool(usingRoCC) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || id_do_fence || csr.io.csr_stall From 335e2c8a1ebef0182d81ec7d46f08d4ec94b475c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 May 2016 16:58:01 -0700 Subject: [PATCH 1015/1087] Support disabling atomics extension --- rocket/src/main/scala/csr.scala | 3 +- rocket/src/main/scala/idecode.scala | 60 +++++++++++++++++------------ rocket/src/main/scala/rocket.scala | 3 ++ 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index e163967b..0ccce9aa 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -180,8 +180,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val system_insn = io.rw.cmd === CSR.I val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn - val isa_string = "IMA" + + val isa_string = "IM" + (if (usingVM) "S" else "") + + (if (usingAtomics) "A" else "") + (if (usingFPU) "FD" else "") + (if (usingRoCC) "X" else "") val isa = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index c301424c..002c7052 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -98,30 +98,6 @@ class XDecode(implicit val p: Parameters) extends DecodeConstants SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N), SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N), - AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - - LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), @@ -185,6 +161,42 @@ class XDecode(implicit val p: Parameters) extends DecodeConstants CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) } +class ADecode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + // jal renf1 fence.i + // | jalr | renf2 | + // fp_val| | renx2 | | renf3 | + // | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + + LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y)) +} + class FDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 2fad32bd..6e7c85c8 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -14,6 +14,7 @@ case object XLen extends Field[Int] case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] +case object UseAtomics extends Field[Boolean] case object UsePerfCounters extends Field[Boolean] case object FastLoadWord extends Field[Boolean] case object FastLoadByte extends Field[Boolean] @@ -32,6 +33,7 @@ trait HasCoreParameters extends HasAddrMapParameters { val usingVM = p(UseVM) val usingFPU = p(UseFPU) + val usingAtomics = p(UseAtomics) val usingFDivSqrt = p(FDivSqrt) val usingRoCC = !p(BuildRoCC).isEmpty val usingFastMulDiv = p(FastMulDiv) @@ -119,6 +121,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } var decode_table = new XDecode().table + if (usingAtomics) decode_table ++= new ADecode().table if (usingFPU) decode_table ++= new FDecode().table if (usingFPU && usingFDivSqrt) decode_table ++= new FDivSqrtDecode().table if (usingRoCC) decode_table ++= new RoCCDecode().table From d7790ac6a44f7eae0f7afbfd034e5f72ed79c67f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 21 May 2016 16:58:36 -0700 Subject: [PATCH 1016/1087] WIP on blocking D$ --- rocket/src/main/scala/dcache.scala | 53 +++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index bc58aab7..4a900ce4 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -97,11 +97,11 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { tlb.io.req.valid := s1_valid_masked tlb.io.req.bits.passthrough := s1_req.phys tlb.io.req.bits.asid := 0 - tlb.io.req.bits.vpn := s1_req.addr(vaddrBits-1, pgIdxBits) + tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits tlb.io.req.bits.instruction := false tlb.io.req.bits.store := s1_write when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } - when (!tlb.io.req.ready && tlb.io.req.valid) { s1_nack := true } + when (s1_valid && tlb.io.resp.miss) { s1_nack := true } val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) val s1_tag = Mux(s1_probe || inWriteback, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) @@ -121,7 +121,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_data = RegEnable(s1_data, s1_valid || inWriteback) val s2_hit_way = RegEnable(Cat(s1_hit_way.reverse), s1_valid_not_nacked || s1_probe) val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked || s1_probe) - val s2_hit = s2_hit_state.isHit(s2_req.cmd) + val s2_hit = s2_hit_way.orR && s2_hit_state.isHit(s2_req.cmd) val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback() val s2_valid_hit = s2_valid_masked && s2_hit val s2_valid_miss = s2_valid_masked && !s2_hit && !pstore_valid @@ -143,13 +143,14 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // committed stores val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) - val s2_cpu_data = RegEnable(io.cpu.s1_data, s1_valid && s1_write) - val s2_storegen = new StoreGen(s2_req.typ, s2_req.addr, s2_cpu_data, wordBytes) + val s2_store_data = RegEnable(io.cpu.s1_data, s1_valid && s1_write) + val s2_storegen = new StoreGen(s2_req.typ, s2_req.addr, s2_store_data, wordBytes) + val s2_storegen_data = Wire(init = s2_storegen.data) val pstore_drain = s2_store_valid || releaseInFlight || io.cpu.s2_nack || !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)) pstore_valid := s2_store_valid || (pstore_valid && !pstore_drain) val pstore_addr = RegEnable(s2_req.addr, s2_store_valid) val pstore_way = RegEnable(s2_hit_way, s2_store_valid) - val pstore_data = RegEnable(s2_storegen.data, s2_store_valid) + val pstore_data = RegEnable(s2_storegen_data, s2_store_valid) val pstore_mask = RegEnable(s2_storegen.mask, s2_store_valid) dataArb.io.in(0).valid := pstore_valid && pstore_drain dataArb.io.in(0).bits.write := true @@ -163,13 +164,13 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_raw_hazard = s1_read && ((s2_store_valid && s2_req.addr(idxMSB, wordOffBits) === s1_idx) || (pstore_valid && pstore_addr(idxMSB, wordOffBits) === s1_idx)) - when (s1_raw_hazard) { s1_nack := true } + when (s1_valid && s1_raw_hazard) { s1_nack := true } val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) - metaWriteArb.io.in(0).valid := s2_valid_hit && s2_hit_state =/= s2_new_hit_state - metaWriteArb.io.in(0).bits.way_en := s2_hit_way + metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_valid_miss && !s2_repl_dirty) + metaWriteArb.io.in(0).bits.way_en := Mux(s2_hit, s2_hit_way, UIntToOH(replacer.way)) metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB) - metaWriteArb.io.in(0).bits.data.coh := s2_new_hit_state + metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset) metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) // acquire @@ -179,15 +180,20 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // grant val grantIsRefill = io.mem.grant.bits.hasMultibeatData() - assert(!io.mem.grant.valid || grantIsRefill, "TODO uncached") + val grantHasData = io.mem.grant.bits.hasData() + val grantIsUncached = grantHasData && !grantIsRefill + when (io.mem.grant.valid) { + assert(grantIsRefill === io.mem.grant.bits.requiresAck(), "") + assert(!grantIsUncached, "TODO uncached") + } val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles) - val grantDone = !grantIsRefill || refillDone + val grantDone = refillDone || grantIsUncached when (io.mem.grant.fire() && grantDone) { grant_wait := false } // data refill dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid io.mem.grant.ready := true - assert(io.mem.grant.ready || !io.mem.grant.valid, "") + assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid, "") dataArb.io.in(1).bits.write := true dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits dataArb.io.in(1).bits.way_en := UIntToOH(replacer.way) @@ -220,6 +226,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles) val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback) val new_coh = Wire(init = s2_hit_state.onProbe(probe_bits)) + val release_way = Wire(init = s2_hit_way) val releaseRejected = io.mem.release.valid && !io.mem.release.ready val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) @@ -249,6 +256,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) { io.mem.release.bits := s2_hit_state.makeVoluntaryWriteback(UInt(0), UInt(0)) new_coh := s2_hit_state.onCacheControl(M_FLUSH) + release_way := UIntToOH(replacer.way) when (releaseDone) { release_state := s_voluntary_write_meta } } when (s2_probe && !io.mem.release.fire()) { s1_nack := true } @@ -262,16 +270,17 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) metaWriteArb.io.in(2).valid := (release_state === s_voluntary_write_meta || release_state === s_probe_write_meta) - metaWriteArb.io.in(2).bits.way_en := s2_hit_way + metaWriteArb.io.in(2).bits.way_en := release_way metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB) metaWriteArb.io.in(2).bits.data.coh := new_coh metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits) when (metaWriteArb.io.in(2).fire()) { release_state := s_ready } // response - io.cpu.replay_next := io.mem.grant.valid && !grantIsRefill + io.cpu.replay_next := io.mem.grant.valid && grantIsUncached io.cpu.resp.valid := s2_valid_hit || io.cpu.resp.bits.replay - io.cpu.resp.bits := s2_req + io.cpu.resp.bits := s2_req // TODO uncached + io.cpu.resp.bits.has_data := isRead(s2_req.cmd) // TODO uncached io.cpu.resp.bits.replay := Reg(next = io.cpu.replay_next) io.cpu.ordered := !(s1_valid || s2_valid || grant_wait) @@ -282,4 +291,16 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes) io.cpu.resp.bits.data := loadgen.data io.cpu.resp.bits.data_word_bypass := loadgen.wordData + io.cpu.resp.bits.store_data := s2_store_data + + // AMOs + if (usingAtomics) { + val amoalu = Module(new AMOALU) + amoalu.io.addr := s2_req.addr + amoalu.io.cmd := s2_req.cmd + amoalu.io.typ := s2_req.typ + amoalu.io.lhs := s2_data_word + amoalu.io.rhs := s2_store_data + s2_storegen_data := amoalu.io.out + } } From 354cb2d5ecd37fd879c706194e084bbb6cccbd3b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 22 May 2016 14:08:53 -0700 Subject: [PATCH 1017/1087] Don't stall I$ response when resolving a branch misprediction This avoids a fetch bubble. Not clear if this is the best way to do it. Perhaps this change should instead be made to Frontend (i.e., ignore resp.ready when req.valid is high), but that might exacerbate a critical path. --- rocket/src/main/scala/rocket.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 6e7c85c8..af09c7ac 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -505,7 +505,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { mem_npc)).toUInt // mispredicted branch io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i io.imem.flush_tlb := csr.io.fatc - io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt + io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt || take_pc_mem io.imem.btb_update.valid := mem_reg_valid && !mem_npc_misaligned && mem_wrong_npc && mem_cfi_taken && !take_pc_wb io.imem.btb_update.bits.isJump := mem_ctrl.jal || mem_ctrl.jalr From 0b8de578d4ef4e7210b1d9271c7351970bc7c455 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 22 May 2016 16:16:21 -0700 Subject: [PATCH 1018/1087] Add additional D$ store buffering to prevent structural hazards --- rocket/src/main/scala/dcache.scala | 71 ++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 4a900ce4..a40c7ddb 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -76,7 +76,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s_ready :: s_grant_wait :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8) val grant_wait = Reg(init=Bool(false)) val release_state = Reg(init=s_ready) - val pstore_valid = Reg(init=Bool(false)) + val pstore1_valid = Wire(Bool()) + val pstore2_valid = Reg(Bool()) val inWriteback = release_state === s_voluntary_writeback || release_state === s_probe_rep_dirty io.cpu.req.ready := (release_state === s_ready) && !grant_wait && !s1_nack @@ -124,7 +125,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_hit = s2_hit_way.orR && s2_hit_state.isHit(s2_req.cmd) val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback() val s2_valid_hit = s2_valid_masked && s2_hit - val s2_valid_miss = s2_valid_masked && !s2_hit && !pstore_valid + val s2_valid_miss = s2_valid_masked && !s2_hit && !(pstore1_valid || pstore2_valid) val s2_repl = RegEnable(meta.io.resp(replacer.way), s1_valid_not_nacked) val s2_repl_dirty = s2_repl.coh.requiresVoluntaryWriteback() io.cpu.s2_nack := s2_valid && !s2_valid_hit @@ -141,29 +142,50 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.resp.valid), "DCache exception occurred - cache response not killed.") - // committed stores - val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) - val s2_store_data = RegEnable(io.cpu.s1_data, s1_valid && s1_write) - val s2_storegen = new StoreGen(s2_req.typ, s2_req.addr, s2_store_data, wordBytes) - val s2_storegen_data = Wire(init = s2_storegen.data) - val pstore_drain = s2_store_valid || releaseInFlight || io.cpu.s2_nack || !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)) - pstore_valid := s2_store_valid || (pstore_valid && !pstore_drain) - val pstore_addr = RegEnable(s2_req.addr, s2_store_valid) - val pstore_way = RegEnable(s2_hit_way, s2_store_valid) - val pstore_data = RegEnable(s2_storegen_data, s2_store_valid) - val pstore_mask = RegEnable(s2_storegen.mask, s2_store_valid) - dataArb.io.in(0).valid := pstore_valid && pstore_drain + // pending store buffer + val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write) + val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write) + val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write) + val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write) + val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes) + val pstore1_storegen_data = Wire(init = pstore1_storegen.data) + val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd) + val pstore_drain_structural = pstore1_valid && pstore2_valid && ((s1_valid && s1_write) || pstore1_amo) + val pstore_drain_opportunistic = !(io.cpu.req.valid && isRead(io.cpu.req.bits.cmd)) + val pstore_drain_on_miss = releaseInFlight || io.cpu.s2_nack + val pstore_drain = + Bool(usingAtomics) && pstore_drain_structural || + (((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss)) + val pstore1_way = Wire(init=s2_hit_way) + pstore1_valid := { + val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) + val pstore1_held = Reg(Bool()) + val pstore1_held_way = RegEnable(s2_hit_way, s2_store_valid) + when (pstore1_held) { pstore1_way := pstore1_held_way } + pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain + s2_store_valid || pstore1_held + } + val advance_pstore1 = pstore1_valid && (!pstore2_valid || pstore_drain) + pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1 + val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1) + val pstore2_way = RegEnable(pstore1_way, advance_pstore1) + val pstore2_storegen_data = RegEnable(pstore1_storegen_data, advance_pstore1) + val pstore2_storegen_mask = RegEnable(pstore1_storegen.mask, advance_pstore1) + dataArb.io.in(0).valid := pstore_drain dataArb.io.in(0).bits.write := true - dataArb.io.in(0).bits.addr := pstore_addr - dataArb.io.in(0).bits.way_en := pstore_way - dataArb.io.in(0).bits.wdata := Fill(rowWords, pstore_data) - dataArb.io.in(0).bits.wmask := pstore_mask << (if (rowOffBits > offsetlsb) (pstore_addr(rowOffBits-1,offsetlsb) << wordOffBits) else UInt(0)) + dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) + dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) + dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data)) + val pstore_mask_shift = + if (rowOffBits > offsetlsb) Mux(pstore2_valid, pstore2_addr, pstore1_addr)(rowOffBits-1,offsetlsb) << wordOffBits + else UInt(0) + dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift // store->load RAW hazard detection val s1_idx = s1_req.addr(idxMSB, wordOffBits) val s1_raw_hazard = s1_read && - ((s2_store_valid && s2_req.addr(idxMSB, wordOffBits) === s1_idx) || - (pstore_valid && pstore_addr(idxMSB, wordOffBits) === s1_idx)) + ((pstore1_valid && pstore1_addr(idxMSB, wordOffBits) === s1_idx) || + (pstore2_valid && pstore2_addr(idxMSB, wordOffBits) === s1_idx)) when (s1_valid && s1_raw_hazard) { s1_nack := true } val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) @@ -291,7 +313,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes) io.cpu.resp.bits.data := loadgen.data io.cpu.resp.bits.data_word_bypass := loadgen.wordData - io.cpu.resp.bits.store_data := s2_store_data + io.cpu.resp.bits.store_data := pstore1_data // AMOs if (usingAtomics) { @@ -300,7 +322,10 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { amoalu.io.cmd := s2_req.cmd amoalu.io.typ := s2_req.typ amoalu.io.lhs := s2_data_word - amoalu.io.rhs := s2_store_data - s2_storegen_data := amoalu.io.out + amoalu.io.rhs := pstore1_data + pstore1_storegen_data := amoalu.io.out + } else { + assert(!(s1_valid_masked && isRead(s1_req.cmd) && isWrite(s1_req.cmd)), "unsupported D$ operation") + assert(!pstore_drain_structural, "???") } } From 0d93d1a1a04c9aed18a5acddc752a6e95e30e2d6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 22 May 2016 17:17:57 -0700 Subject: [PATCH 1019/1087] Clean up pending store logic a bit --- rocket/src/main/scala/dcache.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index a40c7ddb..7a04d1de 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -106,7 +106,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) val s1_tag = Mux(s1_probe || inWriteback, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) - val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag) + val s1_hit_way = Cat(meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).reverse) val s1_hit_state = Mux1H(s1_hit_way, meta.io.resp.map(_.coh)) val s1_data = Mux1H(s1_hit_way, data.io.resp) // retime into s2 if critical @@ -120,7 +120,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { s2_req.addr := s1_paddr } val s2_data = RegEnable(s1_data, s1_valid || inWriteback) - val s2_hit_way = RegEnable(Cat(s1_hit_way.reverse), s1_valid_not_nacked || s1_probe) + val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked || s1_probe) val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked || s1_probe) val s2_hit = s2_hit_way.orR && s2_hit_state.isHit(s2_req.cmd) val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback() @@ -147,6 +147,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write) val pstore1_addr = RegEnable(s1_paddr, s1_valid_not_nacked && s1_write) val pstore1_data = RegEnable(io.cpu.s1_data, s1_valid_not_nacked && s1_write) + val pstore1_way = RegEnable(s1_hit_way, s1_valid_not_nacked && s1_write) val pstore1_storegen = new StoreGen(pstore1_typ, pstore1_addr, pstore1_data, wordBytes) val pstore1_storegen_data = Wire(init = pstore1_storegen.data) val pstore1_amo = Bool(usingAtomics) && isRead(pstore1_cmd) @@ -156,12 +157,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val pstore_drain = Bool(usingAtomics) && pstore_drain_structural || (((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss)) - val pstore1_way = Wire(init=s2_hit_way) pstore1_valid := { val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) val pstore1_held = Reg(Bool()) - val pstore1_held_way = RegEnable(s2_hit_way, s2_store_valid) - when (pstore1_held) { pstore1_way := pstore1_held_way } pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain s2_store_valid || pstore1_held } From b92c73e361c14d305478f9daf673a98be38d374e Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sun, 22 May 2016 17:18:26 -0700 Subject: [PATCH 1020/1087] Add LR/SC to blocking D$ --- rocket/src/main/scala/dcache.scala | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 7a04d1de..57f4af91 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -142,6 +142,20 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.resp.valid), "DCache exception occurred - cache response not killed.") + // load reservations + val s2_lr = Bool(usingAtomics) && s2_req.cmd === M_XLR + val s2_sc = Bool(usingAtomics) && s2_req.cmd === M_XSC + val lrscCount = Reg(init=UInt(0)) + val lrscValid = lrscCount > 0 + val lrscAddr = Reg(UInt()) + val s2_sc_fail = s2_sc && !(lrscValid && lrscAddr === (s2_req.addr >> blockOffBits)) + when (s2_valid_hit && s2_lr) { + lrscCount := lrscCycles - 1 + lrscAddr := s2_req.addr >> blockOffBits + } + when (lrscValid) { lrscCount := lrscCount - 1 } + when ((s2_valid_hit && s2_sc) || io.cpu.invalidate_lr) { lrscCount := 0 } + // pending store buffer val pstore1_cmd = RegEnable(s1_req.cmd, s1_valid_not_nacked && s1_write) val pstore1_typ = RegEnable(s1_req.typ, s1_valid_not_nacked && s1_write) @@ -158,7 +172,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { Bool(usingAtomics) && pstore_drain_structural || (((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss)) pstore1_valid := { - val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) + val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) && !s2_sc_fail val pstore1_held = Reg(Bool()) pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain s2_store_valid || pstore1_held @@ -228,8 +242,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) // probe - metaReadArb.io.in(0).valid := io.mem.probe.valid - io.mem.probe.ready := metaReadArb.io.in(0).ready && !releaseInFlight && !s1_valid && (!s2_valid || s2_valid_hit) + val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr) + metaReadArb.io.in(0).valid := io.mem.probe.valid && !block_probe + io.mem.probe.ready := metaReadArb.io.in(0).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) metaReadArb.io.in(0).bits.idx := io.mem.probe.bits.addr_block metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays) @@ -305,11 +320,10 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.ordered := !(s1_valid || s2_valid || grant_wait) // load data subword mux/sign extension - val s2_sc = Bool(false) val s2_word_idx = s2_req.addr(log2Up(rowWords*coreDataBytes)-1, log2Up(wordBytes)) val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits))) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes) - io.cpu.resp.bits.data := loadgen.data + io.cpu.resp.bits.data := loadgen.data | s2_sc_fail io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.resp.bits.store_data := pstore1_data From 42f079ce578c59a4550a1120d35f69316663859d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 May 2016 14:11:24 -0700 Subject: [PATCH 1021/1087] JAL requires DW_XPR This has been benign so far because of how the logic minimization worked. --- rocket/src/main/scala/idecode.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 002c7052..d719d626 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -82,7 +82,7 @@ class XDecode(implicit val p: Parameters) extends DecodeConstants BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_X, FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), From 3b35c7470e9bb098de18e2d0f7e75720c0ae9d77 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 May 2016 15:42:56 -0700 Subject: [PATCH 1022/1087] Add uncached support to blocking D$ --- rocket/src/main/scala/dcache.scala | 121 +++++++++++++++++++---------- 1 file changed, 81 insertions(+), 40 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 57f4af91..90ca3dd8 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -126,10 +126,13 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback() val s2_valid_hit = s2_valid_masked && s2_hit val s2_valid_miss = s2_valid_masked && !s2_hit && !(pstore1_valid || pstore2_valid) - val s2_repl = RegEnable(meta.io.resp(replacer.way), s1_valid_not_nacked) - val s2_repl_dirty = s2_repl.coh.requiresVoluntaryWriteback() - io.cpu.s2_nack := s2_valid && !s2_valid_hit - when (io.cpu.s2_nack) { s1_nack := true } + val s2_uncached = !addrMap.isCacheable(s2_req.addr) + val s2_valid_cached_miss = s2_valid_miss && !s2_uncached + val s2_valid_uncached = s2_valid_miss && s2_uncached + val s2_victim_state = RegEnable(meta.io.resp(replacer.way), s1_valid_not_nacked) + val s2_victim_dirty = s2_victim_state.coh.requiresVoluntaryWriteback() + io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready) + when (s2_valid && !s2_valid_hit) { s1_nack := true } // exceptions val misaligned = new StoreGen(s1_req.typ, s1_req.addr, UInt(0), wordBytes).misaligned @@ -174,10 +177,11 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { pstore1_valid := { val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) && !s2_sc_fail val pstore1_held = Reg(Bool()) + assert(!s2_store_valid || !pstore1_held) pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain s2_store_valid || pstore1_held } - val advance_pstore1 = pstore1_valid && (!pstore2_valid || pstore_drain) + val advance_pstore1 = pstore1_valid && !(pstore2_valid && !pstore_drain) pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1 val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1) val pstore2_way = RegEnable(pstore1_way, advance_pstore1) @@ -201,24 +205,50 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (s1_valid && s1_raw_hazard) { s1_nack := true } val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) - metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_valid_miss && !s2_repl_dirty) + metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_valid_cached_miss && !s2_victim_dirty) metaWriteArb.io.in(0).bits.way_en := Mux(s2_hit, s2_hit_way, UIntToOH(replacer.way)) metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB) metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset) metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) // acquire - io.mem.acquire.valid := s2_valid_miss && !s2_repl_dirty && fq.io.enq.ready - io.mem.acquire.bits := s2_hit_state.makeAcquire(addr_block = s2_req.addr(paddrBits-1, blockOffBits), client_xact_id = UInt(0), op_code = s2_req.cmd) + val cachedGetMessage = s2_hit_state.makeAcquire( + client_xact_id = UInt(0), + addr_block = s2_req.addr(paddrBits-1, blockOffBits), + op_code = s2_req.cmd) + val uncachedGetMessage = Get( + client_xact_id = UInt(0), + addr_block = s2_req.addr(paddrBits-1, blockOffBits), + addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), + addr_byte = s2_req.addr(beatOffBits-1, 0), + operand_size = s2_req.typ, + alloc = Bool(false)) + val uncachedPutMessage = Put( + client_xact_id = UInt(0), + addr_block = s2_req.addr(paddrBits-1, blockOffBits), + addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), + data = Fill(beatWords, pstore1_storegen.data), + wmask = pstore1_storegen.mask << (s2_req.addr(beatOffBits-1, wordOffBits) << wordOffBits), + alloc = Bool(false)) + io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready + io.mem.acquire.bits := cachedGetMessage + when (s2_uncached) { + assert(!s2_valid_masked || !s2_hit, "cache hit on uncached access") + io.mem.acquire.bits := uncachedGetMessage + when (isWrite(s2_req.cmd)) { + assert(!s2_valid || !isRead(s2_req.cmd), "uncached AMOs are unsupported") + io.mem.acquire.bits := uncachedPutMessage + } + } when (io.mem.acquire.fire()) { grant_wait := true } // grant val grantIsRefill = io.mem.grant.bits.hasMultibeatData() - val grantHasData = io.mem.grant.bits.hasData() - val grantIsUncached = grantHasData && !grantIsRefill + val grantIsVoluntary = io.mem.grant.bits.isVoluntary() + val grantIsUncached = !grantIsRefill && !grantIsVoluntary when (io.mem.grant.valid) { - assert(grantIsRefill === io.mem.grant.bits.requiresAck(), "") - assert(!grantIsUncached, "TODO uncached") + assert(grant_wait || grantIsVoluntary, "unexpected grant") + when (grantIsUncached) { s2_data := io.mem.grant.bits.data } } val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles) val grantDone = refillDone || grantIsUncached @@ -227,7 +257,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // data refill dataArb.io.in(1).valid := grantIsRefill && io.mem.grant.valid io.mem.grant.ready := true - assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid, "") + assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid) dataArb.io.in(1).bits.write := true dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits dataArb.io.in(1).bits.way_en := UIntToOH(replacer.way) @@ -235,12 +265,21 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes) // tag updates on refill metaWriteArb.io.in(1).valid := refillDone - assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready, "") + assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready) metaWriteArb.io.in(1).bits.way_en := UIntToOH(replacer.way) metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB) metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd) metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) + // finish + fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone) + fq.io.enq.bits := io.mem.grant.bits.makeFinish() + io.mem.finish <> fq.io.deq + when (fq.io.enq.valid) { + assert(fq.io.enq.ready) + replacer.miss + } + // probe val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr) metaReadArb.io.in(0).valid := io.mem.probe.valid && !block_probe @@ -248,29 +287,24 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { metaReadArb.io.in(0).bits.idx := io.mem.probe.bits.addr_block metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays) - // finish - fq.io.enq.valid := refillDone - fq.io.enq.bits := io.mem.grant.bits.makeFinish() - io.mem.finish <> fq.io.deq - when (fq.io.enq.valid) { - assert(fq.io.enq.ready, "") - replacer.miss - } - // release val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles) val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback) - val new_coh = Wire(init = s2_hit_state.onProbe(probe_bits)) - val release_way = Wire(init = s2_hit_way) + val releaseWay = Wire(init = s2_hit_way) val releaseRejected = io.mem.release.valid && !io.mem.release.ready val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) io.mem.release.valid := s2_release_data_valid io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits) - when (s2_valid_miss && s2_repl_dirty) { + val voluntaryReleaseMessage = s2_hit_state.makeVoluntaryWriteback(UInt(0), UInt(0)) + val voluntaryNewCoh = s2_hit_state.onCacheControl(M_FLUSH) + val probeResponseMessage = s2_hit_state.makeRelease(probe_bits) + val probeNewCoh = s2_hit_state.onProbe(probe_bits) + val newCoh = Wire(init = probeNewCoh) + when (s2_valid_cached_miss && s2_victim_dirty) { release_state := s_voluntary_writeback - probe_bits.addr_block := Cat(s2_repl.tag, s2_req.addr(idxMSB, idxLSB)) + probe_bits.addr_block := Cat(s2_victim_state.tag, s2_req.addr(idxMSB, idxLSB)) } when (s2_probe) { when (s2_hit_dirty) { release_state := s_probe_rep_dirty } @@ -285,13 +319,13 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.mem.release.valid := true } when (release_state === s_probe_rep_clean || release_state === s_probe_rep_dirty) { - io.mem.release.bits := s2_hit_state.makeRelease(probe_bits) + io.mem.release.bits := probeResponseMessage when (releaseDone) { release_state := s_probe_write_meta } } when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) { - io.mem.release.bits := s2_hit_state.makeVoluntaryWriteback(UInt(0), UInt(0)) - new_coh := s2_hit_state.onCacheControl(M_FLUSH) - release_way := UIntToOH(replacer.way) + io.mem.release.bits := voluntaryReleaseMessage + newCoh := voluntaryNewCoh + releaseWay := UIntToOH(replacer.way) when (releaseDone) { release_state := s_voluntary_write_meta } } when (s2_probe && !io.mem.release.fire()) { s1_nack := true } @@ -305,20 +339,28 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { dataArb.io.in(2).bits.way_en := ~UInt(0, nWays) metaWriteArb.io.in(2).valid := (release_state === s_voluntary_write_meta || release_state === s_probe_write_meta) - metaWriteArb.io.in(2).bits.way_en := release_way + metaWriteArb.io.in(2).bits.way_en := releaseWay metaWriteArb.io.in(2).bits.idx := io.mem.release.bits.full_addr()(idxMSB, idxLSB) - metaWriteArb.io.in(2).bits.data.coh := new_coh + metaWriteArb.io.in(2).bits.data.coh := newCoh metaWriteArb.io.in(2).bits.data.tag := io.mem.release.bits.full_addr()(paddrBits-1, untagBits) when (metaWriteArb.io.in(2).fire()) { release_state := s_ready } - // response - io.cpu.replay_next := io.mem.grant.valid && grantIsUncached - io.cpu.resp.valid := s2_valid_hit || io.cpu.resp.bits.replay - io.cpu.resp.bits := s2_req // TODO uncached - io.cpu.resp.bits.has_data := isRead(s2_req.cmd) // TODO uncached - io.cpu.resp.bits.replay := Reg(next = io.cpu.replay_next) + // cached response + io.cpu.resp.valid := s2_valid_hit + io.cpu.resp.bits := s2_req + io.cpu.resp.bits.has_data := isRead(s2_req.cmd) + io.cpu.resp.bits.replay := false io.cpu.ordered := !(s1_valid || s2_valid || grant_wait) + // uncached response + io.cpu.replay_next := io.mem.grant.valid && grantIsUncached + val doUncachedResp = Reg(next = io.cpu.replay_next) + when (doUncachedResp) { + assert(!s2_valid_hit) + io.cpu.resp.valid := true + io.cpu.resp.bits.replay := true + } + // load data subword mux/sign extension val s2_word_idx = s2_req.addr(log2Up(rowWords*coreDataBytes)-1, log2Up(wordBytes)) val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits))) @@ -338,6 +380,5 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { pstore1_storegen_data := amoalu.io.out } else { assert(!(s1_valid_masked && isRead(s1_req.cmd) && isWrite(s1_req.cmd)), "unsupported D$ operation") - assert(!pstore_drain_structural, "???") } } From f14d87e327d9f7a8892c0dcea062b9d2e346d5c4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 23 May 2016 17:51:08 -0700 Subject: [PATCH 1023/1087] Support larger I$ sets when VM is disabled --- rocket/src/main/scala/frontend.scala | 2 +- rocket/src/main/scala/icache.scala | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 5d67e158..e1e24316 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -103,7 +103,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa io.mem <> icache.io.mem icache.io.req.valid := !stall && !s0_same_block - icache.io.req.bits.idx := io.cpu.npc + icache.io.req.bits.addr := io.cpu.npc icache.io.invalidate := io.cpu.flush_icache icache.io.s1_ppn := tlb.io.resp.ppn icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 94c7822e..df01dc34 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -14,8 +14,8 @@ trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { val refillCycles = refillCyclesPerBeat*outerDataBeats } -class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) { - val idx = UInt(width = pgIdxBits) +class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters { + val addr = UInt(width = vaddrBits) } class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters { @@ -35,7 +35,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara } require(isPow2(nSets) && isPow2(nWays)) require(isPow2(coreInstBytes)) - require(pgIdxBits >= untagBits) + require(!usingVM || pgIdxBits >= untagBits) val s_ready :: s_request :: s_refill_wait :: s_refill :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) @@ -47,27 +47,26 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val s1_any_tag_hit = Wire(Bool()) val s1_valid = Reg(init=Bool(false)) - val s1_pgoff = Reg(UInt(width = pgIdxBits)) - val s1_addr = Cat(io.s1_ppn, s1_pgoff).toUInt - val s1_tag = s1_addr(tagBits+untagBits-1,untagBits) + val s1_vaddr = Reg(UInt()) + val s1_paddr = Cat(io.s1_ppn, s1_vaddr(pgIdxBits-1,0)).toUInt + val s1_tag = s1_paddr(tagBits+untagBits-1,untagBits) val s0_valid = io.req.valid || s1_valid && stall - val s0_pgoff = Mux(s1_valid && stall, s1_pgoff, io.req.bits.idx) + val s0_vaddr = Mux(s1_valid && stall, s1_vaddr, io.req.bits.addr) s1_valid := io.req.valid && rdy || s1_valid && stall && !io.s1_kill when (io.req.valid && rdy) { - s1_pgoff := io.req.bits.idx + s1_vaddr := io.req.bits.addr } val out_valid = s1_valid && !io.s1_kill && state === s_ready - val s1_idx = s1_addr(untagBits-1,blockOffBits) - val s1_offset = s1_addr(blockOffBits-1,0) + val s1_idx = s1_vaddr(untagBits-1,blockOffBits) val s1_hit = out_valid && s1_any_tag_hit val s1_miss = out_valid && !s1_any_tag_hit rdy := state === s_ready && !s1_miss when (s1_valid && state === s_ready && s1_miss) { - refill_addr := s1_addr + refill_addr := s1_paddr } val refill_tag = refill_addr(tagBits+untagBits-1,untagBits) @@ -79,7 +78,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0) val entagbits = code.width(tagBits) val tag_array = SeqMem(nSets, Vec(nWays, Bits(width = entagbits))) - val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) + val tag_rdata = tag_array.read(s0_vaddr(untagBits-1,blockOffBits), !refill_done && s0_valid) when (refill_done) { val tag = code.encode(refill_tag).toUInt tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _)) @@ -102,7 +101,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val s1_dout = Wire(Vec(nWays, Bits(width = rowBits))) for (i <- 0 until nWays) { - val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool + val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_vaddr(untagBits-1,blockOffBits))).toBool val tag_out = tag_rdata(i) val s1_tag_disparity = code.decode(tag_out).error s1_tag_match(i) := tag_out(tagBits-1,0) === s1_tag @@ -119,7 +118,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara if(refillCycles > 1) data_array.write(Cat(s1_idx, refill_cnt), e_d) else data_array.write(s1_idx, e_d) } - val s0_raddr = s0_pgoff(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) + val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) } From e0addb5723b3cb8c82c8bf86426c3c87c2fbbfa1 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 May 2016 15:45:35 -0700 Subject: [PATCH 1024/1087] Support uncached AMOs in blocking D$ --- rocket/src/main/scala/dcache.scala | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 90ca3dd8..e66ca6ab 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -230,14 +230,24 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { data = Fill(beatWords, pstore1_storegen.data), wmask = pstore1_storegen.mask << (s2_req.addr(beatOffBits-1, wordOffBits) << wordOffBits), alloc = Bool(false)) + val uncachedPutAtomicMessage = PutAtomic( + client_xact_id = UInt(0), + addr_block = s2_req.addr(paddrBits-1, blockOffBits), + addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), + addr_byte = s2_req.addr(beatOffBits-1, 0), + atomic_opcode = s2_req.cmd, + operand_size = s2_req.typ, + data = Fill(beatWords, pstore1_storegen.data)) io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready io.mem.acquire.bits := cachedGetMessage when (s2_uncached) { assert(!s2_valid_masked || !s2_hit, "cache hit on uncached access") io.mem.acquire.bits := uncachedGetMessage when (isWrite(s2_req.cmd)) { - assert(!s2_valid || !isRead(s2_req.cmd), "uncached AMOs are unsupported") io.mem.acquire.bits := uncachedPutMessage + when (pstore1_amo) { + io.mem.acquire.bits := uncachedPutAtomicMessage + } } } when (io.mem.acquire.fire()) { grant_wait := true } From 5dac7b818d0954915c249fed2a3c6df4e80a7753 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 May 2016 15:45:52 -0700 Subject: [PATCH 1025/1087] Support set associativity in blocking D$ --- rocket/src/main/scala/dcache.scala | 63 ++++++++++++++++-------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index e66ca6ab..df089d4f 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -25,7 +25,7 @@ class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { val addr = io.req.bits.addr >> rowOffBits for (w <- 0 until nWays) { val array = SeqMem(nSets*refillCycles, Vec(rowBytes, Bits(width=8))) - val valid = io.req.valid && io.req.bits.way_en(w) + val valid = io.req.valid && (Bool(nWays == 1) || io.req.bits.way_en(w)) when (valid && io.req.bits.write) { val data = Vec.tabulate(rowBytes)(i => io.req.bits.wdata(8*(i+1)-1, 8*i)) array.write(addr, data, io.req.bits.wmask.toBools) @@ -43,7 +43,6 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val fq = Module(new FinishQueue(1)) - require(nWays == 1) // TODO associativity require(rowBits == encRowBits) // no ECC require(refillCyclesPerBeat == 1) require(rowBits >= coreDataBits) @@ -79,6 +78,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val pstore1_valid = Wire(Bool()) val pstore2_valid = Reg(Bool()) val inWriteback = release_state === s_voluntary_writeback || release_state === s_probe_rep_dirty + val releaseWay = Wire(UInt()) io.cpu.req.ready := (release_state === s_ready) && !grant_wait && !s1_nack // hit initiation path @@ -105,10 +105,13 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (s1_valid && tlb.io.resp.miss) { s1_nack := true } val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) - val s1_tag = Mux(s1_probe || inWriteback, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) + val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) val s1_hit_way = Cat(meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).reverse) - val s1_hit_state = Mux1H(s1_hit_way, meta.io.resp.map(_.coh)) - val s1_data = Mux1H(s1_hit_way, data.io.resp) // retime into s2 if critical + val s1_hit_state = ClientMetadata.onReset.fromBits( + meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.toBits, UInt(0))) + .reduce (_|_)) + val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) + val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_probe = Reg(next=s1_probe, init=Bool(false)) @@ -120,17 +123,20 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { s2_req.addr := s1_paddr } val s2_data = RegEnable(s1_data, s1_valid || inWriteback) - val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked || s1_probe) - val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked || s1_probe) - val s2_hit = s2_hit_way.orR && s2_hit_state.isHit(s2_req.cmd) - val s2_hit_dirty = s2_hit && s2_hit_state.requiresVoluntaryWriteback() + val s2_probe_way = RegEnable(s1_hit_way, s1_probe) + val s2_probe_state = RegEnable(s1_hit_state, s1_probe) + val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked) + val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked) + val s2_hit = s2_hit_state.isHit(s2_req.cmd) val s2_valid_hit = s2_valid_masked && s2_hit val s2_valid_miss = s2_valid_masked && !s2_hit && !(pstore1_valid || pstore2_valid) val s2_uncached = !addrMap.isCacheable(s2_req.addr) val s2_valid_cached_miss = s2_valid_miss && !s2_uncached val s2_valid_uncached = s2_valid_miss && s2_uncached - val s2_victim_state = RegEnable(meta.io.resp(replacer.way), s1_valid_not_nacked) - val s2_victim_dirty = s2_victim_state.coh.requiresVoluntaryWriteback() + val s2_victim_way = Mux(s2_hit_state.isValid(), s2_hit_way, UIntToOH(replacer.way)) + val s2_victim_tag = RegEnable(meta.io.resp(replacer.way).tag, s1_valid_not_nacked) + val s2_victim_state = Mux(s2_hit_state.isValid(), s2_hit_state, RegEnable(meta.io.resp(replacer.way).coh, s1_valid_not_nacked)) + val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback() io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready) when (s2_valid && !s2_valid_hit) { s1_nack := true } @@ -181,7 +187,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain s2_store_valid || pstore1_held } - val advance_pstore1 = pstore1_valid && !(pstore2_valid && !pstore_drain) + val advance_pstore1 = pstore1_valid && (pstore2_valid === pstore_drain) pstore2_valid := pstore2_valid && !pstore_drain || advance_pstore1 val pstore2_addr = RegEnable(pstore1_addr, advance_pstore1) val pstore2_way = RegEnable(pstore1_way, advance_pstore1) @@ -206,7 +212,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_valid_cached_miss && !s2_victim_dirty) - metaWriteArb.io.in(0).bits.way_en := Mux(s2_hit, s2_hit_way, UIntToOH(replacer.way)) + metaWriteArb.io.in(0).bits.way_en := s2_victim_way metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB) metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset) metaWriteArb.io.in(0).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) @@ -241,7 +247,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.mem.acquire.valid := ((s2_valid_cached_miss && !s2_victim_dirty) || s2_valid_uncached) && fq.io.enq.ready io.mem.acquire.bits := cachedGetMessage when (s2_uncached) { - assert(!s2_valid_masked || !s2_hit, "cache hit on uncached access") + assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access") io.mem.acquire.bits := uncachedGetMessage when (isWrite(s2_req.cmd)) { io.mem.acquire.bits := uncachedPutMessage @@ -270,13 +276,13 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { assert(dataArb.io.in(1).ready || !dataArb.io.in(1).valid) dataArb.io.in(1).bits.write := true dataArb.io.in(1).bits.addr := Cat(s2_req.addr(paddrBits-1, blockOffBits), io.mem.grant.bits.addr_beat) << beatOffBits - dataArb.io.in(1).bits.way_en := UIntToOH(replacer.way) + dataArb.io.in(1).bits.way_en := s2_victim_way dataArb.io.in(1).bits.wdata := io.mem.grant.bits.data dataArb.io.in(1).bits.wmask := ~UInt(0, rowBytes) // tag updates on refill metaWriteArb.io.in(1).valid := refillDone assert(!metaWriteArb.io.in(1).valid || metaWriteArb.io.in(1).ready) - metaWriteArb.io.in(1).bits.way_en := UIntToOH(replacer.way) + metaWriteArb.io.in(1).bits.way_en := s2_victim_way metaWriteArb.io.in(1).bits.idx := s2_req.addr(idxMSB, idxLSB) metaWriteArb.io.in(1).bits.data.coh := s2_hit_state.onGrant(io.mem.grant.bits, s2_req.cmd) metaWriteArb.io.in(1).bits.data.tag := s2_req.addr(paddrBits-1, untagBits) @@ -285,10 +291,8 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { fq.io.enq.valid := io.mem.grant.fire() && io.mem.grant.bits.requiresAck() && (!grantIsRefill || refillDone) fq.io.enq.bits := io.mem.grant.bits.makeFinish() io.mem.finish <> fq.io.deq - when (fq.io.enq.valid) { - assert(fq.io.enq.ready) - replacer.miss - } + when (fq.io.enq.valid) { assert(fq.io.enq.ready) } + when (refillDone) { replacer.miss } // probe val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr) @@ -300,25 +304,26 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // release val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles) val releaseDone = writebackDone || (io.mem.release.fire() && !inWriteback) - val releaseWay = Wire(init = s2_hit_way) val releaseRejected = io.mem.release.valid && !io.mem.release.ready val s1_release_data_valid = Reg(next = dataArb.io.in(2).fire()) val s2_release_data_valid = Reg(next = s1_release_data_valid && !releaseRejected) val releaseDataBeat = Cat(UInt(0), writebackCount) + Mux(releaseRejected, UInt(0), s1_release_data_valid + Cat(UInt(0), s2_release_data_valid)) io.mem.release.valid := s2_release_data_valid io.mem.release.bits := ClientMetadata.onReset.makeRelease(probe_bits) - val voluntaryReleaseMessage = s2_hit_state.makeVoluntaryWriteback(UInt(0), UInt(0)) - val voluntaryNewCoh = s2_hit_state.onCacheControl(M_FLUSH) - val probeResponseMessage = s2_hit_state.makeRelease(probe_bits) - val probeNewCoh = s2_hit_state.onProbe(probe_bits) + val voluntaryReleaseMessage = s2_victim_state.makeVoluntaryWriteback(UInt(0), UInt(0)) + val voluntaryNewCoh = s2_victim_state.onCacheControl(M_FLUSH) + val probeResponseMessage = s2_probe_state.makeRelease(probe_bits) + val probeNewCoh = s2_probe_state.onProbe(probe_bits) val newCoh = Wire(init = probeNewCoh) + releaseWay := s2_probe_way when (s2_valid_cached_miss && s2_victim_dirty) { + assert(!s2_hit_state.isValid()) release_state := s_voluntary_writeback - probe_bits.addr_block := Cat(s2_victim_state.tag, s2_req.addr(idxMSB, idxLSB)) + probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB)) } when (s2_probe) { - when (s2_hit_dirty) { release_state := s_probe_rep_dirty } - .elsewhen (s2_hit) { release_state := s_probe_rep_clean } + when (s2_probe_state.requiresVoluntaryWriteback()) { release_state := s_probe_rep_dirty } + .elsewhen (s2_probe_state.isValid()) { release_state := s_probe_rep_clean } .otherwise { io.mem.release.valid := true release_state := s_probe_rep_miss @@ -335,7 +340,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (release_state === s_voluntary_writeback || release_state === s_voluntary_write_meta) { io.mem.release.bits := voluntaryReleaseMessage newCoh := voluntaryNewCoh - releaseWay := UIntToOH(replacer.way) + releaseWay := s2_victim_way when (releaseDone) { release_state := s_voluntary_write_meta } } when (s2_probe && !io.mem.release.fire()) { s1_nack := true } From 4605b616c1f09d56e3452c2af1ef918e340a74a0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 24 May 2016 16:26:07 -0700 Subject: [PATCH 1026/1087] Fix bug in D$ AMO/storegen logic --- rocket/src/main/scala/dcache.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index df089d4f..fa32995c 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -387,9 +387,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // AMOs if (usingAtomics) { val amoalu = Module(new AMOALU) - amoalu.io.addr := s2_req.addr - amoalu.io.cmd := s2_req.cmd - amoalu.io.typ := s2_req.typ + amoalu.io.addr := pstore1_addr + amoalu.io.cmd := pstore1_cmd + amoalu.io.typ := pstore1_typ amoalu.io.lhs := s2_data_word amoalu.io.rhs := pstore1_data pstore1_storegen_data := amoalu.io.out From 9aa724706e1163c277326371a706a65719dc9c35 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 May 2016 14:26:45 -0700 Subject: [PATCH 1027/1087] Don't include RV64 instructions in RV32 decode table --- rocket/src/main/scala/idecode.scala | 448 ++++++++++++++-------------- rocket/src/main/scala/rocket.scala | 14 +- 2 files changed, 229 insertions(+), 233 deletions(-) diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index d719d626..ec16f10b 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -10,8 +10,6 @@ import cde.Parameters abstract trait DecodeConstants extends HasCoreParameters { - val xpr64 = Bool(xLen == 64) - val table: Array[(BitPat, List[BitPat])] } @@ -44,15 +42,15 @@ class IntCtrlSigs extends Bundle { val amo = Bool() def default: List[BitPat] = - // jal renf1 fence.i - // | jalr | renf2 | - // fp_val| | renx2 | | renf3 | - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | - // | | | | | | | | | | | | | | | | | | | | | wxd | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N, X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) + // jal renf1 fence.i + // val | jalr | renf2 | + // | fp_val| | renx2 | | renf3 | + // | | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // | | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N,X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = { val decoder = DecodeLogic(inst, default, table) @@ -64,252 +62,246 @@ class IntCtrlSigs extends Bundle { } } -class XDecode(implicit val p: Parameters) extends DecodeConstants +class IDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - // jal renf1 fence.i - // | jalr | renf2 | - // fp_val| | renx2 | | renf3 | - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | - // | | | | | | | | | | | | | | | | | | | | | wxd | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - BNE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BEQ-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BLT-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BLTU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BGE-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BGEU-> List(Y, N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BNE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BEQ-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BLT-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BLTU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - JAL-> List(Y, N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - JALR-> List(Y, N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - AUIPC-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + JAL-> List(Y,N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - LB-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N), - LH-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N), - LW-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N), - LD-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N), - LBU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N), - LHU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N), - LWU-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N), - SB-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N), - SH-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N), - SW-> List(Y, N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N), - SD-> List(xpr64,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N), + LB-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N), + LH-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N), + LW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N), + LBU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N), + LHU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N), + SB-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N), + SH-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N), + SW-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N), - LUI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLTI -> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLTIU-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ANDI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - XORI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRLI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRAI-> List(Y, N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADD-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SUB-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLT-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLTU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - AND-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - OR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - XOR-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRA-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + LUI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTI -> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTIU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ANDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + XORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADD-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SUB-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLT-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + AND-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + OR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + XOR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRA-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADDIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRLIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRAIW-> List(xpr64,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADDW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SUBW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRLW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRAW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N), + FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N), - MUL-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - MULH-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - MULHU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - MULHSU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - MULW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), + CSRRS-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), + CSRRC-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N), + CSRRWI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), + CSRRSI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), + CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) +} - DIV-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIVU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REM-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REMU-> List(Y, N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIVW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIVUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REMW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REMUW-> List(xpr64,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), +class I64Decode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + LD-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N), + LWU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N), + SD-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N), - FENCE-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N), - FENCE_I-> List(Y, N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N), + ADDIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADDW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SUBW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) +} - SFENCE_VM-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SCALL-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SBREAK-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SRET-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - MRET-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - WFI-> List(Y, N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - CSRRW-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), - CSRRS-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), - CSRRC-> List(Y, N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N), - CSRRWI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), - CSRRSI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), - CSRRCI-> List(Y, N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) +class MDecode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + MUL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULH-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULHU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULHSU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + + DIV-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + DIVU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REM-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N)) +} + +class M64Decode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + MULW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + + DIVW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + DIVUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N)) } class ADecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - // jal renf1 fence.i - // | jalr | renf2 | - // fp_val| | renx2 | | renf3 | - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | - // | | | | | | | | | | | | | | | | | | | | | wxd | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - AMOADD_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOXOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOSWAP_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOAND_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOOR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMIN_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMINU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAX_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAXU_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOADD_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOSWAP_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOXOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOAND_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOOR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMIN_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMINU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAX_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAXU_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOADD_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - LR_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - LR_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - SC_W-> List(Y, N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - SC_D-> List(xpr64,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y)) + LR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y)) +} + +class A64Decode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + AMOADD_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + + LR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y)) } class FDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - // jal renf1 fence.i - // | jalr | renf2 | - // fp_val| | renx2 | | renf3 | - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | - // | | | | | | | | | | | | | | | | | | | | | wxd | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - FCVT_S_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), - FSGNJ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMIN_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMIN_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMAX_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMAX_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMUL_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMUL_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMADD_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMADD_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMSUB_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMSUB_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FCLASS_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCLASS_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FMV_X_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FMV_X_D-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_W_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_W_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_WU_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_WU_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_L_S-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_L_D-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_LU_S-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_LU_D-> List(xpr64,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FEQ_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FEQ_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLE_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLE_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FMV_S_X-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FMV_D_X-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_W-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_WU-> List(Y, Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_L-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_L-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_LU-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_LU-> List(xpr64,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FLW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N), - FLD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N), - FSW-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N), - FSD-> List(Y, Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N)) + FCVT_S_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), + FSGNJ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMIN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMIN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMAX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMAX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMUL_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMUL_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FCLASS_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCLASS_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FMV_X_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_W_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_W_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_WU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_WU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FEQ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FEQ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLE_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLE_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FMV_S_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FLW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N), + FLD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N), + FSW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N), + FSD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N)) +} + +class F64Decode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + FMV_X_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FMV_D_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N)) } class FDivSqrtDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - FDIV_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FDIV_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSQRT_S-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSQRT_D-> List(Y, Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N)) + FDIV_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FDIV_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSQRT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSQRT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N)) } class RoCCDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - // jal renf1 fence.i - // | jalr | renf2 | - // fp_val| | renx2 | | renf3 | - // | rocc| | | renx1 s_alu1 mem_val | | | wfd | - // val | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | - // | | | | | | | | | | | | | | | | | | | | | wxd | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - CUSTOM0-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM0_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM0_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM0_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM1-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM1_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM1_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM1_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM2-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM2_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM2_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM2_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM3-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM3_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM3_RD-> List(Y, N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM3_RD_RS1-> List(Y, N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y, N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) + CUSTOM0-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM0_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index af09c7ac..24f62814 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -120,11 +120,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val rocc = new RoCCInterface().flip } - var decode_table = new XDecode().table - if (usingAtomics) decode_table ++= new ADecode().table - if (usingFPU) decode_table ++= new FDecode().table - if (usingFPU && usingFDivSqrt) decode_table ++= new FDivSqrtDecode().table - if (usingRoCC) decode_table ++= new RoCCDecode().table + val decode_table = { + (if (true) new MDecode +: (if (xLen > 32) Seq(new M64Decode) else Nil) else Nil) ++: + (if (usingAtomics) new ADecode +: (if (xLen > 32) Seq(new A64Decode) else Nil) else Nil) ++: + (if (usingFPU) new FDecode +: (if (xLen > 32) Seq(new F64Decode) else Nil) else Nil) ++: + (if (usingFPU && usingFDivSqrt) Some(new FDivSqrtDecode) else None) ++: + (if (usingRoCC) Some(new RoCCDecode) else None) ++: + (if (xLen > 32) Some(new I64Decode) else None) ++: + Seq(new IDecode) + } flatMap(_.table) val ex_ctrl = Reg(new IntCtrlSigs) val mem_ctrl = Reg(new IntCtrlSigs) From 5442b89664a56cff1361ea922c7a1ca5bd7258d9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 May 2016 14:27:02 -0700 Subject: [PATCH 1028/1087] Remove unnecessary muxes in RV32 MulDiv --- rocket/src/main/scala/multiplier.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/rocket/src/main/scala/multiplier.scala b/rocket/src/main/scala/multiplier.scala index ce063399..9770d632 100644 --- a/rocket/src/main/scala/multiplier.scala +++ b/rocket/src/main/scala/multiplier.scala @@ -58,13 +58,16 @@ class MulDiv( FN_MULHU -> List(Y, Y, N, N), FN_MULHSU -> List(Y, Y, Y, N))).map(_ toBool) - def sext(x: Bits, signed: Bool) = { - val sign = signed && Mux(io.req.bits.dw === DW_64, x(w-1), x(w/2-1)) - val hi = Mux(io.req.bits.dw === DW_64, x(w-1,w/2), Fill(w/2, sign)) + require(w == 32 || w == 64) + def halfWidth(req: MultiplierReq) = Bool(w > 32) && req.dw === DW_32 + + def sext(x: Bits, halfW: Bool, signed: Bool) = { + val sign = signed && Mux(halfW, x(w/2-1), x(w-1)) + val hi = Mux(halfW, Fill(w/2, sign), x(w-1,w/2)) (Cat(hi, x(w/2-1,0)), sign) } - val (lhs_in, lhs_sign) = sext(io.req.bits.in1, lhsSigned) - val (rhs_in, rhs_sign) = sext(io.req.bits.in2, rhsSigned) + val (lhs_in, lhs_sign) = sext(io.req.bits.in1, halfWidth(io.req.bits), lhsSigned) + val (rhs_in, rhs_sign) = sext(io.req.bits.in2, halfWidth(io.req.bits), rhsSigned) val subtractor = remainder(2*w,w) - divisor(w,0) val less = subtractor(w) @@ -143,7 +146,7 @@ class MulDiv( } io.resp.bits := req - io.resp.bits.data := Mux(req.dw === DW_32, Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) + io.resp.bits.data := Mux(halfWidth(req), Cat(Fill(w/2, remainder(w/2-1)), remainder(w/2-1,0)), remainder(w-1,0)) io.resp.valid := state === s_done io.req.ready := state === s_ready } From 00ea9a7d829119368842f33394f4d3c686308862 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 May 2016 15:37:32 -0700 Subject: [PATCH 1029/1087] Remove most of mstatus when user mode isn't supported --- rocket/src/main/scala/csr.scala | 10 ++++++++-- rocket/src/main/scala/rocket.scala | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 0ccce9aa..aa1ea014 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -363,7 +363,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mstatus.prv := reg_mstatus.spp }.otherwise { when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie } - when (reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie } + .elsewhen (Bool(usingVM) && reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie } reg_mstatus.mpie := false reg_mstatus.mpp := PRV.U reg_mstatus.prv := reg_mstatus.mpp @@ -388,7 +388,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mstatus.mie := new_mstatus.mie reg_mstatus.mpie := new_mstatus.mpie - val supportedModes = Vec((PRV.M :: PRV.U :: (if (usingVM) List(PRV.S) else Nil)).map(UInt(_))) + val supportedModes = Vec((PRV.M +: (if (usingUser) Some(PRV.U) else None) ++: (if (usingVM) Seq(PRV.S) else Nil)).map(UInt(_))) if (supportedModes.size > 1) { reg_mstatus.mprv := new_mstatus.mprv when (supportedModes contains new_mstatus.mpp) { reg_mstatus.mpp := new_mstatus.mpp } @@ -459,4 +459,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.rocc.csr.waddr := io.rw.addr io.rocc.csr.wdata := wdata io.rocc.csr.wen := wen + + if (!usingUser) { + reg_mstatus.mpp := PRV.M + reg_mstatus.prv := PRV.M + reg_mstatus.mprv := false + } } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 24f62814..823d0c09 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -14,6 +14,7 @@ case object XLen extends Field[Int] case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] +case object UseUser extends Field[Boolean] case object UseAtomics extends Field[Boolean] case object UsePerfCounters extends Field[Boolean] case object FastLoadWord extends Field[Boolean] @@ -32,6 +33,7 @@ trait HasCoreParameters extends HasAddrMapParameters { val xLen = p(XLen) val usingVM = p(UseVM) + val usingUser = p(UseUser) val usingFPU = p(UseFPU) val usingAtomics = p(UseAtomics) val usingFDivSqrt = p(FDivSqrt) From 40f38dde63f3a93293e0bbea42633fbe28fe90e6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 May 2016 19:44:31 -0700 Subject: [PATCH 1030/1087] Work around lack of zero-width wires in D$ --- rocket/src/main/scala/dcache.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index fa32995c..bb11c55f 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -229,12 +229,15 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { addr_byte = s2_req.addr(beatOffBits-1, 0), operand_size = s2_req.typ, alloc = Bool(false)) + val uncachedPutOffset = // TODO zero-width + if (beatBytes > wordBytes) s2_req.addr(beatOffBits-1, wordOffBits) + else UInt(0) val uncachedPutMessage = Put( client_xact_id = UInt(0), addr_block = s2_req.addr(paddrBits-1, blockOffBits), addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), data = Fill(beatWords, pstore1_storegen.data), - wmask = pstore1_storegen.mask << (s2_req.addr(beatOffBits-1, wordOffBits) << wordOffBits), + wmask = pstore1_storegen.mask << (uncachedPutOffset << wordOffBits), alloc = Bool(false)) val uncachedPutAtomicMessage = PutAtomic( client_xact_id = UInt(0), @@ -377,7 +380,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } // load data subword mux/sign extension - val s2_word_idx = s2_req.addr(log2Up(rowWords*coreDataBytes)-1, log2Up(wordBytes)) + val s2_word_idx = // TODO zero-width + if (rowBits > wordBits) s2_req.addr(log2Up(rowBits/8)-1, log2Up(wordBytes)) + else UInt(0) val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits))) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes) io.cpu.resp.bits.data := loadgen.data | s2_sc_fail From 0c50bfcfb384b98ac837a5803503efaba90ce969 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 25 May 2016 21:47:48 -0700 Subject: [PATCH 1031/1087] Work around more zero-width wire cases --- rocket/src/main/scala/nbdcache.scala | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index b71a3f23..c776596c 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -160,9 +160,12 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val replay_next = Bool(OUTPUT) } + def beatOffset(addr: UInt) = // TODO zero-width + if (beatOffBits > wordOffBits) addr(beatOffBits - 1, wordOffBits) + else UInt(0) + def wordFromBeat(addr: UInt, dat: UInt) = { - val offset = addr(beatOffBits - 1, wordOffBits) - val shift = Cat(offset, UInt(0, wordOffBits + log2Up(wordBytes))) + val shift = Cat(beatOffset(addr), UInt(0, wordOffBits + log2Up(wordBytes))) (dat >> shift)(wordBits - 1, 0) } @@ -184,8 +187,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val storegen = new StoreGen(req.typ, req.addr, req.data, wordBytes) val loadgen = new LoadGen(req.typ, req.addr, grant_word, req_cmd_sc, wordBytes) - val beat_offset = req.addr(beatOffBits - 1, wordOffBits) - val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits))) + val beat_mask = (storegen.mask << Cat(beatOffset(req.addr), UInt(0, wordOffBits))) val beat_data = Fill(beatWords, storegen.data) val addr_block = req.addr(paddrBits - 1, blockOffBits) @@ -958,7 +960,9 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } writeArb.io.in(0).bits.addr := s3_req.addr - val rowIdx = s3_req.addr(rowOffBits-1,offsetlsb).toUInt + val rowIdx = + if (rowOffBits > offsetlsb) s3_req.addr(rowOffBits-1,offsetlsb).toUInt + else UInt(0) val rowWMask = UInt(1) << (if(rowOffBits > offsetlsb) rowIdx else UInt(0)) writeArb.io.in(0).bits.wmask := rowWMask writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data) From 96fa1eb6add6d75633a2614327e4252376f3989d Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 May 2016 18:52:08 -0700 Subject: [PATCH 1032/1087] Add UInt->BitPat implicit conversion This will be removed from Chisel3, so we're putting it here to maintain compatibility. --- rocket/src/main/scala/util.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index c0b89bd1..1dd6eac1 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -8,6 +8,7 @@ import scala.math._ import cde.{Parameters, Field} object Util { + implicit def uintToBitPat(x: UInt): BitPat = BitPat(x) implicit def intToUInt(x: Int): UInt = UInt(x) implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_)) From c104b57c5272fae0ca33f3c4ed49534ec3c072fa Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 26 May 2016 22:23:21 -0700 Subject: [PATCH 1033/1087] Use BitPat implicit conversion in instruction decoder --- rocket/src/main/scala/idecode.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index ec16f10b..6b02baa1 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -7,6 +7,7 @@ import Instructions._ import uncore.constants.MemoryOpConstants._ import ALU._ import cde.Parameters +import Util._ abstract trait DecodeConstants extends HasCoreParameters { From 3ee514492309866f485585904a6ab30251299ccf Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 27 May 2016 12:24:17 -0700 Subject: [PATCH 1034/1087] Fix TLB tag check logic when ASIDs are present --- rocket/src/main/scala/tlb.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 909f49e1..99742a56 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -92,7 +92,7 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val r_refill_waddr = Reg(tag_cam.io.write_addr) val r_req = Reg(new TLBReq) - val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt + val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt tag_cam.io.tag := lookup_tag tag_cam.io.write := state === s_wait && io.ptw.resp.valid tag_cam.io.write_tag := r_refill_tag From 51379621d6598420e411ce5225625e3d9c89eef4 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 31 May 2016 19:27:28 -0700 Subject: [PATCH 1035/1087] Flush blocking D$ on FENCE.I --- rocket/src/main/scala/dcache.scala | 99 +++++-- rocket/src/main/scala/idecode.scala | 386 +++++++++++++-------------- rocket/src/main/scala/nbdcache.scala | 4 +- rocket/src/main/scala/rocket.scala | 2 +- 4 files changed, 268 insertions(+), 223 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index bb11c55f..484a39a9 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -51,7 +51,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val replacer = p(Replacer)() def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) val meta = Module(new MetadataArray(onReset _)) - val metaReadArb = Module(new Arbiter(new MetaReadReq, 2)) + val metaReadArb = Module(new Arbiter(new MetaReadReq, 3)) val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 3)) meta.io.read <> metaReadArb.io.out meta.io.write <> metaWriteArb.io.out @@ -68,12 +68,19 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_nack = Wire(init=Bool(false)) val s1_valid_masked = s1_valid && !io.cpu.s1_kill val s1_valid_not_nacked = s1_valid_masked && !s1_nack - val s1_req = RegEnable(io.cpu.req.bits, io.cpu.req.valid) + val s1_req = Reg(io.cpu.req.bits) + when (metaReadArb.io.out.valid) { + s1_req := io.cpu.req.bits + s1_req.addr := Cat(io.cpu.req.bits.addr >> pgIdxBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0)) + } val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) + val s1_readwrite = s1_read || s1_write + val s1_flush_valid = Reg(Bool()) val s_ready :: s_grant_wait :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: Nil = Enum(UInt(), 8) val grant_wait = Reg(init=Bool(false)) + val release_ack_wait = Reg(init=Bool(false)) val release_state = Reg(init=s_ready) val pstore1_valid = Wire(Bool()) val pstore2_valid = Reg(Bool()) @@ -87,22 +94,22 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { dataArb.io.in(3).bits.addr := io.cpu.req.bits.addr dataArb.io.in(3).bits.way_en := ~UInt(0, nWays) when (!dataArb.io.in(3).ready && isRead(io.cpu.req.bits.cmd)) { io.cpu.req.ready := false } - metaReadArb.io.in(1).valid := io.cpu.req.valid - metaReadArb.io.in(1).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB) - metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays) - when (!metaReadArb.io.in(1).ready) { io.cpu.req.ready := false } + metaReadArb.io.in(2).valid := io.cpu.req.valid + metaReadArb.io.in(2).bits.idx := io.cpu.req.bits.addr(idxMSB, idxLSB) + metaReadArb.io.in(2).bits.way_en := ~UInt(0, nWays) + when (!metaReadArb.io.in(2).ready) { io.cpu.req.ready := false } // address translation val tlb = Module(new TLB) io.ptw <> tlb.io.ptw - tlb.io.req.valid := s1_valid_masked + tlb.io.req.valid := s1_valid_masked && s1_readwrite tlb.io.req.bits.passthrough := s1_req.phys tlb.io.req.bits.asid := 0 tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits tlb.io.req.bits.instruction := false tlb.io.req.bits.store := s1_write when (!tlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := false } - when (s1_valid && tlb.io.resp.miss) { s1_nack := true } + when (s1_valid && s1_readwrite && tlb.io.resp.miss) { s1_nack := true } val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) @@ -112,30 +119,37 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { .reduce (_|_)) val s1_data_way = Mux(inWriteback, releaseWay, s1_hit_way) val s1_data = Mux1H(s1_data_way, data.io.resp) // retime into s2 if critical + val s1_victim_way = Wire(init = replacer.way) val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_probe = Reg(next=s1_probe, init=Bool(false)) val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready val s2_valid_masked = s2_valid && Reg(next = !s1_nack) val s2_req = Reg(io.cpu.req.bits) - when (s1_valid_not_nacked) { + when (s1_valid_not_nacked || s1_flush_valid) { s2_req := s1_req s2_req.addr := s1_paddr } + val s2_read = isRead(s2_req.cmd) + val s2_write = isWrite(s2_req.cmd) + val s2_readwrite = s2_read || s2_write + val s2_flush_valid = RegNext(s1_flush_valid) val s2_data = RegEnable(s1_data, s1_valid || inWriteback) val s2_probe_way = RegEnable(s1_hit_way, s1_probe) val s2_probe_state = RegEnable(s1_hit_state, s1_probe) val s2_hit_way = RegEnable(s1_hit_way, s1_valid_not_nacked) val s2_hit_state = RegEnable(s1_hit_state, s1_valid_not_nacked) val s2_hit = s2_hit_state.isHit(s2_req.cmd) - val s2_valid_hit = s2_valid_masked && s2_hit - val s2_valid_miss = s2_valid_masked && !s2_hit && !(pstore1_valid || pstore2_valid) + val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit + val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait val s2_uncached = !addrMap.isCacheable(s2_req.addr) val s2_valid_cached_miss = s2_valid_miss && !s2_uncached + val s2_victimize = s2_valid_cached_miss || s2_flush_valid val s2_valid_uncached = s2_valid_miss && s2_uncached - val s2_victim_way = Mux(s2_hit_state.isValid(), s2_hit_way, UIntToOH(replacer.way)) - val s2_victim_tag = RegEnable(meta.io.resp(replacer.way).tag, s1_valid_not_nacked) - val s2_victim_state = Mux(s2_hit_state.isValid(), s2_hit_state, RegEnable(meta.io.resp(replacer.way).coh, s1_valid_not_nacked)) + val s2_victim_way = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_way, UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid))) + val s2_victim_tag = RegEnable(meta.io.resp(s1_victim_way).tag, s1_valid_not_nacked || s1_flush_valid) + val s2_victim_state = Mux(s2_hit_state.isValid() && !s2_flush_valid, s2_hit_state, RegEnable(meta.io.resp(s1_victim_way).coh, s1_valid_not_nacked || s1_flush_valid)) + val s2_victim_valid = s2_victim_state.isValid() val s2_victim_dirty = s2_victim_state.requiresVoluntaryWriteback() io.cpu.s2_nack := s2_valid && !s2_valid_hit && !(s2_valid_uncached && io.mem.acquire.ready) when (s2_valid && !s2_valid_hit) { s1_nack := true } @@ -148,7 +162,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.xcpt.pf.st := s1_write && tlb.io.resp.xcpt_st assert(!(Reg(next= (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && - io.cpu.resp.valid), + s2_valid_masked), "DCache exception occurred - cache response not killed.") // load reservations @@ -181,7 +195,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { Bool(usingAtomics) && pstore_drain_structural || (((pstore1_valid && !pstore1_amo) || pstore2_valid) && (pstore_drain_opportunistic || pstore_drain_on_miss)) pstore1_valid := { - val s2_store_valid = s2_valid_hit && isWrite(s2_req.cmd) && !s2_sc_fail + val s2_store_valid = s2_valid_hit && s2_write && !s2_sc_fail val pstore1_held = Reg(Bool()) assert(!s2_store_valid || !pstore1_held) pstore1_held := (s2_store_valid || pstore1_held) && pstore2_valid && !pstore_drain @@ -211,7 +225,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (s1_valid && s1_raw_hazard) { s1_nack := true } val s2_new_hit_state = s2_hit_state.onHit(s2_req.cmd) - metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_valid_cached_miss && !s2_victim_dirty) + metaWriteArb.io.in(0).valid := (s2_valid_hit && s2_hit_state =/= s2_new_hit_state) || (s2_victimize && !s2_victim_dirty) metaWriteArb.io.in(0).bits.way_en := s2_victim_way metaWriteArb.io.in(0).bits.idx := s2_req.addr(idxMSB, idxLSB) metaWriteArb.io.in(0).bits.data.coh := Mux(s2_hit, s2_new_hit_state, ClientMetadata.onReset) @@ -252,7 +266,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (s2_uncached) { assert(!s2_valid_masked || !s2_hit_state.isValid(), "cache hit on uncached access") io.mem.acquire.bits := uncachedGetMessage - when (isWrite(s2_req.cmd)) { + when (s2_write) { io.mem.acquire.bits := uncachedPutMessage when (pstore1_amo) { io.mem.acquire.bits := uncachedPutAtomicMessage @@ -266,8 +280,9 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val grantIsVoluntary = io.mem.grant.bits.isVoluntary() val grantIsUncached = !grantIsRefill && !grantIsVoluntary when (io.mem.grant.valid) { - assert(grant_wait || grantIsVoluntary, "unexpected grant") + assert(grant_wait || grantIsVoluntary && release_ack_wait, "unexpected grant") when (grantIsUncached) { s2_data := io.mem.grant.bits.data } + when (grantIsVoluntary) { release_ack_wait := false } } val (refillCount, refillDone) = Counter(io.mem.grant.fire() && grantIsRefill, refillCycles) val grantDone = refillDone || grantIsUncached @@ -299,10 +314,10 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // probe val block_probe = releaseInFlight || lrscValid || (s2_valid_hit && s2_lr) - metaReadArb.io.in(0).valid := io.mem.probe.valid && !block_probe - io.mem.probe.ready := metaReadArb.io.in(0).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) - metaReadArb.io.in(0).bits.idx := io.mem.probe.bits.addr_block - metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays) + metaReadArb.io.in(1).valid := io.mem.probe.valid && !block_probe + io.mem.probe.ready := metaReadArb.io.in(1).ready && !block_probe && !s1_valid && (!s2_valid || s2_valid_hit) + metaReadArb.io.in(1).bits.idx := io.mem.probe.bits.addr_block + metaReadArb.io.in(1).bits.way_en := ~UInt(0, nWays) // release val (writebackCount, writebackDone) = Counter(io.mem.release.fire() && inWriteback, refillCycles) @@ -319,7 +334,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val probeNewCoh = s2_probe_state.onProbe(probe_bits) val newCoh = Wire(init = probeNewCoh) releaseWay := s2_probe_way - when (s2_valid_cached_miss && s2_victim_dirty) { + when (s2_victimize && s2_victim_dirty) { assert(!s2_hit_state.isValid()) release_state := s_voluntary_writeback probe_bits.addr_block := Cat(s2_victim_tag, s2_req.addr(idxMSB, idxLSB)) @@ -344,7 +359,10 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.mem.release.bits := voluntaryReleaseMessage newCoh := voluntaryNewCoh releaseWay := s2_victim_way - when (releaseDone) { release_state := s_voluntary_write_meta } + when (releaseDone) { + release_state := s_voluntary_write_meta + release_ack_wait := true + } } when (s2_probe && !io.mem.release.fire()) { s1_nack := true } io.mem.release.bits.addr_block := probe_bits.addr_block @@ -366,7 +384,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { // cached response io.cpu.resp.valid := s2_valid_hit io.cpu.resp.bits := s2_req - io.cpu.resp.bits.has_data := isRead(s2_req.cmd) + io.cpu.resp.bits.has_data := s2_read io.cpu.resp.bits.replay := false io.cpu.ordered := !(s1_valid || s2_valid || grant_wait) @@ -399,6 +417,33 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { amoalu.io.rhs := pstore1_data pstore1_storegen_data := amoalu.io.out } else { - assert(!(s1_valid_masked && isRead(s1_req.cmd) && isWrite(s1_req.cmd)), "unsupported D$ operation") + assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation") + } + + // flushes + val flushed = Reg(init=Bool(true)) + val flushing = Reg(init=Bool(false)) + val flushCounter = Counter(nSets * nWays) + when (io.mem.acquire.fire()) { flushed := false } + when (s2_valid_masked && s2_req.cmd === M_FLUSH_ALL) { + io.cpu.s2_nack := !flushed + when (!flushed) { + flushing := !release_ack_wait + } + } + s1_flush_valid := metaReadArb.io.in(0).fire() && !s1_flush_valid && !s2_flush_valid && release_state === s_ready && !release_ack_wait + metaReadArb.io.in(0).valid := flushing + metaReadArb.io.in(0).bits.idx := flushCounter.value + metaReadArb.io.in(0).bits.way_en := ~UInt(0, nWays) + when (flushing) { + s1_victim_way := flushCounter.value >> log2Up(nSets) + when (s2_flush_valid) { + when (flushCounter.inc()) { + flushed := true + } + } + when (flushed && release_state === s_ready && !release_ack_wait) { + flushing := false + } } } diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 6b02baa1..33319e93 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -43,15 +43,15 @@ class IntCtrlSigs extends Bundle { val amo = Bool() def default: List[BitPat] = - // jal renf1 fence.i - // val | jalr | renf2 | - // | fp_val| | renx2 | | renf3 | - // | | rocc| | | renx1 s_alu1 mem_val | | | wfd | - // | | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | - // | | | | | | | | | | | | | | | | | | | | | wxd | fence - // | | | | | | | | | | | | | | | | | | | | | | csr | | amo - // | | | | | | | | | | | | | | | | | | | | | | | | | | - List(N,X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) + // jal renf1 fence.i + // val | jalr | renf2 | + // | fp_val| | renx2 | | renf3 | + // | | rocc| | | renx1 s_alu1 mem_val | | | wfd | + // | | | br| | | | s_alu2 | imm dw alu | mem_cmd mem_type| | | | div | + // | | | | | | | | | | | | | | | | | | | | | wxd | fence + // | | | | | | | | | | | | | | | | | | | | | | csr | | amo + // | | | | | | | | | | | | | | | | | | | | | | | | | | + List(N,X,X,X,X,X,X,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, X,X,X,X,X,X,CSR.X,X,X,X) def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = { val decoder = DecodeLogic(inst, default, table) @@ -66,243 +66,243 @@ class IntCtrlSigs extends Bundle { class IDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - BNE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BEQ-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BLT-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BLTU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BNE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SNE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BEQ-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SEQ, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BLT-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLT, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BLTU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SLTU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BGE-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGE, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + BGEU-> List(Y,N,N,Y,N,N,Y,Y,A2_RS2, A1_RS1, IMM_SB,DW_X, FN_SGEU, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - JAL-> List(Y,N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + JAL-> List(Y,N,N,N,Y,N,N,N,A2_FOUR,A1_PC, IMM_UJ,DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + JALR-> List(Y,N,N,N,N,Y,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + AUIPC-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_PC, IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - LB-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N), - LH-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N), - LW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N), - LBU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N), - LHU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N), - SB-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N), - SH-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N), - SW-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N), + LB-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_B, N,N,N,N,N,Y,CSR.N,N,N,N), + LH-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_H, N,N,N,N,N,Y,CSR.N,N,N,N), + LW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,N), + LBU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_BU,N,N,N,N,N,Y,CSR.N,N,N,N), + LHU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_HU,N,N,N,N,N,Y,CSR.N,N,N,N), + SB-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_B, N,N,N,N,N,N,CSR.N,N,N,N), + SH-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_H, N,N,N,N,N,N,CSR.N,N,N,N), + SW-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,N,N,N,N,N,CSR.N,N,N,N), - LUI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLTI -> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLTIU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ANDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - XORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRAI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADD-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SUB-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLT-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLTU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - AND-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - OR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - XOR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRA-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + LUI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_U, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTI -> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTIU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ANDI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + XORI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAI-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADD-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SUB-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLT-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLT, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLTU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SLTU, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + AND-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_AND, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + OR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_OR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + XOR-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_XOR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRA-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N), - FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,Y,N,N), + FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N), + FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FLUSH_ALL,MT_X, N,N,N,N,N,N,CSR.N,Y,N,N), - SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), - CSRRS-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), - CSRRC-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N), - CSRRWI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), - CSRRSI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), - CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) + SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), + CSRRS-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), + CSRRC-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N), + CSRRWI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), + CSRRSI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.S,N,N,N), + CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) } class I64Decode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - LD-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N), - LWU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N), - SD-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N), + LD-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,N), + LWU-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_WU,N,N,N,N,N,Y,CSR.N,N,N,N), + SD-> List(Y,N,N,N,N,N,Y,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,N,N,N,N,N,CSR.N,N,N,N), - ADDIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRAIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - ADDW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SUBW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SLLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - SRAW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) + ADDIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAIW-> List(Y,N,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + ADDW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SUBW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SUB, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SLLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SL, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRLW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SR, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + SRAW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32,FN_SRA, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) } class MDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - MUL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - MULH-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - MULHU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - MULHSU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MUL-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULH-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULH, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULHU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULHSU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_MULHSU,N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIV-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIVU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REM-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REMU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N)) + DIV-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + DIVU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REM-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMU-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_XPR,FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N)) } class M64Decode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - MULW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + MULW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_MUL, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIVW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - DIVUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REMW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), - REMUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N)) + DIVW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIV, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + DIVUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_DIVU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REM, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N), + REMUW-> List(Y,N,N,N,N,N,Y,Y,A2_RS2, A1_RS1, IMM_X, DW_32, FN_REMU, N,M_X, MT_X, N,N,N,N,Y,Y,CSR.N,N,N,N)) } class ADecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - AMOADD_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOXOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOSWAP_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOAND_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMIN_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMINU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAX_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAXU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOADD_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - LR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), - SC_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y)) + LR_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_W-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_W, N,N,N,N,N,Y,CSR.N,N,N,Y)) } class A64Decode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - AMOADD_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOSWAP_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOXOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOAND_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMIN_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMINU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAX_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - AMOMAXU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU,MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOADD_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_ADD, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOSWAP_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_SWAP, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOXOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_XOR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOAND_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_AND, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOOR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_OR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMIN_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MIN, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMINU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MINU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAX_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAX, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + AMOMAXU_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XA_MAXU, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - LR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), - SC_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y)) + LR_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XLR, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y), + SC_D-> List(Y,N,N,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, Y,M_XSC, MT_D, N,N,N,N,N,Y,CSR.N,N,N,Y)) } class FDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - FCVT_S_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), - FSGNJ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSGNJN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMIN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMIN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMAX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMAX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMUL_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMUL_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FNMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), - FCLASS_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCLASS_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FMV_X_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_W_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_W_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_WU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_WU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FEQ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FEQ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLE_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FLE_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), - FMV_S_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FLW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N), - FLD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N), - FSW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N), - FSD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N)) + FCVT_S_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,Y,N,N,CSR.N,N,N,N), + FSGNJ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSGNJN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMIN_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMIN_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMAX_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMAX_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMUL_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMUL_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMADD_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMADD_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMSUB_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FNMSUB_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,Y,Y,N,N,CSR.N,N,N,N), + FCLASS_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCLASS_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FMV_X_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_W_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_W_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_WU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_WU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FEQ_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FEQ_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLE_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FLE_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,N,N,Y,CSR.N,N,N,N), + FMV_S_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_W-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_WU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FLW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_W, N,N,N,Y,N,N,CSR.N,N,N,N), + FLD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_I, DW_XPR,FN_ADD, Y,M_XRD, MT_D, N,N,N,Y,N,N,CSR.N,N,N,N), + FSW-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_W, N,Y,N,N,N,N,CSR.N,N,N,N), + FSD-> List(Y,Y,N,N,N,N,N,Y,A2_IMM, A1_RS1, IMM_S, DW_XPR,FN_ADD, Y,M_XWR, MT_D, N,Y,N,N,N,N,CSR.N,N,N,N)) } class F64Decode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - FMV_X_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_L_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_L_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_LU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FCVT_LU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), - FMV_D_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_S_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), - FCVT_D_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N)) + FMV_X_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_L_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FCVT_LU_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,N,N,N,N,Y,CSR.N,N,N,N), + FMV_D_X-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_L-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_S_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N), + FCVT_D_LU-> List(Y,Y,N,N,N,N,N,Y,A2_X, A1_RS1, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,Y,N,N,CSR.N,N,N,N)) } class FDivSqrtDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - FDIV_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FDIV_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSQRT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), - FSQRT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N)) + FDIV_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FDIV_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSQRT_S-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N), + FSQRT_D-> List(Y,Y,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, Y,Y,N,Y,N,N,CSR.N,N,N,N)) } class RoCCDecode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( - CUSTOM0-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM0_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM0_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM0_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM0_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM0_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM1-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM1_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM1_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM1_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM1_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM1_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM2-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM2_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM2_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM2_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM2_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM2_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM3-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM3_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM3_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), - CUSTOM3_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM3_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), - CUSTOM3_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) + CUSTOM0-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM0_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM0_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM0_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM1_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM1_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM2_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM2_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RS1_RS2-> List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,N,N), + CUSTOM3_RD-> List(Y,N,Y,N,N,N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3_RD_RS1-> List(Y,N,Y,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N), + CUSTOM3_RD_RS1_RS2->List(Y,N,Y,N,N,N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.N,N,N,N)) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index c776596c..87a8179a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -410,7 +410,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { when (!io.meta_read.ready) { rpq.io.deq.ready := Bool(false) - io.replay.bits.cmd := M_NOP + io.replay.bits.cmd := M_FLUSH_ALL /* nop */ } } @@ -800,7 +800,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_valid = Reg(next=s1_valid_masked, init=Bool(false)) val s2_req = Reg(io.cpu.req.bits) - val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_NOP + val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL val s2_recycle = Wire(Bool()) val s2_valid_masked = Wire(Bool()) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 823d0c09..e365b217 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -509,7 +509,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret Mux(replay_wb, wb_reg_pc, // replay mem_npc)).toUInt // mispredicted branch - io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i + io.imem.flush_icache := wb_reg_valid && wb_ctrl.fence_i && !io.dmem.s2_nack io.imem.flush_tlb := csr.io.fatc io.imem.resp.ready := !ctrl_stalld || csr.io.interrupt || take_pc_mem From 9949347569beae5ee1a60071b03fb49acf2bd52c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 Jun 2016 16:57:10 -0700 Subject: [PATCH 1036/1087] First stab at debug interrupts --- rocket/src/main/scala/csr.scala | 89 +++++++++++++++++++++--- rocket/src/main/scala/idecode.scala | 15 +++- rocket/src/main/scala/instructions.scala | 15 ++-- rocket/src/main/scala/rocket.scala | 4 ++ rocket/src/main/scala/tlb.scala | 2 +- rocket/src/main/scala/util.scala | 1 + 6 files changed, 108 insertions(+), 18 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index aa1ea014..7713d536 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -11,6 +11,7 @@ import scala.math._ import junctions.AddrHashMap class MStatus extends Bundle { + val debug = Bool() // not truly part of mstatus, but convenient val prv = UInt(width = PRV.SZ) // not truly part of mstatus, but convenient val sd = Bool() val zero3 = UInt(width = 31) @@ -35,6 +36,26 @@ class MStatus extends Bundle { val uie = Bool() } +class DCSR extends Bundle { + val xdebugver = UInt(width = 2) + val ndreset = Bool() + val fullreset = Bool() + val hwbpcount = UInt(width = 12) + val ebreakm = Bool() + val ebreakh = Bool() + val ebreaks = Bool() + val ebreaku = Bool() + val zero2 = Bool() + val stopcycle = Bool() + val stoptime = Bool() + val cause = UInt(width = 3) + val debugint = Bool() + val zero1 = Bool() + val halt = Bool() + val step = Bool() + val prv = UInt(width = PRV.SZ) +} + class MIP extends Bundle { val rocc = Bool() val meip = Bool() @@ -116,6 +137,11 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reset_mstatus.prv := PRV.M val reg_mstatus = Reg(init=reset_mstatus) + val reset_dcsr = Wire(init=new DCSR().fromBits(0)) + reset_dcsr.xdebugver := 1 + reset_dcsr.prv := PRV.M + val reg_dcsr = Reg(init=reset_dcsr) + val (supported_interrupts, delegable_interrupts) = { val sup = Wire(init=new MIP().fromBits(0)) sup.ssip := Bool(p(UseVM)) @@ -141,6 +167,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) Causes.fault_store, Causes.user_ecall).map(1 << _).sum) + val reg_debug = Reg(init=Bool(false)) + val reg_dpc = Reg(UInt(width = vaddrBitsExtended)) + val reg_dscratch = Reg(UInt(width = xLen)) + val reg_mie = Reg(init=UInt(0, xLen)) val reg_mideleg = Reg(init=UInt(0, xLen)) val reg_medeleg = Reg(init=UInt(0, xLen)) @@ -171,11 +201,20 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_mip = mip.toBits & supported_interrupts val pending_interrupts = read_mip & reg_mie - val m_interrupts = Mux(reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie), pending_interrupts & ~reg_mideleg, UInt(0)) - val s_interrupts = Mux(reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie), pending_interrupts & reg_mideleg, UInt(0)) + val m_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.M || (reg_mstatus.prv === PRV.M && reg_mstatus.mie)), pending_interrupts & ~reg_mideleg, UInt(0)) + val s_interrupts = Mux(!reg_debug && (reg_mstatus.prv < PRV.S || (reg_mstatus.prv === PRV.S && reg_mstatus.sie)), pending_interrupts & reg_mideleg, UInt(0)) val all_interrupts = m_interrupts | s_interrupts + val interruptMSB = BigInt(1) << (xLen-1) + val interruptCause = interruptMSB + PriorityEncoder(all_interrupts) io.interrupt := all_interrupts.orR - io.interrupt_cause := (io.interrupt << (xLen-1)) + PriorityEncoder(all_interrupts) + io.interrupt_cause := interruptCause + + val debugIntCause = reg_mip.getWidth + // debug interrupts are only masked by being in debug mode + when (Bool(usingDebug) && reg_dcsr.debugint && !reg_debug) { + io.interrupt := true + io.interrupt_cause := interruptMSB + debugIntCause + } val system_insn = io.rw.cmd === CSR.I val cpu_ren = io.rw.cmd =/= CSR.N && !system_insn @@ -212,6 +251,12 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) CSRs.mcause -> reg_mcause, CSRs.mhartid -> io.prci.id) + if (usingDebug) { + read_mapping += CSRs.dcsr -> reg_dcsr.toBits + read_mapping += CSRs.dpc -> reg_dpc.toBits + read_mapping += CSRs.dscratch -> reg_dscratch.toBits + } + if (usingFPU) { read_mapping += CSRs.fflags -> reg_fflags read_mapping += CSRs.frm -> reg_frm @@ -278,8 +323,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val fp_csr = if (usingFPU) decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr) else Bool(false) - val csr_addr_priv = io.rw.addr(9,8) - val priv_sufficient = reg_mstatus.prv >= csr_addr_priv + val csr_debug = Bool(usingDebug) && io.rw.addr(5) + val csr_addr_priv = Cat(io.rw.addr(5), io.rw.addr(9,8)) + val priv_sufficient = Cat(reg_debug, reg_mstatus.prv) >= csr_addr_priv val read_only = io.rw.addr(11,10).andR val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient val wen = cpu_wen && !read_only @@ -308,9 +354,12 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) Mux(insn_call, reg_mstatus.prv + Causes.user_ecall, Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction))) val cause_lsbs = cause(log2Up(xLen)-1,0) + val causeIsDebugInt = cause(xLen-1) && cause_lsbs === debugIntCause + val causeIsDebugBreak = insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv) + val trapToDebug = Bool(usingDebug) && (causeIsDebugInt || causeIsDebugBreak || reg_debug) val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) - val tvec = Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec) - val epc = Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc) + val tvec = Mux(trapToDebug, UInt(0x800), Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) + val epc = Mux(csr_debug, reg_dpc, Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc)) io.fatc := insn_sfence_vm io.evec := Mux(io.exception || csr_xcpt, tvec, epc) io.ptbr := reg_sptbr @@ -318,6 +367,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.eret := insn_ret io.status := reg_mstatus io.status.sd := io.status.fs.andR || io.status.xs.andR + io.status.debug := reg_debug if (xLen == 32) io.status.sd_rv32 := io.status.sd @@ -336,7 +386,12 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val epc = ~(~io.pc | (coreInstBytes-1)) val pie = read_mstatus(reg_mstatus.prv) - when (delegate) { + when (trapToDebug) { + reg_debug := true + reg_dpc := epc + reg_dcsr.cause := Mux(causeIsDebugInt, UInt(3), UInt(1)) + reg_dcsr.prv := reg_mstatus.prv + }.elsewhen (delegate) { reg_sepc := epc reg_scause := cause reg_sbadaddr := badaddr @@ -361,6 +416,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mstatus.spie := false reg_mstatus.spp := PRV.U reg_mstatus.prv := reg_mstatus.spp + }.elsewhen (csr_debug) { + reg_mstatus.prv := reg_dcsr.prv + reg_debug := false }.otherwise { when (reg_mstatus.mpp(1)) { reg_mstatus.mie := reg_mstatus.mpie } .elsewhen (Bool(usingVM) && reg_mstatus.mpp(0)) { reg_mstatus.sie := reg_mstatus.mpie } @@ -382,13 +440,14 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_fflags := reg_fflags | io.fcsr_flags.bits } + val supportedModes = Vec((PRV.M +: (if (usingUser) Some(PRV.U) else None) ++: (if (usingVM) Seq(PRV.S) else Nil)).map(UInt(_))) + when (wen) { when (decoded_addr(CSRs.mstatus)) { val new_mstatus = new MStatus().fromBits(wdata) reg_mstatus.mie := new_mstatus.mie reg_mstatus.mpie := new_mstatus.mpie - val supportedModes = Vec((PRV.M +: (if (usingUser) Some(PRV.U) else None) ++: (if (usingVM) Seq(PRV.S) else Nil)).map(UInt(_))) if (supportedModes.size > 1) { reg_mstatus.mprv := new_mstatus.mprv when (supportedModes contains new_mstatus.mpp) { reg_mstatus.mpp := new_mstatus.mpp } @@ -428,6 +487,17 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.frm)) { reg_frm := wdata } when (decoded_addr(CSRs.fcsr)) { reg_fflags := wdata; reg_frm := wdata >> reg_fflags.getWidth } } + if (usingDebug) { + when (decoded_addr(CSRs.dcsr)) { + val new_dcsr = new DCSR().fromBits(wdata) + reg_dcsr.ebreakm := new_dcsr.ebreakm + if (usingVM) reg_dcsr.ebreaks := new_dcsr.ebreaks + if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku + if (supportedModes.size > 1) reg_dcsr.prv := new_dcsr.prv + } + when (decoded_addr(CSRs.dpc)) { reg_dpc := ~(~wdata | (coreInstBytes-1)) } + when (decoded_addr(CSRs.dscratch)) { reg_dscratch := wdata } + } if (usingVM) { when (decoded_addr(CSRs.sstatus)) { val new_sstatus = new MStatus().fromBits(wdata) @@ -455,6 +525,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } reg_mip := io.prci.interrupts + reg_dcsr.debugint := io.prci.interrupts.debug io.rocc.csr.waddr := io.rw.addr io.rocc.csr.wdata := wdata diff --git a/rocket/src/main/scala/idecode.scala b/rocket/src/main/scala/idecode.scala index 33319e93..2168922c 100644 --- a/rocket/src/main/scala/idecode.scala +++ b/rocket/src/main/scala/idecode.scala @@ -110,10 +110,8 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants FENCE-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.N,N,Y,N), FENCE_I-> List(Y,N,N,N,N,N,N,N,A2_X, A1_X, IMM_X, DW_X, FN_X, Y,M_FLUSH_ALL,MT_X, N,N,N,N,N,N,CSR.N,Y,N,N), - SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SCALL-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), SBREAK-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), - SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), MRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), WFI-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), CSRRW-> List(Y,N,N,N,N,N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.W,N,N,N), @@ -124,6 +122,19 @@ class IDecode(implicit val p: Parameters) extends DecodeConstants CSRRCI-> List(Y,N,N,N,N,N,N,N,A2_IMM, A1_ZERO,IMM_Z, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,N,N,Y,CSR.C,N,N,N)) } +class SDecode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + SFENCE_VM-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N), + SRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N)) +} + +class DebugDecode(implicit val p: Parameters) extends DecodeConstants +{ + val table: Array[(BitPat, List[BitPat])] = Array( + DRET-> List(Y,N,N,N,N,N,N,X,A2_X, A1_X, IMM_X, DW_X, FN_X, N,M_X, MT_X, N,N,N,N,N,N,CSR.I,N,N,N)) +} + class I64Decode(implicit val p: Parameters) extends DecodeConstants { val table: Array[(BitPat, List[BitPat])] = Array( diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 808505d5..275e5194 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -98,6 +98,7 @@ object Instructions { def SRET = BitPat("b00010000001000000000000001110011") def HRET = BitPat("b00100000001000000000000001110011") def MRET = BitPat("b00110000001000000000000001110011") + def DRET = BitPat("b01111011001000000000000001110011") def SFENCE_VM = BitPat("b000100000100?????000000001110011") def WFI = BitPat("b00010000010100000000000001110011") def CSRRW = BitPat("b?????????????????001?????1110011") @@ -272,7 +273,6 @@ object CSRs { val mcause = 0x342 val mbadaddr = 0x343 val mip = 0x344 - val mipi = 0x345 val mucounteren = 0x310 val mscounteren = 0x311 val mucycle_delta = 0x700 @@ -281,6 +281,9 @@ object CSRs { val mscycle_delta = 0x704 val mstime_delta = 0x705 val msinstret_delta = 0x706 + val dcsr = 0x7b0 + val dpc = 0x7b1 + val dscratch = 0x7b2 val mcycle = 0xf00 val mtime = 0xf01 val minstret = 0xf02 @@ -289,8 +292,6 @@ object CSRs { val marchid = 0xf12 val mimpid = 0xf13 val mhartid = 0xf14 - val mtohost = 0x7c0 - val mfromhost = 0x7c1 val mreset = 0x7c2 val cycleh = 0xc80 val timeh = 0xc81 @@ -302,6 +303,7 @@ object CSRs { val mstime_deltah = 0x785 val msinstret_deltah = 0x786 val mcycleh = 0xf80 + val mtimeh = 0xf81 val minstreth = 0xf82 val all = { val res = collection.mutable.ArrayBuffer[Int]() @@ -334,7 +336,6 @@ object CSRs { res += mcause res += mbadaddr res += mip - res += mipi res += mucounteren res += mscounteren res += mucycle_delta @@ -343,6 +344,9 @@ object CSRs { res += mscycle_delta res += mstime_delta res += msinstret_delta + res += dcsr + res += dpc + res += dscratch res += mcycle res += mtime res += minstret @@ -351,8 +355,6 @@ object CSRs { res += marchid res += mimpid res += mhartid - res += mtohost - res += mfromhost res += mreset res.toArray } @@ -368,6 +370,7 @@ object CSRs { res += mstime_deltah res += msinstret_deltah res += mcycleh + res += mtimeh res += minstreth res.toArray } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index e365b217..160fa632 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -15,6 +15,7 @@ case object FetchWidth extends Field[Int] case object RetireWidth extends Field[Int] case object UseVM extends Field[Boolean] case object UseUser extends Field[Boolean] +case object UseDebug extends Field[Boolean] case object UseAtomics extends Field[Boolean] case object UsePerfCounters extends Field[Boolean] case object FastLoadWord extends Field[Boolean] @@ -34,6 +35,7 @@ trait HasCoreParameters extends HasAddrMapParameters { val usingVM = p(UseVM) val usingUser = p(UseUser) + val usingDebug = p(UseDebug) val usingFPU = p(UseFPU) val usingAtomics = p(UseAtomics) val usingFDivSqrt = p(FDivSqrt) @@ -129,6 +131,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { (if (usingFPU && usingFDivSqrt) Some(new FDivSqrtDecode) else None) ++: (if (usingRoCC) Some(new RoCCDecode) else None) ++: (if (xLen > 32) Some(new I64Decode) else None) ++: + (if (usingVM) Some(new SDecode) else None) ++: + (if (usingDebug) Some(new DebugDecode) else None) ++: Seq(new IDecode) } flatMap(_.table) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 99742a56..cb607ccd 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -128,7 +128,7 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv) val priv_s = priv === PRV.S - val priv_uses_vm = priv <= PRV.S + val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store)) val ur_bits = ur_array.toBits diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 1dd6eac1..ea6aed48 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -10,6 +10,7 @@ import cde.{Parameters, Field} object Util { implicit def uintToBitPat(x: UInt): BitPat = BitPat(x) implicit def intToUInt(x: Int): UInt = UInt(x) + implicit def bigIntToUInt(x: BigInt): UInt = UInt(x) implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_)) implicit def seqToVec[T <: Data](x: Seq[T]): Vec[T] = Vec(x) From 13386af1d1a1195526dbb1b3f32455233700cfd8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 1 Jun 2016 19:30:41 -0700 Subject: [PATCH 1037/1087] Get rid of unused implicit conversion --- rocket/src/main/scala/util.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index ea6aed48..867b25d1 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -21,7 +21,6 @@ object Util { else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x) } - implicit def intToUnsigned(x: Int): Unsigned = new Unsigned(x) implicit def booleanToIntConv(x: Boolean) = new AnyRef { def toInt: Int = if (x) 1 else 0 } From 3b0c1ed0c3e85d9e91055fac07c2bc501aaa0e3c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 3 Jun 2016 13:50:29 -0700 Subject: [PATCH 1038/1087] Cope with changes to AddrMap --- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/dma.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 7713d536..4f8df9be 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -8,7 +8,7 @@ import Instructions._ import cde.{Parameters, Field} import uncore._ import scala.math._ -import junctions.AddrHashMap +import junctions.AddrMap class MStatus extends Bundle { val debug = Bool() // not truly part of mstatus, but convenient diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index 076ac29f..a7fabc1e 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -3,7 +3,7 @@ package rocket import Chisel._ import uncore._ import uncore.DmaRequest._ -import junctions.{ParameterizedBundle, AddrHashMap} +import junctions.{ParameterizedBundle, AddrMap} import cde.Parameters trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters { From 4f2e2480a8c3a65da342f88ee3955d699d14b53f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 6 Jun 2016 20:57:22 -0700 Subject: [PATCH 1039/1087] When exceptions occur in D-mode, set pc=0x808, not 0x800 Closes #43 --- rocket/src/main/scala/csr.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 4f8df9be..47c4a7e2 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -358,7 +358,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val causeIsDebugBreak = insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv) val trapToDebug = Bool(usingDebug) && (causeIsDebugInt || causeIsDebugBreak || reg_debug) val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) - val tvec = Mux(trapToDebug, UInt(0x800), Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) + val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800)) + val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) val epc = Mux(csr_debug, reg_dpc, Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc)) io.fatc := insn_sfence_vm io.evec := Mux(io.exception || csr_xcpt, tvec, epc) From e3c17b5f74f8dad94c3f559fb9d4f08be963053b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 8 Jun 2016 20:19:52 -0700 Subject: [PATCH 1040/1087] Add provisional breakpoint support --- rocket/src/main/scala/breakpoint.scala | 45 ++++++++++++++++++++++++ rocket/src/main/scala/csr.scala | 36 +++++++++++++++++-- rocket/src/main/scala/instructions.scala | 8 +++++ rocket/src/main/scala/rocket.scala | 14 ++++++++ 4 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 rocket/src/main/scala/breakpoint.scala diff --git a/rocket/src/main/scala/breakpoint.scala b/rocket/src/main/scala/breakpoint.scala new file mode 100644 index 00000000..92c2430d --- /dev/null +++ b/rocket/src/main/scala/breakpoint.scala @@ -0,0 +1,45 @@ +// See LICENSE for license details. + +package rocket + +import Chisel._ +import Util._ +import cde.Parameters + +class BPControl extends Bundle { + val matchcond = UInt(width = 2) + val m = Bool() + val h = Bool() + val s = Bool() + val u = Bool() + val r = Bool() + val w = Bool() + val x = Bool() +} + +class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) { + val io = new Bundle { + val bpcontrol = Vec(p(NBreakpoints), new BPControl).asInput + val bpaddress = Vec(p(NBreakpoints), UInt(width = vaddrBits)).asInput + val pc = UInt(INPUT, vaddrBits) + val ea = UInt(INPUT, vaddrBits) + val xcpt_if = Bool(OUTPUT) + val xcpt_ld = Bool(OUTPUT) + val xcpt_st = Bool(OUTPUT) + } + + io.xcpt_if := false + io.xcpt_ld := false + io.xcpt_st := false + + for (((bpc, bpa), i) <- io.bpcontrol zip io.bpaddress zipWithIndex) { + var mask: UInt = bpc.matchcond(1) + for (i <- 1 until log2Ceil(16)) + mask = Cat(mask(i-1) && bpa(i-1), mask) + + def matches(x: UInt) = (~x | mask) === (~bpa | mask) + when (matches(io.pc) && bpc.x) { io.xcpt_if := true } + when (matches(io.ea) && bpc.r) { io.xcpt_ld := true } + when (matches(io.ea) && bpc.w) { io.xcpt_st := true } + } +} diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 47c4a7e2..b6fc328a 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -7,7 +7,6 @@ import Util._ import Instructions._ import cde.{Parameters, Field} import uncore._ -import scala.math._ import junctions.AddrMap class MStatus extends Bundle { @@ -126,6 +125,8 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val rocc = new RoCCInterface().flip val interrupt = Bool(OUTPUT) val interrupt_cause = UInt(OUTPUT, xLen) + val bpcontrol = Vec(p(NBreakpoints), new BPControl).asOutput + val bpaddress = Vec(p(NBreakpoints), UInt(width = vaddrBits)).asOutput } class CSRFile(implicit p: Parameters) extends CoreModule()(p) @@ -171,6 +172,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_dpc = Reg(UInt(width = vaddrBitsExtended)) val reg_dscratch = Reg(UInt(width = xLen)) + val reg_tdrselect = Reg(init=UInt(0, log2Up(p(NBreakpoints)))) + val reg_bpcontrol = Reg(Vec(p(NBreakpoints), new BPControl)) + val reg_bpaddress = Reg(Vec(p(NBreakpoints), UInt(width = vaddrBits))) + val reg_mie = Reg(init=UInt(0, xLen)) val reg_mideleg = Reg(init=UInt(0, xLen)) val reg_medeleg = Reg(init=UInt(0, xLen)) @@ -208,6 +213,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val interruptCause = interruptMSB + PriorityEncoder(all_interrupts) io.interrupt := all_interrupts.orR io.interrupt_cause := interruptCause + io.bpcontrol := reg_bpcontrol + io.bpaddress := reg_bpaddress val debugIntCause = reg_mip.getWidth // debug interrupts are only masked by being in debug mode @@ -229,6 +236,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_mstatus = io.status.toBits()(xLen-1,0) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( + CSRs.tdrselect -> reg_tdrselect, + CSRs.tdrdata1 -> (if (p(NBreakpoints) > 0) reg_bpcontrol(reg_tdrselect).toBits else UInt(0)), + CSRs.tdrdata2 -> (if (p(NBreakpoints) > 0) reg_bpaddress(reg_tdrselect) else UInt(0)), CSRs.mimpid -> UInt(0), CSRs.marchid -> UInt(0), CSRs.mvendorid -> UInt(0), @@ -324,7 +334,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) if (usingFPU) decoded_addr(CSRs.fflags) || decoded_addr(CSRs.frm) || decoded_addr(CSRs.fcsr) else Bool(false) val csr_debug = Bool(usingDebug) && io.rw.addr(5) - val csr_addr_priv = Cat(io.rw.addr(5), io.rw.addr(9,8)) + val csr_addr_priv = Cat(io.rw.addr(6,5).andR, io.rw.addr(9,8)) val priv_sufficient = Cat(reg_debug, reg_mstatus.prv) >= csr_addr_priv val read_only = io.rw.addr(11,10).andR val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient @@ -523,10 +533,20 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata & delegable_interrupts } when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions } } + if (p(NBreakpoints) > 0) { + when (decoded_addr(CSRs.tdrselect)) { reg_tdrselect := Mux(wdata(log2Up(p(NBreakpoints))-1,0) >= UInt(p(NBreakpoints)), UInt(0), wdata(log2Up(p(NBreakpoints))-1,0)) } + when (decoded_addr(CSRs.tdrdata1)) { + val newBPC = new BPControl().fromBits(wdata) + reg_bpcontrol(reg_tdrselect) := newBPC + reg_bpcontrol(reg_tdrselect).matchcond := newBPC.matchcond | 1 /* exact/range only */ + } + when (decoded_addr(CSRs.tdrdata2)) { reg_bpaddress(reg_tdrselect) := wdata } + } } reg_mip := io.prci.interrupts reg_dcsr.debugint := io.prci.interrupts.debug + reg_dcsr.hwbpcount := UInt(p(NBreakpoints)) io.rocc.csr.waddr := io.rw.addr io.rocc.csr.wdata := wdata @@ -537,4 +557,16 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mstatus.prv := PRV.M reg_mstatus.mprv := false } + + for (bpc <- reg_bpcontrol) { + bpc.h := false + if (!usingVM) bpc.s := false + if (!usingUser) bpc.u := false + if (!usingVM && !usingUser) bpc.m := true + when (reset) { + bpc.r := false + bpc.w := false + bpc.x := false + } + } } diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index 275e5194..dd0d515f 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -281,6 +281,10 @@ object CSRs { val mscycle_delta = 0x704 val mstime_delta = 0x705 val msinstret_delta = 0x706 + val tdrselect = 0x7a0 + val tdrdata1 = 0x7a1 + val tdrdata2 = 0x7a2 + val tdrdata3 = 0x7a3 val dcsr = 0x7b0 val dpc = 0x7b1 val dscratch = 0x7b2 @@ -344,6 +348,10 @@ object CSRs { res += mscycle_delta res += mstime_delta res += msinstret_delta + res += tdrselect + res += tdrdata1 + res += tdrdata2 + res += tdrdata3 res += dcsr res += dpc res += dscratch diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 160fa632..396c4b82 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -28,6 +28,7 @@ case object NCustomMRWCSRs extends Field[Int] case object MtvecWritable extends Field[Boolean] case object MtvecInit extends Field[BigInt] case object ResetVector extends Field[BigInt] +case object NBreakpoints extends Field[Int] trait HasCoreParameters extends HasAddrMapParameters { implicit val p: Parameters @@ -160,6 +161,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val mem_reg_flush_pipe = Reg(Bool()) val mem_reg_cause = Reg(UInt()) val mem_reg_slow_bypass = Reg(Bool()) + val mem_reg_load = Reg(Bool()) + val mem_reg_store = Reg(Bool()) val mem_reg_pc = Reg(UInt()) val mem_reg_inst = Reg(Bits()) val mem_reg_wdata = Reg(Bits()) @@ -222,8 +225,15 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val id_do_fence = id_rocc_busy && id_ctrl.fence || id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en) + val bpu = Module(new BreakpointUnit) + bpu.io.bpcontrol := csr.io.bpcontrol + bpu.io.bpaddress := csr.io.bpaddress + bpu.io.pc := id_pc + bpu.io.ea := mem_reg_wdata + val (id_xcpt, id_cause) = checkExceptions(List( (csr.io.interrupt, csr.io.interrupt_cause), + (bpu.io.xcpt_if, UInt(Causes.breakpoint)), (io.imem.resp.bits.xcpt_if, UInt(Causes.fault_fetch)), (id_illegal_insn, UInt(Causes.illegal_instruction)))) @@ -344,6 +354,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { when (ex_reg_valid || ex_reg_xcpt_interrupt) { mem_ctrl := ex_ctrl + mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) + mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd) mem_reg_btb_hit := ex_reg_btb_hit when (ex_reg_btb_hit) { mem_reg_btb_resp := ex_reg_btb_resp } mem_reg_flush_pipe := ex_reg_flush_pipe @@ -359,6 +371,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val (mem_xcpt, mem_cause) = checkExceptions(List( (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), + (mem_reg_valid && mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)), + (mem_reg_valid && mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)), (want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)), (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), From 8516e38eb25b314990c3005f1dd7c2db74b4fdc8 Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Thu, 9 Jun 2016 11:33:33 -0700 Subject: [PATCH 1041/1087] remove implicit modulo addressing in FPU (#44) --- rocket/src/main/scala/fpu.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index b7c72945..107f93bd 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -561,7 +561,7 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { } val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) - val wsrc = (winfo(0) >> 6) + val wsrc = (winfo(0) >> 6)(log2Up(pipes.size) - 1,0) val wcp = winfo(0)(6+log2Up(pipes.size)) val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.res.data))(wsrc)) val wexc = Vec(pipes.map(_.res.exc))(wsrc) From 4cd77cef10998eef71561473d7774020ce98d488 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Jun 2016 12:29:26 -0700 Subject: [PATCH 1042/1087] Make dcsr.halt writable --- rocket/src/main/scala/csr.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index b6fc328a..07210d88 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -501,6 +501,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) if (usingDebug) { when (decoded_addr(CSRs.dcsr)) { val new_dcsr = new DCSR().fromBits(wdata) + reg_dcsr.halt := new_dcsr.halt reg_dcsr.ebreakm := new_dcsr.ebreakm if (usingVM) reg_dcsr.ebreaks := new_dcsr.ebreaks if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku From c85ea7b98769a5a97faf9ec0d11bba3c0bc4cd3b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Jun 2016 12:33:43 -0700 Subject: [PATCH 1043/1087] Set badaddr on breakpoints --- rocket/src/main/scala/csr.scala | 18 ++++-------------- rocket/src/main/scala/rocket.scala | 24 +++++++++++++++--------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 07210d88..8df9c129 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -118,6 +118,7 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val custom_mrw_csrs = Vec(nCustomMrwCsrs, UInt(INPUT, xLen)) val cause = UInt(INPUT, xLen) val pc = UInt(INPUT, vaddrBitsExtended) + val badaddr = UInt(INPUT, vaddrBitsExtended) val fatc = Bool(OUTPUT) val time = UInt(OUTPUT, xLen) val fcsr_rm = Bits(OUTPUT, FPConstants.RM_SZ) @@ -365,7 +366,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction))) val cause_lsbs = cause(log2Up(xLen)-1,0) val causeIsDebugInt = cause(xLen-1) && cause_lsbs === debugIntCause - val causeIsDebugBreak = insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv) + val causeIsDebugBreak = cause === Causes.breakpoint && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv) val trapToDebug = Bool(usingDebug) && (causeIsDebugInt || causeIsDebugBreak || reg_debug) val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800)) @@ -383,17 +384,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) io.status.sd_rv32 := io.status.sd when (io.exception || csr_xcpt) { - def compressVAddr(addr: UInt) = - if (vaddrBitsExtended == vaddrBits) addr - else { - val (upper, lower) = Split(addr, vaddrBits) - val sign = Mux(lower.toSInt < SInt(0), upper.andR, upper.orR) - Cat(sign, lower) - } - val ldst = - cause === Causes.fault_load || cause === Causes.misaligned_load || - cause === Causes.fault_store || cause === Causes.misaligned_store - val badaddr = Mux(ldst, compressVAddr(io.rw.wdata), io.pc) val epc = ~(~io.pc | (coreInstBytes-1)) val pie = read_mstatus(reg_mstatus.prv) @@ -405,7 +395,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) }.elsewhen (delegate) { reg_sepc := epc reg_scause := cause - reg_sbadaddr := badaddr + reg_sbadaddr := io.badaddr reg_mstatus.spie := pie reg_mstatus.spp := reg_mstatus.prv reg_mstatus.sie := false @@ -413,7 +403,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) }.otherwise { reg_mepc := epc reg_mcause := cause - reg_mbadaddr := badaddr + reg_mbadaddr := io.badaddr reg_mstatus.mpie := pie reg_mstatus.mpp := reg_mstatus.prv reg_mstatus.mie := false diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 396c4b82..50f45f77 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -171,6 +171,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val wb_reg_valid = Reg(Bool()) val wb_reg_xcpt = Reg(Bool()) + val wb_reg_mem_xcpt = Reg(Bool()) val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) val wb_reg_rocc_pending = Reg(init=Bool(false)) @@ -369,15 +370,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } } + val (mem_new_xcpt, mem_new_cause) = checkExceptions(List( + (mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)), + (mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)), + (want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)), + (mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), + (mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), + (mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), + (mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) + val (mem_xcpt, mem_cause) = checkExceptions(List( - (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), - (mem_reg_valid && mem_reg_load && bpu.io.xcpt_ld, UInt(Causes.breakpoint)), - (mem_reg_valid && mem_reg_store && bpu.io.xcpt_st, UInt(Causes.breakpoint)), - (want_take_pc_mem && mem_npc_misaligned, UInt(Causes.misaligned_fetch)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.st, UInt(Causes.misaligned_store)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.ma.ld, UInt(Causes.misaligned_load)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.st, UInt(Causes.fault_store)), - (mem_reg_valid && mem_ctrl.mem && io.dmem.xcpt.pf.ld, UInt(Causes.fault_load)))) + (mem_reg_xcpt_interrupt || mem_reg_xcpt, mem_reg_cause), + (mem_reg_valid && mem_new_xcpt, mem_new_cause))) val dcache_kill_mem = mem_reg_valid && mem_ctrl.wxd && io.dmem.replay_next // structural hazard on writeback port val fpu_kill_mem = mem_reg_valid && mem_ctrl.fp && io.fpu.nack_mem @@ -390,6 +394,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { wb_reg_valid := !ctrl_killm wb_reg_replay := replay_mem && !take_pc_wb wb_reg_xcpt := mem_xcpt && !take_pc_wb + wb_reg_mem_xcpt := mem_reg_valid && mem_new_xcpt && !(mem_reg_xcpt_interrupt || mem_reg_xcpt) when (mem_xcpt) { wb_reg_cause := mem_cause } when (mem_reg_valid || mem_reg_replay || mem_reg_xcpt_interrupt) { wb_ctrl := mem_ctrl @@ -459,6 +464,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.rocc.csr <> csr.io.rocc.csr csr.io.rocc.interrupt <> io.rocc.interrupt csr.io.pc := wb_reg_pc + csr.io.badaddr := Mux(wb_reg_mem_xcpt, encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata), wb_reg_pc) csr.io.uarch_counters.foreach(_ := Bool(false)) io.ptw.ptbr := csr.io.ptbr io.ptw.invalidate := csr.io.fatc @@ -625,7 +631,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { def checkHazards(targets: Seq[(Bool, UInt)], cond: UInt => Bool) = targets.map(h => h._1 && cond(h._2)).reduce(_||_) - def encodeVirtualAddress(a0: UInt, ea: UInt) = if (xLen == 32) ea else { + def encodeVirtualAddress(a0: UInt, ea: UInt) = if (vaddrBitsExtended == vaddrBits) ea else { // efficient means to compress 64-bit VA into vaddrBits+1 bits // (VA is bad if VA(vaddrBits) != VA(vaddrBits-1)) val a = a0 >> vaddrBits-1 From dca55a2b35fcbd5913d30faee878c6b999cb00f0 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Jun 2016 12:41:52 -0700 Subject: [PATCH 1044/1087] Respect breakpoint privilege settings --- rocket/src/main/scala/breakpoint.scala | 9 ++++++--- rocket/src/main/scala/rocket.scala | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/breakpoint.scala b/rocket/src/main/scala/breakpoint.scala index 92c2430d..7254b756 100644 --- a/rocket/src/main/scala/breakpoint.scala +++ b/rocket/src/main/scala/breakpoint.scala @@ -19,6 +19,7 @@ class BPControl extends Bundle { class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { + val status = new MStatus().asInput val bpcontrol = Vec(p(NBreakpoints), new BPControl).asInput val bpaddress = Vec(p(NBreakpoints), UInt(width = vaddrBits)).asInput val pc = UInt(INPUT, vaddrBits) @@ -38,8 +39,10 @@ class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) { mask = Cat(mask(i-1) && bpa(i-1), mask) def matches(x: UInt) = (~x | mask) === (~bpa | mask) - when (matches(io.pc) && bpc.x) { io.xcpt_if := true } - when (matches(io.ea) && bpc.r) { io.xcpt_ld := true } - when (matches(io.ea) && bpc.w) { io.xcpt_st := true } + when (Cat(bpc.m, bpc.h, bpc.s, bpc.u)(io.status.prv)) { + when (matches(io.pc) && bpc.x) { io.xcpt_if := true } + when (matches(io.ea) && bpc.r) { io.xcpt_ld := true } + when (matches(io.ea) && bpc.w) { io.xcpt_st := true } + } } } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 50f45f77..60ed1634 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -227,6 +227,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { id_mem_busy && (id_ctrl.amo && id_amo_aq || id_ctrl.fence_i || id_reg_fence && (id_ctrl.mem || id_ctrl.rocc) || id_csr_en) val bpu = Module(new BreakpointUnit) + bpu.io.status := csr.io.status bpu.io.bpcontrol := csr.io.bpcontrol bpu.io.bpaddress := csr.io.bpaddress bpu.io.pc := id_pc From 586c1079d0919921e0955dd7e5e59f3e106008e3 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Jun 2016 13:02:28 -0700 Subject: [PATCH 1045/1087] Fix D$ for set size > page size --- rocket/src/main/scala/dcache.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 484a39a9..97611bc2 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -71,7 +71,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_req = Reg(io.cpu.req.bits) when (metaReadArb.io.out.valid) { s1_req := io.cpu.req.bits - s1_req.addr := Cat(io.cpu.req.bits.addr >> pgIdxBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0)) + s1_req.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaReadArb.io.out.bits.idx, io.cpu.req.bits.addr(blockOffBits-1,0)) } val s1_read = isRead(s1_req.cmd) val s1_write = isWrite(s1_req.cmd) From 2c325151bfb7f2269afe4155670218fe586396be Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Thu, 9 Jun 2016 17:22:36 -0700 Subject: [PATCH 1046/1087] pass invalidate_lr through simple cache interface (#45) --- rocket/src/main/scala/nbdcache.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 87a8179a..bbce5980 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1133,6 +1133,7 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module val s2_req_fire = Reg(next=s1_req_fire) val s3_nack = Reg(next=io.cache.s2_nack) + io.cache.invalidate_lr := io.requestor.invalidate_lr io.cache.req <> req_arb.io.out io.cache.req.bits.phys := Bool(true) io.cache.s1_kill := io.cache.s2_nack From c8c7246ccec5db9d9702e3efba152c9f4cd74bd8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 9 Jun 2016 19:07:10 -0700 Subject: [PATCH 1047/1087] Update breakpoint spec --- rocket/src/main/scala/breakpoint.scala | 23 +++++++++++++++++++---- rocket/src/main/scala/csr.scala | 26 +++++++++++++++++--------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/breakpoint.scala b/rocket/src/main/scala/breakpoint.scala index 7254b756..65114daf 100644 --- a/rocket/src/main/scala/breakpoint.scala +++ b/rocket/src/main/scala/breakpoint.scala @@ -6,8 +6,20 @@ import Chisel._ import Util._ import cde.Parameters -class BPControl extends Bundle { - val matchcond = UInt(width = 2) +class TDRSelect(implicit p: Parameters) extends CoreBundle()(p) { + val tdrmode = Bool() + val reserved = UInt(width = xLen - 1 - log2Up(nTDR)) + val tdrindex = UInt(width = log2Up(nTDR)) + + def nTDR = p(NBreakpoints) +} + +class BPControl(implicit p: Parameters) extends CoreBundle()(p) { + val tdrtype = UInt(width = 4) + val bpamaskmax = UInt(width = 5) + val reserved = UInt(width = xLen-28) + val bpaction = UInt(width = 8) + val bpmatch = UInt(width = 4) val m = Bool() val h = Bool() val s = Bool() @@ -15,6 +27,9 @@ class BPControl extends Bundle { val r = Bool() val w = Bool() val x = Bool() + + def tdrType = 1 + def bpaMaskMax = 4 } class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) { @@ -34,8 +49,8 @@ class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) { io.xcpt_st := false for (((bpc, bpa), i) <- io.bpcontrol zip io.bpaddress zipWithIndex) { - var mask: UInt = bpc.matchcond(1) - for (i <- 1 until log2Ceil(16)) + var mask: UInt = bpc.bpmatch(1) + for (i <- 1 until bpc.bpaMaskMax) mask = Cat(mask(i-1) && bpa(i-1), mask) def matches(x: UInt) = (~x | mask) === (~bpa | mask) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 8df9c129..2016e77e 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -173,7 +173,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_dpc = Reg(UInt(width = vaddrBitsExtended)) val reg_dscratch = Reg(UInt(width = xLen)) - val reg_tdrselect = Reg(init=UInt(0, log2Up(p(NBreakpoints)))) + val reg_tdrselect = Reg(new TDRSelect) val reg_bpcontrol = Reg(Vec(p(NBreakpoints), new BPControl)) val reg_bpaddress = Reg(Vec(p(NBreakpoints), UInt(width = vaddrBits))) @@ -237,9 +237,9 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_mstatus = io.status.toBits()(xLen-1,0) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( - CSRs.tdrselect -> reg_tdrselect, - CSRs.tdrdata1 -> (if (p(NBreakpoints) > 0) reg_bpcontrol(reg_tdrselect).toBits else UInt(0)), - CSRs.tdrdata2 -> (if (p(NBreakpoints) > 0) reg_bpaddress(reg_tdrselect) else UInt(0)), + CSRs.tdrselect -> reg_tdrselect.toBits, + CSRs.tdrdata1 -> (if (p(NBreakpoints) > 0) reg_bpcontrol(reg_tdrselect.tdrindex).toBits else UInt(0)), + CSRs.tdrdata2 -> (if (p(NBreakpoints) > 0) reg_bpaddress(reg_tdrselect.tdrindex) else UInt(0)), CSRs.mimpid -> UInt(0), CSRs.marchid -> UInt(0), CSRs.mvendorid -> UInt(0), @@ -525,13 +525,15 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions } } if (p(NBreakpoints) > 0) { - when (decoded_addr(CSRs.tdrselect)) { reg_tdrselect := Mux(wdata(log2Up(p(NBreakpoints))-1,0) >= UInt(p(NBreakpoints)), UInt(0), wdata(log2Up(p(NBreakpoints))-1,0)) } - when (decoded_addr(CSRs.tdrdata1)) { + val canWrite = reg_tdrselect.tdrmode || reg_debug + val newTDR = new TDRSelect().fromBits(wdata) + when (decoded_addr(CSRs.tdrselect) && newTDR.tdrindex < newTDR.nTDR) { reg_tdrselect.tdrindex := newTDR.tdrindex } + when (decoded_addr(CSRs.tdrdata1) && canWrite) { val newBPC = new BPControl().fromBits(wdata) - reg_bpcontrol(reg_tdrselect) := newBPC - reg_bpcontrol(reg_tdrselect).matchcond := newBPC.matchcond | 1 /* exact/range only */ + reg_bpcontrol(reg_tdrselect.tdrindex) := newBPC + reg_bpcontrol(reg_tdrselect.tdrindex).bpmatch := newBPC.bpmatch & 2 /* exact/NAPOT only */ } - when (decoded_addr(CSRs.tdrdata2)) { reg_bpaddress(reg_tdrselect) := wdata } + when (decoded_addr(CSRs.tdrdata2) && canWrite) { reg_bpaddress(reg_tdrselect.tdrindex) := wdata } } } @@ -549,7 +551,13 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mstatus.mprv := false } + reg_tdrselect.reserved := 0 + reg_tdrselect.tdrmode := true // TODO support D-mode breakpoint theft for (bpc <- reg_bpcontrol) { + bpc.tdrtype := bpc.tdrType + bpc.bpamaskmax := bpc.bpaMaskMax + bpc.reserved := 0 + bpc.bpaction := 0 bpc.h := false if (!usingVM) bpc.s := false if (!usingUser) bpc.u := false From e3b4b5583642391b47d205c7f1b0549d08715d3f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 10 Jun 2016 19:55:58 -0700 Subject: [PATCH 1048/1087] Refactor breakpoints and support range comparison (currently disabled) --- rocket/src/main/scala/breakpoint.scala | 41 +++++++++++++++++++------- rocket/src/main/scala/csr.scala | 41 ++++++++++++++------------ rocket/src/main/scala/rocket.scala | 3 +- 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/rocket/src/main/scala/breakpoint.scala b/rocket/src/main/scala/breakpoint.scala index 65114daf..ee484c28 100644 --- a/rocket/src/main/scala/breakpoint.scala +++ b/rocket/src/main/scala/breakpoint.scala @@ -30,13 +30,28 @@ class BPControl(implicit p: Parameters) extends CoreBundle()(p) { def tdrType = 1 def bpaMaskMax = 4 + def enabled(mstatus: MStatus) = Cat(m, h, s, u)(mstatus.prv) +} + +class BP(implicit p: Parameters) extends CoreBundle()(p) { + val control = new BPControl + val address = UInt(width = vaddrBits) + + def mask(dummy: Int = 0) = { + var mask: UInt = control.bpmatch(1) + for (i <- 1 until control.bpaMaskMax) + mask = Cat(mask(i-1) && address(i-1), mask) + mask + } + + def pow2AddressMatch(x: UInt) = + (~x | mask()) === (~address | mask()) } class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) { val io = new Bundle { val status = new MStatus().asInput - val bpcontrol = Vec(p(NBreakpoints), new BPControl).asInput - val bpaddress = Vec(p(NBreakpoints), UInt(width = vaddrBits)).asInput + val bp = Vec(p(NBreakpoints), new BP).asInput val pc = UInt(INPUT, vaddrBits) val ea = UInt(INPUT, vaddrBits) val xcpt_if = Bool(OUTPUT) @@ -48,16 +63,20 @@ class BreakpointUnit(implicit p: Parameters) extends CoreModule()(p) { io.xcpt_ld := false io.xcpt_st := false - for (((bpc, bpa), i) <- io.bpcontrol zip io.bpaddress zipWithIndex) { - var mask: UInt = bpc.bpmatch(1) - for (i <- 1 until bpc.bpaMaskMax) - mask = Cat(mask(i-1) && bpa(i-1), mask) + for (bp <- io.bp) { + when (bp.control.enabled(io.status)) { + when (bp.pow2AddressMatch(io.pc) && bp.control.x) { io.xcpt_if := true } + when (bp.pow2AddressMatch(io.ea) && bp.control.r) { io.xcpt_ld := true } + when (bp.pow2AddressMatch(io.ea) && bp.control.w) { io.xcpt_st := true } + } + } - def matches(x: UInt) = (~x | mask) === (~bpa | mask) - when (Cat(bpc.m, bpc.h, bpc.s, bpc.u)(io.status.prv)) { - when (matches(io.pc) && bpc.x) { io.xcpt_if := true } - when (matches(io.ea) && bpc.r) { io.xcpt_ld := true } - when (matches(io.ea) && bpc.w) { io.xcpt_st := true } + if (!io.bp.isEmpty) for ((bpl, bph) <- io.bp zip io.bp.tail) { + def matches(x: UInt) = !(x < bpl.address) && x < bph.address + when (bph.control.enabled(io.status) && bph.control.bpmatch === 1) { + when (matches(io.pc) && bph.control.x) { io.xcpt_if := true } + when (matches(io.ea) && bph.control.r) { io.xcpt_ld := true } + when (matches(io.ea) && bph.control.w) { io.xcpt_st := true } } } } diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 2016e77e..d3daba09 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -126,8 +126,7 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val rocc = new RoCCInterface().flip val interrupt = Bool(OUTPUT) val interrupt_cause = UInt(OUTPUT, xLen) - val bpcontrol = Vec(p(NBreakpoints), new BPControl).asOutput - val bpaddress = Vec(p(NBreakpoints), UInt(width = vaddrBits)).asOutput + val bp = Vec(p(NBreakpoints), new BP).asOutput } class CSRFile(implicit p: Parameters) extends CoreModule()(p) @@ -174,8 +173,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_dscratch = Reg(UInt(width = xLen)) val reg_tdrselect = Reg(new TDRSelect) - val reg_bpcontrol = Reg(Vec(p(NBreakpoints), new BPControl)) - val reg_bpaddress = Reg(Vec(p(NBreakpoints), UInt(width = vaddrBits))) + val reg_bp = Reg(Vec(1 << log2Up(p(NBreakpoints)), new BP)) val reg_mie = Reg(init=UInt(0, xLen)) val reg_mideleg = Reg(init=UInt(0, xLen)) @@ -214,8 +212,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val interruptCause = interruptMSB + PriorityEncoder(all_interrupts) io.interrupt := all_interrupts.orR io.interrupt_cause := interruptCause - io.bpcontrol := reg_bpcontrol - io.bpaddress := reg_bpaddress + io.bp := reg_bp take p(NBreakpoints) val debugIntCause = reg_mip.getWidth // debug interrupts are only masked by being in debug mode @@ -238,8 +235,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( CSRs.tdrselect -> reg_tdrselect.toBits, - CSRs.tdrdata1 -> (if (p(NBreakpoints) > 0) reg_bpcontrol(reg_tdrselect.tdrindex).toBits else UInt(0)), - CSRs.tdrdata2 -> (if (p(NBreakpoints) > 0) reg_bpaddress(reg_tdrselect.tdrindex) else UInt(0)), + CSRs.tdrdata1 -> reg_bp(reg_tdrselect.tdrindex).control.toBits, + CSRs.tdrdata2 -> reg_bp(reg_tdrselect.tdrindex).address, CSRs.mimpid -> UInt(0), CSRs.marchid -> UInt(0), CSRs.mvendorid -> UInt(0), @@ -340,9 +337,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val read_only = io.rw.addr(11,10).andR val cpu_wen = cpu_ren && io.rw.cmd =/= CSR.R && priv_sufficient val wen = cpu_wen && !read_only - val wdata = Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, - Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, - io.rw.wdata)) + + val wdata = (Mux((io.rw.cmd === CSR.S || io.rw.cmd === CSR.C), io.rw.rdata, UInt(0)) | + Mux(io.rw.cmd =/= CSR.C, io.rw.wdata, UInt(0))) & + ~Mux(io.rw.cmd === CSR.C, io.rw.wdata, UInt(0)) val do_system_insn = priv_sufficient && system_insn val opcode = UInt(1) << io.rw.addr(2,0) @@ -525,15 +523,17 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata & delegable_exceptions } } if (p(NBreakpoints) > 0) { - val canWrite = reg_tdrselect.tdrmode || reg_debug val newTDR = new TDRSelect().fromBits(wdata) - when (decoded_addr(CSRs.tdrselect) && newTDR.tdrindex < newTDR.nTDR) { reg_tdrselect.tdrindex := newTDR.tdrindex } - when (decoded_addr(CSRs.tdrdata1) && canWrite) { - val newBPC = new BPControl().fromBits(wdata) - reg_bpcontrol(reg_tdrselect.tdrindex) := newBPC - reg_bpcontrol(reg_tdrselect.tdrindex).bpmatch := newBPC.bpmatch & 2 /* exact/NAPOT only */ + when (decoded_addr(CSRs.tdrselect)) { reg_tdrselect.tdrindex := newTDR.tdrindex } + + when (reg_tdrselect.tdrmode || reg_debug) { + when (decoded_addr(CSRs.tdrdata1)) { + val newBPC = new BPControl().fromBits(wdata) + reg_bp(reg_tdrselect.tdrindex).control := newBPC + reg_bp(reg_tdrselect.tdrindex).control.bpmatch := newBPC.bpmatch & 2 /* exact/NAPOT only */ + } + when (decoded_addr(CSRs.tdrdata2)) { reg_bp(reg_tdrselect.tdrindex).address := wdata } } - when (decoded_addr(CSRs.tdrdata2) && canWrite) { reg_bpaddress(reg_tdrselect.tdrindex) := wdata } } } @@ -553,7 +553,8 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_tdrselect.reserved := 0 reg_tdrselect.tdrmode := true // TODO support D-mode breakpoint theft - for (bpc <- reg_bpcontrol) { + if (reg_bp.isEmpty) reg_tdrselect.tdrindex := 0 + for (bpc <- reg_bp map {_.control}) { bpc.tdrtype := bpc.tdrType bpc.bpamaskmax := bpc.bpaMaskMax bpc.reserved := 0 @@ -568,4 +569,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) bpc.x := false } } + for (bp <- reg_bp drop p(NBreakpoints)) + bp := new BP().fromBits(0) } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 60ed1634..a8d5a4cf 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -228,8 +228,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val bpu = Module(new BreakpointUnit) bpu.io.status := csr.io.status - bpu.io.bpcontrol := csr.io.bpcontrol - bpu.io.bpaddress := csr.io.bpaddress + bpu.io.bp := csr.io.bp bpu.io.pc := id_pc bpu.io.ea := mem_reg_wdata From e3816d5fc7cb230ecc2177854a33298770a49528 Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Tue, 14 Jun 2016 16:59:37 -0700 Subject: [PATCH 1049/1087] set invalidate_lr in other rocc examples (#47) This should fix https://travis-ci.org/ucb-bar/rocket-chip/jobs/137607305 --- rocket/src/main/scala/rocc.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index eee67228..b5802d48 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -178,6 +178,7 @@ class TranslatorExample(implicit p: Parameters) extends RoCC()(p) { io.busy := (state =/= s_idle) io.interrupt := Bool(false) io.mem.req.valid := Bool(false) + io.mem.invalidate_lr := Bool(false) io.autl.acquire.valid := Bool(false) io.autl.grant.ready := Bool(false) } @@ -259,6 +260,7 @@ class CharacterCountExample(implicit p: Parameters) extends RoCC()(p) io.busy := (state =/= s_idle) io.interrupt := Bool(false) io.mem.req.valid := Bool(false) + io.mem.invalidate_lr := Bool(false) } class OpcodeSet(val opcodes: Seq[UInt]) { From 0b4c8e9af7fc07c164214769d55a0ebf8d469dd9 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 15 Jun 2016 16:21:24 -0700 Subject: [PATCH 1050/1087] Add D-mode single-step support --- rocket/src/main/scala/csr.scala | 28 ++++++++++++++++++---------- rocket/src/main/scala/rocket.scala | 2 +- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index d3daba09..0622ccf5 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -107,8 +107,8 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val csr_stall = Bool(OUTPUT) val csr_xcpt = Bool(OUTPUT) val eret = Bool(OUTPUT) + val singleStep = Bool(OUTPUT) - val prv = UInt(OUTPUT, PRV.SZ) val status = new MStatus().asOutput val ptbr = UInt(OUTPUT, paddrBits) val evec = UInt(OUTPUT, vaddrBitsExtended) @@ -168,10 +168,17 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) Causes.fault_store, Causes.user_ecall).map(1 << _).sum) + val exception = io.exception || io.csr_xcpt val reg_debug = Reg(init=Bool(false)) val reg_dpc = Reg(UInt(width = vaddrBitsExtended)) val reg_dscratch = Reg(UInt(width = xLen)) + val reg_singleStepped = Reg(Bool()) + when (io.retire(0) || exception) { reg_singleStepped := true } + when (!io.singleStep) { reg_singleStepped := false } + assert(!io.singleStep || io.retire <= UInt(1)) + assert(!reg_singleStepped || io.retire === UInt(0)) + val reg_tdrselect = Reg(new TDRSelect) val reg_bp = Reg(Vec(1 << log2Up(p(NBreakpoints)), new BP)) @@ -210,7 +217,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val all_interrupts = m_interrupts | s_interrupts val interruptMSB = BigInt(1) << (xLen-1) val interruptCause = interruptMSB + PriorityEncoder(all_interrupts) - io.interrupt := all_interrupts.orR + io.interrupt := all_interrupts.orR || reg_singleStepped io.interrupt_cause := interruptCause io.bp := reg_bp take p(NBreakpoints) @@ -350,7 +357,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val insn_sfence_vm = do_system_insn && opcode(4) val insn_wfi = do_system_insn && opcode(5) - val csr_xcpt = (cpu_wen && read_only) || + io.csr_xcpt := (cpu_wen && read_only) || (cpu_ren && (!priv_sufficient || !addr_valid || fp_csr && !io.status.fs.orR)) || (system_insn && !priv_sufficient) || insn_call || insn_break @@ -359,36 +366,36 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (read_mip.orR) { reg_wfi := false } val cause = - Mux(!csr_xcpt, io.cause, + Mux(!io.csr_xcpt, io.cause, Mux(insn_call, reg_mstatus.prv + Causes.user_ecall, Mux[UInt](insn_break, Causes.breakpoint, Causes.illegal_instruction))) val cause_lsbs = cause(log2Up(xLen)-1,0) val causeIsDebugInt = cause(xLen-1) && cause_lsbs === debugIntCause val causeIsDebugBreak = cause === Causes.breakpoint && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv) - val trapToDebug = Bool(usingDebug) && (causeIsDebugInt || causeIsDebugBreak || reg_debug) + val trapToDebug = Bool(usingDebug) && (reg_singleStepped || causeIsDebugInt || causeIsDebugBreak || reg_debug) val delegate = Bool(p(UseVM)) && reg_mstatus.prv < PRV.M && Mux(cause(xLen-1), reg_mideleg(cause_lsbs), reg_medeleg(cause_lsbs)) val debugTVec = Mux(reg_debug, UInt(0x808), UInt(0x800)) val tvec = Mux(trapToDebug, debugTVec, Mux(delegate, reg_stvec.sextTo(vaddrBitsExtended), reg_mtvec)) val epc = Mux(csr_debug, reg_dpc, Mux(Bool(p(UseVM)) && !csr_addr_priv(1), reg_sepc, reg_mepc)) io.fatc := insn_sfence_vm - io.evec := Mux(io.exception || csr_xcpt, tvec, epc) + io.evec := Mux(exception, tvec, epc) io.ptbr := reg_sptbr - io.csr_xcpt := csr_xcpt io.eret := insn_ret + io.singleStep := reg_dcsr.step && !reg_debug io.status := reg_mstatus io.status.sd := io.status.fs.andR || io.status.xs.andR io.status.debug := reg_debug if (xLen == 32) io.status.sd_rv32 := io.status.sd - when (io.exception || csr_xcpt) { + when (exception) { val epc = ~(~io.pc | (coreInstBytes-1)) val pie = read_mstatus(reg_mstatus.prv) when (trapToDebug) { reg_debug := true reg_dpc := epc - reg_dcsr.cause := Mux(causeIsDebugInt, UInt(3), UInt(1)) + reg_dcsr.cause := Mux(reg_singleStepped, UInt(4), Mux(causeIsDebugInt, UInt(3), UInt(1))) reg_dcsr.prv := reg_mstatus.prv }.elsewhen (delegate) { reg_sepc := epc @@ -427,7 +434,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } } - assert(PopCount(insn_ret :: io.exception :: csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") + assert(PopCount(insn_ret :: io.exception :: io.csr_xcpt :: Nil) <= 1, "these conditions must be mutually exclusive") io.time := reg_cycle io.csr_stall := reg_wfi @@ -490,6 +497,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.dcsr)) { val new_dcsr = new DCSR().fromBits(wdata) reg_dcsr.halt := new_dcsr.halt + reg_dcsr.step := new_dcsr.step reg_dcsr.ebreakm := new_dcsr.ebreakm if (usingVM) reg_dcsr.ebreaks := new_dcsr.ebreaks if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index a8d5a4cf..79938d63 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -297,7 +297,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { ex_ctrl.csr := id_csr ex_reg_btb_hit := io.imem.btb_resp.valid when (io.imem.btb_resp.valid) { ex_reg_btb_resp := io.imem.btb_resp.bits } - ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush + ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep ex_reg_load_use := id_load_use for (i <- 0 until id_raddr.size) { From 60bddddfe63584cc75892fd09279880543313f95 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 17 Jun 2016 18:29:05 -0700 Subject: [PATCH 1051/1087] Merge sptbr and sasid --- rocket/src/main/scala/csr.scala | 16 +++++++++++----- rocket/src/main/scala/dcache.scala | 1 - rocket/src/main/scala/frontend.scala | 1 - rocket/src/main/scala/instructions.scala | 2 -- rocket/src/main/scala/nbdcache.scala | 1 - rocket/src/main/scala/ptw.scala | 8 +++++--- rocket/src/main/scala/rocket.scala | 8 +++++++- rocket/src/main/scala/tlb.scala | 3 +-- 8 files changed, 24 insertions(+), 16 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 0622ccf5..d1cc678a 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -71,6 +71,12 @@ class MIP extends Bundle { val usip = Bool() } +class PTBR(implicit p: Parameters) extends CoreBundle()(p) { + require(maxPAddrBits - pgIdxBits + asIdBits <= xLen) + val asid = UInt(width = asIdBits) + val ppn = UInt(width = maxPAddrBits - pgIdxBits) +} + object PRV { val SZ = 2 @@ -110,7 +116,7 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val singleStep = Bool(OUTPUT) val status = new MStatus().asOutput - val ptbr = UInt(OUTPUT, paddrBits) + val ptbr = new PTBR().asOutput val evec = UInt(OUTPUT, vaddrBitsExtended) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) @@ -197,7 +203,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_sbadaddr = Reg(UInt(width = vaddrBitsExtended)) val reg_sscratch = Reg(Bits(width = xLen)) val reg_stvec = Reg(UInt(width = vaddrBits)) - val reg_sptbr = Reg(UInt(width = ppnBits)) + val reg_sptbr = Reg(new PTBR) val reg_wfi = Reg(init=Bool(false)) val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) @@ -297,8 +303,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) read_mapping += CSRs.sscratch -> reg_sscratch read_mapping += CSRs.scause -> reg_scause read_mapping += CSRs.sbadaddr -> reg_sbadaddr.sextTo(xLen) - read_mapping += CSRs.sptbr -> reg_sptbr - read_mapping += CSRs.sasid -> UInt(0) + read_mapping += CSRs.sptbr -> reg_sptbr.toBits read_mapping += CSRs.sepc -> reg_sepc.sextTo(xLen) read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) read_mapping += CSRs.mscounteren -> UInt(0) @@ -522,7 +527,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) } when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } - when (decoded_addr(CSRs.sptbr)) { reg_sptbr := wdata } + when (decoded_addr(CSRs.sptbr)) { reg_sptbr.ppn := wdata(ppnBits-1,0) } when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata >> log2Up(coreInstBytes) << log2Up(coreInstBytes) } when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 } when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } @@ -559,6 +564,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mstatus.mprv := false } + reg_sptbr.asid := 0 reg_tdrselect.reserved := 0 reg_tdrselect.tdrmode := true // TODO support D-mode breakpoint theft if (reg_bp.isEmpty) reg_tdrselect.tdrindex := 0 diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 97611bc2..965d4946 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -104,7 +104,6 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.ptw <> tlb.io.ptw tlb.io.req.valid := s1_valid_masked && s1_readwrite tlb.io.req.bits.passthrough := s1_req.phys - tlb.io.req.bits.asid := 0 tlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits tlb.io.req.bits.instruction := false tlb.io.req.bits.store := s1_write diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index e1e24316..e9193c25 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -96,7 +96,6 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa io.ptw <> tlb.io.ptw tlb.io.req.valid := !stall && !icmiss tlb.io.req.bits.vpn := s1_pc >> pgIdxBits - tlb.io.req.bits.asid := UInt(0) tlb.io.req.bits.passthrough := Bool(false) tlb.io.req.bits.instruction := Bool(true) tlb.io.req.bits.store := Bool(false) diff --git a/rocket/src/main/scala/instructions.scala b/rocket/src/main/scala/instructions.scala index dd0d515f..339ac42f 100644 --- a/rocket/src/main/scala/instructions.scala +++ b/rocket/src/main/scala/instructions.scala @@ -259,7 +259,6 @@ object CSRs { val sbadaddr = 0x143 val sip = 0x144 val sptbr = 0x180 - val sasid = 0x181 val scycle = 0xd00 val stime = 0xd01 val sinstret = 0xd02 @@ -326,7 +325,6 @@ object CSRs { res += sbadaddr res += sip res += sptbr - res += sasid res += scycle res += stime res += sinstret diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index bbce5980..7b336d37 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -817,7 +817,6 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.ptw <> dtlb.io.ptw dtlb.io.req.valid := s1_valid_masked && s1_readwrite dtlb.io.req.bits.passthrough := s1_req.phys - dtlb.io.req.bits.asid := UInt(0) dtlb.io.req.bits.vpn := s1_req.addr >> pgIdxBits dtlb.io.req.bits.instruction := Bool(false) dtlb.io.req.bits.store := s1_write diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index b83e0fc8..fc9ab76d 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -21,12 +21,13 @@ class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) { val req = Decoupled(new PTWReq) val resp = Valid(new PTWResp).flip - val status = new MStatus().asInput + val ptbr = new PTBR().asInput val invalidate = Bool(INPUT) + val status = new MStatus().asInput } class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) { - val ptbr = UInt(INPUT, ppnBits) + val ptbr = new PTBR().asInput val invalidate = Bool(INPUT) val status = new MStatus().asInput } @@ -78,7 +79,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { when (arb.io.out.fire()) { r_req := arb.io.out.bits r_req_dest := arb.io.chosen - r_pte.ppn := io.dpath.ptbr + r_pte.ppn := io.dpath.ptbr.ppn } val (pte_cache_hit, pte_cache_data) = { @@ -130,6 +131,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.requestor(i).resp.valid := resp_val && (r_req_dest === i) io.requestor(i).resp.bits.pte := r_pte io.requestor(i).resp.bits.pte.ppn := resp_ppn + io.requestor(i).ptbr := io.dpath.ptbr io.requestor(i).invalidate := io.dpath.invalidate io.requestor(i).status := io.dpath.status } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 79938d63..5243d653 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -66,7 +66,13 @@ trait HasCoreParameters extends HasAddrMapParameters { val enableCommitLog = false val usingPerfCounters = p(UsePerfCounters) - if (fastLoadByte) require(fastLoadWord) + val maxPAddrBits = xLen match { + case 32 => 34 + case 64 => 50 + } + + require(paddrBits < maxPAddrBits) + require(!fastLoadByte || fastLoadWord) } abstract class CoreModule(implicit val p: Parameters) extends Module diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index cb607ccd..5977ba53 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -56,7 +56,6 @@ class RocketCAM(implicit p: Parameters) extends TLBModule()(p) { } class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { - val asid = UInt(width = asIdBits) val vpn = UInt(width = vpnBitsExtended) val passthrough = Bool() val instruction = Bool() @@ -92,7 +91,7 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { val r_refill_waddr = Reg(tag_cam.io.write_addr) val r_req = Reg(new TLBReq) - val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt + val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt tag_cam.io.tag := lookup_tag tag_cam.io.write := state === s_wait && io.ptw.resp.valid tag_cam.io.write_tag := r_refill_tag From d1c83ccda0c9a59add21598b9638ba63ac521344 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 13 Jun 2016 16:18:38 -0700 Subject: [PATCH 1052/1087] change Tile interface to allow arbitrary number of cached and uncached channels --- rocket/src/main/scala/tile.scala | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 14c51966..fca46187 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -9,6 +9,8 @@ import cde.{Parameters, Field} case object CoreName extends Field[String] case object BuildRoCC extends Field[Seq[RoccParameters]] +case object NCachedTileLinkPorts extends Field[Int] +case object NUncachedTileLinkPorts extends Field[Int] case class RoccParameters( opcodes: OpcodeSet, @@ -20,22 +22,23 @@ case class RoccParameters( abstract class Tile(resetSignal: Bool = null) (implicit p: Parameters) extends Module(_reset = resetSignal) { - val buildRocc = p(BuildRoCC) - val usingRocc = !buildRocc.isEmpty - val nRocc = buildRocc.size - val nFPUPorts = buildRocc.filter(_.useFPU).size - val nCachedTileLinkPorts = 1 - val nUncachedTileLinkPorts = 1 + p(RoccNMemChannels) + val nCachedTileLinkPorts = p(NCachedTileLinkPorts) + val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts) val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) + val io = new Bundle { val cached = Vec(nCachedTileLinkPorts, new ClientTileLinkIO) val uncached = Vec(nUncachedTileLinkPorts, new ClientUncachedTileLinkIO) val prci = new PRCITileIO().flip - val dma = new DmaIO } } class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(resetSignal)(p) { + val buildRocc = p(BuildRoCC) + val usingRocc = !buildRocc.isEmpty + val nRocc = buildRocc.size + val nFPUPorts = buildRocc.filter(_.useFPU).size + val core = Module(new Rocket()(p.alterPartial({ case CoreName => "Rocket" }))) val icache = Module(new Frontend()(p.alterPartial({ case CacheName => "L1I" From 4c31248917e9ebd6037c74143223eeb639cf46d5 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 22 Jun 2016 16:09:45 -0700 Subject: [PATCH 1053/1087] make sure UseAtomics is on when PTW is being used --- rocket/src/main/scala/ptw.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index fc9ab76d..a8d4ea5c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -58,6 +58,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val mem = new HellaCacheIO val dpath = new DatapathPTWIO } + + require(usingAtomics, "PTW requires atomic memory operations") val s_ready :: s_req :: s_wait :: s_set_dirty :: s_wait_dirty :: s_done :: Nil = Enum(UInt(), 6) val state = Reg(init=s_ready) From 7f88a00a38ac1346577158c35d6a30ac246dd213 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Jun 2016 13:47:15 -0700 Subject: [PATCH 1054/1087] Always verify BTB result; don't bother flushing it This improves CPI for things like lbu t0, (t0) j foo addi t0, t0, 1 where the addi would stall, causing j's misprediction check to fail, flushing the pipeline. --- rocket/src/main/scala/frontend.scala | 2 +- rocket/src/main/scala/rocket.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index e9193c25..d696cada 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -81,7 +81,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa btb.io.btb_update := io.cpu.btb_update btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update - btb.io.invalidate := io.cpu.flush_icache || io.cpu.flush_tlb // virtual tags + btb.io.invalidate := false when (!stall && !icmiss) { btb.io.req.valid := true s2_btb_resp_valid := btb.io.resp.valid diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 5243d653..8a60609c 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -343,13 +343,13 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt - val mem_wrong_npc = mem_npc =/= ex_reg_pc || !ex_reg_valid + val mem_wrong_npc = Mux(ex_reg_valid, mem_npc =/= ex_reg_pc, Mux(io.imem.resp.valid, mem_npc =/= id_pc, Bool(true))) val mem_npc_misaligned = mem_npc(1) val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal val mem_misprediction = if (p(BtbKey).nEntries == 0) mem_cfi_taken - else mem_cfi && mem_wrong_npc + else mem_wrong_npc val want_take_pc_mem = mem_reg_valid && (mem_misprediction || mem_reg_flush_pipe) take_pc_mem := want_take_pc_mem && !mem_npc_misaligned From 5644a2703a08a2f6e7d312322d766d8525c8eed6 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Jun 2016 13:49:33 -0700 Subject: [PATCH 1055/1087] Avoid need for FENCE.I in debug programs This is a hack to work around caching the (uncacheable) debug RAM. The RAM is always entered with a JALR, so flush the I$ on any debug-mode JALR. --- rocket/src/main/scala/rocket.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 8a60609c..12a99608 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -306,6 +306,11 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { ex_reg_flush_pipe := id_ctrl.fence_i || id_csr_flush || csr.io.singleStep ex_reg_load_use := id_load_use + when (id_ctrl.jalr && csr.io.status.debug) { + ex_reg_flush_pipe := true + ex_ctrl.fence_i := true + } + for (i <- 0 until id_raddr.size) { val do_bypass = id_bypass_src(i).reduce(_||_) val bypass_src = PriorityEncoder(id_bypass_src(i)) From 6d43c0a945e284f2b885639e7cdb5e680eb0cb29 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 22 Jun 2016 17:17:52 -0700 Subject: [PATCH 1056/1087] Mask interrupts during single-step --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index d1cc678a..d223b6b5 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -223,7 +223,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val all_interrupts = m_interrupts | s_interrupts val interruptMSB = BigInt(1) << (xLen-1) val interruptCause = interruptMSB + PriorityEncoder(all_interrupts) - io.interrupt := all_interrupts.orR || reg_singleStepped + io.interrupt := all_interrupts.orR && !io.singleStep || reg_singleStepped io.interrupt_cause := interruptCause io.bp := reg_bp take p(NBreakpoints) From 6f850564946d2ec1bf4c2288273253f165be9a72 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 23 Jun 2016 13:18:42 -0700 Subject: [PATCH 1057/1087] Remove reliance on HtifKey --- rocket/src/main/scala/rocket.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 12a99608..7b318e38 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -59,7 +59,7 @@ trait HasCoreParameters extends HasAddrMapParameters { val roccCsrs = if (p(BuildRoCC).isEmpty) Nil else p(BuildRoCC).flatMap(_.csrs) val nRoccCsrs = p(RoccNCSRs) - val nCores = p(HtifKey).nCores + val nCores = p(NTiles) // Print out log of committed instructions and their writeback values. // Requires post-processing due to out-of-order writebacks. From a70dee17ead06e36fb10326937d77e5433f4bb55 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 28 Jun 2016 12:10:33 -0700 Subject: [PATCH 1058/1087] Make RoCC energy-saving logic mirror same for D$ --- rocket/src/main/scala/rocket.scala | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 7b318e38..466b4408 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -180,7 +180,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val wb_reg_mem_xcpt = Reg(Bool()) val wb_reg_replay = Reg(Bool()) val wb_reg_cause = Reg(UInt()) - val wb_reg_rocc_pending = Reg(init=Bool(false)) val wb_reg_pc = Reg(UInt()) val wb_reg_inst = Reg(Bits()) val wb_reg_wdata = Reg(Bits()) @@ -419,14 +418,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc val replay_wb_common = io.dmem.s2_nack || wb_reg_replay - val wb_rocc_val = wb_reg_valid && wb_ctrl.rocc && !replay_wb_common val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt take_pc_wb := replay_wb || wb_xcpt || csr.io.eret - when (wb_rocc_val) { wb_reg_rocc_pending := !io.rocc.cmd.ready } - when (wb_reg_xcpt) { wb_reg_rocc_pending := Bool(false) } - // writeback arbitration val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool @@ -529,12 +524,14 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val dcache_blocked = Reg(Bool()) dcache_blocked := !io.dmem.req.ready && (io.dmem.req.valid || dcache_blocked) + val rocc_blocked = Reg(Bool()) + rocc_blocked := !wb_reg_xcpt && !io.rocc.cmd.ready && (io.rocc.cmd.valid || rocc_blocked) val ctrl_stalld = id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_ctrl.fp && id_stall_fpu || id_ctrl.mem && dcache_blocked || // reduce activity during D$ misses - Bool(usingRoCC) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || + id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy id_do_fence || csr.io.csr_stall ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt @@ -591,7 +588,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.dmem.s1_data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) io.dmem.invalidate_lr := wb_xcpt - io.rocc.cmd.valid := wb_rocc_val + io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common io.rocc.exception := wb_xcpt && csr.io.status.xs.orR io.rocc.status := csr.io.status io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) From c10691b616ee713237ab561e358a8e0ee7bd2348 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 28 Jun 2016 12:47:49 -0700 Subject: [PATCH 1059/1087] Don't take interrupts on instructions in branch shadow In situations like j 1f nop 1: nop the interrupt could be taken on the first nop. --- rocket/src/main/scala/rocket.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 466b4408..2ca94cba 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -327,6 +327,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } // replay inst in ex stage? + val ex_pc_valid = ex_reg_valid || ex_reg_xcpt_interrupt val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready || ex_ctrl.div && !div.io.req.ready @@ -347,7 +348,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { Mux(mem_ctrl.jal, ImmGen(IMM_UJ, mem_reg_inst), SInt(4))) val mem_int_wdata = Mux(mem_ctrl.jalr, mem_br_target, mem_reg_wdata.toSInt).toUInt val mem_npc = (Mux(mem_ctrl.jalr, encodeVirtualAddress(mem_reg_wdata, mem_reg_wdata).toSInt, mem_br_target) & SInt(-2)).toUInt - val mem_wrong_npc = Mux(ex_reg_valid, mem_npc =/= ex_reg_pc, Mux(io.imem.resp.valid, mem_npc =/= id_pc, Bool(true))) + val mem_wrong_npc = Mux(ex_pc_valid, mem_npc =/= ex_reg_pc, Mux(io.imem.resp.valid, mem_npc =/= id_pc, Bool(true))) val mem_npc_misaligned = mem_npc(1) val mem_cfi = mem_ctrl.branch || mem_ctrl.jalr || mem_ctrl.jal val mem_cfi_taken = (mem_ctrl.branch && mem_br_taken) || mem_ctrl.jalr || mem_ctrl.jal @@ -363,7 +364,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { mem_reg_xcpt_interrupt := !take_pc_mem_wb && ex_reg_xcpt_interrupt when (ex_xcpt) { mem_reg_cause := ex_cause } - when (ex_reg_valid || ex_reg_xcpt_interrupt) { + when (ex_pc_valid) { mem_ctrl := ex_ctrl mem_reg_load := ex_ctrl.mem && isRead(ex_ctrl.mem_cmd) mem_reg_store := ex_ctrl.mem && isWrite(ex_ctrl.mem_cmd) From a9e0a5e2df2d4bda31d24b5b45a00261a55eb128 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 28 Jun 2016 13:15:39 -0700 Subject: [PATCH 1060/1087] changes to imports after uncore refactor --- rocket/src/main/scala/arbiter.scala | 1 - rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/dcache.scala | 8 ++++++-- rocket/src/main/scala/dma.scala | 7 +++++-- rocket/src/main/scala/frontend.scala | 2 +- rocket/src/main/scala/icache.scala | 4 +++- rocket/src/main/scala/nbdcache.scala | 20 +++++++++++++------- rocket/src/main/scala/ptw.scala | 3 ++- rocket/src/main/scala/rocc.scala | 4 +++- rocket/src/main/scala/rocket.scala | 4 +++- rocket/src/main/scala/tile.scala | 4 +++- rocket/src/main/scala/tlb.scala | 3 ++- 12 files changed, 42 insertions(+), 20 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 507fb45e..6b3cee53 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -3,7 +3,6 @@ package rocket import Chisel._ -import uncore._ import cde.{Parameters, Field} import junctions.{ParameterizedBundle, DecoupledHelper} diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index d223b6b5..017a115e 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -6,7 +6,7 @@ import Chisel._ import Util._ import Instructions._ import cde.{Parameters, Field} -import uncore._ +import uncore.devices._ import junctions.AddrMap class MStatus extends Bundle { diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 965d4946..f565e4b6 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -3,8 +3,12 @@ package rocket import Chisel._ -import uncore._ import junctions._ +import uncore.tilelink._ +import uncore.agents._ +import uncore.coherence._ +import uncore.util._ +import uncore.constants._ import cde.{Parameters, Field} import Util._ @@ -250,7 +254,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { addr_block = s2_req.addr(paddrBits-1, blockOffBits), addr_beat = s2_req.addr(blockOffBits-1, beatOffBits), data = Fill(beatWords, pstore1_storegen.data), - wmask = pstore1_storegen.mask << (uncachedPutOffset << wordOffBits), + wmask = Some(pstore1_storegen.mask << (uncachedPutOffset << wordOffBits)), alloc = Bool(false)) val uncachedPutAtomicMessage = PutAtomic( client_xact_id = UInt(0), diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala index a7fabc1e..ce1af0ed 100644 --- a/rocket/src/main/scala/dma.scala +++ b/rocket/src/main/scala/dma.scala @@ -1,8 +1,11 @@ package rocket import Chisel._ -import uncore._ -import uncore.DmaRequest._ +import uncore.tilelink._ +import uncore.devices._ +import uncore.devices.DmaRequest._ +import uncore.agents._ +import uncore.util._ import junctions.{ParameterizedBundle, AddrMap} import cde.Parameters diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index d696cada..4c9d3aaf 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -1,7 +1,7 @@ package rocket import Chisel._ -import uncore._ +import uncore.tilelink._ import Util._ import cde.{Parameters, Field} diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index df01dc34..f41995d2 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -1,7 +1,9 @@ package rocket import Chisel._ -import uncore._ +import uncore.agents._ +import uncore.tilelink._ +import uncore.util._ import Util._ import cde.{Parameters, Field} diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 7b336d37..3602700a 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -3,8 +3,12 @@ package rocket import Chisel._ -import uncore._ import junctions._ +import uncore.tilelink._ +import uncore.coherence._ +import uncore.agents._ +import uncore.util._ +import uncore.constants._ import cde.{Parameters, Field} import Util._ @@ -207,7 +211,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { addr_block = addr_block, addr_beat = addr_beat, data = beat_data, - wmask = beat_mask, + wmask = Some(beat_mask), alloc = Bool(false)) val putAtomic_acquire = PutAtomic( @@ -453,10 +457,10 @@ class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) { val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) val mem_req_arb = Module(new LockingArbiter( - new Acquire, - nMSHRs + nIOMSHRs, - outerDataBeats, - (a: Acquire) => a.hasMultibeatData())) + new Acquire, + nMSHRs + nIOMSHRs, + outerDataBeats, + Some((a: Acquire) => a.hasMultibeatData()))) val mem_finish_arb = Module(new Arbiter(new FinishToDst, nMSHRs + nIOMSHRs)) val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs)) @@ -994,7 +998,9 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { metaWriteArb.io.in(0) <> mshrs.io.meta_write // probes and releases - val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, (r: Release) => r.hasMultibeatData())) + val releaseArb = Module(new LockingArbiter( + new Release, 2, outerDataBeats, + Some((r: Release) => r.hasMultibeatData()))) io.mem.release <> releaseArb.io.out prober.io.req.valid := io.mem.probe.valid && !lrsc_valid diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index a8d4ea5c..3b98dc25 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -3,7 +3,8 @@ package rocket import Chisel._ -import uncore._ +import uncore.agents._ +import uncore.constants._ import Util._ import cde.{Parameters, Field} diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index b5802d48..e8e5e626 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -3,7 +3,9 @@ package rocket import Chisel._ -import uncore._ +import uncore.tilelink._ +import uncore.constants._ +import uncore.agents.CacheName import Util._ import cde.{Parameters, Field} diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 2ca94cba..a94bdfa4 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -4,7 +4,9 @@ package rocket import Chisel._ import junctions._ -import uncore._ +import uncore.devices._ +import uncore.agents.CacheName +import uncore.constants._ import Util._ import cde.{Parameters, Field} diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index fca46187..3ace692e 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -3,7 +3,9 @@ package rocket import Chisel._ -import uncore._ +import uncore.tilelink._ +import uncore.agents._ +import uncore.devices._ import Util._ import cde.{Parameters, Field} diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 5977ba53..3ca8e73f 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -7,7 +7,8 @@ import Util._ import junctions._ import scala.math._ import cde.{Parameters, Field} -import uncore.PseudoLRU +import uncore.agents.PseudoLRU +import uncore.coherence._ case object NTLBEntries extends Field[Int] From 663002ec0c25c575604727967e1feb8c75c0a78b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 2 Jul 2016 14:26:05 -0700 Subject: [PATCH 1061/1087] Improve TLB simulation performance --- rocket/src/main/scala/tlb.scala | 132 +++++++++++--------------------- 1 file changed, 43 insertions(+), 89 deletions(-) diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index 3ca8e73f..a6d5565b 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -18,44 +18,6 @@ trait HasTLBParameters extends HasCoreParameters { val camTagBits = asIdBits + vpnBits } -abstract class TLBModule(implicit val p: Parameters) extends Module - with HasTLBParameters -abstract class TLBBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) - with HasTLBParameters - -class CAMIO(implicit p: Parameters) extends TLBBundle()(p) { - val clear = Bool(INPUT) - val clear_mask = Bits(INPUT, entries) - val tag = Bits(INPUT, camTagBits) - val hit = Bool(OUTPUT) - val hits = UInt(OUTPUT, entries) - val valid_bits = Bits(OUTPUT, entries) - - val write = Bool(INPUT) - val write_tag = Bits(INPUT, camTagBits) - val write_addr = UInt(INPUT, camAddrBits) -} - -class RocketCAM(implicit p: Parameters) extends TLBModule()(p) { - val io = new CAMIO - val cam_tags = Mem(entries, Bits(width = camTagBits)) - - val vb_array = Reg(init=Bits(0, entries)) - when (io.write) { - vb_array := vb_array.bitSet(io.write_addr, Bool(true)) - cam_tags(io.write_addr) := io.write_tag - } - when (io.clear) { - vb_array := vb_array & ~io.clear_mask - } - - val hits = (0 until entries).map(i => vb_array(i) && cam_tags(i) === io.tag) - - io.valid_bits := vb_array - io.hits := Vec(hits).toBits - io.hit := io.hits.orR -} - class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { val vpn = UInt(width = vpnBitsExtended) val passthrough = Bool() @@ -63,7 +25,7 @@ class TLBReq(implicit p: Parameters) extends CoreBundle()(p) { val store = Bool() } -class TLBRespNoHitIndex(implicit p: Parameters) extends CoreBundle()(p) { +class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { // lookup responses val miss = Bool(OUTPUT) val ppn = UInt(OUTPUT, ppnBits) @@ -72,101 +34,89 @@ class TLBRespNoHitIndex(implicit p: Parameters) extends CoreBundle()(p) { val xcpt_if = Bool(OUTPUT) } -class TLBResp(implicit p: Parameters) extends TLBRespNoHitIndex()(p) with HasTLBParameters { - val hit_idx = UInt(OUTPUT, entries) -} - -class TLB(implicit p: Parameters) extends TLBModule()(p) { +class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { val io = new Bundle { val req = Decoupled(new TLBReq).flip val resp = new TLBResp val ptw = new TLBPTWIO } - val tag_cam = Module(new RocketCAM) - val tag_ram = Mem(entries, io.ptw.resp.bits.pte.ppn) + val valid = Reg(init = UInt(0, entries)) + val ppns = Reg(Vec(entries, io.ptw.resp.bits.pte.ppn)) + val tags = Reg(Vec(entries, UInt(width = asIdBits + vpnBits))) val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(UInt(), 4) val state = Reg(init=s_ready) - val r_refill_tag = Reg(tag_cam.io.write_tag) - val r_refill_waddr = Reg(tag_cam.io.write_addr) + val r_refill_tag = Reg(UInt(width = asIdBits + vpnBits)) + val r_refill_waddr = Reg(UInt(width = log2Ceil(entries))) val r_req = Reg(new TLBReq) val lookup_tag = Cat(io.ptw.ptbr.asid, io.req.bits.vpn(vpnBits-1,0)).toUInt - tag_cam.io.tag := lookup_tag - tag_cam.io.write := state === s_wait && io.ptw.resp.valid - tag_cam.io.write_tag := r_refill_tag - tag_cam.io.write_addr := r_refill_waddr - val tag_hit_addr = OHToUInt(tag_cam.io.hits) + val hitsVec = (0 until entries).map(i => valid(i) && tags(i) === lookup_tag) + val hits = hitsVec.toBits // permission bit arrays - val ur_array = Reg(Vec(entries, Bool())) // user read permission - val uw_array = Reg(Vec(entries, Bool())) // user write permission - val ux_array = Reg(Vec(entries, Bool())) // user execute permission - val sr_array = Reg(Vec(entries, Bool())) // supervisor read permission - val sw_array = Reg(Vec(entries, Bool())) // supervisor write permission - val sx_array = Reg(Vec(entries, Bool())) // supervisor execute permission - val dirty_array = Reg(Vec(entries, Bool())) // PTE dirty bit + val ur_array = Reg(UInt(width = entries)) // user read permission + val uw_array = Reg(UInt(width = entries)) // user write permission + val ux_array = Reg(UInt(width = entries)) // user execute permission + val sr_array = Reg(UInt(width = entries)) // supervisor read permission + val sw_array = Reg(UInt(width = entries)) // supervisor write permission + val sx_array = Reg(UInt(width = entries)) // supervisor execute permission + val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit when (io.ptw.resp.valid) { val pte = io.ptw.resp.bits.pte - tag_ram(r_refill_waddr) := pte.ppn - ur_array(r_refill_waddr) := pte.ur() - uw_array(r_refill_waddr) := pte.uw() - ux_array(r_refill_waddr) := pte.ux() - sr_array(r_refill_waddr) := pte.sr() - sw_array(r_refill_waddr) := pte.sw() - sx_array(r_refill_waddr) := pte.sx() - dirty_array(r_refill_waddr) := pte.d + ppns(r_refill_waddr) := pte.ppn + tags(r_refill_waddr) := r_refill_tag + + val mask = UIntToOH(r_refill_waddr) + valid := valid | mask + ur_array := Mux(pte.ur(), ur_array | mask, ur_array & ~mask) + uw_array := Mux(pte.uw(), uw_array | mask, uw_array & ~mask) + ux_array := Mux(pte.ux(), ux_array | mask, ux_array & ~mask) + sr_array := Mux(pte.sr(), sr_array | mask, sr_array & ~mask) + sw_array := Mux(pte.sw(), sw_array | mask, sw_array & ~mask) + sx_array := Mux(pte.sx(), sx_array | mask, sx_array & ~mask) + dirty_array := Mux(pte.d, dirty_array | mask, dirty_array & ~mask) } // high if there are any unused (invalid) entries in the TLB - val has_invalid_entry = !tag_cam.io.valid_bits.andR - val invalid_entry = PriorityEncoder(~tag_cam.io.valid_bits) val plru = new PseudoLRU(entries) - val repl_waddr = Mux(has_invalid_entry, invalid_entry, plru.replace) + val repl_waddr = Mux(!valid.andR, PriorityEncoder(~valid), plru.replace) val do_mprv = io.ptw.status.mprv && !io.req.bits.instruction val priv = Mux(do_mprv, io.ptw.status.mpp, io.ptw.status.prv) val priv_s = priv === PRV.S val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug - val req_xwr = Cat(!r_req.store, r_req.store, !(r_req.instruction || r_req.store)) - val ur_bits = ur_array.toBits - val pum_ok = ~Mux(io.ptw.status.pum, ur_bits, UInt(0)) - val r_array = Mux(priv_s, sr_array.toBits & pum_ok, ur_bits) - val w_array = Mux(priv_s, sw_array.toBits & pum_ok, uw_array.toBits) - val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) + val pum_ok = ~Mux(io.ptw.status.pum, ur_array, UInt(0)) + val r_array = Mux(priv_s, sr_array & pum_ok, ur_array) + val w_array = Mux(priv_s, sw_array & pum_ok, uw_array) + val x_array = Mux(priv_s, sx_array, ux_array) val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough val bad_va = if (vpnBits == vpnBitsExtended) Bool(false) else io.req.bits.vpn(vpnBits) =/= io.req.bits.vpn(vpnBits-1) // it's only a store hit if the dirty bit is set - val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) + val tag_hits = hits & (dirty_array | ~Mux(io.req.bits.store, w_array, UInt(0))) val tag_hit = tag_hits.orR val tlb_hit = vm_enabled && tag_hit val tlb_miss = vm_enabled && !tag_hit && !bad_va when (io.req.valid && tlb_hit) { - plru.access(OHToUInt(tag_cam.io.hits)) + plru.access(OHToUInt(hits)) } val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits)) val addr_prot = addrMap.getProt(paddr) io.req.ready := state === s_ready - io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & tag_cam.io.hits).orR) - io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & tag_cam.io.hits).orR) - io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & tag_cam.io.hits).orR) + io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR) + io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR) + io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR) io.resp.miss := tlb_miss - io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(ppnBits-1,0)) - io.resp.hit_idx := tag_cam.io.hits + io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0)) - // clear entries on a TLB flush. - // TODO: selective flushing. careful with superpage mappings (flush it all) - tag_cam.io.clear := io.ptw.invalidate - tag_cam.io.clear_mask := ~UInt(0, entries) - io.ptw.req.valid := state === s_request io.ptw.req.bits.addr := r_refill_tag io.ptw.req.bits.prv := io.ptw.status.prv @@ -195,6 +145,10 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) { when (io.ptw.resp.valid) { state := s_ready } + + when (io.ptw.invalidate) { + valid := 0 + } } } From 5aa8ef1855091c7d82a7cc06a758805ff31f0021 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 2 Jul 2016 14:27:29 -0700 Subject: [PATCH 1062/1087] Remove invalidation support from BTB Validating the target PC in the pipeline is cheaper than maintaining the valid bits and control logic to guarantee the BTB won't ever mispredict branch targets. --- rocket/src/main/scala/btb.scala | 107 ++++++++++++--------------- rocket/src/main/scala/frontend.scala | 1 - 2 files changed, 46 insertions(+), 62 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index c5fb0e78..ea30d668 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -36,15 +36,15 @@ class RAS(nras: Int) { pos := nextPos } def peek: UInt = stack(pos) - def pop: Unit = when (!isEmpty) { + def pop(): Unit = when (!isEmpty) { count := count - 1 pos := Mux(Bool(isPow2(nras)) || pos > 0, pos-1, UInt(nras-1)) } - def clear: Unit = count := UInt(0) + def clear(): Unit = count := UInt(0) def isEmpty: Bool = count === UInt(0) - private val count = Reg(init=UInt(0,log2Up(nras+1))) - private val pos = Reg(init=UInt(0,log2Up(nras))) + private val count = Reg(UInt(width = log2Up(nras+1))) + private val pos = Reg(UInt(width = log2Up(nras))) private val stack = Reg(Vec(nras, UInt())) } @@ -140,82 +140,80 @@ class BTB(implicit p: Parameters) extends BtbModule { val btb_update = Valid(new BTBUpdate).flip val bht_update = Valid(new BHTUpdate).flip val ras_update = Valid(new RASUpdate).flip - val invalidate = Bool(INPUT) } - val idxValid = Reg(init=UInt(0, entries)) - val idxs = Mem(entries, UInt(width=matchBits)) - val idxPages = Mem(entries, UInt(width=log2Up(nPages))) - val tgts = Mem(entries, UInt(width=matchBits)) - val tgtPages = Mem(entries, UInt(width=log2Up(nPages))) - val pages = Mem(nPages, UInt(width=vaddrBits-matchBits)) - val pageValid = Reg(init=UInt(0, nPages)) + val idxs = Reg(Vec(entries, UInt(width=matchBits))) + val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) + val tgts = Reg(Vec(entries, UInt(width=matchBits))) + val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) + val pages = Reg(Vec(nPages, UInt(width=vaddrBits-matchBits))) val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) - val useRAS = Reg(Vec(entries, Bool())) - val isJump = Reg(Vec(entries, Bool())) - val brIdx = Mem(entries, UInt(width=log2Up(fetchWidth))) + val useRAS = Reg(UInt(width = entries)) + val isJump = Reg(UInt(width = entries)) + val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth)))) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { val p = page(addr) - Vec(pages.map(_ === p)).toBits & pageValid + Vec(pages.map(_ === p)).toBits } private def tagMatch(addr: UInt, pgMatch: UInt) = { - val idx = addr(matchBits-1,0) - val idxMatch = idxs.map(_ === idx).toBits - val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits - idxValid & idxMatch & idxPageMatch + val idxMatch = idxs.map(_ === addr(matchBits-1,0)) + val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR) + (idxPageMatch zip idxMatch) map { case (p, i) => p && i } } val r_btb_update = Pipe(io.btb_update) val update_target = io.req.bits.addr val pageHit = pageMatch(io.req.bits.addr) - val hits = tagMatch(io.req.bits.addr, pageHit) + val hitsVec = tagMatch(io.req.bits.addr, pageHit) + val hits = hitsVec.toBits val updatePageHit = pageMatch(r_btb_update.bits.pc) val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) val updateHit = r_btb_update.bits.prediction.valid - val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1 + val nextRepl = Reg(UInt(width = log2Ceil(entries))) + when (r_btb_update.valid && !updateHit) { nextRepl := Mux(nextRepl === entries-1 && Bool(!isPow2(entries)), 0, nextRepl + 1) } + val nextPageRepl = Reg(UInt(width = log2Ceil(nPages))) val useUpdatePageHit = updatePageHit.orR + val usePageHit = pageHit.orR val doIdxPageRepl = !useUpdatePageHit - val idxPageRepl = Wire(UInt(width = nPages)) - val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) + val idxPageRepl = UIntToOH(nextPageRepl) + val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, + Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), idxPageRepl)) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) val samePage = page(r_btb_update.bits.pc) === page(update_target) - val usePageHit = (pageHit & ~idxPageReplEn).orR val doTgtPageRepl = !samePage && !usePageHit - val tgtPageRepl = Mux(samePage, idxPageUpdateOH, idxPageUpdateOH(nPages-2,0) << 1 | idxPageUpdateOH(nPages-1)) + val tgtPageRepl = Mux(samePage, idxPageUpdateOH, Cat(idxPageUpdateOH(nPages-2,0), idxPageUpdateOH(nPages-1))) val tgtPageUpdate = OHToUInt(Mux(usePageHit, pageHit, tgtPageRepl)) val tgtPageReplEn = Mux(doTgtPageRepl, tgtPageRepl, UInt(0)) - val doPageRepl = doIdxPageRepl || doTgtPageRepl - val pageReplEn = idxPageReplEn | tgtPageReplEn - idxPageRepl := UIntToOH(Counter(r_btb_update.valid && doPageRepl, nPages)._1) + when (r_btb_update.valid && (doIdxPageRepl || doTgtPageRepl)) { + val both = doIdxPageRepl && doTgtPageRepl + val next = nextPageRepl + Mux[UInt](both, 2, 1) + nextPageRepl := Mux(next >= nPages, next(0), next) + } when (r_btb_update.valid) { assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") val waddr = - if (updatesOutOfOrder) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) + if (updatesOutOfOrder) Mux(updateHits.reduce(_|_), OHToUInt(updateHits), nextRepl) else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) - // invalidate entries if we stomp on pages they depend upon - val invalidateMask = Vec.tabulate(entries)(i => (pageReplEn & (idxPagesOH(i) | tgtPagesOH(i))).orR).toBits - val validateMask = UIntToOH(waddr) - idxValid := (idxValid & ~invalidateMask) | validateMask - idxs(waddr) := r_btb_update.bits.pc tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate tgtPages(waddr) := tgtPageUpdate - useRAS(waddr) := r_btb_update.bits.isReturn - isJump(waddr) := r_btb_update.bits.isJump + val mask = UIntToOH(waddr) + useRAS := Mux(r_btb_update.bits.isReturn, useRAS | mask, useRAS & ~mask) + isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask) if (fetchWidth == 1) { brIdx(waddr) := UInt(0) } else { @@ -223,41 +221,29 @@ class BTB(implicit p: Parameters) extends BtbModule { } require(nPages % 2 == 0) - val idxWritesEven = (idxPageUpdateOH & Fill(nPages/2, UInt(1,2))).orR + val idxWritesEven = !idxPageUpdate(0) - def writeBank(i: Int, mod: Int, en: Bool, data: UInt) = + def writeBank(i: Int, mod: Int, en: UInt, data: UInt) = for (i <- i until nPages by mod) - when (en && pageReplEn(i)) { pages(i) := data } + when (en(i)) { pages(i) := data } - writeBank(0, 2, Mux(idxWritesEven, doIdxPageRepl, doTgtPageRepl), + writeBank(0, 2, Mux(idxWritesEven, idxPageReplEn, tgtPageReplEn), Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target))) - writeBank(1, 2, Mux(idxWritesEven, doTgtPageRepl, doIdxPageRepl), + writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn), Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc))) - - when (doPageRepl) { pageValid := pageValid | pageReplEn } - } - - when (io.invalidate) { - idxValid := 0 - pageValid := 0 } io.resp.valid := hits.orR io.resp.bits.taken := io.resp.valid - io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) + io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts)) io.resp.bits.entry := OHToUInt(hits) io.resp.bits.bridx := brIdx(io.resp.bits.entry) - if (fetchWidth == 1) { - io.resp.bits.mask := UInt(1) - } else { - // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case - io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)).toSInt, - SInt(-1)).toUInt - } + io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)).toSInt, + SInt(-1)).toUInt if (nBHT > 0) { val bht = new BHT(nBHT) - val isBranch = !Mux1H(hits, isJump) + val isBranch = !(hits & isJump).orR val res = bht.get(io.req.bits.addr, io.req.valid && io.resp.valid && isBranch) val update_btb_hit = io.bht_update.bits.prediction.valid when (io.bht_update.valid && update_btb_hit) { @@ -269,7 +255,7 @@ class BTB(implicit p: Parameters) extends BtbModule { if (nRAS > 0) { val ras = new RAS(nRAS) - val doPeek = Mux1H(hits, useRAS) + val doPeek = (hits & useRAS).orR when (!ras.isEmpty && doPeek) { io.resp.bits.target := ras.peek } @@ -280,9 +266,8 @@ class BTB(implicit p: Parameters) extends BtbModule { io.resp.bits.target := io.ras_update.bits.returnAddr } }.elsewhen (io.ras_update.bits.isReturn && io.ras_update.bits.prediction.valid) { - ras.pop + ras.pop() } } - when (io.invalidate) { ras.clear } } } diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 4c9d3aaf..83189a94 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -81,7 +81,6 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa btb.io.btb_update := io.cpu.btb_update btb.io.bht_update := io.cpu.bht_update btb.io.ras_update := io.cpu.ras_update - btb.io.invalidate := false when (!stall && !icmiss) { btb.io.req.valid := true s2_btb_resp_valid := btb.io.resp.valid From 2d325df60c0c2a83c6e5d722234a6e2769f8dd5f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 2 Jul 2016 14:34:18 -0700 Subject: [PATCH 1063/1087] Improve PTW simulation performance --- rocket/src/main/scala/ptw.scala | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 3b98dc25..34f4820c 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -88,21 +88,20 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val (pte_cache_hit, pte_cache_data) = { val size = log2Up(pgLevels * 2) val plru = new PseudoLRU(size) - val valid = Reg(Vec(size, Bool())) - val validBits = valid.toBits - val tags = Mem(size, UInt(width = paddrBits)) - val data = Mem(size, UInt(width = ppnBits)) + val valid = Reg(init = UInt(0, size)) + val tags = Reg(Vec(size, UInt(width = paddrBits))) + val data = Reg(Vec(size, UInt(width = ppnBits))) - val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits + val hits = Vec(tags.map(_ === pte_addr)).toBits & valid val hit = hits.orR when (io.mem.resp.valid && pte.table() && !hit) { - val r = Mux(validBits.andR, plru.replace, PriorityEncoder(~validBits)) - valid(r) := true + val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid)) + valid := valid | UIntToOH(r) tags(r) := pte_addr data(r) := pte.ppn } when (hit && state === s_req) { plru.access(OHToUInt(hits)) } - when (reset || io.dpath.invalidate) { valid.foreach(_ := false) } + when (io.dpath.invalidate) { valid := 0 } (hit, Mux1H(hits, data)) } From ebefe5703688ff7704aa3c6824aaba0f9f2ffa9f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Mon, 4 Jul 2016 23:43:25 -0700 Subject: [PATCH 1064/1087] simplify BTB fetchWidth=1 special case --- rocket/src/main/scala/btb.scala | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index ea30d668..0a5d46d0 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -152,7 +152,7 @@ class BTB(implicit p: Parameters) extends BtbModule { val useRAS = Reg(UInt(width = entries)) val isJump = Reg(UInt(width = entries)) - val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth)))) + val brIdx = if (fetchWidth > 1) Reg(Vec(entries, UInt(width=log2Up(fetchWidth)))) else Seq(UInt(0)) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { @@ -214,11 +214,8 @@ class BTB(implicit p: Parameters) extends BtbModule { val mask = UIntToOH(waddr) useRAS := Mux(r_btb_update.bits.isReturn, useRAS | mask, useRAS & ~mask) isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask) - if (fetchWidth == 1) { - brIdx(waddr) := UInt(0) - } else { + if (fetchWidth > 1) brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes) - } require(nPages % 2 == 0) val idxWritesEven = !idxPageUpdate(0) @@ -237,9 +234,8 @@ class BTB(implicit p: Parameters) extends BtbModule { io.resp.bits.taken := io.resp.valid io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts)) io.resp.bits.entry := OHToUInt(hits) - io.resp.bits.bridx := brIdx(io.resp.bits.entry) - io.resp.bits.mask := Mux(io.resp.bits.taken, Cat((UInt(1) << brIdx(io.resp.bits.entry))-1, UInt(1)).toSInt, - SInt(-1)).toUInt + io.resp.bits.bridx := Mux1H(hitsVec, brIdx) + io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1)) if (nBHT > 0) { val bht = new BHT(nBHT) From 8bd7e3932b9df9aa32a53f53c072319bb9252c30 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 5 Jul 2016 19:19:49 -0700 Subject: [PATCH 1065/1087] Implement priv-1.9 PTE scheme --- rocket/src/main/scala/csr.scala | 4 ++- rocket/src/main/scala/ptw.scala | 46 ++++++++++++++++++++------------- rocket/src/main/scala/tlb.scala | 25 ++++++++---------- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 017a115e..039a07f2 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -17,7 +17,8 @@ class MStatus extends Bundle { val sd_rv32 = Bool() val zero2 = UInt(width = 2) val vm = UInt(width = 5) - val zero1 = UInt(width = 5) + val zero1 = UInt(width = 4) + val mxr = Bool() val pum = Bool() val mprv = Bool() val xs = UInt(width = 2) @@ -463,6 +464,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_mstatus.mprv := new_mstatus.mprv when (supportedModes contains new_mstatus.mpp) { reg_mstatus.mpp := new_mstatus.mpp } if (supportedModes.size > 2) { + reg_mstatus.mxr := new_mstatus.mxr reg_mstatus.pum := new_mstatus.pum reg_mstatus.spp := new_mstatus.spp reg_mstatus.spie := new_mstatus.spie diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 34f4820c..ac91083f 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -9,8 +9,10 @@ import Util._ import cde.{Parameters, Field} class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { - val addr = UInt(width = vpnBits) val prv = Bits(width = 2) + val pum = Bool() + val mxr = Bool() + val addr = UInt(width = vpnBits) val store = Bool() val fetch = Bool() } @@ -34,23 +36,32 @@ class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) { } class PTE(implicit p: Parameters) extends CoreBundle()(p) { - val ppn = Bits(width = ppnBits) - val reserved_for_software = Bits(width = 3) + val reserved_for_hardware = Bits(width = 16) + val ppn = UInt(width = 38) + val reserved_for_software = Bits(width = 2) val d = Bool() + val a = Bool() + val g = Bool() + val u = Bool() + val x = Bool() + val w = Bool() val r = Bool() - val typ = Bits(width = 4) val v = Bool() - def table(dummy: Int = 0) = v && typ < 2 - def leaf(dummy: Int = 0) = v && typ >= 2 - def ur(dummy: Int = 0) = leaf() && typ < 8 - def uw(dummy: Int = 0) = ur() && typ(0) - def ux(dummy: Int = 0) = ur() && typ(1) - def sr(dummy: Int = 0) = leaf() - def sw(dummy: Int = 0) = leaf() && typ(0) - def sx(dummy: Int = 0) = v && typ >= 4 && typ(1) - def access_ok(prv: Bits, store: Bool, fetch: Bool) = - Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur()))) + def table(dummy: Int = 0) = v && !r && !w && !x + def leaf(dummy: Int = 0) = v && (r || (x && !w)) + def ur(dummy: Int = 0) = sr() && u + def uw(dummy: Int = 0) = sw() && u + def ux(dummy: Int = 0) = sx() && u + def sr(dummy: Int = 0) = leaf() && r + def sw(dummy: Int = 0) = leaf() && w + def sx(dummy: Int = 0) = leaf() && x + + def access_ok(req: PTWReq) = { + val perm_ok = Mux(req.fetch, x, Mux(req.store, w, r || (x && req.mxr))) + val priv_ok = Mux(u, !req.pum, req.prv(0)) + leaf() && priv_ok && perm_ok + } } class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { @@ -106,14 +117,13 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { (hit, Mux1H(hits, data)) } - val perm_ok = pte.access_ok(r_req.prv, r_req.store, r_req.fetch) - val set_dirty_bit = perm_ok && (!pte.r || (r_req.store && !pte.d)) + val set_dirty_bit = pte.access_ok(r_req) && (!pte.a || (r_req.store && !pte.d)) when (io.mem.resp.valid && state === s_wait && !set_dirty_bit) { r_pte := pte } val pte_wdata = Wire(init=new PTE().fromBits(0)) - pte_wdata.r := true + pte_wdata.a := true pte_wdata.d := r_req.store io.mem.req.valid := state === s_req || state === s_set_dirty @@ -162,7 +172,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { } when (io.mem.resp.valid) { state := s_done - when (pte.leaf() && set_dirty_bit) { + when (set_dirty_bit) { state := s_set_dirty } when (pte.table() && count < pgLevels-1) { diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index a6d5565b..e99873b1 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -56,12 +56,11 @@ class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { val hits = hitsVec.toBits // permission bit arrays - val ur_array = Reg(UInt(width = entries)) // user read permission - val uw_array = Reg(UInt(width = entries)) // user write permission - val ux_array = Reg(UInt(width = entries)) // user execute permission - val sr_array = Reg(UInt(width = entries)) // supervisor read permission - val sw_array = Reg(UInt(width = entries)) // supervisor write permission - val sx_array = Reg(UInt(width = entries)) // supervisor execute permission + val pte_array = Reg(new PTE) + val u_array = Reg(UInt(width = entries)) // user permission + val sw_array = Reg(UInt(width = entries)) // write permission + val sx_array = Reg(UInt(width = entries)) // execute permission + val sr_array = Reg(UInt(width = entries)) // read permission val dirty_array = Reg(UInt(width = entries)) // PTE dirty bit when (io.ptw.resp.valid) { val pte = io.ptw.resp.bits.pte @@ -70,9 +69,7 @@ class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { val mask = UIntToOH(r_refill_waddr) valid := valid | mask - ur_array := Mux(pte.ur(), ur_array | mask, ur_array & ~mask) - uw_array := Mux(pte.uw(), uw_array | mask, uw_array & ~mask) - ux_array := Mux(pte.ux(), ux_array | mask, ux_array & ~mask) + u_array := Mux(pte.u, u_array | mask, u_array & ~mask) sr_array := Mux(pte.sr(), sr_array | mask, sr_array & ~mask) sw_array := Mux(pte.sw(), sw_array | mask, sw_array & ~mask) sx_array := Mux(pte.sx(), sx_array | mask, sx_array & ~mask) @@ -88,10 +85,10 @@ class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { val priv_s = priv === PRV.S val priv_uses_vm = priv <= PRV.S && !io.ptw.status.debug - val pum_ok = ~Mux(io.ptw.status.pum, ur_array, UInt(0)) - val r_array = Mux(priv_s, sr_array & pum_ok, ur_array) - val w_array = Mux(priv_s, sw_array & pum_ok, uw_array) - val x_array = Mux(priv_s, sx_array, ux_array) + val priv_ok = Mux(priv_s, ~Mux(io.ptw.status.pum, u_array, UInt(0)), u_array) + val w_array = priv_ok & sw_array + val x_array = priv_ok & sx_array + val r_array = priv_ok & (sr_array | Mux(io.ptw.status.mxr, x_array, UInt(0))) val vm_enabled = Bool(usingVM) && io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough val bad_va = @@ -118,8 +115,8 @@ class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0)) io.ptw.req.valid := state === s_request + io.ptw.req.bits := io.ptw.status io.ptw.req.bits.addr := r_refill_tag - io.ptw.req.bits.prv := io.ptw.status.prv io.ptw.req.bits.store := r_req.store io.ptw.req.bits.fetch := r_req.instruction From 25fdabdd59cbb347fa0e4ff554ce6fcf2c49d067 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 6 Jul 2016 01:31:56 -0700 Subject: [PATCH 1066/1087] Don't implicitly create Vecs, since they're heavyweight --- rocket/src/main/scala/btb.scala | 2 +- rocket/src/main/scala/dcache.scala | 2 +- rocket/src/main/scala/fpu.scala | 4 ++-- rocket/src/main/scala/nbdcache.scala | 8 ++++---- rocket/src/main/scala/ptw.scala | 8 +++++--- rocket/src/main/scala/tile.scala | 2 +- rocket/src/main/scala/util.scala | 15 ++++++++++++++- 7 files changed, 28 insertions(+), 13 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 0a5d46d0..5599205f 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -157,7 +157,7 @@ class BTB(implicit p: Parameters) extends BtbModule { private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { val p = page(addr) - Vec(pages.map(_ === p)).toBits + pages.map(_ === p).toBits } private def tagMatch(addr: UInt, pgMatch: UInt) = { val idxMatch = idxs.map(_ === addr(matchBits-1,0)) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index f565e4b6..70d45a2d 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -116,7 +116,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s1_paddr = Cat(tlb.io.resp.ppn, s1_req.addr(pgIdxBits-1,0)) val s1_tag = Mux(s1_probe, probe_bits.addr_block >> idxBits, s1_paddr(paddrBits-1, untagBits)) - val s1_hit_way = Cat(meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).reverse) + val s1_hit_way = meta.io.resp.map(r => r.coh.isValid() && r.tag === s1_tag).toBits val s1_hit_state = ClientMetadata.onReset.fromBits( meta.io.resp.map(r => Mux(r.tag === s1_tag, r.coh.toBits, UInt(0))) .reduce (_|_)) diff --git a/rocket/src/main/scala/fpu.scala b/rocket/src/main/scala/fpu.scala index 107f93bd..75153249 100644 --- a/rocket/src/main/scala/fpu.scala +++ b/rocket/src/main/scala/fpu.scala @@ -563,8 +563,8 @@ class FPU(implicit p: Parameters) extends CoreModule()(p) { val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) val wsrc = (winfo(0) >> 6)(log2Up(pipes.size) - 1,0) val wcp = winfo(0)(6+log2Up(pipes.size)) - val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.res.data))(wsrc)) - val wexc = Vec(pipes.map(_.res.exc))(wsrc) + val wdata = Mux(divSqrt_wen, divSqrt_wdata, (pipes.map(_.res.data): Seq[UInt])(wsrc)) + val wexc = (pipes.map(_.res.exc): Seq[UInt])(wsrc) when ((!wcp && wen(0)) || divSqrt_wen) { regfile(waddr) := wdata if (enableCommitLog) { diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 3602700a..a97620ae 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -884,7 +884,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { writeArb.io.out.ready := data.io.write.ready data.io.write.bits := writeArb.io.out.bits val wdata_encoded = (0 until rowWords).map(i => code.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i))) - data.io.write.bits.data := Cat(wdata_encoded.reverse) + data.io.write.bits.data := wdata_encoded.toBits // tag read for new requests metaReadArb.io.in(4).valid := io.cpu.req.valid @@ -948,10 +948,10 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) val s2_data_decoded = (0 until rowWords).map(i => code.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i))) - val s2_data_corrected = Vec(s2_data_decoded.map(_.corrected)).toBits - val s2_data_uncorrected = Vec(s2_data_decoded.map(_.uncorrected)).toBits + val s2_data_corrected = s2_data_decoded.map(_.corrected).toBits + val s2_data_uncorrected = s2_data_decoded.map(_.uncorrected).toBits val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,log2Up(wordBytes)) - val s2_data_correctable = Vec(s2_data_decoded.map(_.correctable)).toBits()(s2_word_idx) + val s2_data_correctable = s2_data_decoded.map(_.correctable).toBits()(s2_word_idx) // store/amo hits s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index ac91083f..9a431618 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -81,7 +81,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val r_req_dest = Reg(Bits()) val r_pte = Reg(new PTE) - val vpn_idx = Vec((0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0)))(count) + val vpn_idxs = (0 until pgLevels).map(i => (r_req.addr >> (pgLevels-i-1)*pgLevelBits)(pgLevelBits-1,0)) + val vpn_idx = vpn_idxs(count) val arb = Module(new RRArbiter(new PTWReq, n)) arb.io.in <> io.requestor.map(_.req) @@ -103,7 +104,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { val tags = Reg(Vec(size, UInt(width = paddrBits))) val data = Reg(Vec(size, UInt(width = ppnBits))) - val hits = Vec(tags.map(_ === pte_addr)).toBits & valid + val hits = tags.map(_ === pte_addr).toBits & valid val hit = hits.orR when (io.mem.resp.valid && pte.table() && !hit) { val r = Mux(valid.andR, plru.replace, PriorityEncoder(~valid)) @@ -136,7 +137,8 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { io.mem.invalidate_lr := Bool(false) val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits - val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) + val resp_ppns = (0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn + val resp_ppn = resp_ppns(count) val resp_val = state === s_done for (i <- 0 until io.requestor.size) { diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 3ace692e..ee154ee8 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -105,7 +105,7 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) if (p(RoccNCSRs) > 0) { - core.io.rocc.csr.rdata <> roccs.map(_.io.csr.rdata).reduce(_ ++ _) + core.io.rocc.csr.rdata <> roccs.flatMap(_.io.csr.rdata) for ((rocc, accelParams) <- roccs.zip(buildRocc)) { rocc.io.csr.waddr := core.io.rocc.csr.waddr rocc.io.csr.wdata := core.io.rocc.csr.wdata diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 867b25d1..4b58c4e4 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -13,7 +13,6 @@ object Util { implicit def bigIntToUInt(x: BigInt): UInt = UInt(x) implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_)) - implicit def seqToVec[T <: Data](x: Seq[T]): Vec[T] = Vec(x) implicit def wcToUInt(c: WideCounter): UInt = c.value implicit def sextToConv(x: UInt) = new AnyRef { def sextTo(n: Int): UInt = @@ -25,6 +24,20 @@ object Util { def toInt: Int = if (x) 1 else 0 } + implicit class SeqToAugmentedSeq[T <: Data](val x: Seq[T]) extends AnyVal { + def apply(idx: UInt): T = { + if (x.size == 1) { + x.head + } else { + val half = 1 << (log2Ceil(x.size) - 1) + val newIdx = idx & (half - 1) + Mux(idx >= UInt(half), x.drop(half)(newIdx), x.take(half)(newIdx)) + } + } + + def toBits(): UInt = Cat(x.map(_.toBits).reverse) + } + def minUInt(values: Seq[UInt]): UInt = values.reduce((a, b) => Mux(a < b, a, b)) From f3e22984d5892b53d2df42615ec6857aa9ed2fec Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 6 Jul 2016 01:37:39 -0700 Subject: [PATCH 1067/1087] Remove uarch counters These will be replaced with the indirect TDR scheme used by breakpoints. --- rocket/src/main/scala/csr.scala | 2 -- rocket/src/main/scala/rocket.scala | 1 - 2 files changed, 3 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 039a07f2..c36bdfed 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -121,7 +121,6 @@ class CSRFileIO(implicit p: Parameters) extends CoreBundle { val evec = UInt(OUTPUT, vaddrBitsExtended) val exception = Bool(INPUT) val retire = UInt(INPUT, log2Up(1+retireWidth)) - val uarch_counters = Vec(16, UInt(INPUT, log2Up(1+retireWidth))) val custom_mrw_csrs = Vec(nCustomMrwCsrs, UInt(INPUT, xLen)) val cause = UInt(INPUT, xLen) val pc = UInt(INPUT, vaddrBitsExtended) @@ -207,7 +206,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val reg_sptbr = Reg(new PTBR) val reg_wfi = Reg(init=Bool(false)) - val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_fflags = Reg(UInt(width = 5)) val reg_frm = Reg(UInt(width = 3)) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index a94bdfa4..7425f724 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -474,7 +474,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { csr.io.rocc.interrupt <> io.rocc.interrupt csr.io.pc := wb_reg_pc csr.io.badaddr := Mux(wb_reg_mem_xcpt, encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata), wb_reg_pc) - csr.io.uarch_counters.foreach(_ := Bool(false)) io.ptw.ptbr := csr.io.ptbr io.ptw.invalidate := csr.io.fatc io.ptw.status := csr.io.status From c0e6ecebfc17afb6a58f210ecb6fae050abb8c85 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 6 Jul 2016 03:16:05 -0700 Subject: [PATCH 1068/1087] Fix BTB perf bug In rare cases, it would replace into a different row than it recorded. --- rocket/src/main/scala/btb.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 5599205f..c98f2e36 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -182,9 +182,8 @@ class BTB(implicit p: Parameters) extends BtbModule { val useUpdatePageHit = updatePageHit.orR val usePageHit = pageHit.orR val doIdxPageRepl = !useUpdatePageHit - val idxPageRepl = UIntToOH(nextPageRepl) - val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, - Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), idxPageRepl)) + val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl)) + val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) val idxPageReplEn = Mux(doIdxPageRepl, idxPageRepl, UInt(0)) From 35a983275efb4add5ff90a2d21d020069591a0f2 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Wed, 6 Jul 2016 15:54:33 -0700 Subject: [PATCH 1069/1087] Guarantee one-hotness of BTB entries --- rocket/src/main/scala/btb.scala | 37 ++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index c98f2e36..5110bfed 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -147,6 +147,7 @@ class BTB(implicit p: Parameters) extends BtbModule { val tgts = Reg(Vec(entries, UInt(width=matchBits))) val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) val pages = Reg(Vec(nPages, UInt(width=vaddrBits-matchBits))) + val pageValid = Reg(init = UInt(0, nPages)) val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) @@ -157,12 +158,12 @@ class BTB(implicit p: Parameters) extends BtbModule { private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { val p = page(addr) - pages.map(_ === p).toBits + pageValid & pages.map(_ === p).toBits } private def tagMatch(addr: UInt, pgMatch: UInt) = { - val idxMatch = idxs.map(_ === addr(matchBits-1,0)) - val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR) - (idxPageMatch zip idxMatch) map { case (p, i) => p && i } + val idxMatch = idxs.map(_ === addr(matchBits-1,0)).toBits + val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits + idxMatch & idxPageMatch } val r_btb_update = Pipe(io.btb_update) @@ -172,16 +173,20 @@ class BTB(implicit p: Parameters) extends BtbModule { val hitsVec = tagMatch(io.req.bits.addr, pageHit) val hits = hitsVec.toBits val updatePageHit = pageMatch(r_btb_update.bits.pc) - val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) - val updateHit = r_btb_update.bits.prediction.valid - val nextRepl = Reg(UInt(width = log2Ceil(entries))) - when (r_btb_update.valid && !updateHit) { nextRepl := Mux(nextRepl === entries-1 && Bool(!isPow2(entries)), 0, nextRepl + 1) } - val nextPageRepl = Reg(UInt(width = log2Ceil(nPages))) + val updateHits = tagMatch(r_btb_update.bits.pc, updatePageHit) + val updateHit = if (updatesOutOfOrder) updateHits.orR else r_btb_update.bits.prediction.valid + val updateHitAddr = if (updatesOutOfOrder) OHToUInt(updateHits) else r_btb_update.bits.prediction.bits.entry + + // guarantee one-hotness of idx after reset + val resetting = Reg(init = Bool(true)) + val (nextRepl, wrap) = Counter(resetting || (r_btb_update.valid && !updateHit), entries) + when (wrap) { resetting := false } val useUpdatePageHit = updatePageHit.orR val usePageHit = pageHit.orR val doIdxPageRepl = !useUpdatePageHit + val nextPageRepl = Reg(UInt(width = log2Ceil(nPages))) val idxPageRepl = Mux(usePageHit, Cat(pageHit(nPages-2,0), pageHit(nPages-1)), UIntToOH(nextPageRepl)) val idxPageUpdateOH = Mux(useUpdatePageHit, updatePageHit, idxPageRepl) val idxPageUpdate = OHToUInt(idxPageUpdateOH) @@ -199,18 +204,15 @@ class BTB(implicit p: Parameters) extends BtbModule { nextPageRepl := Mux(next >= nPages, next(0), next) } - when (r_btb_update.valid) { - assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") + when (r_btb_update.valid || resetting) { + assert(resetting || io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") - val waddr = - if (updatesOutOfOrder) Mux(updateHits.reduce(_|_), OHToUInt(updateHits), nextRepl) - else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) - - idxs(waddr) := r_btb_update.bits.pc + val waddr = Mux(updateHit && !resetting, updateHitAddr, nextRepl) + val mask = UIntToOH(waddr) + idxs(waddr) := Mux(resetting, Cat(r_btb_update.bits.pc >> log2Ceil(entries), nextRepl), r_btb_update.bits.pc) tgts(waddr) := update_target idxPages(waddr) := idxPageUpdate tgtPages(waddr) := tgtPageUpdate - val mask = UIntToOH(waddr) useRAS := Mux(r_btb_update.bits.isReturn, useRAS | mask, useRAS & ~mask) isJump := Mux(r_btb_update.bits.isJump, isJump | mask, isJump & ~mask) if (fetchWidth > 1) @@ -227,6 +229,7 @@ class BTB(implicit p: Parameters) extends BtbModule { Mux(idxWritesEven, page(r_btb_update.bits.pc), page(update_target))) writeBank(1, 2, Mux(idxWritesEven, tgtPageReplEn, idxPageReplEn), Mux(idxWritesEven, page(update_target), page(r_btb_update.bits.pc))) + pageValid := pageValid | tgtPageReplEn | idxPageReplEn } io.resp.valid := hits.orR From 2455a806af2c99e3dceab2cb7cf0a1c37419127c Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 7 Jul 2016 15:30:43 -0700 Subject: [PATCH 1070/1087] Make WFI instruction respect mie CSR setting --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index c36bdfed..36756b2b 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -367,7 +367,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) insn_call || insn_break when (insn_wfi) { reg_wfi := true } - when (read_mip.orR) { reg_wfi := false } + when (pending_interrupts.orR) { reg_wfi := false } val cause = Mux(!io.csr_xcpt, io.cause, From 3d8939d3c37bb49669987020ee8d4c32618e013a Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 7 Jul 2016 15:30:23 -0700 Subject: [PATCH 1071/1087] Set misa.base = 1 for RV32 --- rocket/src/main/scala/csr.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 36756b2b..a14e4183 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -241,7 +241,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) (if (usingAtomics) "A" else "") + (if (usingFPU) "FD" else "") + (if (usingRoCC) "X" else "") - val isa = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | + val isa = (BigInt(log2Ceil(xLen) - 4) << (xLen-2)) | isa_string.map(x => 1 << (x - 'A')).reduce(_|_) val read_mstatus = io.status.toBits()(xLen-1,0) From f7b392306ecb37d98d0206d19f8dfcf9bca6ee63 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 7 Jul 2016 18:51:23 -0700 Subject: [PATCH 1072/1087] make sure SimpleHellaCacheIF can work with blocking DCache --- rocket/src/main/scala/nbdcache.scala | 115 ++++++++++++++------------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index a97620ae..88d3ddc7 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1112,6 +1112,49 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next } +class ReplayQueue(depth: Int)(implicit p: Parameters) extends Module { + val io = new Bundle { + val req = Decoupled(new HellaCacheReq).flip + val nack = Bool(INPUT) + val resp_valid = Bool(INPUT) + val replay = Decoupled(new HellaCacheReq) + } + + val nacked = Reg(init = UInt(0, depth)) + val inflight = Reg(init = UInt(0, depth)) + val reqs = Reg(Vec(depth, new HellaCacheReq)) + val ordering = Reg(Vec(depth, UInt(width = log2Up(depth)))) + val pop_ordering = io.nack || io.resp_valid + val push_ordering = io.req.fire() || io.replay.fire() + val (ordering_head, _) = Counter(pop_ordering, depth) + val (ordering_tail, _) = Counter(push_ordering, depth) + + val order_onehot = UIntToOH(ordering(ordering_head)) + val next_inflight = PriorityEncoder(~inflight) + val next_inflight_onehot = PriorityEncoderOH(~inflight) + val next_replay = PriorityEncoder(nacked) + val next_replay_onehot = PriorityEncoderOH(nacked) + + io.replay.valid := nacked.orR + io.replay.bits := Mux1H(next_replay_onehot, reqs) + io.req.ready := !inflight.andR + + + nacked := (nacked | Mux(io.nack, order_onehot, UInt(0))) & + ~Mux(io.replay.fire(), next_replay_onehot, UInt(0)) + + inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) & + ~Mux(io.resp_valid, order_onehot, UInt(0)) + + when (io.req.fire()) { + ordering(ordering_tail) := next_inflight + reqs(next_inflight) := io.req.bits + } + when (io.replay.fire()) { + ordering(ordering_tail) := next_replay + } +} + // exposes a sane decoupled request interface class SimpleHellaCacheIF(implicit p: Parameters) extends Module { @@ -1120,23 +1163,24 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module val cache = new HellaCacheIO } - val replaying_cmb = Wire(Bool()) - val replaying = Reg(next = replaying_cmb, init = Bool(false)) - replaying_cmb := replaying - - val replayq1 = Module(new Queue(new HellaCacheReq, 1, flow = true)) - val replayq2 = Module(new Queue(new HellaCacheReq, 1)) + val replayq = Module(new ReplayQueue(2)) val req_arb = Module(new Arbiter(new HellaCacheReq, 2)) - req_arb.io.in(0) <> replayq1.io.deq - req_arb.io.in(1).valid := !replaying_cmb && io.requestor.req.valid + val req_helper = DecoupledHelper( + req_arb.io.in(1).ready, + replayq.io.req.ready, + io.requestor.req.valid) + + req_arb.io.in(0) <> replayq.io.replay + req_arb.io.in(1).valid := req_helper.fire(req_arb.io.in(1).ready) req_arb.io.in(1).bits := io.requestor.req.bits - io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready + io.requestor.req.ready := req_helper.fire(io.requestor.req.valid) + replayq.io.req.valid := req_helper.fire(replayq.io.req.ready) + replayq.io.req.bits := io.requestor.req.bits val s0_req_fire = io.cache.req.fire() - val s1_req_fire = Reg(next=s0_req_fire) - val s2_req_fire = Reg(next=s1_req_fire) - val s3_nack = Reg(next=io.cache.s2_nack) + val s1_req_fire = Reg(next = s0_req_fire) + val s2_req_fire = Reg(next = s1_req_fire) io.cache.invalidate_lr := io.requestor.invalidate_lr io.cache.req <> req_arb.io.out @@ -1144,51 +1188,8 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module io.cache.s1_kill := io.cache.s2_nack io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) -/* replay queues: - replayq1 holds the older request. - replayq2 holds the newer request (for the first nack). - We need to split the queues like this for the case where the older request - goes through but gets nacked, while the newer request stalls. - If this happens, the newer request will go through before the older one. - We don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as - there will only be two requests going through at most. -*/ - - // stash d$ request in stage 2 if nacked (older request) - replayq1.io.enq.valid := Bool(false) - replayq1.io.enq.bits.cmd := io.cache.resp.bits.cmd - replayq1.io.enq.bits.typ := io.cache.resp.bits.typ - replayq1.io.enq.bits.addr := io.cache.resp.bits.addr - replayq1.io.enq.bits.data := io.cache.resp.bits.store_data - replayq1.io.enq.bits.tag := io.cache.resp.bits.tag - - // stash d$ request in stage 1 if nacked (newer request) - replayq2.io.enq.valid := s2_req_fire && s3_nack - replayq2.io.enq.bits <> io.cache.resp.bits - replayq2.io.enq.bits.data := io.cache.resp.bits.store_data - replayq2.io.deq.ready := Bool(false) - - when (io.cache.s2_nack) { - replayq1.io.enq.valid := Bool(true) - replaying_cmb := Bool(true) - } - - // when replaying request got sunk into the d$ - when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !io.cache.s2_nack) { - // see if there's a stashed request in replayq2 - when (replayq2.io.deq.valid) { - replayq1.io.enq.valid := Bool(true) - replayq1.io.enq.bits.cmd := replayq2.io.deq.bits.cmd - replayq1.io.enq.bits.typ := replayq2.io.deq.bits.typ - replayq1.io.enq.bits.addr := replayq2.io.deq.bits.addr - replayq1.io.enq.bits.data := replayq2.io.deq.bits.data - replayq1.io.enq.bits.tag := replayq2.io.deq.bits.tag - replayq2.io.deq.ready := Bool(true) - } .otherwise { - replaying_cmb := Bool(false) - } - } - + replayq.io.nack := io.cache.s2_nack && s2_req_fire + replayq.io.resp_valid := io.cache.resp.valid io.requestor.resp := io.cache.resp assert(!Reg(next = io.cache.req.fire()) || From 1699622730470701cf21a19c98161e2a5f820d2f Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 9 Jul 2016 01:08:52 -0700 Subject: [PATCH 1073/1087] Don't speculatively refill I$ in uncacheable regions --- rocket/src/main/scala/dcache.scala | 3 ++- rocket/src/main/scala/frontend.scala | 13 +++++++++++-- rocket/src/main/scala/icache.scala | 6 ++++-- rocket/src/main/scala/rocket.scala | 13 ++++++++----- rocket/src/main/scala/tlb.scala | 2 ++ 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index 70d45a2d..ea5a42cb 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -129,9 +129,11 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val releaseInFlight = s1_probe || s2_probe || release_state =/= s_ready val s2_valid_masked = s2_valid && Reg(next = !s1_nack) val s2_req = Reg(io.cpu.req.bits) + val s2_uncached = Reg(Bool()) when (s1_valid_not_nacked || s1_flush_valid) { s2_req := s1_req s2_req.addr := s1_paddr + s2_uncached := !tlb.io.resp.cacheable } val s2_read = isRead(s2_req.cmd) val s2_write = isWrite(s2_req.cmd) @@ -145,7 +147,6 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s2_hit = s2_hit_state.isHit(s2_req.cmd) val s2_valid_hit = s2_valid_masked && s2_readwrite && s2_hit val s2_valid_miss = s2_valid_masked && s2_readwrite && !s2_hit && !(pstore1_valid || pstore2_valid) && !release_ack_wait - val s2_uncached = !addrMap.isCacheable(s2_req.addr) val s2_valid_cached_miss = s2_valid_miss && !s2_uncached val s2_victimize = s2_valid_cached_miss || s2_flush_valid val s2_valid_uncached = s2_valid_miss && s2_uncached diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 83189a94..f7b1fb1a 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -7,6 +7,7 @@ import cde.{Parameters, Field} class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) { val pc = UInt(width = vaddrBitsExtended) + val speculative = Bool() } class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { @@ -14,6 +15,7 @@ class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) { val data = Vec(fetchWidth, Bits(width = coreInstBits)) val mask = Bits(width = fetchWidth) val xcpt_if = Bool() + val replay = Bool() } class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) { @@ -40,13 +42,15 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s1_pc_ = Reg(UInt(width=vaddrBitsExtended)) val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline) + val s1_speculative = Reg(Bool()) val s1_same_block = Reg(Bool()) val s2_valid = Reg(init=Bool(true)) val s2_pc = Reg(init=UInt(p(ResetVector))) val s2_btb_resp_valid = Reg(init=Bool(false)) val s2_btb_resp_bits = Reg(new BTBResp) val s2_xcpt_if = Reg(init=Bool(false)) - val s2_resp_valid = Wire(init=Bool(false)) + val s2_speculative = Reg(init=Bool(false)) + val s2_resp_valid = Wire(Bool()) val s2_resp_data = Wire(UInt(width = rowBits)) val ntpc_0 = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) @@ -62,15 +66,18 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa when (!stall) { s1_same_block := s0_same_block && !tlb.io.resp.miss s1_pc_ := npc + s1_speculative := Mux(icmiss, s2_speculative, true) s2_valid := !icmiss when (!icmiss) { s2_pc := s1_pc + s2_speculative := s1_speculative && !tlb.io.resp.cacheable s2_xcpt_if := tlb.io.resp.xcpt_if } } when (io.cpu.req.valid) { s1_same_block := Bool(false) s1_pc_ := io.cpu.req.bits.pc + s1_speculative := io.cpu.req.bits.speculative s2_valid := Bool(false) } @@ -105,8 +112,9 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icache.io.invalidate := io.cpu.flush_icache icache.io.s1_ppn := tlb.io.resp.ppn icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb + icache.io.s2_kill := s2_speculative - io.cpu.resp.valid := s2_valid && (s2_xcpt_if || s2_resp_valid) + io.cpu.resp.valid := s2_valid && (s2_resp_valid || s2_speculative || s2_xcpt_if) io.cpu.resp.bits.pc := s2_pc io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) @@ -138,6 +146,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2) io.cpu.resp.bits.mask := msk_pc io.cpu.resp.bits.xcpt_if := s2_xcpt_if + io.cpu.resp.bits.replay := s2_speculative && !s2_resp_valid && !s2_xcpt_if io.cpu.btb_resp.valid := s2_btb_resp_valid io.cpu.btb_resp.bits := s2_btb_resp_bits diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index f41995d2..32e1b9b7 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -30,6 +30,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val req = Valid(new ICacheReq).flip val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req val s1_kill = Bool(INPUT) // delayed one cycle w.r.t. req + val s2_kill = Bool(INPUT) // delayed two cycles; prevents I$ miss emission val resp = Decoupled(new ICacheResp) val invalidate = Bool(INPUT) @@ -67,7 +68,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara val s1_miss = out_valid && !s1_any_tag_hit rdy := state === s_ready && !s1_miss - when (s1_valid && state === s_ready && s1_miss) { + when (s1_miss && state === s_ready) { refill_addr := s1_paddr } val refill_tag = refill_addr(tagBits+untagBits-1,untagBits) @@ -135,7 +136,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) io.resp.valid := s1_hit } - io.mem.acquire.valid := (state === s_request) + io.mem.acquire.valid := state === s_request && !io.s2_kill io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits) // control state machine @@ -146,6 +147,7 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara } is (s_request) { when (io.mem.acquire.ready) { state := s_refill_wait } + when (io.s2_kill) { state := s_ready } } is (s_refill_wait) { when (io.mem.grant.valid) { state := s_refill } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 7425f724..726053d3 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -157,6 +157,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val ex_reg_flush_pipe = Reg(Bool()) val ex_reg_load_use = Reg(Bool()) val ex_reg_cause = Reg(UInt()) + val ex_reg_replay = Reg(Bool()) val ex_reg_pc = Reg(UInt()) val ex_reg_inst = Reg(Bits()) @@ -295,8 +296,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { div.io.req.bits.tag := ex_waddr ex_reg_valid := !ctrl_killd + ex_reg_replay := !take_pc && io.imem.resp.valid && io.imem.resp.bits.replay ex_reg_xcpt := !ctrl_killd && id_xcpt - ex_reg_xcpt_interrupt := csr.io.interrupt && !take_pc && io.imem.resp.valid + ex_reg_xcpt_interrupt := !take_pc && io.imem.resp.valid && csr.io.interrupt when (id_xcpt) { ex_reg_cause := id_cause } when (!ctrl_killd) { @@ -323,18 +325,18 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { } } } - when (!ctrl_killd || csr.io.interrupt) { + when (!ctrl_killd || csr.io.interrupt || io.imem.resp.bits.replay) { ex_reg_inst := id_inst ex_reg_pc := id_pc } // replay inst in ex stage? - val ex_pc_valid = ex_reg_valid || ex_reg_xcpt_interrupt + val ex_pc_valid = ex_reg_valid || ex_reg_replay || ex_reg_xcpt_interrupt val wb_dcache_miss = wb_ctrl.mem && !io.dmem.resp.valid val replay_ex_structural = ex_ctrl.mem && !io.dmem.req.ready || ex_ctrl.div && !div.io.req.ready val replay_ex_load_use = wb_dcache_miss && ex_reg_load_use - val replay_ex = ex_reg_valid && (replay_ex_structural || replay_ex_load_use) + val replay_ex = ex_reg_replay || (ex_reg_valid && (replay_ex_structural || replay_ex_load_use)) val ctrl_killx = take_pc_mem_wb || replay_ex || !ex_reg_valid // detect 2-cycle load-use delay for LB/LH/SC val ex_slow_bypass = ex_ctrl.mem_cmd === M_XSC || Vec(MT_B, MT_BU, MT_H, MT_HU).contains(ex_ctrl.mem_type) @@ -536,9 +538,10 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { id_ctrl.rocc && rocc_blocked || // reduce activity while RoCC is busy id_do_fence || csr.io.csr_stall - ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt + ctrl_killd := !io.imem.resp.valid || io.imem.resp.bits.replay || take_pc || ctrl_stalld || csr.io.interrupt io.imem.req.valid := take_pc + io.imem.req.bits.speculative := !take_pc_wb io.imem.req.bits.pc := Mux(wb_xcpt || csr.io.eret, csr.io.evec, // exception or [m|s]ret Mux(replay_wb, wb_reg_pc, // replay diff --git a/rocket/src/main/scala/tlb.scala b/rocket/src/main/scala/tlb.scala index e99873b1..aca9aed2 100644 --- a/rocket/src/main/scala/tlb.scala +++ b/rocket/src/main/scala/tlb.scala @@ -32,6 +32,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { val xcpt_ld = Bool(OUTPUT) val xcpt_st = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT) + val cacheable = Bool(OUTPUT) } class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { @@ -111,6 +112,7 @@ class TLB(implicit val p: Parameters) extends Module with HasTLBParameters { io.resp.xcpt_ld := bad_va || (!tlb_miss && !addr_prot.r) || (tlb_hit && !(r_array & hits).orR) io.resp.xcpt_st := bad_va || (!tlb_miss && !addr_prot.w) || (tlb_hit && !(w_array & hits).orR) io.resp.xcpt_if := bad_va || (!tlb_miss && !addr_prot.x) || (tlb_hit && !(x_array & hits).orR) + io.resp.cacheable := addrMap.isCacheable(paddr) io.resp.miss := tlb_miss io.resp.ppn := Mux(vm_enabled, Mux1H(hitsVec, ppns), io.req.bits.vpn(ppnBits-1,0)) From b8884e8143f313ed582741433ad265e271f29a12 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 14 Jul 2016 12:05:09 -0700 Subject: [PATCH 1074/1087] Simplify frontend virtual address extension code --- rocket/src/main/scala/frontend.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index f7b1fb1a..ad95969d 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -53,10 +53,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s2_resp_valid = Wire(Bool()) val s2_resp_data = Wire(UInt(width = rowBits)) - val ntpc_0 = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) - val ntpc = // don't increment PC into virtual address space hole - if (vaddrBitsExtended == vaddrBits) ntpc_0 - else Cat(s1_pc(vaddrBits-1) & ntpc_0(vaddrBits-1), ntpc_0) + val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) val predicted_npc = Wire(init = ntpc) val icmiss = s2_valid && !s2_resp_valid val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt From 3d0b92afd7e41f2201121060db177bb6dab62d1b Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 14 Jul 2016 12:09:34 -0700 Subject: [PATCH 1075/1087] Misc code cleanup --- rocket/src/main/scala/csr.scala | 2 +- rocket/src/main/scala/rocket.scala | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index a14e4183..34f12f39 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -528,7 +528,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~reg_mideleg) | (wdata & reg_mideleg) } when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } when (decoded_addr(CSRs.sptbr)) { reg_sptbr.ppn := wdata(ppnBits-1,0) } - when (decoded_addr(CSRs.sepc)) { reg_sepc := wdata >> log2Up(coreInstBytes) << log2Up(coreInstBytes) } + when (decoded_addr(CSRs.sepc)) { reg_sepc := ~(~wdata | (coreInstBytes-1)) } when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata >> 2 << 2 } when (decoded_addr(CSRs.scause)) { reg_scause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.sbadaddr)) { reg_sbadaddr := wdata(vaddrBitsExtended-1,0) } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 726053d3..aee537f5 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -423,7 +423,8 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { val wb_set_sboard = wb_ctrl.div || wb_dcache_miss || wb_ctrl.rocc val replay_wb_common = io.dmem.s2_nack || wb_reg_replay - val replay_wb = replay_wb_common || wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready + val replay_wb_rocc = wb_reg_valid && wb_ctrl.rocc && !io.rocc.cmd.ready + val replay_wb = replay_wb_common || replay_wb_rocc val wb_xcpt = wb_reg_xcpt || csr.io.csr_xcpt take_pc_wb := replay_wb || wb_xcpt || csr.io.eret @@ -455,7 +456,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { ll_wen := Bool(true) } - val wb_valid = wb_reg_valid && !replay_wb && !csr.io.csr_xcpt + val wb_valid = wb_reg_valid && !replay_wb && !wb_xcpt val wb_wen = wb_valid && wb_ctrl.wxd val rf_wen = wb_wen || ll_wen val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) From e6aab368a46e83562c93f6d1bcde8dfd0dc64165 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 14 Jul 2016 12:38:54 -0700 Subject: [PATCH 1076/1087] Replace ICacheBufferWays parameter with I$ constructor argument --- rocket/src/main/scala/frontend.scala | 32 +++++++--------------------- rocket/src/main/scala/icache.scala | 23 ++++++++++---------- 2 files changed, 19 insertions(+), 36 deletions(-) diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index ad95969d..502a2232 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -37,7 +37,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val mem = new ClientUncachedTileLinkIO } - val icache = Module(new ICache) + val icache = Module(new ICache(latency = 2)) val tlb = Module(new TLB) val s1_pc_ = Reg(UInt(width=vaddrBitsExtended)) @@ -50,12 +50,10 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val s2_btb_resp_bits = Reg(new BTBResp) val s2_xcpt_if = Reg(init=Bool(false)) val s2_speculative = Reg(init=Bool(false)) - val s2_resp_valid = Wire(Bool()) - val s2_resp_data = Wire(UInt(width = rowBits)) val ntpc = ~(~s1_pc | (coreInstBytes*fetchWidth-1)) + UInt(coreInstBytes*fetchWidth) val predicted_npc = Wire(init = ntpc) - val icmiss = s2_valid && !s2_resp_valid + val icmiss = s2_valid && !icache.io.resp.valid val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt val s0_same_block = Wire(init = !icmiss && !io.cpu.req.valid && ((ntpc & rowBytes) === (s1_pc & rowBytes))) @@ -110,30 +108,16 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa icache.io.s1_ppn := tlb.io.resp.ppn icache.io.s1_kill := io.cpu.req.valid || tlb.io.resp.miss || tlb.io.resp.xcpt_if || icmiss || io.cpu.flush_tlb icache.io.s2_kill := s2_speculative + icache.io.resp.ready := !stall && !s1_same_block - io.cpu.resp.valid := s2_valid && (s2_resp_valid || s2_speculative || s2_xcpt_if) + io.cpu.resp.valid := s2_valid && (icache.io.resp.valid || s2_speculative || s2_xcpt_if) io.cpu.resp.bits.pc := s2_pc io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) - // if the ways are buffered, we don't need to buffer again - if (p(ICacheBufferWays)) { - icache.io.resp.ready := !stall && !s1_same_block - - s2_resp_valid := icache.io.resp.valid - s2_resp_data := icache.io.resp.bits.datablock - } else { - val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true)) - icbuf.io.enq <> icache.io.resp - icbuf.io.deq.ready := !stall && !s1_same_block - - s2_resp_valid := icbuf.io.deq.valid - s2_resp_data := icbuf.io.deq.bits.datablock - } - require(fetchWidth * coreInstBytes <= rowBytes) - val fetch_data = - if (fetchWidth * coreInstBytes == rowBytes) s2_resp_data - else s2_resp_data >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) + val fetch_data = // TODO zero-width + if (fetchWidth * coreInstBytes == rowBytes) icache.io.resp.bits.datablock + else icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) for (i <- 0 until fetchWidth) { io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) @@ -143,7 +127,7 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2) io.cpu.resp.bits.mask := msk_pc io.cpu.resp.bits.xcpt_if := s2_xcpt_if - io.cpu.resp.bits.replay := s2_speculative && !s2_resp_valid && !s2_xcpt_if + io.cpu.resp.bits.replay := s2_speculative && !icache.io.resp.valid && !s2_xcpt_if io.cpu.btb_resp.valid := s2_btb_resp_valid io.cpu.btb_resp.bits := s2_btb_resp_bits diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index 32e1b9b7..ecf66091 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -7,8 +7,6 @@ import uncore.util._ import Util._ import cde.{Parameters, Field} -case object ICacheBufferWays extends Field[Boolean] - trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters { val outerDataBeats = p(TLKey(p(TLId))).dataBeats val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat @@ -25,7 +23,7 @@ class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1Cache val datablock = Bits(width = rowBits) } -class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { +class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters { val io = new Bundle { val req = Valid(new ICacheReq).flip val s1_ppn = UInt(INPUT, ppnBits) // delayed one cycle w.r.t. req @@ -126,15 +124,16 @@ class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePara } // output signals - if (p(ICacheBufferWays)) { - val s2_hit = RegEnable(s1_hit, !stall) - val s2_tag_hit = RegEnable(s1_tag_hit, !stall) - val s2_dout = RegEnable(s1_dout, !stall) - io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) - io.resp.valid := s2_hit - } else { - io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) - io.resp.valid := s1_hit + latency match { + case 1 => + io.resp.bits.datablock := Mux1H(s1_tag_hit, s1_dout) + io.resp.valid := s1_hit + case 2 => + val s2_hit = RegEnable(s1_hit, !stall) + val s2_tag_hit = RegEnable(s1_tag_hit, !stall) + val s2_dout = RegEnable(s1_dout, !stall) + io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout) + io.resp.valid := s2_hit } io.mem.acquire.valid := state === s_request && !io.s2_kill io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits) From da512d4230bc6bbd48f7ec78ce4181f7219458f8 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 14 Jul 2016 17:10:27 -0700 Subject: [PATCH 1077/1087] Explicitly discard BTB index LSBs --- rocket/src/main/scala/btb.scala | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/rocket/src/main/scala/btb.scala b/rocket/src/main/scala/btb.scala index 5110bfed..d16c4725 100644 --- a/rocket/src/main/scala/btb.scala +++ b/rocket/src/main/scala/btb.scala @@ -142,18 +142,18 @@ class BTB(implicit p: Parameters) extends BtbModule { val ras_update = Valid(new RASUpdate).flip } - val idxs = Reg(Vec(entries, UInt(width=matchBits))) + val idxs = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes)))) val idxPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) - val tgts = Reg(Vec(entries, UInt(width=matchBits))) + val tgts = Reg(Vec(entries, UInt(width=matchBits - log2Up(coreInstBytes)))) val tgtPages = Reg(Vec(entries, UInt(width=log2Up(nPages)))) - val pages = Reg(Vec(nPages, UInt(width=vaddrBits-matchBits))) + val pages = Reg(Vec(nPages, UInt(width=vaddrBits - matchBits))) val pageValid = Reg(init = UInt(0, nPages)) val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) val useRAS = Reg(UInt(width = entries)) val isJump = Reg(UInt(width = entries)) - val brIdx = if (fetchWidth > 1) Reg(Vec(entries, UInt(width=log2Up(fetchWidth)))) else Seq(UInt(0)) + val brIdx = Reg(Vec(entries, UInt(width=log2Up(fetchWidth)))) private def page(addr: UInt) = addr >> matchBits private def pageMatch(addr: UInt) = { @@ -161,7 +161,7 @@ class BTB(implicit p: Parameters) extends BtbModule { pageValid & pages.map(_ === p).toBits } private def tagMatch(addr: UInt, pgMatch: UInt) = { - val idxMatch = idxs.map(_ === addr(matchBits-1,0)).toBits + val idxMatch = idxs.map(_ === addr(matchBits-1, log2Up(coreInstBytes))).toBits val idxPageMatch = idxPagesOH.map(_ & pgMatch).map(_.orR).toBits idxMatch & idxPageMatch } @@ -209,8 +209,9 @@ class BTB(implicit p: Parameters) extends BtbModule { val waddr = Mux(updateHit && !resetting, updateHitAddr, nextRepl) val mask = UIntToOH(waddr) - idxs(waddr) := Mux(resetting, Cat(r_btb_update.bits.pc >> log2Ceil(entries), nextRepl), r_btb_update.bits.pc) - tgts(waddr) := update_target + val newIdx = r_btb_update.bits.pc(matchBits-1, log2Up(coreInstBytes)) + idxs(waddr) := Mux(resetting, Cat(newIdx >> log2Ceil(entries), nextRepl), newIdx) + tgts(waddr) := update_target(matchBits-1, log2Up(coreInstBytes)) idxPages(waddr) := idxPageUpdate tgtPages(waddr) := tgtPageUpdate useRAS := Mux(r_btb_update.bits.isReturn, useRAS | mask, useRAS & ~mask) @@ -234,7 +235,7 @@ class BTB(implicit p: Parameters) extends BtbModule { io.resp.valid := hits.orR io.resp.bits.taken := io.resp.valid - io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts)) + io.resp.bits.target := Cat(Mux1H(Mux1H(hitsVec, tgtPagesOH), pages), Mux1H(hitsVec, tgts) << log2Up(coreInstBytes)) io.resp.bits.entry := OHToUInt(hits) io.resp.bits.bridx := Mux1H(hitsVec, brIdx) io.resp.bits.mask := Cat((UInt(1) << ~Mux(io.resp.bits.taken, ~io.resp.bits.bridx, UInt(0)))-1, UInt(1)) From d78f1aacd0aad64e2fcd6d161a6cfb275b8f8762 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 14 Jul 2016 21:42:12 -0700 Subject: [PATCH 1078/1087] Clean up some zero-width wire cases using UInt.extract --- rocket/src/main/scala/dcache.scala | 12 +++--------- rocket/src/main/scala/frontend.scala | 8 ++------ rocket/src/main/scala/icache.scala | 7 +++---- rocket/src/main/scala/nbdcache.scala | 14 ++++---------- rocket/src/main/scala/util.scala | 8 +++++++- 5 files changed, 19 insertions(+), 30 deletions(-) diff --git a/rocket/src/main/scala/dcache.scala b/rocket/src/main/scala/dcache.scala index ea5a42cb..82de400e 100644 --- a/rocket/src/main/scala/dcache.scala +++ b/rocket/src/main/scala/dcache.scala @@ -216,9 +216,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) dataArb.io.in(0).bits.wdata := Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_storegen_data)) - val pstore_mask_shift = - if (rowOffBits > offsetlsb) Mux(pstore2_valid, pstore2_addr, pstore1_addr)(rowOffBits-1,offsetlsb) << wordOffBits - else UInt(0) + val pstore_mask_shift = Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1,offsetlsb) << wordOffBits dataArb.io.in(0).bits.wmask := Mux(pstore2_valid, pstore2_storegen_mask, pstore1_storegen.mask) << pstore_mask_shift // store->load RAW hazard detection @@ -247,9 +245,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { addr_byte = s2_req.addr(beatOffBits-1, 0), operand_size = s2_req.typ, alloc = Bool(false)) - val uncachedPutOffset = // TODO zero-width - if (beatBytes > wordBytes) s2_req.addr(beatOffBits-1, wordOffBits) - else UInt(0) + val uncachedPutOffset = s2_req.addr.extract(beatOffBits-1, wordOffBits) val uncachedPutMessage = Put( client_xact_id = UInt(0), addr_block = s2_req.addr(paddrBits-1, blockOffBits), @@ -402,9 +398,7 @@ class DCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } // load data subword mux/sign extension - val s2_word_idx = // TODO zero-width - if (rowBits > wordBits) s2_req.addr(log2Up(rowBits/8)-1, log2Up(wordBytes)) - else UInt(0) + val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes)) val s2_data_word = s2_data >> Cat(s2_word_idx, UInt(0, log2Up(coreDataBits))) val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc, wordBytes) io.cpu.resp.bits.data := loadgen.data | s2_sc_fail diff --git a/rocket/src/main/scala/frontend.scala b/rocket/src/main/scala/frontend.scala index 502a2232..77e8f6e6 100644 --- a/rocket/src/main/scala/frontend.scala +++ b/rocket/src/main/scala/frontend.scala @@ -115,17 +115,13 @@ class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CachePa io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc) require(fetchWidth * coreInstBytes <= rowBytes) - val fetch_data = // TODO zero-width - if (fetchWidth * coreInstBytes == rowBytes) icache.io.resp.bits.datablock - else icache.io.resp.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) + val fetch_data = icache.io.resp.bits.datablock >> (s2_pc.extract(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits)) for (i <- 0 until fetchWidth) { io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits) } - val all_ones = UInt((1 << (fetchWidth+1))-1) - val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2) - io.cpu.resp.bits.mask := msk_pc + io.cpu.resp.bits.mask := UInt((1 << fetchWidth)-1) << s2_pc.extract(log2Up(fetchWidth)+log2Up(coreInstBytes)-1, log2Up(coreInstBytes)) io.cpu.resp.bits.xcpt_if := s2_xcpt_if io.cpu.resp.bits.replay := s2_speculative && !icache.io.resp.valid && !s2_xcpt_if diff --git a/rocket/src/main/scala/icache.scala b/rocket/src/main/scala/icache.scala index ecf66091..107b332c 100644 --- a/rocket/src/main/scala/icache.scala +++ b/rocket/src/main/scala/icache.scala @@ -72,7 +72,7 @@ class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with val refill_tag = refill_addr(tagBits+untagBits-1,untagBits) val narrow_grant = FlowThroughSerializer(io.mem.grant, refillCyclesPerBeat) - val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles) //TODO Zero width wire + val (refill_cnt, refill_wrap) = Counter(narrow_grant.fire(), refillCycles) val refill_done = state === s_refill && refill_wrap narrow_grant.ready := Bool(true) @@ -116,10 +116,9 @@ class ICache(latency: Int)(implicit p: Parameters) extends CoreModule()(p) with val wen = narrow_grant.valid && repl_way === UInt(i) when (wen) { val e_d = code.encode(narrow_grant.bits.data).toUInt - if(refillCycles > 1) data_array.write(Cat(s1_idx, refill_cnt), e_d) - else data_array.write(s1_idx, e_d) + data_array.write((s1_idx << log2Ceil(refillCycles)) | refill_cnt, e_d) } - val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-(if(refillCycles > 1) refill_cnt.getWidth else 0)) + val s0_raddr = s0_vaddr(untagBits-1,blockOffBits-log2Ceil(refillCycles)) s1_dout(i) := data_array.read(s0_raddr, !wen && s0_valid) } diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 88d3ddc7..0208fccc 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -164,9 +164,7 @@ class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val replay_next = Bool(OUTPUT) } - def beatOffset(addr: UInt) = // TODO zero-width - if (beatOffBits > wordOffBits) addr(beatOffBits - 1, wordOffBits) - else UInt(0) + def beatOffset(addr: UInt) = addr.extract(beatOffBits - 1, wordOffBits) def wordFromBeat(addr: UInt, dat: UInt) = { val shift = Cat(beatOffset(addr), UInt(0, wordOffBits + log2Up(wordBytes))) @@ -300,7 +298,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { (Vec(s_refill_req, s_refill_resp).contains(state) && !cmd_requires_second_acquire)) val gnt_multi_data = io.mem_grant.bits.hasMultibeatData() - val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) // TODO: Zero width? + val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) val rpq = Module(new Queue(new ReplayInternal, p(ReplayQueueDepth))) @@ -373,7 +371,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.idx_match := (state =/= s_invalid) && idx_match io.refill.way_en := req.way_en - io.refill.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits + io.refill.addr := ((req_idx << log2Ceil(refillCycles)) | refill_cnt) << rowOffBits io.tag := req.addr >> untagBits io.req_pri_rdy := state === s_invalid io.req_sec_rdy := sec_rdy && rpq.io.enq.ready @@ -963,11 +961,7 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { } writeArb.io.in(0).bits.addr := s3_req.addr - val rowIdx = - if (rowOffBits > offsetlsb) s3_req.addr(rowOffBits-1,offsetlsb).toUInt - else UInt(0) - val rowWMask = UInt(1) << (if(rowOffBits > offsetlsb) rowIdx else UInt(0)) - writeArb.io.in(0).bits.wmask := rowWMask + writeArb.io.in(0).bits.wmask := UIntToOH(s3_req.addr.extract(rowOffBits-1,offsetlsb)) writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data) writeArb.io.in(0).valid := s3_valid writeArb.io.in(0).bits.way_en := s3_way diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 4b58c4e4..15296cc0 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -14,10 +14,16 @@ object Util { implicit def booleanToBool(x: Boolean): Bits = Bool(x) implicit def intSeqToUIntSeq(x: Seq[Int]): Seq[UInt] = x.map(UInt(_)) implicit def wcToUInt(c: WideCounter): UInt = c.value - implicit def sextToConv(x: UInt) = new AnyRef { + + implicit class UIntToAugmentedUInt(val x: UInt) extends AnyVal { def sextTo(n: Int): UInt = if (x.getWidth == n) x else Cat(Fill(n - x.getWidth, x(x.getWidth-1)), x) + + def extract(hi: Int, lo: Int): UInt = { + if (hi == lo-1) UInt(0) + else x(hi, lo) + } } implicit def booleanToIntConv(x: Boolean) = new AnyRef { From 7cf44f9b25f273c52f731334a6ad021d30cb8bed Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Fri, 15 Jul 2016 00:51:01 -0700 Subject: [PATCH 1079/1087] clean up WideCounter implementation --- rocket/src/main/scala/util.scala | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/rocket/src/main/scala/util.scala b/rocket/src/main/scala/util.scala index 15296cc0..40a3c4a9 100644 --- a/rocket/src/main/scala/util.scala +++ b/rocket/src/main/scala/util.scala @@ -138,28 +138,23 @@ object Split // a counter that clock gates most of its MSBs using the LSB carry-out case class WideCounter(width: Int, inc: UInt = UInt(1)) { - require(inc.getWidth > 0) private val isWide = width > 2*inc.getWidth private val smallWidth = if (isWide) inc.getWidth max log2Up(width) else width private val small = Reg(init=UInt(0, smallWidth)) - private val doInc = inc.orR - private val nextSmall = - if (inc.getWidth == 1) small + UInt(1, smallWidth+1) - else Cat(UInt(0,1), small) + inc - when (doInc) { small := nextSmall(smallWidth-1,0) } + private val nextSmall = small +& inc + small := nextSmall private val large = if (isWide) { val r = Reg(init=UInt(0, width - smallWidth)) - when (doInc && nextSmall(smallWidth)) { r := r + UInt(1) } + when (nextSmall(smallWidth)) { r := r + UInt(1) } r } else null - val value = Cat(large, small) + val value = if (isWide) Cat(large, small) else small def := (x: UInt) = { - val w = x.getWidth - small := x(w.min(smallWidth)-1,0) - if (isWide) large := (if (w < smallWidth) UInt(0) else x(w.min(width)-1,smallWidth)) + small := x + if (isWide) large := x >> smallWidth } } From cff8de9814c00b81029320c413df6124ecc768a3 Mon Sep 17 00:00:00 2001 From: mwachs5 Date: Fri, 15 Jul 2016 15:41:20 -0700 Subject: [PATCH 1080/1087] Use new Mul/Div parameters vs UseFastMulDiv (#48) * Use new Mul/Div parameters vs UseFastMulDiv * Rename MulDivUnroll to MulUnroll --- rocket/src/main/scala/rocket.scala | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index aee537f5..4e1e7582 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -22,7 +22,8 @@ case object UseAtomics extends Field[Boolean] case object UsePerfCounters extends Field[Boolean] case object FastLoadWord extends Field[Boolean] case object FastLoadByte extends Field[Boolean] -case object FastMulDiv extends Field[Boolean] +case object MulUnroll extends Field[Int] +case object DivEarlyOut extends Field[Boolean] case object CoreInstBits extends Field[Int] case object CoreDataBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int] @@ -43,7 +44,8 @@ trait HasCoreParameters extends HasAddrMapParameters { val usingAtomics = p(UseAtomics) val usingFDivSqrt = p(FDivSqrt) val usingRoCC = !p(BuildRoCC).isEmpty - val usingFastMulDiv = p(FastMulDiv) + val mulUnroll = p(MulUnroll) + val divEarlyOut = p(DivEarlyOut) val fastLoadWord = p(FastLoadWord) val fastLoadByte = p(FastLoadByte) @@ -286,8 +288,9 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { // multiplier and divider val div = Module(new MulDiv(width = xLen, - unroll = if(usingFastMulDiv) 8 else 1, - earlyOut = usingFastMulDiv)) + unroll = mulUnroll, + earlyOut = divEarlyOut)) + div.io.req.valid := ex_reg_valid && ex_ctrl.div div.io.req.bits.dw := ex_ctrl.alu_dw div.io.req.bits.fn := ex_ctrl.alu_fn From 40a146f6253a1e707205309d820ad87ab394fab8 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 18 Jul 2016 17:01:29 -0700 Subject: [PATCH 1081/1087] HellaCacheArbiter passes through if n == 1 --- rocket/src/main/scala/arbiter.scala | 82 +++++++++++++++-------------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/rocket/src/main/scala/arbiter.scala b/rocket/src/main/scala/arbiter.scala index 6b3cee53..1a686d5b 100644 --- a/rocket/src/main/scala/arbiter.scala +++ b/rocket/src/main/scala/arbiter.scala @@ -13,51 +13,55 @@ class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module val mem = new HellaCacheIO } - val s1_id = Reg(UInt()) - val s2_id = Reg(next=s1_id) + if (n == 1) { + io.mem <> io.requestor.head + } else { + val s1_id = Reg(UInt()) + val s2_id = Reg(next=s1_id) - io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_) - io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) - io.requestor(0).req.ready := io.mem.req.ready - for (i <- 1 until n) - io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid + io.mem.invalidate_lr := io.requestor.map(_.invalidate_lr).reduce(_||_) + io.mem.req.valid := io.requestor.map(_.req.valid).reduce(_||_) + io.requestor(0).req.ready := io.mem.req.ready + for (i <- 1 until n) + io.requestor(i).req.ready := io.requestor(i-1).req.ready && !io.requestor(i-1).req.valid - for (i <- n-1 to 0 by -1) { - val req = io.requestor(i).req - def connect_s0() = { - io.mem.req.bits.cmd := req.bits.cmd - io.mem.req.bits.typ := req.bits.typ - io.mem.req.bits.addr := req.bits.addr - io.mem.req.bits.phys := req.bits.phys - io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n))) - s1_id := UInt(i) - } - def connect_s1() = { - io.mem.s1_kill := io.requestor(i).s1_kill - io.mem.s1_data := io.requestor(i).s1_data + for (i <- n-1 to 0 by -1) { + val req = io.requestor(i).req + def connect_s0() = { + io.mem.req.bits.cmd := req.bits.cmd + io.mem.req.bits.typ := req.bits.typ + io.mem.req.bits.addr := req.bits.addr + io.mem.req.bits.phys := req.bits.phys + io.mem.req.bits.tag := Cat(req.bits.tag, UInt(i, log2Up(n))) + s1_id := UInt(i) + } + def connect_s1() = { + io.mem.s1_kill := io.requestor(i).s1_kill + io.mem.s1_data := io.requestor(i).s1_data + } + + if (i == n-1) { + connect_s0() + connect_s1() + } else { + when (req.valid) { connect_s0() } + when (s1_id === UInt(i)) { connect_s1() } + } } - if (i == n-1) { - connect_s0() - connect_s1() - } else { - when (req.valid) { connect_s0() } - when (s1_id === UInt(i)) { connect_s1() } + for (i <- 0 until n) { + val resp = io.requestor(i).resp + val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i) + resp.valid := io.mem.resp.valid && tag_hit + io.requestor(i).xcpt := io.mem.xcpt + io.requestor(i).ordered := io.mem.ordered + io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i) + resp.bits := io.mem.resp.bits + resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) + + io.requestor(i).replay_next := io.mem.replay_next } } - - for (i <- 0 until n) { - val resp = io.requestor(i).resp - val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i) - resp.valid := io.mem.resp.valid && tag_hit - io.requestor(i).xcpt := io.mem.xcpt - io.requestor(i).ordered := io.mem.ordered - io.requestor(i).s2_nack := io.mem.s2_nack && s2_id === UInt(i) - resp.bits := io.mem.resp.bits - resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n) - - io.requestor(i).replay_next := io.mem.replay_next - } } class InOrderArbiter[T <: Data, U <: Data](reqTyp: T, respTyp: U, n: Int) From 2723b2f515172a9b24733f76cfb50f8fa134a06f Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 18 Jul 2016 17:02:47 -0700 Subject: [PATCH 1082/1087] fix issues in SimpleHellaCacheIF and document the changes --- rocket/src/main/scala/nbdcache.scala | 95 ++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 26 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 0208fccc..011c2778 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -1106,47 +1106,86 @@ class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) { io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next } -class ReplayQueue(depth: Int)(implicit p: Parameters) extends Module { +/** + * This module buffers requests made by the SimpleHellaCacheIF in case they + * are nacked. Nacked requests must be replayed in order, and no other requests + * must be allowed to go through until the replayed requests are successfully + * completed. + */ +class SimpleHellaCacheIFReplayQueue(depth: Int) + (implicit val p: Parameters) extends Module + with HasL1HellaCacheParameters { val io = new Bundle { val req = Decoupled(new HellaCacheReq).flip - val nack = Bool(INPUT) - val resp_valid = Bool(INPUT) + val nack = Valid(Bits(width = coreDCacheReqTagBits)).flip + val resp = Valid(new HellaCacheResp).flip val replay = Decoupled(new HellaCacheReq) } - val nacked = Reg(init = UInt(0, depth)) + // Registers to store the sent request + // When a request is sent the first time, + // it is stored in one of the reqs registers + // and the corresponding inflight bit is set. + // The reqs register will be deallocated once the request is + // successfully completed. val inflight = Reg(init = UInt(0, depth)) val reqs = Reg(Vec(depth, new HellaCacheReq)) - val ordering = Reg(Vec(depth, UInt(width = log2Up(depth)))) - val pop_ordering = io.nack || io.resp_valid - val push_ordering = io.req.fire() || io.replay.fire() - val (ordering_head, _) = Counter(pop_ordering, depth) - val (ordering_tail, _) = Counter(push_ordering, depth) - val order_onehot = UIntToOH(ordering(ordering_head)) - val next_inflight = PriorityEncoder(~inflight) + // The nack queue stores the index of nacked requests (in the reqs vector) + // in the order that they were nacked. A request is enqueued onto nackq + // when it is newly nacked (i.e. not a nack for a previous replay). + // The head of the nack queue will be replayed until it is + // successfully completed, at which time the request is dequeued. + // No new requests will be made or other replays attempted until the head + // of the nackq is successfully completed. + val nackq = Module(new Queue(UInt(width = log2Up(depth)), depth)) + val replaying = Reg(init = Bool(false)) + val next_inflight_onehot = PriorityEncoderOH(~inflight) - val next_replay = PriorityEncoder(nacked) - val next_replay_onehot = PriorityEncoderOH(nacked) + val next_inflight = OHToUInt(next_inflight_onehot) - io.replay.valid := nacked.orR - io.replay.bits := Mux1H(next_replay_onehot, reqs) - io.req.ready := !inflight.andR + val next_replay = nackq.io.deq.bits + val next_replay_onehot = UIntToOH(next_replay) + val next_replay_req = reqs(next_replay) + // Keep sending the head of the nack queue until it succeeds + io.replay.valid := nackq.io.deq.valid && !replaying + io.replay.bits := next_replay_req + // Don't allow new requests if there is are replays waiting + // or something being nacked. + io.req.ready := !inflight.andR && !nackq.io.deq.valid && !io.nack.valid - nacked := (nacked | Mux(io.nack, order_onehot, UInt(0))) & - ~Mux(io.replay.fire(), next_replay_onehot, UInt(0)) + // Match on the tags to determine the index of nacks or responses + val nack_onehot = Cat(reqs.map(_.tag === io.nack.bits).reverse) & inflight + val resp_onehot = Cat(reqs.map(_.tag === io.resp.bits.tag).reverse) & inflight + val replay_complete = io.resp.valid && replaying && io.resp.bits.tag === next_replay_req.tag + val nack_head = io.nack.valid && nackq.io.deq.valid && io.nack.bits === next_replay_req.tag + + // Enqueue to the nack queue if there is a nack that is not in response to + // the previous replay + nackq.io.enq.valid := io.nack.valid && !nack_head + nackq.io.enq.bits := OHToUInt(nack_onehot) + assert(!nackq.io.enq.valid || nackq.io.enq.ready, + "SimpleHellaCacheIF: ReplayQueue nack queue overflow") + + // Dequeue from the nack queue if the last replay was successfully completed + nackq.io.deq.ready := replay_complete + assert(!nackq.io.deq.ready || nackq.io.deq.valid, + "SimpleHellaCacheIF: ReplayQueue nack queue underflow") + + // Set inflight bit when a request is made + // Clear it when it is successfully completed inflight := (inflight | Mux(io.req.fire(), next_inflight_onehot, UInt(0))) & - ~Mux(io.resp_valid, order_onehot, UInt(0)) + ~Mux(io.resp.valid, resp_onehot, UInt(0)) when (io.req.fire()) { - ordering(ordering_tail) := next_inflight reqs(next_inflight) := io.req.bits } - when (io.replay.fire()) { - ordering(ordering_tail) := next_replay - } + + // Only one replay outstanding at a time + when (io.replay.fire()) { replaying := Bool(true) } + when (nack_head || replay_complete) { replaying := Bool(false) } } // exposes a sane decoupled request interface @@ -1157,7 +1196,7 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module val cache = new HellaCacheIO } - val replayq = Module(new ReplayQueue(2)) + val replayq = Module(new SimpleHellaCacheIFReplayQueue(2)) val req_arb = Module(new Arbiter(new HellaCacheReq, 2)) val req_helper = DecoupledHelper( @@ -1175,6 +1214,9 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module val s0_req_fire = io.cache.req.fire() val s1_req_fire = Reg(next = s0_req_fire) val s2_req_fire = Reg(next = s1_req_fire) + val s1_req_tag = Reg(next = io.cache.req.bits.tag) + val s2_req_tag = Reg(next = s1_req_tag) + val s2_kill = Reg(next = io.cache.s1_kill) io.cache.invalidate_lr := io.requestor.invalidate_lr io.cache.req <> req_arb.io.out @@ -1182,8 +1224,9 @@ class SimpleHellaCacheIF(implicit p: Parameters) extends Module io.cache.s1_kill := io.cache.s2_nack io.cache.s1_data := RegEnable(req_arb.io.out.bits.data, s0_req_fire) - replayq.io.nack := io.cache.s2_nack && s2_req_fire - replayq.io.resp_valid := io.cache.resp.valid + replayq.io.nack.valid := (io.cache.s2_nack || s2_kill) && s2_req_fire + replayq.io.nack.bits := s2_req_tag + replayq.io.resp := io.cache.resp io.requestor.resp := io.cache.resp assert(!Reg(next = io.cache.req.fire()) || From c069e660568a3ddbdb2e72384ee50ab2387601eb Mon Sep 17 00:00:00 2001 From: Ben Keller Date: Mon, 18 Jul 2016 17:40:50 -0700 Subject: [PATCH 1083/1087] Modify the RoCC interface to include status in the command queue. (#41) This addresses a bug in which changes in mstatus could propagate to RoCCs before their time. Existing RoCCs that use the status port will need to be modified to match this change. This addresses the first half of #40. --- rocket/src/main/scala/rocc.scala | 2 +- rocket/src/main/scala/rocket.scala | 2 +- rocket/src/main/scala/tile.scala | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index e8e5e626..ce0fcfbe 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -37,6 +37,7 @@ class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) { val inst = new RoCCInstruction val rs1 = Bits(width = xLen) val rs2 = Bits(width = xLen) + val status = new MStatus } class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) { @@ -49,7 +50,6 @@ class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { val resp = Decoupled(new RoCCResponse) val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" })) val busy = Bool(OUTPUT) - val status = new MStatus().asInput val interrupt = Bool(OUTPUT) // These should be handled differently, eventually diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 4e1e7582..7756ab18 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -599,7 +599,7 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { io.rocc.cmd.valid := wb_reg_valid && wb_ctrl.rocc && !replay_wb_common io.rocc.exception := wb_xcpt && csr.io.status.xs.orR - io.rocc.status := csr.io.status + io.rocc.cmd.bits.status := csr.io.status io.rocc.cmd.bits.inst := new RoCCInstruction().fromBits(wb_reg_inst) io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs2 := wb_reg_rs2 diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index ee154ee8..9f635663 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -76,7 +76,6 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile( })) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) rocc.io.cmd <> cmdRouter.io.out(i) - rocc.io.status := core.io.rocc.status rocc.io.exception := core.io.rocc.exception rocc.io.host_id := io.prci.id dcIF.io.requestor <> rocc.io.mem From a43ad522dcfb509c772f230d8445bc2009226c57 Mon Sep 17 00:00:00 2001 From: Colin Schmidt Date: Thu, 21 Jul 2016 20:56:52 -0400 Subject: [PATCH 1084/1087] add clock override to tile constructor (#42) useful to have upstream so that tape-outs can construct rocket-chip to have cores on different clocks without forking rocket --- rocket/src/main/scala/tile.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index 9f635663..66b16553 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -22,8 +22,8 @@ case class RoccParameters( csrs: Seq[Int] = Nil, useFPU: Boolean = false) -abstract class Tile(resetSignal: Bool = null) - (implicit p: Parameters) extends Module(_reset = resetSignal) { +abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null) + (implicit p: Parameters) extends Module(Option(clockSignal), Option(resetSignal)) { val nCachedTileLinkPorts = p(NCachedTileLinkPorts) val nUncachedTileLinkPorts = p(NUncachedTileLinkPorts) val dcacheParams = p.alterPartial({ case CacheName => "L1D" }) @@ -35,7 +35,8 @@ abstract class Tile(resetSignal: Bool = null) } } -class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(resetSignal)(p) { +class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null) + (implicit p: Parameters) extends Tile(clockSignal, resetSignal)(p) { val buildRocc = p(BuildRoCC) val usingRocc = !buildRocc.isEmpty val nRocc = buildRocc.size From 51edd19e85f9bfa6c2822c1253b51253e0df5154 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Fri, 22 Jul 2016 14:22:51 -0700 Subject: [PATCH 1085/1087] add U bit to misa register --- rocket/src/main/scala/csr.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 34f12f39..86090c2f 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -238,6 +238,7 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) val isa_string = "IM" + (if (usingVM) "S" else "") + + (if (usingUser) "U" else "") + (if (usingAtomics) "A" else "") + (if (usingFPU) "FD" else "") + (if (usingRoCC) "X" else "") From dcfcac953002ad574fa7edc38992703cbcab715d Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Tue, 26 Jul 2016 17:20:20 -0700 Subject: [PATCH 1086/1087] fix LRSC issue (RocketChip issue #86) It was possible that the result of a store-conditional could get lost if it did not depend on the result of the corresponding load-reserved. This was because the MSHR does not update the client state based on the secondary requests. So the LR would acquire the line in clientExcusiveClean, but then we would fail to update the metadata array to change the state to clientExclusiveDirty. The solution is to track whether a secondary acquire would cause the line to be dirty. If so, use M_XWR instead of the primary command to generate the update coherence state. --- rocket/src/main/scala/nbdcache.scala | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/rocket/src/main/scala/nbdcache.scala b/rocket/src/main/scala/nbdcache.scala index 011c2778..c3b783c7 100644 --- a/rocket/src/main/scala/nbdcache.scala +++ b/rocket/src/main/scala/nbdcache.scala @@ -282,6 +282,12 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val s_invalid :: s_wb_req :: s_wb_resp :: s_meta_clear :: s_refill_req :: s_refill_resp :: s_meta_write_req :: s_meta_write_resp :: s_drain_rpq :: Nil = Enum(UInt(), 9) val state = Reg(init=s_invalid) + def stateIsOneOf(check_states: Seq[UInt]): Bool = + check_states.map(state === _).reduce(_ || _) + + def stateIsOneOf(st1: UInt, st2: UInt*): Bool = + stateIsOneOf(st1 +: st2) + val new_coh_state = Reg(init=ClientMetadata.onReset) val req = Reg(new MSHRReqInternal()) val req_idx = req.addr(untagBits-1,blockOffBits) @@ -292,14 +298,17 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { // Acquires on the same block for now. val cmd_requires_second_acquire = req.old_meta.coh.requiresAcquireOnSecondaryMiss(req.cmd, io.req_bits.cmd) - val states_before_refill = Vec(s_wb_req, s_wb_resp, s_meta_clear) - val sec_rdy = idx_match && - (states_before_refill.contains(state) || - (Vec(s_refill_req, s_refill_resp).contains(state) && - !cmd_requires_second_acquire)) + // Track whether or not a secondary acquire will cause the coherence state + // to go from clean to dirty. + val dirties_coh = Reg(Bool()) + val states_before_refill = Seq(s_wb_req, s_wb_resp, s_meta_clear) val gnt_multi_data = io.mem_grant.bits.hasMultibeatData() val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) + val sec_rdy = idx_match && + (stateIsOneOf(states_before_refill) || + (stateIsOneOf(s_refill_req, s_refill_resp) && + !cmd_requires_second_acquire && !refill_done)) val rpq = Module(new Queue(new ReplayInternal, p(ReplayQueueDepth))) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) @@ -308,7 +317,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val coh_on_grant = req.old_meta.coh.onGrant( incoming = io.mem_grant.bits, - pending = req.cmd) + pending = Mux(dirties_coh, M_XWR, req.cmd)) val coh_on_hit = io.req_bits.old_meta.coh.onHit(io.req_bits.cmd) when (state === s_drain_rpq && !rpq.io.deq.valid) { @@ -344,10 +353,12 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { when(cmd_requires_second_acquire) { req.cmd := io.req_bits.cmd } + dirties_coh := dirties_coh || isWrite(io.req_bits.cmd) } when (io.req_pri_val && io.req_pri_rdy) { val coh = io.req_bits.old_meta.coh req := io.req_bits + dirties_coh := isWrite(io.req_bits.cmd) when (io.req_bits.tag_match) { when(coh.isHit(io.req_bits.cmd)) { // set dirty bit state := s_meta_write_req @@ -379,7 +390,7 @@ class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) { val meta_hazard = Reg(init=UInt(0,2)) when (meta_hazard =/= UInt(0)) { meta_hazard := meta_hazard + 1 } when (io.meta_write.fire()) { meta_hazard := 1 } - io.probe_rdy := !idx_match || (!states_before_refill.contains(state) && meta_hazard === 0) + io.probe_rdy := !idx_match || (!stateIsOneOf(states_before_refill) && meta_hazard === 0) io.meta_write.valid := state === s_meta_write_req || state === s_meta_clear io.meta_write.bits.idx := req_idx From bf35f980a6a1266ede76cd130c51503c2f3b3dc8 Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Wed, 27 Jul 2016 18:40:38 -0700 Subject: [PATCH 1087/1087] make sure PTE cache is power of 2 in size to satisfy PseudoLRU requirement --- rocket/src/main/scala/ptw.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocket/src/main/scala/ptw.scala b/rocket/src/main/scala/ptw.scala index 9a431618..c5a64764 100644 --- a/rocket/src/main/scala/ptw.scala +++ b/rocket/src/main/scala/ptw.scala @@ -98,7 +98,7 @@ class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) { } val (pte_cache_hit, pte_cache_data) = { - val size = log2Up(pgLevels * 2) + val size = 1 << log2Up(pgLevels * 2) val plru = new PseudoLRU(size) val valid = Reg(init = UInt(0, size)) val tags = Reg(Vec(size, UInt(width = paddrBits)))