working RoCC AccumulatorExample
This commit is contained in:
		@@ -39,6 +39,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu
 | 
			
		||||
    val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
 | 
			
		||||
    resp.valid := io.mem.resp.valid && tag_hit
 | 
			
		||||
    io.requestor(i).xcpt := io.mem.xcpt
 | 
			
		||||
    io.requestor(i).ordered := io.mem.ordered
 | 
			
		||||
    resp.bits := io.mem.resp.bits
 | 
			
		||||
    resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n))
 | 
			
		||||
    resp.bits.nack := io.mem.resp.bits.nack && tag_hit
 | 
			
		||||
 
 | 
			
		||||
@@ -282,25 +282,25 @@ object RoCCDecode extends DecodeConstants
 | 
			
		||||
    CUSTOM0->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM0_RS1->       List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM0_RS1_RS2->   List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM0->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM0_RD->        List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM0_RD_RS1->    List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM0_RD_RS1_RS2->List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM1->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM1_RS1->       List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM1_RS1_RS2->   List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM1->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM1_RD->        List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM1_RD_RS1->    List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM1_RD_RS1_RS2->List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM2->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM2_RS1->       List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM2_RS1_RS2->   List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM2->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM2_RD->        List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM2_RD_RS1->    List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM2_RD_RS1_RS2->List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM3->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM3_RS1->       List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM3_RS1_RS2->   List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM3->           List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM3_RD->        List(Y,    N,Y,BR_N,  N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM3_RD_RS1->    List(Y,    N,Y,BR_N,  N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
 | 
			
		||||
    CUSTOM3_RD_RS1_RS2->List(Y,    N,Y,BR_N,  N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD,   N,M_X,      MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N))
 | 
			
		||||
}
 | 
			
		||||
@@ -639,7 +639,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
     io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr ||
 | 
			
		||||
     io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr ||
 | 
			
		||||
     io.fpu.dec.wen  && id_waddr  === io.dpath.ex_waddr)
 | 
			
		||||
  val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) ||
 | 
			
		||||
  val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) ||
 | 
			
		||||
                     fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val)
 | 
			
		||||
    
 | 
			
		||||
  // stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage.
 | 
			
		||||
@@ -655,7 +655,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
     io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr ||
 | 
			
		||||
     io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr ||
 | 
			
		||||
     io.fpu.dec.wen  && id_waddr  === io.dpath.mem_waddr)
 | 
			
		||||
  val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) ||
 | 
			
		||||
  val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) ||
 | 
			
		||||
                      fp_data_hazard_mem && mem_reg_fp_val
 | 
			
		||||
  id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem)
 | 
			
		||||
 | 
			
		||||
@@ -669,7 +669,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
     io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr ||
 | 
			
		||||
     io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr ||
 | 
			
		||||
     io.fpu.dec.wen  && id_waddr  === io.dpath.wb_waddr)
 | 
			
		||||
  val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) ||
 | 
			
		||||
  val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val || wb_reg_rocc_val) ||
 | 
			
		||||
                     fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val)
 | 
			
		||||
 | 
			
		||||
  io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1
 | 
			
		||||
 
 | 
			
		||||
@@ -5,12 +5,13 @@ import Node._
 | 
			
		||||
 | 
			
		||||
object DecodeLogic
 | 
			
		||||
{
 | 
			
		||||
  def term(b: Literal) = {
 | 
			
		||||
    if (b.isZ) {
 | 
			
		||||
      var (bits, mask, swidth) = Literal.parseLit(b.toString)
 | 
			
		||||
      new Term(BigInt(bits, 2), BigInt(2).pow(b.width)-(BigInt(mask, 2)+1))
 | 
			
		||||
  def term(b: Bits) = {
 | 
			
		||||
    val lit = b.litOf
 | 
			
		||||
    if (lit.isZ) {
 | 
			
		||||
      var (bits, mask, swidth) = Literal.parseLit(lit.toString)
 | 
			
		||||
      new Term(BigInt(bits, 2), BigInt(2).pow(lit.width)-(BigInt(mask, 2)+1))
 | 
			
		||||
    } else {
 | 
			
		||||
      new Term(b.value)
 | 
			
		||||
      new Term(lit.value)
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = {
 | 
			
		||||
@@ -24,18 +25,17 @@ object DecodeLogic
 | 
			
		||||
    var map = mapping
 | 
			
		||||
    var cache = scala.collection.mutable.Map[Term,Bits]()
 | 
			
		||||
    default map { d =>
 | 
			
		||||
      val dlit = d.litOf
 | 
			
		||||
      val dterm = term(dlit)
 | 
			
		||||
      val dterm = term(d)
 | 
			
		||||
      val (keys, values) = map.unzip
 | 
			
		||||
      val addrWidth = keys.map(_.getWidth).max
 | 
			
		||||
      val terms = keys.toList.map(k => term(k.litOf))
 | 
			
		||||
      val termvalues = terms zip values.toList.map(v => term(v.head.litOf))
 | 
			
		||||
      val terms = keys.toList.map(k => term(k))
 | 
			
		||||
      val termvalues = terms zip values.toList.map(v => term(v.head))
 | 
			
		||||
 | 
			
		||||
      for (t <- terms.tails; if !t.isEmpty)
 | 
			
		||||
      for (t <- keys.zip(terms).tails; if !t.isEmpty)
 | 
			
		||||
        for (u <- t.tail)
 | 
			
		||||
          assert(!t.head.intersects(u), "DecodeLogic: keys " + t + " and " + u + " overlap")
 | 
			
		||||
          assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
 | 
			
		||||
 | 
			
		||||
      val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) =>
 | 
			
		||||
      val result = (0 until math.max(d.litOf.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) =>
 | 
			
		||||
        val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
 | 
			
		||||
        val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
 | 
			
		||||
        val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
 | 
			
		||||
 
 | 
			
		||||
@@ -244,7 +244,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
  val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
 | 
			
		||||
  val dmem_resp_fpu =  io.dmem.resp.bits.tag(0).toBool
 | 
			
		||||
  val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
 | 
			
		||||
  val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu
 | 
			
		||||
  val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
 | 
			
		||||
  val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
 | 
			
		||||
 | 
			
		||||
  val mem_ll_wdata = Bits()
 | 
			
		||||
  mem_ll_wdata := div.io.resp.bits.data
 | 
			
		||||
@@ -259,7 +260,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
      io.ctrl.mem_ll_wb := Bool(true)
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  when (dmem_resp_replay) {
 | 
			
		||||
  when (dmem_resp_replay && dmem_resp_xpu) {
 | 
			
		||||
    div.io.resp.ready := Bool(false)
 | 
			
		||||
    if (!conf.rocc.isEmpty)
 | 
			
		||||
      io.rocc.resp.ready := Bool(false)
 | 
			
		||||
@@ -269,7 +270,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
  }
 | 
			
		||||
  when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) }
 | 
			
		||||
 | 
			
		||||
  io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu
 | 
			
		||||
  io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
 | 
			
		||||
  io.fpu.dmem_resp_data := io.dmem.resp.bits.data
 | 
			
		||||
  io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
 | 
			
		||||
  io.fpu.dmem_resp_tag := dmem_resp_waddr
 | 
			
		||||
@@ -297,7 +298,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
  io.ctrl.wb_waddr := wb_reg_waddr
 | 
			
		||||
 | 
			
		||||
  // scoreboard clear (for div/mul and D$ load miss writebacks)
 | 
			
		||||
  io.ctrl.fp_sboard_clr  := io.dmem.resp.bits.replay && dmem_resp_fpu
 | 
			
		||||
  io.ctrl.fp_sboard_clr  := dmem_resp_replay && dmem_resp_fpu
 | 
			
		||||
  io.ctrl.fp_sboard_clra := dmem_resp_waddr
 | 
			
		||||
 | 
			
		||||
	// processor control regfile write
 | 
			
		||||
 
 | 
			
		||||
@@ -698,6 +698,7 @@ class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle {
 | 
			
		||||
  val nack = Bool() // comes 2 cycles after req.fire
 | 
			
		||||
  val replay = Bool()
 | 
			
		||||
  val typ = Bits(width = 3)
 | 
			
		||||
  val has_data = Bool()
 | 
			
		||||
  val data = Bits(width = conf.databits)
 | 
			
		||||
  val data_subword = Bits(width = conf.databits)
 | 
			
		||||
  val tag = Bits(width = conf.reqtagbits)
 | 
			
		||||
@@ -1027,11 +1028,11 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
 | 
			
		||||
    io.cpu.req.ready := Bool(false)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val s2_do_resp = isRead(s2_req.cmd) || s2_sc
 | 
			
		||||
  io.cpu.resp.valid  := s2_do_resp && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
 | 
			
		||||
  io.cpu.resp.valid  := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
 | 
			
		||||
  io.cpu.resp.bits.nack := s2_valid && s2_nack
 | 
			
		||||
  io.cpu.resp.bits := s2_req
 | 
			
		||||
  io.cpu.resp.bits.replay := s2_replay && s2_do_resp
 | 
			
		||||
  io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc
 | 
			
		||||
  io.cpu.resp.bits.replay := s2_replay
 | 
			
		||||
  io.cpu.resp.bits.data := loadgen.word
 | 
			
		||||
  io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte)
 | 
			
		||||
  io.cpu.resp.bits.store_data := s2_req.data
 | 
			
		||||
@@ -1039,3 +1040,84 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
 | 
			
		||||
 | 
			
		||||
  io.mem.grant_ack <> mshrs.io.mem_finish
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// exposes a sane decoupled request interface
 | 
			
		||||
class SimpleHellaCacheIF(implicit conf: DCacheConfig) extends Module
 | 
			
		||||
{
 | 
			
		||||
  val io = new Bundle {
 | 
			
		||||
    val requestor = new HellaCacheIO().flip
 | 
			
		||||
    val cache = new HellaCacheIO
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val replaying_cmb = Bool()
 | 
			
		||||
  val replaying = Reg(next = replaying_cmb, init = Bool(false))
 | 
			
		||||
  replaying_cmb := replaying
 | 
			
		||||
 | 
			
		||||
  val replayq1 = Module(new Queue(new HellaCacheReq, 1, flow = true))
 | 
			
		||||
  val replayq2 = Module(new Queue(new HellaCacheReq, 1))
 | 
			
		||||
  val req_arb = Module(new Arbiter(new HellaCacheReq, 2))
 | 
			
		||||
 | 
			
		||||
  req_arb.io.in(0) <> replayq1.io.deq
 | 
			
		||||
  req_arb.io.in(1).valid := !replaying_cmb && io.requestor.req.valid
 | 
			
		||||
  req_arb.io.in(1).bits := io.requestor.req.bits
 | 
			
		||||
  io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready
 | 
			
		||||
 | 
			
		||||
  val s2_nack = io.cache.resp.bits.nack
 | 
			
		||||
  val s3_nack = Reg(next=s2_nack)
 | 
			
		||||
 | 
			
		||||
  val s0_req_fire = io.cache.req.fire()
 | 
			
		||||
  val s1_req_fire = Reg(next=s0_req_fire)
 | 
			
		||||
  val s2_req_fire = Reg(next=s1_req_fire)
 | 
			
		||||
 | 
			
		||||
  io.cache.req.bits.kill := s2_nack
 | 
			
		||||
  io.cache.req.bits.phys := Bool(true)
 | 
			
		||||
  io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
 | 
			
		||||
  io.cache.req <> req_arb.io.out
 | 
			
		||||
 | 
			
		||||
  // replay queues
 | 
			
		||||
  // replayq1 holds the older request
 | 
			
		||||
  // replayq2 holds the newer request (for the first nack)
 | 
			
		||||
  // we need to split the queues like this for the case where the older request
 | 
			
		||||
  // goes through but gets nacked, while the newer request stalls
 | 
			
		||||
  // if this happens, the newer request will go through before the older
 | 
			
		||||
  // request
 | 
			
		||||
  // we don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as
 | 
			
		||||
  // there will only be two requests going through at most
 | 
			
		||||
 | 
			
		||||
  // stash d$ request in stage 2 if nacked (older request)
 | 
			
		||||
  replayq1.io.enq.valid := Bool(false)
 | 
			
		||||
  replayq1.io.enq.bits.cmd := io.cache.resp.bits.cmd
 | 
			
		||||
  replayq1.io.enq.bits.typ := io.cache.resp.bits.typ
 | 
			
		||||
  replayq1.io.enq.bits.addr := io.cache.resp.bits.addr
 | 
			
		||||
  replayq1.io.enq.bits.data := io.cache.resp.bits.store_data
 | 
			
		||||
  replayq1.io.enq.bits.tag := io.cache.resp.bits.tag
 | 
			
		||||
 | 
			
		||||
  // stash d$ request in stage 1 if nacked (newer request)
 | 
			
		||||
  replayq2.io.enq.valid := s2_req_fire && s3_nack
 | 
			
		||||
  replayq2.io.enq.bits.data := io.cache.resp.bits.store_data
 | 
			
		||||
  replayq2.io.enq.bits <> io.cache.resp.bits
 | 
			
		||||
  replayq2.io.deq.ready := Bool(false)
 | 
			
		||||
 | 
			
		||||
  when (s2_nack) {
 | 
			
		||||
    replayq1.io.enq.valid := Bool(true)
 | 
			
		||||
    replaying_cmb := Bool(true)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // when replaying request got sunk into the d$
 | 
			
		||||
  when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) {
 | 
			
		||||
    // see if there's a stashed request in replayq2
 | 
			
		||||
    when (replayq2.io.deq.valid) {
 | 
			
		||||
      replayq1.io.enq.valid := Bool(true)
 | 
			
		||||
      replayq1.io.enq.bits.cmd := replayq2.io.deq.bits.cmd
 | 
			
		||||
      replayq1.io.enq.bits.typ := replayq2.io.deq.bits.typ
 | 
			
		||||
      replayq1.io.enq.bits.addr := replayq2.io.deq.bits.addr
 | 
			
		||||
      replayq1.io.enq.bits.data := replayq2.io.deq.bits.data
 | 
			
		||||
      replayq1.io.enq.bits.tag := replayq2.io.deq.bits.tag
 | 
			
		||||
      replayq2.io.deq.ready := Bool(true)
 | 
			
		||||
    } .otherwise {
 | 
			
		||||
      replaying_cmb := Bool(false)
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  io.requestor.resp := io.cache.resp
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -2,6 +2,7 @@ package rocket
 | 
			
		||||
 | 
			
		||||
import Chisel._
 | 
			
		||||
import Node._
 | 
			
		||||
import uncore._
 | 
			
		||||
 | 
			
		||||
class RoCCInstruction extends Bundle
 | 
			
		||||
{
 | 
			
		||||
@@ -36,35 +37,73 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle
 | 
			
		||||
{
 | 
			
		||||
  val cmd = Decoupled(new RoCCCommand).flip
 | 
			
		||||
  val resp = Decoupled(new RoCCResponse)
 | 
			
		||||
  val mem = new HellaCacheIO()(conf.dcache)
 | 
			
		||||
  val busy = Bool(OUTPUT)
 | 
			
		||||
  val interrupt = Bool(OUTPUT)
 | 
			
		||||
 | 
			
		||||
  override def clone = new RoCCInterface().asInstanceOf[this.type]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
abstract class RoCC(implicit conf: RocketConfiguration) extends Module
 | 
			
		||||
abstract class RoCC(conf: RocketConfiguration) extends Module
 | 
			
		||||
{
 | 
			
		||||
  val io = new RoCCInterface
 | 
			
		||||
  val io = new RoCCInterface()(conf)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class AccumulatorExample(implicit conf: RocketConfiguration) extends RoCC
 | 
			
		||||
class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf)
 | 
			
		||||
{
 | 
			
		||||
  val regfile = Mem(UInt(width = conf.xprlen), 4)
 | 
			
		||||
  val n = 4
 | 
			
		||||
  val regfile = Mem(UInt(width = conf.xprlen), n)
 | 
			
		||||
  val busy = Vec.fill(n){Reg(init=Bool(false))}
 | 
			
		||||
 | 
			
		||||
  val funct = io.cmd.bits.inst.funct
 | 
			
		||||
  val addr = io.cmd.bits.inst.rs2
 | 
			
		||||
  val addend = io.cmd.bits.rs1
 | 
			
		||||
  val cmd = Queue(io.cmd)
 | 
			
		||||
  val funct = cmd.bits.inst.funct
 | 
			
		||||
  val addr = cmd.bits.inst.rs2(log2Up(n)-1,0)
 | 
			
		||||
  val doWrite = funct === UInt(0)
 | 
			
		||||
  val doRead = funct === UInt(1)
 | 
			
		||||
  val doLoad = funct === UInt(2)
 | 
			
		||||
  val doAccum = funct === UInt(3)
 | 
			
		||||
  val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
 | 
			
		||||
 | 
			
		||||
  // datapath
 | 
			
		||||
  val addend = cmd.bits.rs1
 | 
			
		||||
  val accum = regfile(addr)
 | 
			
		||||
  val wdata = Mux(funct === UInt(0), addend, accum + addend)
 | 
			
		||||
  val wdata = Mux(doWrite, addend, accum + addend)
 | 
			
		||||
 | 
			
		||||
  when (io.cmd.fire() && (funct === UInt(1) || funct === UInt(3))) {
 | 
			
		||||
  when (cmd.fire() && (doWrite || doAccum)) {
 | 
			
		||||
    regfile(addr) := wdata
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  io.cmd.ready := io.resp.ready
 | 
			
		||||
  io.resp.valid := io.cmd.valid && io.cmd.bits.inst.xd
 | 
			
		||||
  io.resp.bits.rd := io.cmd.bits.inst.rd
 | 
			
		||||
  when (io.mem.resp.valid) {
 | 
			
		||||
    regfile(memRespTag) := io.mem.resp.bits.data
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // control
 | 
			
		||||
  when (io.mem.req.fire()) {
 | 
			
		||||
    busy(addr) := Bool(true)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  when (io.mem.resp.valid) {
 | 
			
		||||
    busy(memRespTag) := Bool(false)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val doResp = cmd.bits.inst.xd
 | 
			
		||||
  val stallReg = busy(addr)
 | 
			
		||||
  val stallLoad = doLoad && !io.mem.req.ready
 | 
			
		||||
  val stallResp = doResp && !io.resp.ready
 | 
			
		||||
 | 
			
		||||
  cmd.ready := !stallReg && !stallLoad && !stallResp
 | 
			
		||||
 | 
			
		||||
  io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
 | 
			
		||||
  io.resp.bits.rd := cmd.bits.inst.rd
 | 
			
		||||
  io.resp.bits.data := accum
 | 
			
		||||
 | 
			
		||||
  io.busy := Bool(false)
 | 
			
		||||
  io.interrupt := Bool(false)
 | 
			
		||||
 | 
			
		||||
  io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
 | 
			
		||||
  io.mem.req.bits.addr := addend
 | 
			
		||||
  io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
 | 
			
		||||
  io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
 | 
			
		||||
  io.mem.req.bits.data := Bits(0) // we're not performing any stores...
 | 
			
		||||
  io.mem.req.bits.phys := Bool(true) // don't perform address translation
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ import Util._
 | 
			
		||||
 | 
			
		||||
case class RocketConfiguration(tl: TileLinkConfiguration,
 | 
			
		||||
                               icache: ICacheConfig, dcache: DCacheConfig,
 | 
			
		||||
                               fpu: Boolean, rocc: Option[RoCC] = None,
 | 
			
		||||
                               fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None,
 | 
			
		||||
                               fastLoadWord: Boolean = true,
 | 
			
		||||
                               fastLoadByte: Boolean = false,
 | 
			
		||||
                               fastMulDiv: Boolean = true)
 | 
			
		||||
@@ -23,7 +23,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
 | 
			
		||||
  val memPorts = 2
 | 
			
		||||
  val dcachePortId = 0
 | 
			
		||||
  val icachePortId = 1
 | 
			
		||||
  val vicachePortId = 2
 | 
			
		||||
  implicit val tlConf = confIn.tl
 | 
			
		||||
  implicit val lnConf = confIn.tl.ln
 | 
			
		||||
  implicit val icConf = confIn.icache
 | 
			
		||||
@@ -48,6 +47,18 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
 | 
			
		||||
  ptw.io.requestor(0) <> icache.io.cpu.ptw
 | 
			
		||||
  ptw.io.requestor(1) <> dcache.io.cpu.ptw
 | 
			
		||||
 | 
			
		||||
  if (!conf.rocc.isEmpty) {
 | 
			
		||||
    val dcIF = Module(new SimpleHellaCacheIF)
 | 
			
		||||
    val rocc = Module((conf.rocc.get)(conf))
 | 
			
		||||
    dcIF.io.requestor <> rocc.io.mem
 | 
			
		||||
    core.io.rocc <> rocc.io
 | 
			
		||||
    dcacheArb.io.requestor(2) <> dcIF.io.cache
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  core.io.host <> io.host
 | 
			
		||||
  core.io.imem <> icache.io.cpu
 | 
			
		||||
  core.io.ptw <> ptw.io.dpath
 | 
			
		||||
 | 
			
		||||
  val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts))
 | 
			
		||||
  memArb.io.in(dcachePortId) <> dcache.io.mem
 | 
			
		||||
  memArb.io.in(icachePortId) <> icache.io.mem
 | 
			
		||||
@@ -61,8 +72,4 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
 | 
			
		||||
  dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready
 | 
			
		||||
  io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits
 | 
			
		||||
  io.tilelink.release.meta.bits.payload.client_xact_id :=  Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client)
 | 
			
		||||
 | 
			
		||||
  core.io.host <> io.host
 | 
			
		||||
  core.io.imem <> icache.io.cpu
 | 
			
		||||
  core.io.ptw <> ptw.io.dpath
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user