working RoCC AccumulatorExample
This commit is contained in:
parent
18968dfbc7
commit
f12bbc1e43
@ -39,6 +39,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu
|
|||||||
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
|
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
|
||||||
resp.valid := io.mem.resp.valid && tag_hit
|
resp.valid := io.mem.resp.valid && tag_hit
|
||||||
io.requestor(i).xcpt := io.mem.xcpt
|
io.requestor(i).xcpt := io.mem.xcpt
|
||||||
|
io.requestor(i).ordered := io.mem.ordered
|
||||||
resp.bits := io.mem.resp.bits
|
resp.bits := io.mem.resp.bits
|
||||||
resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n))
|
resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n))
|
||||||
resp.bits.nack := io.mem.resp.bits.nack && tag_hit
|
resp.bits.nack := io.mem.resp.bits.nack && tag_hit
|
||||||
|
@ -282,25 +282,25 @@ object RoCCDecode extends DecodeConstants
|
|||||||
CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM0_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM1_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM2_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM3_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||||
CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N))
|
CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N))
|
||||||
}
|
}
|
||||||
@ -639,7 +639,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
|
|||||||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr ||
|
io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr ||
|
||||||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr ||
|
io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr ||
|
||||||
io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr)
|
io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr)
|
||||||
val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) ||
|
val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) ||
|
||||||
fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val)
|
fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val)
|
||||||
|
|
||||||
// stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage.
|
// stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage.
|
||||||
@ -655,7 +655,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
|
|||||||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr ||
|
io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr ||
|
||||||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr ||
|
io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr ||
|
||||||
io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr)
|
io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr)
|
||||||
val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) ||
|
val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) ||
|
||||||
fp_data_hazard_mem && mem_reg_fp_val
|
fp_data_hazard_mem && mem_reg_fp_val
|
||||||
id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem)
|
id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem)
|
||||||
|
|
||||||
@ -669,7 +669,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
|
|||||||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr ||
|
io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr ||
|
||||||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr ||
|
io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr ||
|
||||||
io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr)
|
io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr)
|
||||||
val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) ||
|
val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val || wb_reg_rocc_val) ||
|
||||||
fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val)
|
fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val)
|
||||||
|
|
||||||
io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1
|
io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1
|
||||||
|
@ -5,12 +5,13 @@ import Node._
|
|||||||
|
|
||||||
object DecodeLogic
|
object DecodeLogic
|
||||||
{
|
{
|
||||||
def term(b: Literal) = {
|
def term(b: Bits) = {
|
||||||
if (b.isZ) {
|
val lit = b.litOf
|
||||||
var (bits, mask, swidth) = Literal.parseLit(b.toString)
|
if (lit.isZ) {
|
||||||
new Term(BigInt(bits, 2), BigInt(2).pow(b.width)-(BigInt(mask, 2)+1))
|
var (bits, mask, swidth) = Literal.parseLit(lit.toString)
|
||||||
|
new Term(BigInt(bits, 2), BigInt(2).pow(lit.width)-(BigInt(mask, 2)+1))
|
||||||
} else {
|
} else {
|
||||||
new Term(b.value)
|
new Term(lit.value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = {
|
def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = {
|
||||||
@ -24,18 +25,17 @@ object DecodeLogic
|
|||||||
var map = mapping
|
var map = mapping
|
||||||
var cache = scala.collection.mutable.Map[Term,Bits]()
|
var cache = scala.collection.mutable.Map[Term,Bits]()
|
||||||
default map { d =>
|
default map { d =>
|
||||||
val dlit = d.litOf
|
val dterm = term(d)
|
||||||
val dterm = term(dlit)
|
|
||||||
val (keys, values) = map.unzip
|
val (keys, values) = map.unzip
|
||||||
val addrWidth = keys.map(_.getWidth).max
|
val addrWidth = keys.map(_.getWidth).max
|
||||||
val terms = keys.toList.map(k => term(k.litOf))
|
val terms = keys.toList.map(k => term(k))
|
||||||
val termvalues = terms zip values.toList.map(v => term(v.head.litOf))
|
val termvalues = terms zip values.toList.map(v => term(v.head))
|
||||||
|
|
||||||
for (t <- terms.tails; if !t.isEmpty)
|
for (t <- keys.zip(terms).tails; if !t.isEmpty)
|
||||||
for (u <- t.tail)
|
for (u <- t.tail)
|
||||||
assert(!t.head.intersects(u), "DecodeLogic: keys " + t + " and " + u + " overlap")
|
assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
|
||||||
|
|
||||||
val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) =>
|
val result = (0 until math.max(d.litOf.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) =>
|
||||||
val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
|
val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
|
||||||
val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
|
val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
|
||||||
val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
|
val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
|
||||||
|
@ -244,7 +244,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
|||||||
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
|
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
|
||||||
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
|
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
|
||||||
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
|
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
|
||||||
val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu
|
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
|
||||||
|
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
|
||||||
|
|
||||||
val mem_ll_wdata = Bits()
|
val mem_ll_wdata = Bits()
|
||||||
mem_ll_wdata := div.io.resp.bits.data
|
mem_ll_wdata := div.io.resp.bits.data
|
||||||
@ -259,7 +260,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
|||||||
io.ctrl.mem_ll_wb := Bool(true)
|
io.ctrl.mem_ll_wb := Bool(true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when (dmem_resp_replay) {
|
when (dmem_resp_replay && dmem_resp_xpu) {
|
||||||
div.io.resp.ready := Bool(false)
|
div.io.resp.ready := Bool(false)
|
||||||
if (!conf.rocc.isEmpty)
|
if (!conf.rocc.isEmpty)
|
||||||
io.rocc.resp.ready := Bool(false)
|
io.rocc.resp.ready := Bool(false)
|
||||||
@ -269,7 +270,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
|||||||
}
|
}
|
||||||
when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) }
|
when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) }
|
||||||
|
|
||||||
io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu
|
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
|
||||||
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
|
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
|
||||||
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
|
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
|
||||||
io.fpu.dmem_resp_tag := dmem_resp_waddr
|
io.fpu.dmem_resp_tag := dmem_resp_waddr
|
||||||
@ -297,7 +298,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
|||||||
io.ctrl.wb_waddr := wb_reg_waddr
|
io.ctrl.wb_waddr := wb_reg_waddr
|
||||||
|
|
||||||
// scoreboard clear (for div/mul and D$ load miss writebacks)
|
// scoreboard clear (for div/mul and D$ load miss writebacks)
|
||||||
io.ctrl.fp_sboard_clr := io.dmem.resp.bits.replay && dmem_resp_fpu
|
io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu
|
||||||
io.ctrl.fp_sboard_clra := dmem_resp_waddr
|
io.ctrl.fp_sboard_clra := dmem_resp_waddr
|
||||||
|
|
||||||
// processor control regfile write
|
// processor control regfile write
|
||||||
|
@ -698,6 +698,7 @@ class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle {
|
|||||||
val nack = Bool() // comes 2 cycles after req.fire
|
val nack = Bool() // comes 2 cycles after req.fire
|
||||||
val replay = Bool()
|
val replay = Bool()
|
||||||
val typ = Bits(width = 3)
|
val typ = Bits(width = 3)
|
||||||
|
val has_data = Bool()
|
||||||
val data = Bits(width = conf.databits)
|
val data = Bits(width = conf.databits)
|
||||||
val data_subword = Bits(width = conf.databits)
|
val data_subword = Bits(width = conf.databits)
|
||||||
val tag = Bits(width = conf.reqtagbits)
|
val tag = Bits(width = conf.reqtagbits)
|
||||||
@ -1027,11 +1028,11 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
|
|||||||
io.cpu.req.ready := Bool(false)
|
io.cpu.req.ready := Bool(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
val s2_do_resp = isRead(s2_req.cmd) || s2_sc
|
io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
|
||||||
io.cpu.resp.valid := s2_do_resp && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
|
|
||||||
io.cpu.resp.bits.nack := s2_valid && s2_nack
|
io.cpu.resp.bits.nack := s2_valid && s2_nack
|
||||||
io.cpu.resp.bits := s2_req
|
io.cpu.resp.bits := s2_req
|
||||||
io.cpu.resp.bits.replay := s2_replay && s2_do_resp
|
io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc
|
||||||
|
io.cpu.resp.bits.replay := s2_replay
|
||||||
io.cpu.resp.bits.data := loadgen.word
|
io.cpu.resp.bits.data := loadgen.word
|
||||||
io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte)
|
io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte)
|
||||||
io.cpu.resp.bits.store_data := s2_req.data
|
io.cpu.resp.bits.store_data := s2_req.data
|
||||||
@ -1039,3 +1040,84 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
|
|||||||
|
|
||||||
io.mem.grant_ack <> mshrs.io.mem_finish
|
io.mem.grant_ack <> mshrs.io.mem_finish
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// exposes a sane decoupled request interface
|
||||||
|
class SimpleHellaCacheIF(implicit conf: DCacheConfig) extends Module
|
||||||
|
{
|
||||||
|
val io = new Bundle {
|
||||||
|
val requestor = new HellaCacheIO().flip
|
||||||
|
val cache = new HellaCacheIO
|
||||||
|
}
|
||||||
|
|
||||||
|
val replaying_cmb = Bool()
|
||||||
|
val replaying = Reg(next = replaying_cmb, init = Bool(false))
|
||||||
|
replaying_cmb := replaying
|
||||||
|
|
||||||
|
val replayq1 = Module(new Queue(new HellaCacheReq, 1, flow = true))
|
||||||
|
val replayq2 = Module(new Queue(new HellaCacheReq, 1))
|
||||||
|
val req_arb = Module(new Arbiter(new HellaCacheReq, 2))
|
||||||
|
|
||||||
|
req_arb.io.in(0) <> replayq1.io.deq
|
||||||
|
req_arb.io.in(1).valid := !replaying_cmb && io.requestor.req.valid
|
||||||
|
req_arb.io.in(1).bits := io.requestor.req.bits
|
||||||
|
io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready
|
||||||
|
|
||||||
|
val s2_nack = io.cache.resp.bits.nack
|
||||||
|
val s3_nack = Reg(next=s2_nack)
|
||||||
|
|
||||||
|
val s0_req_fire = io.cache.req.fire()
|
||||||
|
val s1_req_fire = Reg(next=s0_req_fire)
|
||||||
|
val s2_req_fire = Reg(next=s1_req_fire)
|
||||||
|
|
||||||
|
io.cache.req.bits.kill := s2_nack
|
||||||
|
io.cache.req.bits.phys := Bool(true)
|
||||||
|
io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
||||||
|
io.cache.req <> req_arb.io.out
|
||||||
|
|
||||||
|
// replay queues
|
||||||
|
// replayq1 holds the older request
|
||||||
|
// replayq2 holds the newer request (for the first nack)
|
||||||
|
// we need to split the queues like this for the case where the older request
|
||||||
|
// goes through but gets nacked, while the newer request stalls
|
||||||
|
// if this happens, the newer request will go through before the older
|
||||||
|
// request
|
||||||
|
// we don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as
|
||||||
|
// there will only be two requests going through at most
|
||||||
|
|
||||||
|
// stash d$ request in stage 2 if nacked (older request)
|
||||||
|
replayq1.io.enq.valid := Bool(false)
|
||||||
|
replayq1.io.enq.bits.cmd := io.cache.resp.bits.cmd
|
||||||
|
replayq1.io.enq.bits.typ := io.cache.resp.bits.typ
|
||||||
|
replayq1.io.enq.bits.addr := io.cache.resp.bits.addr
|
||||||
|
replayq1.io.enq.bits.data := io.cache.resp.bits.store_data
|
||||||
|
replayq1.io.enq.bits.tag := io.cache.resp.bits.tag
|
||||||
|
|
||||||
|
// stash d$ request in stage 1 if nacked (newer request)
|
||||||
|
replayq2.io.enq.valid := s2_req_fire && s3_nack
|
||||||
|
replayq2.io.enq.bits.data := io.cache.resp.bits.store_data
|
||||||
|
replayq2.io.enq.bits <> io.cache.resp.bits
|
||||||
|
replayq2.io.deq.ready := Bool(false)
|
||||||
|
|
||||||
|
when (s2_nack) {
|
||||||
|
replayq1.io.enq.valid := Bool(true)
|
||||||
|
replaying_cmb := Bool(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// when replaying request got sunk into the d$
|
||||||
|
when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) {
|
||||||
|
// see if there's a stashed request in replayq2
|
||||||
|
when (replayq2.io.deq.valid) {
|
||||||
|
replayq1.io.enq.valid := Bool(true)
|
||||||
|
replayq1.io.enq.bits.cmd := replayq2.io.deq.bits.cmd
|
||||||
|
replayq1.io.enq.bits.typ := replayq2.io.deq.bits.typ
|
||||||
|
replayq1.io.enq.bits.addr := replayq2.io.deq.bits.addr
|
||||||
|
replayq1.io.enq.bits.data := replayq2.io.deq.bits.data
|
||||||
|
replayq1.io.enq.bits.tag := replayq2.io.deq.bits.tag
|
||||||
|
replayq2.io.deq.ready := Bool(true)
|
||||||
|
} .otherwise {
|
||||||
|
replaying_cmb := Bool(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
io.requestor.resp := io.cache.resp
|
||||||
|
}
|
||||||
|
@ -2,6 +2,7 @@ package rocket
|
|||||||
|
|
||||||
import Chisel._
|
import Chisel._
|
||||||
import Node._
|
import Node._
|
||||||
|
import uncore._
|
||||||
|
|
||||||
class RoCCInstruction extends Bundle
|
class RoCCInstruction extends Bundle
|
||||||
{
|
{
|
||||||
@ -36,35 +37,73 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle
|
|||||||
{
|
{
|
||||||
val cmd = Decoupled(new RoCCCommand).flip
|
val cmd = Decoupled(new RoCCCommand).flip
|
||||||
val resp = Decoupled(new RoCCResponse)
|
val resp = Decoupled(new RoCCResponse)
|
||||||
|
val mem = new HellaCacheIO()(conf.dcache)
|
||||||
val busy = Bool(OUTPUT)
|
val busy = Bool(OUTPUT)
|
||||||
val interrupt = Bool(OUTPUT)
|
val interrupt = Bool(OUTPUT)
|
||||||
|
|
||||||
override def clone = new RoCCInterface().asInstanceOf[this.type]
|
override def clone = new RoCCInterface().asInstanceOf[this.type]
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract class RoCC(implicit conf: RocketConfiguration) extends Module
|
abstract class RoCC(conf: RocketConfiguration) extends Module
|
||||||
{
|
{
|
||||||
val io = new RoCCInterface
|
val io = new RoCCInterface()(conf)
|
||||||
}
|
}
|
||||||
|
|
||||||
class AccumulatorExample(implicit conf: RocketConfiguration) extends RoCC
|
class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf)
|
||||||
{
|
{
|
||||||
val regfile = Mem(UInt(width = conf.xprlen), 4)
|
val n = 4
|
||||||
|
val regfile = Mem(UInt(width = conf.xprlen), n)
|
||||||
|
val busy = Vec.fill(n){Reg(init=Bool(false))}
|
||||||
|
|
||||||
val funct = io.cmd.bits.inst.funct
|
val cmd = Queue(io.cmd)
|
||||||
val addr = io.cmd.bits.inst.rs2
|
val funct = cmd.bits.inst.funct
|
||||||
val addend = io.cmd.bits.rs1
|
val addr = cmd.bits.inst.rs2(log2Up(n)-1,0)
|
||||||
|
val doWrite = funct === UInt(0)
|
||||||
|
val doRead = funct === UInt(1)
|
||||||
|
val doLoad = funct === UInt(2)
|
||||||
|
val doAccum = funct === UInt(3)
|
||||||
|
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
|
||||||
|
|
||||||
|
// datapath
|
||||||
|
val addend = cmd.bits.rs1
|
||||||
val accum = regfile(addr)
|
val accum = regfile(addr)
|
||||||
val wdata = Mux(funct === UInt(0), addend, accum + addend)
|
val wdata = Mux(doWrite, addend, accum + addend)
|
||||||
|
|
||||||
when (io.cmd.fire() && (funct === UInt(1) || funct === UInt(3))) {
|
when (cmd.fire() && (doWrite || doAccum)) {
|
||||||
regfile(addr) := wdata
|
regfile(addr) := wdata
|
||||||
}
|
}
|
||||||
|
|
||||||
io.cmd.ready := io.resp.ready
|
when (io.mem.resp.valid) {
|
||||||
io.resp.valid := io.cmd.valid && io.cmd.bits.inst.xd
|
regfile(memRespTag) := io.mem.resp.bits.data
|
||||||
io.resp.bits.rd := io.cmd.bits.inst.rd
|
}
|
||||||
|
|
||||||
|
// control
|
||||||
|
when (io.mem.req.fire()) {
|
||||||
|
busy(addr) := Bool(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
when (io.mem.resp.valid) {
|
||||||
|
busy(memRespTag) := Bool(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
val doResp = cmd.bits.inst.xd
|
||||||
|
val stallReg = busy(addr)
|
||||||
|
val stallLoad = doLoad && !io.mem.req.ready
|
||||||
|
val stallResp = doResp && !io.resp.ready
|
||||||
|
|
||||||
|
cmd.ready := !stallReg && !stallLoad && !stallResp
|
||||||
|
|
||||||
|
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
|
||||||
|
io.resp.bits.rd := cmd.bits.inst.rd
|
||||||
io.resp.bits.data := accum
|
io.resp.bits.data := accum
|
||||||
|
|
||||||
io.busy := Bool(false)
|
io.busy := Bool(false)
|
||||||
io.interrupt := Bool(false)
|
io.interrupt := Bool(false)
|
||||||
|
|
||||||
|
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
|
||||||
|
io.mem.req.bits.addr := addend
|
||||||
|
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
|
||||||
|
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
|
||||||
|
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
|
||||||
|
io.mem.req.bits.phys := Bool(true) // don't perform address translation
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ import Util._
|
|||||||
|
|
||||||
case class RocketConfiguration(tl: TileLinkConfiguration,
|
case class RocketConfiguration(tl: TileLinkConfiguration,
|
||||||
icache: ICacheConfig, dcache: DCacheConfig,
|
icache: ICacheConfig, dcache: DCacheConfig,
|
||||||
fpu: Boolean, rocc: Option[RoCC] = None,
|
fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None,
|
||||||
fastLoadWord: Boolean = true,
|
fastLoadWord: Boolean = true,
|
||||||
fastLoadByte: Boolean = false,
|
fastLoadByte: Boolean = false,
|
||||||
fastMulDiv: Boolean = true)
|
fastMulDiv: Boolean = true)
|
||||||
@ -23,7 +23,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
|
|||||||
val memPorts = 2
|
val memPorts = 2
|
||||||
val dcachePortId = 0
|
val dcachePortId = 0
|
||||||
val icachePortId = 1
|
val icachePortId = 1
|
||||||
val vicachePortId = 2
|
|
||||||
implicit val tlConf = confIn.tl
|
implicit val tlConf = confIn.tl
|
||||||
implicit val lnConf = confIn.tl.ln
|
implicit val lnConf = confIn.tl.ln
|
||||||
implicit val icConf = confIn.icache
|
implicit val icConf = confIn.icache
|
||||||
@ -48,6 +47,18 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
|
|||||||
ptw.io.requestor(0) <> icache.io.cpu.ptw
|
ptw.io.requestor(0) <> icache.io.cpu.ptw
|
||||||
ptw.io.requestor(1) <> dcache.io.cpu.ptw
|
ptw.io.requestor(1) <> dcache.io.cpu.ptw
|
||||||
|
|
||||||
|
if (!conf.rocc.isEmpty) {
|
||||||
|
val dcIF = Module(new SimpleHellaCacheIF)
|
||||||
|
val rocc = Module((conf.rocc.get)(conf))
|
||||||
|
dcIF.io.requestor <> rocc.io.mem
|
||||||
|
core.io.rocc <> rocc.io
|
||||||
|
dcacheArb.io.requestor(2) <> dcIF.io.cache
|
||||||
|
}
|
||||||
|
|
||||||
|
core.io.host <> io.host
|
||||||
|
core.io.imem <> icache.io.cpu
|
||||||
|
core.io.ptw <> ptw.io.dpath
|
||||||
|
|
||||||
val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts))
|
val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts))
|
||||||
memArb.io.in(dcachePortId) <> dcache.io.mem
|
memArb.io.in(dcachePortId) <> dcache.io.mem
|
||||||
memArb.io.in(icachePortId) <> icache.io.mem
|
memArb.io.in(icachePortId) <> icache.io.mem
|
||||||
@ -61,8 +72,4 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
|
|||||||
dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready
|
dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready
|
||||||
io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits
|
io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits
|
||||||
io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client)
|
io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client)
|
||||||
|
|
||||||
core.io.host <> io.host
|
|
||||||
core.io.imem <> icache.io.cpu
|
|
||||||
core.io.ptw <> ptw.io.dpath
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user