working RoCC AccumulatorExample
This commit is contained in:
parent
18968dfbc7
commit
f12bbc1e43
@ -39,6 +39,7 @@ class HellaCacheArbiter(n: Int)(implicit conf: RocketConfiguration) extends Modu
|
||||
val tag_hit = io.mem.resp.bits.tag(log2Up(n)-1,0) === UInt(i)
|
||||
resp.valid := io.mem.resp.valid && tag_hit
|
||||
io.requestor(i).xcpt := io.mem.xcpt
|
||||
io.requestor(i).ordered := io.mem.ordered
|
||||
resp.bits := io.mem.resp.bits
|
||||
resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n))
|
||||
resp.bits.nack := io.mem.resp.bits.nack && tag_hit
|
||||
|
@ -282,25 +282,25 @@ object RoCCDecode extends DecodeConstants
|
||||
CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM0_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM0_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM0-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM0_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM0_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM0_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM1_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM1_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM1-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM1_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM1_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM1_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM2_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM2_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM2-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM2_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM2_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM2_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM3_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM3_RS1_RS2-> List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,N,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM3-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM3_RD-> List(Y, N,Y,BR_N, N,N,N,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM3_RD_RS1-> List(Y, N,Y,BR_N, N,N,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N),
|
||||
CUSTOM3_RD_RS1_RS2->List(Y, N,Y,BR_N, N,Y,Y,A2_ZERO,A1_RS1, IMM_X, DW_XPR,FN_ADD, N,M_X, MT_X, N,N,Y,WB_ALU,PCR.N,N,N,N,N,N,N,N))
|
||||
}
|
||||
@ -639,7 +639,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
|
||||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.ex_waddr ||
|
||||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.ex_waddr ||
|
||||
io.fpu.dec.wen && id_waddr === io.dpath.ex_waddr)
|
||||
val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val) ||
|
||||
val id_ex_hazard = data_hazard_ex && (ex_reg_pcr != PCR.N || ex_reg_mem_val || ex_reg_div_mul_val || ex_reg_fp_val || ex_reg_rocc_val) ||
|
||||
fp_data_hazard_ex && (ex_reg_mem_val || ex_reg_fp_val)
|
||||
|
||||
// stall for RAW/WAW hazards on PCRs, LB/LH, and mul/div in memory stage.
|
||||
@ -655,7 +655,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
|
||||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.mem_waddr ||
|
||||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.mem_waddr ||
|
||||
io.fpu.dec.wen && id_waddr === io.dpath.mem_waddr)
|
||||
val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val) ||
|
||||
val id_mem_hazard = data_hazard_mem && (mem_reg_pcr != PCR.N || mem_reg_mem_val && mem_mem_cmd_bh || mem_reg_div_mul_val || mem_reg_fp_val || mem_reg_rocc_val) ||
|
||||
fp_data_hazard_mem && mem_reg_fp_val
|
||||
id_load_use := mem_reg_mem_val && (data_hazard_mem || fp_data_hazard_mem)
|
||||
|
||||
@ -669,7 +669,7 @@ class Control(implicit conf: RocketConfiguration) extends Module
|
||||
io.fpu.dec.ren2 && id_raddr2 === io.dpath.wb_waddr ||
|
||||
io.fpu.dec.ren3 && id_raddr3 === io.dpath.wb_waddr ||
|
||||
io.fpu.dec.wen && id_waddr === io.dpath.wb_waddr)
|
||||
val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val) ||
|
||||
val id_wb_hazard = data_hazard_wb && (wb_dcache_miss || wb_reg_div_mul_val || wb_reg_rocc_val) ||
|
||||
fp_data_hazard_wb && (wb_dcache_miss || wb_reg_fp_val)
|
||||
|
||||
io.dpath.mem_ll_bypass_rs1 := io.dpath.mem_ll_wb && io.dpath.mem_ll_waddr === id_raddr1
|
||||
|
@ -5,12 +5,13 @@ import Node._
|
||||
|
||||
object DecodeLogic
|
||||
{
|
||||
def term(b: Literal) = {
|
||||
if (b.isZ) {
|
||||
var (bits, mask, swidth) = Literal.parseLit(b.toString)
|
||||
new Term(BigInt(bits, 2), BigInt(2).pow(b.width)-(BigInt(mask, 2)+1))
|
||||
def term(b: Bits) = {
|
||||
val lit = b.litOf
|
||||
if (lit.isZ) {
|
||||
var (bits, mask, swidth) = Literal.parseLit(lit.toString)
|
||||
new Term(BigInt(bits, 2), BigInt(2).pow(lit.width)-(BigInt(mask, 2)+1))
|
||||
} else {
|
||||
new Term(b.value)
|
||||
new Term(lit.value)
|
||||
}
|
||||
}
|
||||
def logic(addr: Bits, addrWidth: Int, cache: scala.collection.mutable.Map[Term,Bits], terms: Seq[Term]) = {
|
||||
@ -24,18 +25,17 @@ object DecodeLogic
|
||||
var map = mapping
|
||||
var cache = scala.collection.mutable.Map[Term,Bits]()
|
||||
default map { d =>
|
||||
val dlit = d.litOf
|
||||
val dterm = term(dlit)
|
||||
val dterm = term(d)
|
||||
val (keys, values) = map.unzip
|
||||
val addrWidth = keys.map(_.getWidth).max
|
||||
val terms = keys.toList.map(k => term(k.litOf))
|
||||
val termvalues = terms zip values.toList.map(v => term(v.head.litOf))
|
||||
val terms = keys.toList.map(k => term(k))
|
||||
val termvalues = terms zip values.toList.map(v => term(v.head))
|
||||
|
||||
for (t <- terms.tails; if !t.isEmpty)
|
||||
for (t <- keys.zip(terms).tails; if !t.isEmpty)
|
||||
for (u <- t.tail)
|
||||
assert(!t.head.intersects(u), "DecodeLogic: keys " + t + " and " + u + " overlap")
|
||||
assert(!t.head._2.intersects(u._2), "DecodeLogic: keys " + t.head + " and " + u + " overlap")
|
||||
|
||||
val result = (0 until math.max(dlit.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) =>
|
||||
val result = (0 until math.max(d.litOf.width, values.map(_.head.litOf.width).max)).map({ case (i: Int) =>
|
||||
val mint = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 1 }.map(_._1)
|
||||
val maxt = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 0 && ((t.value >> i) & 1) == 0 }.map(_._1)
|
||||
val dc = termvalues.filter { case (k,t) => ((t.mask >> i) & 1) == 1 }.map(_._1)
|
||||
|
@ -244,7 +244,8 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
||||
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
|
||||
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
|
||||
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1)
|
||||
val dmem_resp_replay = io.dmem.resp.bits.replay && dmem_resp_xpu
|
||||
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
|
||||
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
|
||||
|
||||
val mem_ll_wdata = Bits()
|
||||
mem_ll_wdata := div.io.resp.bits.data
|
||||
@ -259,7 +260,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
||||
io.ctrl.mem_ll_wb := Bool(true)
|
||||
}
|
||||
}
|
||||
when (dmem_resp_replay) {
|
||||
when (dmem_resp_replay && dmem_resp_xpu) {
|
||||
div.io.resp.ready := Bool(false)
|
||||
if (!conf.rocc.isEmpty)
|
||||
io.rocc.resp.ready := Bool(false)
|
||||
@ -269,7 +270,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
||||
}
|
||||
when (io.ctrl.mem_ll_waddr === UInt(0)) { io.ctrl.mem_ll_wb := Bool(false) }
|
||||
|
||||
io.fpu.dmem_resp_val := io.dmem.resp.valid && dmem_resp_fpu
|
||||
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
|
||||
io.fpu.dmem_resp_data := io.dmem.resp.bits.data
|
||||
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
|
||||
io.fpu.dmem_resp_tag := dmem_resp_waddr
|
||||
@ -297,7 +298,7 @@ class Datapath(implicit conf: RocketConfiguration) extends Module
|
||||
io.ctrl.wb_waddr := wb_reg_waddr
|
||||
|
||||
// scoreboard clear (for div/mul and D$ load miss writebacks)
|
||||
io.ctrl.fp_sboard_clr := io.dmem.resp.bits.replay && dmem_resp_fpu
|
||||
io.ctrl.fp_sboard_clr := dmem_resp_replay && dmem_resp_fpu
|
||||
io.ctrl.fp_sboard_clra := dmem_resp_waddr
|
||||
|
||||
// processor control regfile write
|
||||
|
@ -698,6 +698,7 @@ class HellaCacheResp(implicit val conf: DCacheConfig) extends DCacheBundle {
|
||||
val nack = Bool() // comes 2 cycles after req.fire
|
||||
val replay = Bool()
|
||||
val typ = Bits(width = 3)
|
||||
val has_data = Bool()
|
||||
val data = Bits(width = conf.databits)
|
||||
val data_subword = Bits(width = conf.databits)
|
||||
val tag = Bits(width = conf.reqtagbits)
|
||||
@ -1027,11 +1028,11 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
|
||||
io.cpu.req.ready := Bool(false)
|
||||
}
|
||||
|
||||
val s2_do_resp = isRead(s2_req.cmd) || s2_sc
|
||||
io.cpu.resp.valid := s2_do_resp && (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
|
||||
io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
|
||||
io.cpu.resp.bits.nack := s2_valid && s2_nack
|
||||
io.cpu.resp.bits := s2_req
|
||||
io.cpu.resp.bits.replay := s2_replay && s2_do_resp
|
||||
io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc
|
||||
io.cpu.resp.bits.replay := s2_replay
|
||||
io.cpu.resp.bits.data := loadgen.word
|
||||
io.cpu.resp.bits.data_subword := Mux(s2_sc, s2_sc_fail, loadgen.byte)
|
||||
io.cpu.resp.bits.store_data := s2_req.data
|
||||
@ -1039,3 +1040,84 @@ class HellaCache(implicit conf: DCacheConfig, tl: TileLinkConfiguration) extends
|
||||
|
||||
io.mem.grant_ack <> mshrs.io.mem_finish
|
||||
}
|
||||
|
||||
// exposes a sane decoupled request interface
|
||||
class SimpleHellaCacheIF(implicit conf: DCacheConfig) extends Module
|
||||
{
|
||||
val io = new Bundle {
|
||||
val requestor = new HellaCacheIO().flip
|
||||
val cache = new HellaCacheIO
|
||||
}
|
||||
|
||||
val replaying_cmb = Bool()
|
||||
val replaying = Reg(next = replaying_cmb, init = Bool(false))
|
||||
replaying_cmb := replaying
|
||||
|
||||
val replayq1 = Module(new Queue(new HellaCacheReq, 1, flow = true))
|
||||
val replayq2 = Module(new Queue(new HellaCacheReq, 1))
|
||||
val req_arb = Module(new Arbiter(new HellaCacheReq, 2))
|
||||
|
||||
req_arb.io.in(0) <> replayq1.io.deq
|
||||
req_arb.io.in(1).valid := !replaying_cmb && io.requestor.req.valid
|
||||
req_arb.io.in(1).bits := io.requestor.req.bits
|
||||
io.requestor.req.ready := !replaying_cmb && req_arb.io.in(1).ready
|
||||
|
||||
val s2_nack = io.cache.resp.bits.nack
|
||||
val s3_nack = Reg(next=s2_nack)
|
||||
|
||||
val s0_req_fire = io.cache.req.fire()
|
||||
val s1_req_fire = Reg(next=s0_req_fire)
|
||||
val s2_req_fire = Reg(next=s1_req_fire)
|
||||
|
||||
io.cache.req.bits.kill := s2_nack
|
||||
io.cache.req.bits.phys := Bool(true)
|
||||
io.cache.req.bits.data := RegEnable(req_arb.io.out.bits.data, s0_req_fire)
|
||||
io.cache.req <> req_arb.io.out
|
||||
|
||||
// replay queues
|
||||
// replayq1 holds the older request
|
||||
// replayq2 holds the newer request (for the first nack)
|
||||
// we need to split the queues like this for the case where the older request
|
||||
// goes through but gets nacked, while the newer request stalls
|
||||
// if this happens, the newer request will go through before the older
|
||||
// request
|
||||
// we don't need to check replayq1.io.enq.ready and replayq2.io.enq.ready as
|
||||
// there will only be two requests going through at most
|
||||
|
||||
// stash d$ request in stage 2 if nacked (older request)
|
||||
replayq1.io.enq.valid := Bool(false)
|
||||
replayq1.io.enq.bits.cmd := io.cache.resp.bits.cmd
|
||||
replayq1.io.enq.bits.typ := io.cache.resp.bits.typ
|
||||
replayq1.io.enq.bits.addr := io.cache.resp.bits.addr
|
||||
replayq1.io.enq.bits.data := io.cache.resp.bits.store_data
|
||||
replayq1.io.enq.bits.tag := io.cache.resp.bits.tag
|
||||
|
||||
// stash d$ request in stage 1 if nacked (newer request)
|
||||
replayq2.io.enq.valid := s2_req_fire && s3_nack
|
||||
replayq2.io.enq.bits.data := io.cache.resp.bits.store_data
|
||||
replayq2.io.enq.bits <> io.cache.resp.bits
|
||||
replayq2.io.deq.ready := Bool(false)
|
||||
|
||||
when (s2_nack) {
|
||||
replayq1.io.enq.valid := Bool(true)
|
||||
replaying_cmb := Bool(true)
|
||||
}
|
||||
|
||||
// when replaying request got sunk into the d$
|
||||
when (s2_req_fire && Reg(next=Reg(next=replaying_cmb)) && !s2_nack) {
|
||||
// see if there's a stashed request in replayq2
|
||||
when (replayq2.io.deq.valid) {
|
||||
replayq1.io.enq.valid := Bool(true)
|
||||
replayq1.io.enq.bits.cmd := replayq2.io.deq.bits.cmd
|
||||
replayq1.io.enq.bits.typ := replayq2.io.deq.bits.typ
|
||||
replayq1.io.enq.bits.addr := replayq2.io.deq.bits.addr
|
||||
replayq1.io.enq.bits.data := replayq2.io.deq.bits.data
|
||||
replayq1.io.enq.bits.tag := replayq2.io.deq.bits.tag
|
||||
replayq2.io.deq.ready := Bool(true)
|
||||
} .otherwise {
|
||||
replaying_cmb := Bool(false)
|
||||
}
|
||||
}
|
||||
|
||||
io.requestor.resp := io.cache.resp
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package rocket
|
||||
|
||||
import Chisel._
|
||||
import Node._
|
||||
import uncore._
|
||||
|
||||
class RoCCInstruction extends Bundle
|
||||
{
|
||||
@ -36,35 +37,73 @@ class RoCCInterface(implicit conf: RocketConfiguration) extends Bundle
|
||||
{
|
||||
val cmd = Decoupled(new RoCCCommand).flip
|
||||
val resp = Decoupled(new RoCCResponse)
|
||||
val mem = new HellaCacheIO()(conf.dcache)
|
||||
val busy = Bool(OUTPUT)
|
||||
val interrupt = Bool(OUTPUT)
|
||||
|
||||
override def clone = new RoCCInterface().asInstanceOf[this.type]
|
||||
}
|
||||
|
||||
abstract class RoCC(implicit conf: RocketConfiguration) extends Module
|
||||
abstract class RoCC(conf: RocketConfiguration) extends Module
|
||||
{
|
||||
val io = new RoCCInterface
|
||||
val io = new RoCCInterface()(conf)
|
||||
}
|
||||
|
||||
class AccumulatorExample(implicit conf: RocketConfiguration) extends RoCC
|
||||
class AccumulatorExample(conf: RocketConfiguration) extends RoCC(conf)
|
||||
{
|
||||
val regfile = Mem(UInt(width = conf.xprlen), 4)
|
||||
val n = 4
|
||||
val regfile = Mem(UInt(width = conf.xprlen), n)
|
||||
val busy = Vec.fill(n){Reg(init=Bool(false))}
|
||||
|
||||
val funct = io.cmd.bits.inst.funct
|
||||
val addr = io.cmd.bits.inst.rs2
|
||||
val addend = io.cmd.bits.rs1
|
||||
val cmd = Queue(io.cmd)
|
||||
val funct = cmd.bits.inst.funct
|
||||
val addr = cmd.bits.inst.rs2(log2Up(n)-1,0)
|
||||
val doWrite = funct === UInt(0)
|
||||
val doRead = funct === UInt(1)
|
||||
val doLoad = funct === UInt(2)
|
||||
val doAccum = funct === UInt(3)
|
||||
val memRespTag = io.mem.resp.bits.tag(log2Up(n)-1,0)
|
||||
|
||||
// datapath
|
||||
val addend = cmd.bits.rs1
|
||||
val accum = regfile(addr)
|
||||
val wdata = Mux(funct === UInt(0), addend, accum + addend)
|
||||
val wdata = Mux(doWrite, addend, accum + addend)
|
||||
|
||||
when (io.cmd.fire() && (funct === UInt(1) || funct === UInt(3))) {
|
||||
when (cmd.fire() && (doWrite || doAccum)) {
|
||||
regfile(addr) := wdata
|
||||
}
|
||||
|
||||
io.cmd.ready := io.resp.ready
|
||||
io.resp.valid := io.cmd.valid && io.cmd.bits.inst.xd
|
||||
io.resp.bits.rd := io.cmd.bits.inst.rd
|
||||
when (io.mem.resp.valid) {
|
||||
regfile(memRespTag) := io.mem.resp.bits.data
|
||||
}
|
||||
|
||||
// control
|
||||
when (io.mem.req.fire()) {
|
||||
busy(addr) := Bool(true)
|
||||
}
|
||||
|
||||
when (io.mem.resp.valid) {
|
||||
busy(memRespTag) := Bool(false)
|
||||
}
|
||||
|
||||
val doResp = cmd.bits.inst.xd
|
||||
val stallReg = busy(addr)
|
||||
val stallLoad = doLoad && !io.mem.req.ready
|
||||
val stallResp = doResp && !io.resp.ready
|
||||
|
||||
cmd.ready := !stallReg && !stallLoad && !stallResp
|
||||
|
||||
io.resp.valid := cmd.valid && doResp && !stallReg && !stallLoad
|
||||
io.resp.bits.rd := cmd.bits.inst.rd
|
||||
io.resp.bits.data := accum
|
||||
|
||||
io.busy := Bool(false)
|
||||
io.interrupt := Bool(false)
|
||||
|
||||
io.mem.req.valid := cmd.valid && doLoad && !stallReg && !stallResp
|
||||
io.mem.req.bits.addr := addend
|
||||
io.mem.req.bits.cmd := M_XRD // perform a load (M_XWR for stores)
|
||||
io.mem.req.bits.typ := MT_D // D = 8 bytes, W = 4, H = 2, B = 1
|
||||
io.mem.req.bits.data := Bits(0) // we're not performing any stores...
|
||||
io.mem.req.bits.phys := Bool(true) // don't perform address translation
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import Util._
|
||||
|
||||
case class RocketConfiguration(tl: TileLinkConfiguration,
|
||||
icache: ICacheConfig, dcache: DCacheConfig,
|
||||
fpu: Boolean, rocc: Option[RoCC] = None,
|
||||
fpu: Boolean, rocc: Option[RocketConfiguration => RoCC] = None,
|
||||
fastLoadWord: Boolean = true,
|
||||
fastLoadByte: Boolean = false,
|
||||
fastMulDiv: Boolean = true)
|
||||
@ -23,7 +23,6 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
|
||||
val memPorts = 2
|
||||
val dcachePortId = 0
|
||||
val icachePortId = 1
|
||||
val vicachePortId = 2
|
||||
implicit val tlConf = confIn.tl
|
||||
implicit val lnConf = confIn.tl.ln
|
||||
implicit val icConf = confIn.icache
|
||||
@ -48,6 +47,18 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
|
||||
ptw.io.requestor(0) <> icache.io.cpu.ptw
|
||||
ptw.io.requestor(1) <> dcache.io.cpu.ptw
|
||||
|
||||
if (!conf.rocc.isEmpty) {
|
||||
val dcIF = Module(new SimpleHellaCacheIF)
|
||||
val rocc = Module((conf.rocc.get)(conf))
|
||||
dcIF.io.requestor <> rocc.io.mem
|
||||
core.io.rocc <> rocc.io
|
||||
dcacheArb.io.requestor(2) <> dcIF.io.cache
|
||||
}
|
||||
|
||||
core.io.host <> io.host
|
||||
core.io.imem <> icache.io.cpu
|
||||
core.io.ptw <> ptw.io.dpath
|
||||
|
||||
val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(memPorts))
|
||||
memArb.io.in(dcachePortId) <> dcache.io.mem
|
||||
memArb.io.in(icachePortId) <> icache.io.mem
|
||||
@ -61,8 +72,4 @@ class Tile(resetSignal: Bool = null)(confIn: RocketConfiguration) extends Module
|
||||
dcache.io.mem.release.meta.ready := io.tilelink.release.meta.ready
|
||||
io.tilelink.release.meta.bits := dcache.io.mem.release.meta.bits
|
||||
io.tilelink.release.meta.bits.payload.client_xact_id := Cat(dcache.io.mem.release.meta.bits.payload.client_xact_id, UInt(dcachePortId, log2Up(memPorts))) // Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client)
|
||||
|
||||
core.io.host <> io.host
|
||||
core.io.imem <> icache.io.cpu
|
||||
core.io.ptw <> ptw.io.dpath
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user