Merge branch 'master' into ss-frontend

This commit is contained in:
Andrew Waterman 2015-01-04 19:59:18 -08:00
commit b70f7683d3
7 changed files with 497 additions and 680 deletions

File diff suppressed because it is too large Load Diff

View File

@ -22,11 +22,6 @@ class Datapath extends Module
// execute definitions
val ex_reg_pc = Reg(UInt())
val ex_reg_inst = Reg(Bits())
val ex_reg_ctrl_fn_dw = Reg(UInt())
val ex_reg_ctrl_fn_alu = Reg(UInt())
val ex_reg_sel_alu2 = Reg(UInt())
val ex_reg_sel_alu1 = Reg(UInt())
val ex_reg_sel_imm = Reg(UInt())
val ex_reg_kill = Reg(Bool())
val ex_reg_rs_bypass = Vec.fill(2)(Reg(Bool()))
val ex_reg_rs_lsb = Vec.fill(2)(Reg(Bits()))
@ -102,11 +97,6 @@ class Datapath extends Module
when (!io.ctrl.killd) {
ex_reg_pc := id_pc
ex_reg_inst := id_inst
ex_reg_ctrl_fn_dw := io.ctrl.fn_dw.toUInt
ex_reg_ctrl_fn_alu := io.ctrl.fn_alu
ex_reg_sel_alu2 := io.ctrl.sel_alu2
ex_reg_sel_alu1 := io.ctrl.sel_alu1
ex_reg_sel_imm := io.ctrl.sel_imm
ex_reg_rs_bypass := io.ctrl.bypass
for (i <- 0 until id_rs.size) {
when (io.ctrl.ren(i)) {
@ -129,31 +119,31 @@ class Datapath extends Module
val ex_rs = for (i <- 0 until id_rs.size)
yield Mux(ex_reg_rs_bypass(i), bypass(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = imm(ex_reg_sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(ex_reg_sel_alu1, SInt(0), Seq(
val ex_imm = imm(io.ctrl.ex_ctrl.sel_imm, ex_reg_inst)
val ex_op1 = MuxLookup(io.ctrl.ex_ctrl.sel_alu1, SInt(0), Seq(
A1_RS1 -> ex_rs(0).toSInt,
A1_PC -> ex_reg_pc.toSInt))
val ex_op2 = MuxLookup(ex_reg_sel_alu2, SInt(0), Seq(
val ex_op2 = MuxLookup(io.ctrl.ex_ctrl.sel_alu2, SInt(0), Seq(
A2_RS2 -> ex_rs(1).toSInt,
A2_IMM -> ex_imm,
A2_FOUR -> SInt(4)))
val alu = Module(new ALU)
alu.io.dw := ex_reg_ctrl_fn_dw
alu.io.fn := ex_reg_ctrl_fn_alu
alu.io.dw := io.ctrl.ex_ctrl.alu_dw
alu.io.fn := io.ctrl.ex_ctrl.alu_fn
alu.io.in2 := ex_op2.toUInt
alu.io.in1 := ex_op1
// multiplier and divider
val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1,
earlyOut = params(FastMulDiv)))
div.io.req.valid := io.ctrl.div_mul_val
div.io.req.bits.dw := ex_reg_ctrl_fn_dw
div.io.req.bits.fn := ex_reg_ctrl_fn_alu
div.io.req.valid := io.ctrl.ex_valid && io.ctrl.ex_ctrl.div
div.io.req.bits.dw := io.ctrl.ex_ctrl.alu_dw
div.io.req.bits.fn := io.ctrl.ex_ctrl.alu_fn
div.io.req.bits.in1 := ex_rs(0)
div.io.req.bits.in2 := ex_rs(1)
div.io.req.bits.tag := io.ctrl.ex_waddr
div.io.kill := io.ctrl.div_mul_kill
div.io.kill := io.ctrl.killm && Reg(next = div.io.req.fire())
io.ctrl.div_mul_rdy := div.io.req.ready
io.fpu.fromint_data := ex_rs(0)
@ -171,7 +161,7 @@ class Datapath extends Module
// D$ request interface (registered inside D$ module)
// other signals (req_val, req_rdy) connect to control module
io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(params(VAddrBits)-1,0)).toUInt
io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_fp_val)
io.dmem.req.bits.tag := Cat(io.ctrl.ex_waddr, io.ctrl.ex_ctrl.fp)
require(io.dmem.req.bits.tag.getWidth >= 6)
require(params(CoreDCacheReqTagBits) >= 6)
@ -196,12 +186,12 @@ class Datapath extends Module
mem_reg_pc := ex_reg_pc
mem_reg_inst := ex_reg_inst
mem_reg_wdata := alu.io.out
when (io.ctrl.ex_rs2_val) {
mem_reg_rs2 := ex_rs(1)
when (io.ctrl.ex_ctrl.rxs2 && (io.ctrl.ex_ctrl.mem || io.ctrl.ex_ctrl.rocc)) {
mem_reg_rs2 := ex_rs(1)
io.dmem.req.bits.data := Mux(io.ctrl.mem_fp_val, io.fpu.store_data, mem_reg_rs2)
io.dmem.req.bits.data := Mux(io.ctrl.mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
// writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
@ -239,21 +229,21 @@ class Datapath extends Module
io.ctrl.mem_br_taken := mem_reg_wdata(0)
val mem_br_target = mem_reg_pc +
Mux(io.ctrl.mem_branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst),
Mux(!io.ctrl.mem_jalr && !io.ctrl.mem_branch, imm(IMM_UJ, mem_reg_inst), SInt(4)))
val mem_npc = Mux(io.ctrl.mem_jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(params(VAddrBits)-1,0)), mem_br_target)
Mux(io.ctrl.mem_ctrl.branch && io.ctrl.mem_br_taken, imm(IMM_SB, mem_reg_inst),
Mux(io.ctrl.mem_ctrl.jal, imm(IMM_UJ, mem_reg_inst), SInt(4)))
val mem_npc = Mux(io.ctrl.mem_ctrl.jalr, Cat(vaSign(mem_reg_wdata, mem_reg_wdata), mem_reg_wdata(params(VAddrBits)-1,0)), mem_br_target)
io.ctrl.mem_misprediction := mem_npc != ex_reg_pc || !io.ctrl.ex_valid
io.ctrl.mem_rs1_ra := mem_reg_inst(19,15) === 1
val mem_int_wdata = Mux(io.ctrl.mem_jalr, mem_br_target, mem_reg_wdata)
val mem_int_wdata = Mux(io.ctrl.mem_ctrl.jalr, mem_br_target, mem_reg_wdata)
// writeback stage
when (!mem_reg_kill) {
wb_reg_pc := mem_reg_pc
wb_reg_inst := mem_reg_inst
wb_reg_wdata := Mux(io.ctrl.mem_fp_val && io.ctrl.mem_wen, io.fpu.toint_data, mem_int_wdata)
when (io.ctrl.mem_rocc_val) {
wb_reg_rs2 := mem_reg_rs2
wb_reg_wdata := Mux(io.ctrl.mem_ctrl.fp && io.ctrl.mem_ctrl.wxd, io.fpu.toint_data, mem_int_wdata)
when (io.ctrl.mem_ctrl.rocc) {
wb_reg_rs2 := mem_reg_rs2
wb_wdata := Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword,
Mux(io.ctrl.ll_wen, ll_wdata,

View File

@ -10,6 +10,9 @@ case object ECCCode extends Field[Option[Code]]
abstract trait L1CacheParameters extends CacheParameters with CoreParameters {
val co = params(TLCoherence)
val code = params(ECCCode).getOrElse(new IdentityCode)
val outerDataBeats = params(TLDataBeats)
val refillCyclesPerBeat = params(TLDataBits)/rowBits
val refillCycles = refillCyclesPerBeat*outerDataBeats
abstract trait FrontendParameters extends L1CacheParameters
@ -106,7 +109,7 @@ class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule
icache.io.req.bits.idx := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
icache.io.invalidate := io.cpu.invalidate
icache.io.req.bits.ppn := tlb.io.resp.ppn
icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss
icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.cpu.ptw.invalidate
icache.io.resp.ready := !stall && !s1_same_block
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icache.io.resp.valid)
@ -187,22 +190,13 @@ class ICache extends FrontendModule
val s2_miss = s2_valid && !s2_any_tag_hit
rdy := state === s_ready && !s2_miss
var refill_cnt = UInt(0)
var refill_done = state === s_refill
var refill_valid = io.mem.grant.valid
var refill_bits = io.mem.grant.bits
def doRefill(g: Grant): Bool = Bool(true)
if(refillCycles > 1) {
val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCycles, doRefill))
ser.io.in <> io.mem.grant
refill_cnt = ser.io.cnt
refill_done = ser.io.done
refill_valid = ser.io.out.valid
refill_bits = ser.io.out.bits
ser.io.out.ready := Bool(true)
} else {
io.mem.grant.ready := Bool(true)
val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCyclesPerBeat, (g: Grant) => co.messageUpdatesDataArray(g)))
ser.io.in <> io.mem.grant
val (refill_cnt, refill_wrap) = Counter(ser.io.out.fire(), refillCycles) //TODO Zero width wire
val refill_done = state === s_refill && refill_wrap
val refill_valid = ser.io.out.valid
val refill_bits = ser.io.out.bits
ser.io.out.ready := Bool(true)
//assert(!c.tlco.isVoluntary(refill_bits.payload) || !refill_valid, "UncachedRequestors shouldn't get voluntary grants.")
val repl_way = if (isDM) UInt(0) else LFSR16(s2_miss)(log2Up(nWays)-1,0)
@ -236,7 +230,7 @@ class ICache extends FrontendModule
val s2_dout = Vec.fill(nWays){Reg(Bits())}
for (i <- 0 until nWays) {
val s1_vb = vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool
val s2_vb = Reg(Bool())
val s2_tag_disparity = Reg(Bool())
val s2_tag_match = Reg(Bool())
@ -257,8 +251,8 @@ class ICache extends FrontendModule
val s1_raddr = Reg(UInt())
when (refill_valid && repl_way === UInt(i)) {
val e_d = code.encode(refill_bits.payload.data)
if(refillCycles > 1) data_array(Cat(s2_idx,refill_cnt)) := e_d
else data_array(s2_idx) := e_d
if(refillCycles > 1) data_array(Cat(s2_idx, refill_cnt)) := e_d
else data_array(s2_idx) := e_d
// /*.else*/when (s0_valid) { // uncomment ".else" to infer 6T SRAM
.elsewhen (s0_valid) {
@ -272,14 +266,14 @@ class ICache extends FrontendModule
io.resp.bits.datablock := Mux1H(s2_tag_hit, s2_dout)
val ack_q = Module(new Queue(new LogicalNetworkIO(new Finish), 1))
ack_q.io.enq.valid := refill_done && co.requiresAckForGrant(refill_bits.payload.g_type)
ack_q.io.enq.bits.payload.master_xact_id := refill_bits.payload.master_xact_id
ack_q.io.enq.valid := refill_done && co.requiresAckForGrant(refill_bits.payload)
ack_q.io.enq.bits.payload.manager_xact_id := refill_bits.payload.manager_xact_id
ack_q.io.enq.bits.header.dst := refill_bits.header.src
// output signals
io.resp.valid := s2_hit
io.mem.acquire.valid := (state === s_request) && ack_q.io.enq.ready
io.mem.acquire.bits.payload := Acquire(co.getUncachedReadAcquireType, s2_addr >> UInt(blockOffBits), UInt(0))
io.mem.acquire.bits.payload := UncachedRead(s2_addr >> UInt(blockOffBits))
io.mem.finish <> ack_q.io.deq
// control state machine

View File

@ -13,13 +13,15 @@ case object LRSCCycles extends Field[Int]
case object NDTLBEntries extends Field[Int]
abstract trait L1HellaCacheParameters extends L1CacheParameters {
val indexmsb = untagBits-1
val indexlsb = blockOffBits
val offsetmsb = indexlsb-1
val idxMSB = untagBits-1
val idxLSB = blockOffBits
val offsetmsb = idxLSB-1
val offsetlsb = wordOffBits
val doNarrowRead = coreDataBits * nWays % rowBits == 0
val encDataBits = code.width(coreDataBits)
val encRowBits = encDataBits*rowWords
val sdqDepth = params(StoreDataQueueDepth)
val nMSHRs = params(NMSHRs)
abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters
@ -57,22 +59,28 @@ class LoadGen(typ: Bits, addr: Bits, dat: Bits, zero: Bool)
val byte = Cat(Mux(zero || t.byte, Fill(56, sign && byteShift(7)), half(63,8)), byteShift)
class HellaCacheReq extends CoreBundle {
trait HasCoreData extends CoreBundle {
val data = Bits(width = coreDataBits)
class HellaCacheReqInternal extends CoreBundle {
val kill = Bool()
val typ = Bits(width = MT_SZ)
val phys = Bool()
val addr = UInt(width = coreMaxAddrBits)
val data = Bits(width = coreDataBits)
val tag = Bits(width = coreDCacheReqTagBits)
val cmd = Bits(width = M_SZ)
class HellaCacheResp extends CoreBundle {
class HellaCacheReq extends HellaCacheReqInternal
with HasCoreData
class HellaCacheResp extends CoreBundle
with HasCoreData {
val nack = Bool() // comes 2 cycles after req.fire
val replay = Bool()
val typ = Bits(width = 3)
val has_data = Bool()
val data = Bits(width = coreDataBits)
val data_subword = Bits(width = coreDataBits)
val tag = Bits(width = coreDCacheReqTagBits)
val cmd = Bits(width = 4)
@ -100,15 +108,20 @@ class HellaCacheIO extends CoreBundle {
val ordered = Bool(INPUT)
class MSHRReq extends HellaCacheReq with L1HellaCacheParameters {
trait HasSDQId extends CoreBundle with L1HellaCacheParameters {
val sdq_id = UInt(width = log2Up(sdqDepth))
trait HasMissInfo extends CoreBundle with L1HellaCacheParameters {
val tag_match = Bool()
val old_meta = new L1Metadata
val way_en = Bits(width = nWays)
class Replay extends HellaCacheReq with L1HellaCacheParameters {
val sdq_id = UInt(width = log2Up(params(StoreDataQueueDepth)))
class Replay extends HellaCacheReqInternal with HasCoreData
class ReplayInternal extends HellaCacheReqInternal with HasSDQId
class MSHRReq extends Replay with HasMissInfo
class MSHRReqInternal extends ReplayInternal with HasMissInfo
class DataReadReq extends L1HellaCacheBundle {
val way_en = Bits(width = nWays)
@ -146,7 +159,6 @@ class WritebackReq extends L1HellaCacheBundle {
val idx = Bits(width = idxBits)
val way_en = Bits(width = nWays)
val client_xact_id = Bits(width = params(TLClientXactIdBits))
val master_xact_id = Bits(width = params(TLMasterXactIdBits))
val r_type = UInt(width = co.releaseTypeWidth)
@ -156,8 +168,7 @@ class MSHR(id: Int) extends L1HellaCacheModule {
val req_pri_rdy = Bool(OUTPUT)
val req_sec_val = Bool(INPUT)
val req_sec_rdy = Bool(OUTPUT)
val req_bits = new MSHRReq().asInput
val req_sdq_id = UInt(INPUT, log2Up(params(StoreDataQueueDepth)))
val req_bits = new MSHRReqInternal().asInput
val idx_match = Bool(OUTPUT)
val tag = Bits(OUTPUT, tagBits)
@ -166,7 +177,7 @@ class MSHR(id: Int) extends L1HellaCacheModule {
val mem_resp = new DataWriteReq().asOutput
val meta_read = Decoupled(new L1MetaReadReq)
val meta_write = Decoupled(new L1MetaWriteReq)
val replay = Decoupled(new Replay)
val replay = Decoupled(new ReplayInternal)
val mem_grant = Valid(new LogicalNetworkIO(new Grant)).flip
val mem_finish = Decoupled(new LogicalNetworkIO(new Finish))
val wb_req = Decoupled(new WritebackReq)
@ -178,28 +189,25 @@ class MSHR(id: Int) extends L1HellaCacheModule {
val acquire_type = Reg(UInt())
val release_type = Reg(UInt())
val line_state = Reg(new ClientMetadata()(co))
val refill_count = Reg(UInt(width = log2Up(refillCycles))) // TODO: zero-width wire
val req = Reg(new MSHRReq())
val line_state = Reg(new ClientMetadata)
val req = Reg(new MSHRReqInternal())
val req_cmd = io.req_bits.cmd
val req_idx = req.addr(untagBits-1,blockOffBits)
val idx_match = req_idx === io.req_bits.addr(untagBits-1,blockOffBits)
val sec_rdy = idx_match && (state === s_wb_req || state === s_wb_resp || state === s_meta_clear || (state === s_refill_req || state === s_refill_resp) && !co.needsTransactionOnSecondaryMiss(req_cmd, io.mem_req.bits))
val reply = io.mem_grant.valid && io.mem_grant.bits.payload.client_xact_id === UInt(id)
val refill_done = reply && (if(refillCycles > 1) refill_count.andR else Bool(true))
val (refill_cnt, refill_done) = Counter(reply && co.messageUpdatesDataArray(io.mem_grant.bits.payload), refillCycles) // TODO: Zero width?
val wb_done = reply && (state === s_wb_resp)
val meta_on_flush = co.clientMetadataOnFlush
val meta_on_grant = co.clientMetadataOnGrant(io.mem_grant.bits.payload, io.mem_req.bits)
val meta_on_hit = co.clientMetadataOnHit(req_cmd, io.req_bits.old_meta.coh)
val rpq = Module(new Queue(new Replay, params(ReplayQueueDepth)))
val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth)))
rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(req_cmd)
rpq.io.enq.bits := io.req_bits
rpq.io.enq.bits.sdq_id := io.req_sdq_id
rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid
when (state === s_drain_rpq && !rpq.io.deq.valid) {
@ -213,11 +221,8 @@ class MSHR(id: Int) extends L1HellaCacheModule {
state := s_meta_write_resp
when (state === s_refill_resp) {
when (reply) { line_state := meta_on_grant }
when (refill_done) { state := s_meta_write_req }
when (reply) {
if(refillCycles > 1) refill_count := refill_count + UInt(1)
line_state := meta_on_grant
when (io.mem_req.fire()) { // s_refill_req
state := s_refill_resp
@ -231,13 +236,12 @@ class MSHR(id: Int) extends L1HellaCacheModule {
when (io.wb_req.fire()) { // s_wb_req
state := s_wb_resp
when (io.req_sec_val && io.req_sec_rdy) { // s_wb_req, s_wb_resp, s_refill_req
acquire_type := co.getAcquireTypeOnSecondaryMiss(req_cmd, meta_on_flush, io.mem_req.bits)
when (io.req_pri_val && io.req_pri_rdy) {
line_state := meta_on_flush
refill_count := UInt(0)
refill_cnt := UInt(0)
acquire_type := co.getAcquireTypeOnPrimaryMiss(req_cmd, meta_on_flush)
release_type := co.getReleaseTypeOnVoluntaryWriteback() //TODO downgrades etc
req := io.req_bits
@ -255,8 +259,8 @@ class MSHR(id: Int) extends L1HellaCacheModule {
val ackq = Module(new Queue(new LogicalNetworkIO(new Finish), 1))
ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload.g_type)
ackq.io.enq.bits.payload.master_xact_id := io.mem_grant.bits.payload.master_xact_id
ackq.io.enq.valid := (wb_done || refill_done) && co.requiresAckForGrant(io.mem_grant.bits.payload)
ackq.io.enq.bits.payload.manager_xact_id := io.mem_grant.bits.payload.manager_xact_id
ackq.io.enq.bits.header.dst := io.mem_grant.bits.header.src
val can_finish = state === s_invalid || state === s_refill_req || state === s_refill_resp
io.mem_finish.valid := ackq.io.deq.valid && can_finish
@ -265,7 +269,7 @@ class MSHR(id: Int) extends L1HellaCacheModule {
io.idx_match := (state != s_invalid) && idx_match
io.mem_resp := req
io.mem_resp.addr := (if(refillCycles > 1) Cat(req_idx, refill_count) else req_idx) << rowOffBits
io.mem_resp.addr := (if(refillCycles > 1) Cat(req_idx, refill_cnt) else req_idx) << rowOffBits
io.tag := req.addr >> untagBits
io.req_pri_rdy := state === s_invalid
io.req_sec_rdy := sec_rdy && rpq.io.enq.ready
@ -286,13 +290,10 @@ class MSHR(id: Int) extends L1HellaCacheModule {
io.wb_req.bits.idx := req_idx
io.wb_req.bits.way_en := req.way_en
io.wb_req.bits.client_xact_id := Bits(id)
io.wb_req.bits.master_xact_id := Bits(0) // DNC
io.wb_req.bits.r_type := co.getReleaseTypeOnVoluntaryWriteback()
io.mem_req.valid := state === s_refill_req && ackq.io.enq.ready
io.mem_req.bits.a_type := acquire_type
io.mem_req.bits.addr := Cat(io.tag, req_idx).toUInt
io.mem_req.bits.client_xact_id := Bits(id)
io.mem_req.bits := Acquire(acquire_type, Cat(io.tag, req_idx).toUInt, Bits(id))
io.mem_finish <> ackq.io.deq
io.meta_read.valid := state === s_drain_rpq
@ -328,26 +329,26 @@ class MSHRFile extends L1HellaCacheModule {
val fence_rdy = Bool(OUTPUT)
val sdq_val = Reg(init=Bits(0, params(StoreDataQueueDepth)))
val sdq_alloc_id = PriorityEncoder(~sdq_val(params(StoreDataQueueDepth)-1,0))
val sdq_val = Reg(init=Bits(0, sdqDepth))
val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0))
val sdq_rdy = !sdq_val.andR
val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd)
val sdq = Mem(io.req.bits.data, params(StoreDataQueueDepth))
val sdq = Mem(io.req.bits.data, sdqDepth)
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
val idxMatch = Vec.fill(params(NMSHRs)){Bool()}
val tagList = Vec.fill(params(NMSHRs)){Bits()}
val idxMatch = Vec.fill(nMSHRs){Bool()}
val tagList = Vec.fill(nMSHRs){Bits()}
val tag_match = Mux1H(idxMatch, tagList) === io.req.bits.addr >> untagBits
val wbTagList = Vec.fill(params(NMSHRs)){Bits()}
val memRespMux = Vec.fill(params(NMSHRs)){new DataWriteReq}
val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, params(NMSHRs)))
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, params(NMSHRs)))
val mem_req_arb = Module(new Arbiter(new Acquire, params(NMSHRs)))
val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), params(NMSHRs)))
val wb_req_arb = Module(new Arbiter(new WritebackReq, params(NMSHRs)))
val replay_arb = Module(new Arbiter(new Replay, params(NMSHRs)))
val alloc_arb = Module(new Arbiter(Bool(), params(NMSHRs)))
val wbTagList = Vec.fill(nMSHRs){Bits()}
val memRespMux = Vec.fill(nMSHRs){new DataWriteReq}
val meta_read_arb = Module(new Arbiter(new L1MetaReadReq, nMSHRs))
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs))
val mem_req_arb = Module(new LockingArbiter(new Acquire, nMSHRs, outerDataBeats, co.messageHasData _))
val mem_finish_arb = Module(new Arbiter(new LogicalNetworkIO(new Finish), nMSHRs))
val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs))
val replay_arb = Module(new Arbiter(new ReplayInternal, nMSHRs))
val alloc_arb = Module(new Arbiter(Bool(), nMSHRs))
var idx_match = Bool(false)
var pri_rdy = Bool(false)
@ -356,7 +357,7 @@ class MSHRFile extends L1HellaCacheModule {
io.fence_rdy := true
io.probe_rdy := true
for (i <- 0 until params(NMSHRs)) {
for (i <- 0 until nMSHRs) {
val mshr = Module(new MSHR(i))
idxMatch(i) := mshr.io.idx_match
@ -368,7 +369,7 @@ class MSHRFile extends L1HellaCacheModule {
mshr.io.req_sec_val := io.req.valid && sdq_rdy && tag_match
mshr.io.req_bits := io.req.bits
mshr.io.req_sdq_id := sdq_alloc_id
mshr.io.req_bits.sdq_id := sdq_alloc_id
mshr.io.meta_read <> meta_read_arb.io.in(i)
mshr.io.meta_write <> meta_write_arb.io.in(i)
@ -405,8 +406,8 @@ class MSHRFile extends L1HellaCacheModule {
io.replay <> replay_arb.io.out
when (io.replay.valid || sdq_enq) {
sdq_val := sdq_val & ~(UIntToOH(io.replay.bits.sdq_id) & Fill(params(StoreDataQueueDepth), free_sdq)) |
PriorityEncoderOH(~sdq_val(params(StoreDataQueueDepth)-1,0)) & Fill(params(StoreDataQueueDepth), sdq_enq)
sdq_val := sdq_val & ~(UIntToOH(replay_arb.io.out.bits.sdq_id) & Fill(sdqDepth, free_sdq)) |
PriorityEncoderOH(~sdq_val(sdqDepth-1,0)) & Fill(sdqDepth, sdq_enq)
@ -422,7 +423,9 @@ class WritebackUnit extends L1HellaCacheModule {
val active = Reg(init=Bool(false))
val r1_data_req_fired = Reg(init=Bool(false))
val r2_data_req_fired = Reg(init=Bool(false))
val cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1)))
val cnt = Reg(init = UInt(0, width = log2Up(refillCycles+1))) //TODO Zero width
val buf_v = (if(refillCyclesPerBeat > 1) Reg(init=Bits(0, width = refillCyclesPerBeat-1)) else Bits(1))
val beat_done = buf_v.andR
val req = Reg(new WritebackReq)
io.release.valid := false
@ -433,27 +436,22 @@ class WritebackUnit extends L1HellaCacheModule {
r1_data_req_fired := true
cnt := cnt + 1
if(refillCycles > 1) { // Coalescing buffer inserted
when (!r1_data_req_fired && !r2_data_req_fired && cnt === refillCycles) {
io.release.valid := true
active := !io.release.ready
} else { // No buffer, data released a cycle earlier
when (r2_data_req_fired) {
io.release.valid := true
when(!io.release.ready) {
r1_data_req_fired := false
r2_data_req_fired := false
cnt := UInt(0)
} .otherwise {
active := false
when (r2_data_req_fired) {
io.release.valid := beat_done
when(!io.release.ready) {
r1_data_req_fired := false
r2_data_req_fired := false
cnt := cnt - Mux[UInt](Bool(refillCycles > 1) && r1_data_req_fired, 2, 1)
} .elsewhen(beat_done) { if(refillCyclesPerBeat > 1) buf_v := 0 }
when(!r1_data_req_fired) {
active := cnt < UInt(refillCycles)
when (io.req.fire()) {
active := true
cnt := 0
if(refillCyclesPerBeat > 1) buf_v := 0
req := io.req.bits
@ -467,26 +465,23 @@ class WritebackUnit extends L1HellaCacheModule {
io.data_req.valid := fire
io.data_req.bits.way_en := req.way_en
if(refillCycles > 1) {
io.data_req.bits.addr := Cat(req.idx, cnt(log2Up(refillCycles)-1,0)) << rowOffBits
} else {
io.data_req.bits.addr := req.idx << rowOffBits
io.data_req.bits.addr := (if(refillCycles > 1) Cat(req.idx, cnt(log2Up(refillCycles)-1,0))
else req.idx) << rowOffBits
io.release.bits.r_type := req.r_type
io.release.bits.addr := Cat(req.tag, req.idx).toUInt
io.release.bits.client_xact_id := req.client_xact_id
io.release.bits.master_xact_id := req.master_xact_id
if(refillCycles > 1) {
val data_buf = Reg(Bits())
when(active && r2_data_req_fired) {
data_buf := Cat(io.data_resp, data_buf(refillCycles*encRowBits-1, encRowBits))
io.release.bits.data := data_buf
} else {
io.release.bits.data := io.data_resp
io.release.bits.data :=
(if(refillCyclesPerBeat > 1) {
val data_buf = Reg(Bits())
when(active && r2_data_req_fired && !beat_done) {
data_buf := Cat(io.data_resp, data_buf((refillCyclesPerBeat-1)*encRowBits-1, encRowBits))
buf_v := (if(refillCyclesPerBeat > 2)
Cat(UInt(1), buf_v(refillCyclesPerBeat-2,1))
else UInt(1))
Cat(io.data_resp, data_buf)
} else { io.data_resp })
class ProbeUnit extends L1HellaCacheModule {
@ -498,7 +493,7 @@ class ProbeUnit extends L1HellaCacheModule {
val wb_req = Decoupled(new WritebackReq)
val way_en = Bits(INPUT, nWays)
val mshr_rdy = Bool(INPUT)
val line_state = new ClientMetadata()(co).asInput
val line_state = new ClientMetadata().asInput
val s_reset :: s_invalid :: s_meta_read :: s_meta_resp :: s_mshr_req :: s_release :: s_writeback_req :: s_writeback_resp :: s_meta_write :: Nil = Enum(UInt(), 9)
@ -544,8 +539,12 @@ class ProbeUnit extends L1HellaCacheModule {
io.req.ready := state === s_invalid
io.rep.valid := state === s_release && !(hit && co.needsWriteback(line_state))
io.rep.bits := Release(co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush)), req.addr, req.client_xact_id, req.master_xact_id)
io.rep.valid := state === s_release &&
!(hit && co.needsWriteback(line_state)) // Otherwise WBU will issue release
io.rep.bits := Release(co.getReleaseTypeOnProbe(req,
Mux(hit, line_state, co.clientMetadataOnFlush)),
io.meta_read.valid := state === s_meta_read
io.meta_read.bits.idx := req.addr
@ -563,7 +562,6 @@ class ProbeUnit extends L1HellaCacheModule {
io.wb_req.bits.tag := req.addr >> UInt(idxBits)
io.wb_req.bits.r_type := co.getReleaseTypeOnProbe(req, Mux(hit, line_state, co.clientMetadataOnFlush))
io.wb_req.bits.client_xact_id := req.client_xact_id
io.wb_req.bits.master_xact_id := req.master_xact_id
class DataArray extends L1HellaCacheModule {
@ -747,7 +745,7 @@ class HellaCache extends L1HellaCacheModule {
io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st
// tags
def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0))(co))
def onReset = L1Metadata(UInt(0), ClientMetadata(UInt(0)))
val meta = Module(new MetadataArray(onReset _))
val metaReadArb = Module(new Arbiter(new MetaReadReq, 5))
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2))
@ -872,8 +870,8 @@ class HellaCache extends L1HellaCacheModule {
metaReadArb.io.in(1) <> mshrs.io.meta_read
metaWriteArb.io.in(0) <> mshrs.io.meta_write
// probes
val releaseArb = Module(new Arbiter(new Release, 2))
// probes and releases
val releaseArb = Module(new LockingArbiter(new Release, 2, outerDataBeats, co.messageHasData _))
DecoupledLogicalNetworkIOWrapper(releaseArb.io.out) <> io.mem.release
val probe = DecoupledLogicalNetworkIOUnwrapper(io.mem.probe)
@ -889,11 +887,9 @@ class HellaCache extends L1HellaCacheModule {
// refills
def doRefill(g: Grant): Bool = co.messageUpdatesDataArray(g)
val refill = if(refillCycles > 1) {
val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCycles, doRefill))
ser.io.in <> io.mem.grant
} else io.mem.grant
val ser = Module(new FlowThroughSerializer(io.mem.grant.bits, refillCyclesPerBeat, doRefill))
ser.io.in <> io.mem.grant
val refill = ser.io.out
mshrs.io.mem_grant.valid := refill.fire()
mshrs.io.mem_grant.bits := refill.bits
refill.ready := writeArb.io.in(1).ready || !doRefill(refill.bits.payload)
@ -943,7 +939,7 @@ class HellaCache extends L1HellaCacheModule {
// nack it like it's hot
val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss ||
s1_req.addr(indexmsb,indexlsb) === prober.io.meta_write.bits.idx && !prober.io.req.ready
s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready
val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay)
when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) }
val s2_nack_victim = s2_hit && mshrs.io.secondary_miss

View File

@ -43,6 +43,7 @@ class RoCCInterface extends Bundle
// These should be handled differently, eventually
val imem = new UncachedTileLinkIO
val dmem = new TileLinkIO
val iptw = new TLBPTWIO
val dptw = new TLBPTWIO
val pptw = new TLBPTWIO
@ -124,6 +125,11 @@ class AccumulatorExample extends RoCC
io.imem.acquire.valid := false
io.imem.grant.ready := false
io.imem.finish.valid := false
io.dmem.acquire.valid := false
io.dmem.release.valid := false
io.dmem.finish.valid := false
io.dmem.probe.ready := false
io.dmem.grant.ready := false
io.iptw.req.valid := false
io.dptw.req.valid := false
io.pptw.req.valid := false

View File

@ -38,10 +38,10 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) {
core.io.imem <> icache.io.cpu
core.io.ptw <> ptw.io.dpath
val memArb = Module(new UncachedTileLinkIOArbiterThatAppendsArbiterId(params(NTilePorts)))
val dcPortId = 0
memArb.io.in(dcPortId) <> dcache.io.mem
memArb.io.in(1) <> icache.io.mem
val memArb = Module(new TileLinkIOArbiterThatAppendsArbiterId(params(NTilePorts)))
io.tilelink <> memArb.io.out
memArb.io.in(0) <> dcache.io.mem
memArb.io.in(1) <> TileLinkIOWrapper(icache.io.mem)
//If so specified, build an RoCC module and wire it in
@ -51,21 +51,10 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) {
core.io.rocc <> rocc.io
dcIF.io.requestor <> rocc.io.mem
dcArb.io.requestor(2) <> dcIF.io.cache
memArb.io.in(2) <> rocc.io.imem
memArb.io.in(2) <> TileLinkIOWrapper(rocc.io.imem)
memArb.io.in(3) <> rocc.io.dmem
ptw.io.requestor(2) <> rocc.io.iptw
ptw.io.requestor(3) <> rocc.io.dptw
ptw.io.requestor(4) <> rocc.io.pptw
io.tilelink.acquire <> memArb.io.out.acquire
io.tilelink.grant <> memArb.io.out.grant
io.tilelink.finish <> memArb.io.out.finish
// Probes and releases routed directly to coherent dcache
io.tilelink.probe <> dcache.io.mem.probe
// Mimic client id extension done by UncachedTileLinkIOArbiter for Acquires from either client)
io.tilelink.release.valid := dcache.io.mem.release.valid
dcache.io.mem.release.ready := io.tilelink.release.ready
io.tilelink.release.bits := dcache.io.mem.release.bits
io.tilelink.release.bits.payload.client_xact_id := Cat(dcache.io.mem.release.bits.payload.client_xact_id, UInt(dcPortId, log2Up(params(NTilePorts))))

View File

@ -158,50 +158,3 @@ object Random
private def partition(value: UInt, slices: Int) =
Vec.tabulate(slices)(i => value < round((i << value.getWidth).toDouble / slices))
class FlowThroughSerializer[T <: HasTileLinkData](gen: LogicalNetworkIO[T], n: Int, doSer: T => Bool) extends Module {
val io = new Bundle {
val in = Decoupled(gen.clone).flip
val out = Decoupled(gen.clone)
val cnt = UInt(OUTPUT, log2Up(n))
val done = Bool(OUTPUT)
require(io.in.bits.payload.data.getWidth % n == 0)
val narrowWidth = io.in.bits.payload.data.getWidth / n
val cnt = Reg(init=UInt(0, width = log2Up(n)))
val wrap = cnt === UInt(n-1)
val rbits = Reg(init=io.in.bits)
val active = Reg(init=Bool(false))
val shifter = Vec.fill(n){Bits(width = narrowWidth)}
(0 until n).foreach {
i => shifter(i) := rbits.payload.data((i+1)*narrowWidth-1,i*narrowWidth)
io.done := Bool(false)
io.cnt := cnt
io.in.ready := !active
io.out.valid := active || io.in.valid
io.out.bits := io.in.bits
when(!active && io.in.valid) {
when(doSer(io.in.bits.payload)) {
cnt := Mux(io.out.ready, UInt(1), UInt(0))
rbits := io.in.bits
active := Bool(true)
io.done := !doSer(io.in.bits.payload)
when(active) {
io.out.bits := rbits
io.out.bits.payload.data := shifter(cnt)
when(io.out.ready) {
cnt := cnt + UInt(1)
when(wrap) {
cnt := UInt(0)
io.done := Bool(true)
active := Bool(false)