1
0

Merge commit '3c3e35a56e954b35e6ceb17179ebadc52e8d9b3f' into rocc-fpu-port

This commit is contained in:
Colin Schmidt 2015-10-18 13:09:17 -07:00
commit 2cee8c8bec
15 changed files with 697 additions and 464 deletions

View File

@ -5,10 +5,10 @@ package rocket
import Chisel._ import Chisel._
import uncore._ import uncore._
class HellaCacheArbiter(n: Int) extends Module class HellaCacheArbiter(n: Int)(implicit p: Parameters) extends Module
{ {
val io = new Bundle { val io = new Bundle {
val requestor = Vec.fill(n){new HellaCacheIO}.flip val requestor = Vec(new HellaCacheIO, n).flip
val mem = new HellaCacheIO val mem = new HellaCacheIO
} }
@ -43,12 +43,12 @@ class HellaCacheArbiter(n: Int) extends Module
io.requestor(i).xcpt := io.mem.xcpt io.requestor(i).xcpt := io.mem.xcpt
io.requestor(i).ordered := io.mem.ordered io.requestor(i).ordered := io.mem.ordered
resp.bits := io.mem.resp.bits resp.bits := io.mem.resp.bits
resp.bits.tag := io.mem.resp.bits.tag >> UInt(log2Up(n)) resp.bits.tag := io.mem.resp.bits.tag >> log2Up(n)
resp.bits.nack := io.mem.resp.bits.nack && tag_hit resp.bits.nack := io.mem.resp.bits.nack && tag_hit
resp.bits.replay := io.mem.resp.bits.replay && tag_hit resp.bits.replay := io.mem.resp.bits.replay && tag_hit
io.requestor(i).replay_next.valid := io.mem.replay_next.valid && io.requestor(i).replay_next.valid := io.mem.replay_next.valid &&
io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i) io.mem.replay_next.bits(log2Up(n)-1,0) === UInt(i)
io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> UInt(log2Up(n)) io.requestor(i).replay_next.bits := io.mem.replay_next.bits >> log2Up(n)
} }
} }

View File

@ -6,18 +6,28 @@ import Chisel._
import junctions._ import junctions._
import Util._ import Util._
case object NBTBEntries extends Field[Int] case object BtbKey extends Field[BtbParameters]
case object NRAS extends Field[Int]
abstract trait BTBParameters extends CoreParameters { case class BtbParameters(
val matchBits = params(PgIdxBits) enabled: Boolean = true,
val entries = params(NBTBEntries) nEntries: Int = 62,
val nRAS = params(NRAS) nRAS: Int = 2,
updatesOutOfOrder: Boolean = false)
abstract trait HasBtbParameters extends HasCoreParameters {
val matchBits = pgIdxBits
val entries = p(BtbKey).nEntries
val nRAS = p(BtbKey).nRAS
val updatesOutOfOrder = p(BtbKey).updatesOutOfOrder
val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages val nPages = ((1 max(log2Up(entries)))+1)/2*2 // control logic assumes 2 divides pages
val opaqueBits = log2Up(entries) val opaqueBits = log2Up(entries)
val nBHT = 1 << log2Up(entries*2) val nBHT = 1 << log2Up(entries*2)
} }
abstract class BtbModule(implicit val p: Parameters) extends Module with HasBtbParameters
abstract class BtbBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasBtbParameters
class RAS(nras: Int) { class RAS(nras: Int) {
def push(addr: UInt): Unit = { def push(addr: UInt): Unit = {
when (count < nras) { count := count + 1 } when (count < nras) { count := count + 1 }
@ -35,10 +45,10 @@ class RAS(nras: Int) {
private val count = Reg(init=UInt(0,log2Up(nras+1))) private val count = Reg(init=UInt(0,log2Up(nras+1)))
private val pos = Reg(init=UInt(0,log2Up(nras))) private val pos = Reg(init=UInt(0,log2Up(nras)))
private val stack = Reg(Vec.fill(nras){UInt()}) private val stack = Reg(Vec(UInt(), nras))
} }
class BHTResp extends Bundle with BTBParameters { class BHTResp(implicit p: Parameters) extends BtbBundle()(p) {
val history = UInt(width = log2Up(nBHT).max(1)) val history = UInt(width = log2Up(nBHT).max(1))
val value = UInt(width = 2) val value = UInt(width = 2)
} }
@ -52,7 +62,7 @@ class BHTResp extends Bundle with BTBParameters {
// - each counter corresponds with the address of the fetch packet ("fetch pc"). // - each counter corresponds with the address of the fetch packet ("fetch pc").
// - updated when a branch resolves (and BTB was a hit for that branch). // - updated when a branch resolves (and BTB was a hit for that branch).
// The updating branch must provide its "fetch pc". // The updating branch must provide its "fetch pc".
class BHT(nbht: Int) { class BHT(nbht: Int)(implicit p: Parameters) {
val nbhtbits = log2Up(nbht) val nbhtbits = log2Up(nbht)
def get(addr: UInt, update: Bool): BHTResp = { def get(addr: UInt, update: Bool): BHTResp = {
val res = Wire(new BHTResp) val res = Wire(new BHTResp)
@ -69,14 +79,14 @@ class BHT(nbht: Int) {
when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) } when (mispredict) { history := Cat(taken, d.history(nbhtbits-1,1)) }
} }
private val table = Mem(UInt(width = 2), nbht) private val table = Mem(nbht, UInt(width = 2))
val history = Reg(UInt(width = nbhtbits)) val history = Reg(UInt(width = nbhtbits))
} }
// BTB update occurs during branch resolution (and only on a mispredict). // BTB update occurs during branch resolution (and only on a mispredict).
// - "pc" is what future fetch PCs will tag match against. // - "pc" is what future fetch PCs will tag match against.
// - "br_pc" is the PC of the branch instruction. // - "br_pc" is the PC of the branch instruction.
class BTBUpdate extends Bundle with BTBParameters { class BTBUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = Valid(new BTBResp) val prediction = Valid(new BTBResp)
val pc = UInt(width = vaddrBits) val pc = UInt(width = vaddrBits)
val target = UInt(width = vaddrBits) val target = UInt(width = vaddrBits)
@ -88,14 +98,14 @@ class BTBUpdate extends Bundle with BTBParameters {
// BHT update occurs during branch resolution on all conditional branches. // BHT update occurs during branch resolution on all conditional branches.
// - "pc" is what future fetch PCs will tag match against. // - "pc" is what future fetch PCs will tag match against.
class BHTUpdate extends Bundle with BTBParameters { class BHTUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val prediction = Valid(new BTBResp) val prediction = Valid(new BTBResp)
val pc = UInt(width = vaddrBits) val pc = UInt(width = vaddrBits)
val taken = Bool() val taken = Bool()
val mispredict = Bool() val mispredict = Bool()
} }
class RASUpdate extends Bundle with BTBParameters { class RASUpdate(implicit p: Parameters) extends BtbBundle()(p) {
val isCall = Bool() val isCall = Bool()
val isReturn = Bool() val isReturn = Bool()
val returnAddr = UInt(width = vaddrBits) val returnAddr = UInt(width = vaddrBits)
@ -106,16 +116,16 @@ class RASUpdate extends Bundle with BTBParameters {
// shifting off the lowest log(inst_bytes) bits off). // shifting off the lowest log(inst_bytes) bits off).
// - "resp.mask" provides a mask of valid instructions (instructions are // - "resp.mask" provides a mask of valid instructions (instructions are
// masked off by the predicted taken branch). // masked off by the predicted taken branch).
class BTBResp extends Bundle with BTBParameters { class BTBResp(implicit p: Parameters) extends BtbBundle()(p) {
val taken = Bool() val taken = Bool()
val mask = Bits(width = params(FetchWidth)) val mask = Bits(width = fetchWidth)
val bridx = Bits(width = log2Up(params(FetchWidth))) val bridx = Bits(width = log2Up(fetchWidth))
val target = UInt(width = vaddrBits) val target = UInt(width = vaddrBits)
val entry = UInt(width = opaqueBits) val entry = UInt(width = opaqueBits)
val bht = new BHTResp val bht = new BHTResp
} }
class BTBReq extends Bundle with BTBParameters { class BTBReq(implicit p: Parameters) extends BtbBundle()(p) {
val addr = UInt(width = vaddrBits) val addr = UInt(width = vaddrBits)
} }
@ -123,7 +133,7 @@ class BTBReq extends Bundle with BTBParameters {
// Higher-performance processors may cause BTB updates to occur out-of-order, // Higher-performance processors may cause BTB updates to occur out-of-order,
// which requires an extra CAM port for updates (to ensure no duplicates get // which requires an extra CAM port for updates (to ensure no duplicates get
// placed in BTB). // placed in BTB).
class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParameters { class BTB(implicit p: Parameters) extends BtbModule {
val io = new Bundle { val io = new Bundle {
val req = Valid(new BTBReq).flip val req = Valid(new BTBReq).flip
val resp = Valid(new BTBResp) val resp = Valid(new BTBResp)
@ -134,18 +144,18 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
} }
val idxValid = Reg(init=UInt(0, entries)) val idxValid = Reg(init=UInt(0, entries))
val idxs = Mem(UInt(width=matchBits), entries) val idxs = Mem(entries, UInt(width=matchBits))
val idxPages = Mem(UInt(width=log2Up(nPages)), entries) val idxPages = Mem(entries, UInt(width=log2Up(nPages)))
val tgts = Mem(UInt(width=matchBits), entries) val tgts = Mem(entries, UInt(width=matchBits))
val tgtPages = Mem(UInt(width=log2Up(nPages)), entries) val tgtPages = Mem(entries, UInt(width=log2Up(nPages)))
val pages = Mem(UInt(width=vaddrBits-matchBits), nPages) val pages = Mem(nPages, UInt(width=vaddrBits-matchBits))
val pageValid = Reg(init=UInt(0, nPages)) val pageValid = Reg(init=UInt(0, nPages))
val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0)) val idxPagesOH = idxPages.map(UIntToOH(_)(nPages-1,0))
val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0)) val tgtPagesOH = tgtPages.map(UIntToOH(_)(nPages-1,0))
val useRAS = Reg(Vec(Bool(), entries)) val useRAS = Reg(Vec(entries, Bool()))
val isJump = Reg(Vec(Bool(), entries)) val isJump = Reg(Vec(entries, Bool()))
val brIdx = Mem(UInt(width=log2Up(params(FetchWidth))), entries) val brIdx = Mem(entries, UInt(width=log2Up(fetchWidth)))
private def page(addr: UInt) = addr >> matchBits private def page(addr: UInt) = addr >> matchBits
private def pageMatch(addr: UInt) = { private def pageMatch(addr: UInt) = {
@ -174,6 +184,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
} }
val updateHit = r_btb_update.bits.prediction.valid val updateHit = r_btb_update.bits.prediction.valid
val nextRepl = Counter(r_btb_update.valid && !updateHit, entries)._1
val useUpdatePageHit = updatePageHit.orR val useUpdatePageHit = updatePageHit.orR
val doIdxPageRepl = !useUpdatePageHit val doIdxPageRepl = !useUpdatePageHit
@ -196,9 +207,8 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
when (r_btb_update.valid) { when (r_btb_update.valid) {
assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target") assert(io.req.bits.addr === r_btb_update.bits.target, "BTB request != I$ target")
val nextRepl = Counter(!updateHit, entries)._1
val waddr = val waddr =
if (updates_out_of_order) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl) if (updatesOutOfOrder) Mux(updateHits.orR, OHToUInt(updateHits), nextRepl)
else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl) else Mux(updateHit, r_btb_update.bits.prediction.bits.entry, nextRepl)
// invalidate entries if we stomp on pages they depend upon // invalidate entries if we stomp on pages they depend upon
@ -212,10 +222,10 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
tgtPages(waddr) := tgtPageUpdate tgtPages(waddr) := tgtPageUpdate
useRAS(waddr) := r_btb_update.bits.isReturn useRAS(waddr) := r_btb_update.bits.isReturn
isJump(waddr) := r_btb_update.bits.isJump isJump(waddr) := r_btb_update.bits.isJump
if (params(FetchWidth) == 1) { if (fetchWidth == 1) {
brIdx(waddr) := UInt(0) brIdx(waddr) := UInt(0)
} else { } else {
brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(params(CoreInstBits)/8) brIdx(waddr) := r_btb_update.bits.br_pc >> log2Up(coreInstBytes)
} }
require(nPages % 2 == 0) require(nPages % 2 == 0)
@ -243,7 +253,7 @@ class BTB(updates_out_of_order: Boolean = false) extends Module with BTBParamete
io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts)) io.resp.bits.target := Cat(Mux1H(Mux1H(hits, tgtPagesOH), pages), Mux1H(hits, tgts))
io.resp.bits.entry := OHToUInt(hits) io.resp.bits.entry := OHToUInt(hits)
io.resp.bits.bridx := brIdx(io.resp.bits.entry) io.resp.bits.bridx := brIdx(io.resp.bits.entry)
if (params(FetchWidth) == 1) { if (fetchWidth == 1) {
io.resp.bits.mask := UInt(1) io.resp.bits.mask := UInt(1)
} else { } else {
// note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case // note: btb_resp is clock gated, so the mask is only relevant for the io.resp.valid case

View File

@ -64,12 +64,14 @@ object CSR
val C = UInt(3,SZ) val C = UInt(3,SZ)
val I = UInt(4,SZ) val I = UInt(4,SZ)
val R = UInt(5,SZ) val R = UInt(5,SZ)
val ADDRSZ = 12
} }
class CSRFileIO extends CoreBundle { class CSRFileIO(implicit p: Parameters) extends CoreBundle {
val host = new HTIFIO val host = new HtifIO
val rw = new Bundle { val rw = new Bundle {
val addr = UInt(INPUT, 12) val addr = UInt(INPUT, CSR.ADDRSZ)
val cmd = Bits(INPUT, CSR.SZ) val cmd = Bits(INPUT, CSR.SZ)
val rdata = Bits(OUTPUT, xLen) val rdata = Bits(OUTPUT, xLen)
val wdata = Bits(INPUT, xLen) val wdata = Bits(INPUT, xLen)
@ -85,8 +87,8 @@ class CSRFileIO extends CoreBundle {
val evec = UInt(OUTPUT, vaddrBitsExtended) val evec = UInt(OUTPUT, vaddrBitsExtended)
val exception = Bool(INPUT) val exception = Bool(INPUT)
val retire = UInt(INPUT, log2Up(1+retireWidth)) val retire = UInt(INPUT, log2Up(1+retireWidth))
val uarch_counters = Vec.fill(16)(UInt(INPUT, log2Up(1+retireWidth))) val uarch_counters = Vec(UInt(INPUT, log2Up(1+retireWidth)), 16)
val custom_mrw_csrs = Vec.fill(params(NCustomMRWCSRs))(UInt(INPUT, xLen)) val custom_mrw_csrs = Vec(UInt(INPUT, xLen), nCustomMrwCsrs)
val cause = UInt(INPUT, xLen) val cause = UInt(INPUT, xLen)
val pc = UInt(INPUT, vaddrBitsExtended) val pc = UInt(INPUT, vaddrBitsExtended)
val fatc = Bool(OUTPUT) val fatc = Bool(OUTPUT)
@ -98,7 +100,7 @@ class CSRFileIO extends CoreBundle {
val interrupt_cause = UInt(OUTPUT, xLen) val interrupt_cause = UInt(OUTPUT, xLen)
} }
class CSRFile extends CoreModule class CSRFile(implicit p: Parameters) extends CoreModule()(p)
{ {
val io = new CSRFileIO val io = new CSRFileIO
@ -123,13 +125,13 @@ class CSRFile extends CoreModule
val reg_fromhost = Reg(init=Bits(0, xLen)) val reg_fromhost = Reg(init=Bits(0, xLen))
val reg_stats = Reg(init=Bool(false)) val reg_stats = Reg(init=Bool(false))
val reg_time = Reg(UInt(width = xLen)) val reg_time = Reg(UInt(width = xLen))
val reg_cycle = WideCounter(xLen)
val reg_instret = WideCounter(xLen, io.retire) val reg_instret = WideCounter(xLen, io.retire)
val reg_cycle = if (enableCommitLog) { reg_instret } else { WideCounter(xLen) }
val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _)) val reg_uarch_counters = io.uarch_counters.map(WideCounter(xLen, _))
val reg_fflags = Reg(UInt(width = 5)) val reg_fflags = Reg(UInt(width = 5))
val reg_frm = Reg(UInt(width = 3)) val reg_frm = Reg(UInt(width = 3))
val irq_rocc = Bool(!params(BuildRoCC).isEmpty) && io.rocc.interrupt val irq_rocc = Bool(!p(BuildRoCC).isEmpty) && io.rocc.interrupt
io.interrupt_cause := 0 io.interrupt_cause := 0
io.interrupt := io.interrupt_cause(xLen-1) io.interrupt := io.interrupt_cause(xLen-1)
@ -153,48 +155,47 @@ class CSRFile extends CoreModule
val system_insn = io.rw.cmd === CSR.I val system_insn = io.rw.cmd === CSR.I
val cpu_ren = io.rw.cmd != CSR.N && !system_insn val cpu_ren = io.rw.cmd != CSR.N && !system_insn
val host_pcr_req_valid = Reg(Bool()) // don't reset val host_csr_req_valid = Reg(Bool()) // don't reset
val host_pcr_req_fire = host_pcr_req_valid && !cpu_ren val host_csr_req_fire = host_csr_req_valid && !cpu_ren
val host_pcr_rep_valid = Reg(Bool()) // don't reset val host_csr_rep_valid = Reg(Bool()) // don't reset
val host_pcr_bits = Reg(io.host.pcr_req.bits) val host_csr_bits = Reg(io.host.csr.req.bits)
io.host.pcr_req.ready := !host_pcr_req_valid && !host_pcr_rep_valid io.host.csr.req.ready := !host_csr_req_valid && !host_csr_rep_valid
io.host.pcr_rep.valid := host_pcr_rep_valid io.host.csr.resp.valid := host_csr_rep_valid
io.host.pcr_rep.bits := host_pcr_bits.data io.host.csr.resp.bits := host_csr_bits.data
when (io.host.pcr_req.fire()) { when (io.host.csr.req.fire()) {
host_pcr_req_valid := true host_csr_req_valid := true
host_pcr_bits := io.host.pcr_req.bits host_csr_bits := io.host.csr.req.bits
} }
when (host_pcr_req_fire) { when (host_csr_req_fire) {
host_pcr_req_valid := false host_csr_req_valid := false
host_pcr_rep_valid := true host_csr_rep_valid := true
host_pcr_bits.data := io.rw.rdata host_csr_bits.data := io.rw.rdata
} }
when (io.host.pcr_rep.fire()) { host_pcr_rep_valid := false } when (io.host.csr.resp.fire()) { host_csr_rep_valid := false }
io.host.debug_stats_pcr := reg_stats // direct export up the hierarchy
io.host.debug_stats_csr := reg_stats // direct export up the hierarchy
val read_time = if (usingPerfCounters) reg_time else (reg_cycle: UInt)
val read_mstatus = io.status.toBits val read_mstatus = io.status.toBits
val isa_string = "IMA" + val isa_string = "IMA" +
(if (params(UseVM)) "S" else "") + (if (usingVM) "S" else "") +
(if (!params(BuildFPU).isEmpty) "FD" else "") + (if (usingFPU) "FD" else "") +
(if (!params(BuildRoCC).isEmpty) "X" else "") (if (usingRoCC) "X" else "")
val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) | val cpuid = ((if (xLen == 32) BigInt(0) else BigInt(2)) << (xLen-2)) |
isa_string.map(x => 1 << (x - 'A')).reduce(_|_) isa_string.map(x => 1 << (x - 'A')).reduce(_|_)
val impid = 1 val impid = 1
val read_mapping = collection.mutable.LinkedHashMap[Int,Bits]( val read_mapping = collection.mutable.LinkedHashMap[Int,Bits](
CSRs.fflags -> (if (!params(BuildFPU).isEmpty) reg_fflags else UInt(0)), CSRs.fflags -> (if (usingFPU) reg_fflags else UInt(0)),
CSRs.frm -> (if (!params(BuildFPU).isEmpty) reg_frm else UInt(0)), CSRs.frm -> (if (usingFPU) reg_frm else UInt(0)),
CSRs.fcsr -> (if (!params(BuildFPU).isEmpty) Cat(reg_frm, reg_fflags) else UInt(0)), CSRs.fcsr -> (if (usingFPU) Cat(reg_frm, reg_fflags) else UInt(0)),
CSRs.cycle -> reg_cycle, CSRs.cycle -> reg_cycle,
CSRs.cyclew -> reg_cycle, CSRs.cyclew -> reg_cycle,
CSRs.instret -> reg_instret, CSRs.time -> read_time,
CSRs.instretw -> reg_instret, CSRs.timew -> read_time,
CSRs.time -> reg_time, CSRs.stime -> read_time,
CSRs.timew -> reg_time, CSRs.stimew -> read_time,
CSRs.stime -> reg_time, CSRs.mtime -> read_time,
CSRs.stimew -> reg_time,
CSRs.mtime -> reg_time,
CSRs.mcpuid -> UInt(cpuid), CSRs.mcpuid -> UInt(cpuid),
CSRs.mimpid -> UInt(impid), CSRs.mimpid -> UInt(impid),
CSRs.mstatus -> read_mstatus, CSRs.mstatus -> read_mstatus,
@ -214,7 +215,15 @@ class CSRFile extends CoreModule
CSRs.mtohost -> reg_tohost, CSRs.mtohost -> reg_tohost,
CSRs.mfromhost -> reg_fromhost) CSRs.mfromhost -> reg_fromhost)
if (params(UseVM)) { if (usingPerfCounters) {
read_mapping += CSRs.instret -> reg_instret
read_mapping += CSRs.instretw -> reg_instret
for (i <- 0 until reg_uarch_counters.size)
read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i)
}
if (usingVM) {
val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus)) val read_sstatus = Wire(init=new SStatus().fromBits(read_mstatus))
read_sstatus.zero1 := 0 read_sstatus.zero1 := 0
read_sstatus.zero2 := 0 read_sstatus.zero2 := 0
@ -241,17 +250,14 @@ class CSRFile extends CoreModule
read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen) read_mapping += CSRs.stvec -> reg_stvec.sextTo(xLen)
} }
for (i <- 0 until reg_uarch_counters.size) for (i <- 0 until nCustomMrwCsrs) {
read_mapping += (CSRs.uarch0 + i) -> reg_uarch_counters(i)
for (i <- 0 until params(NCustomMRWCSRs)) {
val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase? val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase?
require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range") require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range")
require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use") require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use")
read_mapping += addr -> io.custom_mrw_csrs(i) read_mapping += addr -> io.custom_mrw_csrs(i)
} }
val addr = Mux(cpu_ren, io.rw.addr, host_pcr_bits.addr) val addr = Mux(cpu_ren, io.rw.addr, host_csr_bits.addr)
val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) } val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) }
val addr_valid = decoded_addr.values.reduce(_||_) val addr_valid = decoded_addr.values.reduce(_||_)
@ -260,11 +266,11 @@ class CSRFile extends CoreModule
val priv_sufficient = reg_mstatus.prv >= csr_addr_priv val priv_sufficient = reg_mstatus.prv >= csr_addr_priv
val read_only = io.rw.addr(11,10).andR val read_only = io.rw.addr(11,10).andR
val cpu_wen = cpu_ren && io.rw.cmd != CSR.R && priv_sufficient val cpu_wen = cpu_ren && io.rw.cmd != CSR.R && priv_sufficient
val wen = cpu_wen && !read_only || host_pcr_req_fire && host_pcr_bits.rw val wen = cpu_wen && !read_only || host_csr_req_fire && host_csr_bits.rw
val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata, val wdata = Mux(io.rw.cmd === CSR.W, io.rw.wdata,
Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata, Mux(io.rw.cmd === CSR.C, io.rw.rdata & ~io.rw.wdata,
Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata, Mux(io.rw.cmd === CSR.S, io.rw.rdata | io.rw.wdata,
host_pcr_bits.data))) host_csr_bits.data)))
val opcode = io.rw.addr val opcode = io.rw.addr
val insn_call = !opcode(8) && !opcode(0) && system_insn val insn_call = !opcode(8) && !opcode(0) && system_insn
@ -341,7 +347,7 @@ class CSRFile extends CoreModule
assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive") assert(PopCount(insn_ret :: insn_redirect_trap :: io.exception :: csr_xcpt :: io.csr_replay :: Nil) <= 1, "these conditions must be mutually exclusive")
when (reg_time >= reg_mtimecmp) { when (read_time >= reg_mtimecmp) {
reg_mip.mtip := true reg_mip.mtip := true
} }
@ -351,7 +357,7 @@ class CSRFile extends CoreModule
io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready io.csr_replay := io.host.ipi_req.valid && !io.host.ipi_req.ready
io.csr_stall := reg_wfi io.csr_stall := reg_wfi
when (host_pcr_req_fire && !host_pcr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) } when (host_csr_req_fire && !host_csr_bits.rw && decoded_addr(CSRs.mtohost)) { reg_tohost := UInt(0) }
io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v)
@ -366,7 +372,7 @@ class CSRFile extends CoreModule
reg_mstatus.ie := new_mstatus.ie reg_mstatus.ie := new_mstatus.ie
reg_mstatus.ie1 := new_mstatus.ie1 reg_mstatus.ie1 := new_mstatus.ie1
val supportedModes = Vec((PRV_M :: PRV_U :: (if (params(UseVM)) List(PRV_S) else Nil)).map(UInt(_))) val supportedModes = Vec((PRV_M :: PRV_U :: (if (usingVM) List(PRV_S) else Nil)).map(UInt(_)))
if (supportedModes.size > 1) { if (supportedModes.size > 1) {
reg_mstatus.mprv := new_mstatus.mprv reg_mstatus.mprv := new_mstatus.mprv
when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv } when (supportedModes contains new_mstatus.prv) { reg_mstatus.prv := new_mstatus.prv }
@ -377,17 +383,17 @@ class CSRFile extends CoreModule
} }
} }
if (params(UseVM)) { if (usingVM) {
val vm_on = if (xLen == 32) 8 else 9 val vm_on = if (xLen == 32) 8 else 9
when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 } when (new_mstatus.vm === 0) { reg_mstatus.vm := 0 }
when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on } when (new_mstatus.vm === vm_on) { reg_mstatus.vm := vm_on }
} }
if (params(UseVM) || !params(BuildFPU).isEmpty) reg_mstatus.fs := new_mstatus.fs if (usingVM || usingFPU) reg_mstatus.fs := new_mstatus.fs
if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_mstatus.xs if (usingRoCC) reg_mstatus.xs := new_mstatus.xs
} }
when (decoded_addr(CSRs.mip)) { when (decoded_addr(CSRs.mip)) {
val new_mip = new MIP().fromBits(wdata) val new_mip = new MIP().fromBits(wdata)
if (params(UseVM)) { if (usingVM) {
reg_mip.ssip := new_mip.ssip reg_mip.ssip := new_mip.ssip
reg_mip.stip := new_mip.stip reg_mip.stip := new_mip.stip
} }
@ -395,7 +401,7 @@ class CSRFile extends CoreModule
} }
when (decoded_addr(CSRs.mie)) { when (decoded_addr(CSRs.mie)) {
val new_mie = new MIP().fromBits(wdata) val new_mie = new MIP().fromBits(wdata)
if (params(UseVM)) { if (usingVM) {
reg_mie.ssip := new_mie.ssip reg_mie.ssip := new_mie.ssip
reg_mie.stip := new_mie.stip reg_mie.stip := new_mie.stip
} }
@ -409,13 +415,14 @@ class CSRFile extends CoreModule
when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata }
when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ } when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & UInt((BigInt(1) << (xLen-1)) + 31) /* only implement 5 LSBs and MSB */ }
when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) } when (decoded_addr(CSRs.mbadaddr)) { reg_mbadaddr := wdata(vaddrBitsExtended-1,0) }
when (decoded_addr(CSRs.instretw)) { reg_instret := wdata } if (usingPerfCounters)
when (decoded_addr(CSRs.instretw)) { reg_instret := wdata }
when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false } when (decoded_addr(CSRs.mtimecmp)) { reg_mtimecmp := wdata; reg_mip.mtip := false }
when (decoded_addr(CSRs.mreset) /* XXX used by HTIF to write mtime */) { reg_time := wdata } when (decoded_addr(CSRs.mtime)) { reg_time := wdata }
when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_pcr_req_fire) { reg_fromhost := wdata } } when (decoded_addr(CSRs.mfromhost)){ when (reg_fromhost === UInt(0) || !host_csr_req_fire) { reg_fromhost := wdata } }
when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_pcr_req_fire) { reg_tohost := wdata } } when (decoded_addr(CSRs.mtohost)) { when (reg_tohost === UInt(0) || host_csr_req_fire) { reg_tohost := wdata } }
when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) } when (decoded_addr(CSRs.stats)) { reg_stats := wdata(0) }
if (params(UseVM)) { if (usingVM) {
when (decoded_addr(CSRs.sstatus)) { when (decoded_addr(CSRs.sstatus)) {
val new_sstatus = new SStatus().fromBits(wdata) val new_sstatus = new SStatus().fromBits(wdata)
reg_mstatus.ie := new_sstatus.ie reg_mstatus.ie := new_sstatus.ie
@ -423,7 +430,7 @@ class CSRFile extends CoreModule
reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U) reg_mstatus.prv1 := Mux[UInt](new_sstatus.ps, PRV_S, PRV_U)
reg_mstatus.mprv := new_sstatus.mprv reg_mstatus.mprv := new_sstatus.mprv
reg_mstatus.fs := new_sstatus.fs // even without an FPU reg_mstatus.fs := new_sstatus.fs // even without an FPU
if (!params(BuildRoCC).isEmpty) reg_mstatus.xs := new_sstatus.xs if (usingRoCC) reg_mstatus.xs := new_sstatus.xs
} }
when (decoded_addr(CSRs.sip)) { when (decoded_addr(CSRs.sip)) {
val new_sip = new MIP().fromBits(wdata) val new_sip = new MIP().fromBits(wdata)

View File

@ -42,7 +42,7 @@ object ALU
} }
import ALU._ import ALU._
class ALUIO extends CoreBundle { class ALUIO(implicit p: Parameters) extends CoreBundle()(p) {
val dw = Bits(INPUT, SZ_DW) val dw = Bits(INPUT, SZ_DW)
val fn = Bits(INPUT, SZ_ALU_FN) val fn = Bits(INPUT, SZ_ALU_FN)
val in2 = UInt(INPUT, xLen) val in2 = UInt(INPUT, xLen)
@ -51,8 +51,7 @@ class ALUIO extends CoreBundle {
val adder_out = UInt(OUTPUT, xLen) val adder_out = UInt(OUTPUT, xLen)
} }
class ALU extends Module class ALU(implicit p: Parameters) extends Module {
{
val io = new ALUIO val io = new ALUIO
// ADD, SUB // ADD, SUB

View File

@ -131,8 +131,10 @@ class FPUDecoder extends Module
FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y) FSQRT_D -> List(FCMD_SQRT, N,Y,Y,N,N,Y,X,N,N,N,N,N,N,Y,Y,Y)
)) ))
val s = io.sigs val s = io.sigs
Vec(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12, s.swap23, s.single, s.fromint, val sigs = Seq(s.cmd, s.ldst, s.wen, s.ren1, s.ren2, s.ren3, s.swap12,
s.toint, s.fastpipe, s.fma, s.div, s.sqrt, s.round, s.wflags) := decoder s.swap23, s.single, s.fromint, s.toint, s.fastpipe, s.fma,
s.div, s.sqrt, s.round, s.wflags)
sigs zip decoder map {case(s,d) => s := d}
} }
class FPUIO extends Bundle { class FPUIO extends Bundle {
@ -215,7 +217,7 @@ class FPToInt extends Module
dcmp.io.a := in.in1 dcmp.io.a := in.in1
dcmp.io.b := in.in2 dcmp.io.b := in.in2
val dcmp_out = (~in.rm & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR val dcmp_out = (~in.rm & Cat(dcmp.io.a_lt_b, dcmp.io.a_eq_b)).orR
val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << UInt(4) val dcmp_exc = (~in.rm & Cat(dcmp.io.a_lt_b_invalid, dcmp.io.a_eq_b_invalid)).orR << 4
val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, in.typ ^ 1, 52, 12, 64) val d2i = hardfloat.recodedFloatNToAny(in.in1, in.rm, in.typ ^ 1, 52, 12, 64)
@ -346,13 +348,12 @@ class FPUFMAPipe(val latency: Int, sigWidth: Int, expWidth: Int) extends Module
fma.io.c := in.in3 fma.io.c := in.in3
val res = Wire(new FPResult) val res = Wire(new FPResult)
res.data := fma.io.out res.data := Cat(SInt(-1, 32), fma.io.out)
res.exc := fma.io.exceptionFlags res.exc := fma.io.exceptionFlags
io.out := Pipe(valid, res, latency-1) io.out := Pipe(valid, res, latency-1)
} }
class FPU extends Module class FPU(implicit p: Parameters) extends CoreModule()(p) {
{
val io = new FPUIO val io = new FPUIO
val ex_reg_valid = Reg(next=io.valid, init=Bool(false)) val ex_reg_valid = Reg(next=io.valid, init=Bool(false))
@ -389,8 +390,14 @@ class FPU extends Module
val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d) val load_wb_data_recoded = Mux(load_wb_single, Cat(SInt(-1, 32), rec_s), rec_d)
// regfile // regfile
val regfile = Mem(Bits(width = 65), 32) val regfile = Mem(32, Bits(width = 65))
when (load_wb) { regfile(load_wb_tag) := load_wb_data_recoded } when (load_wb) {
regfile(load_wb_tag) := load_wb_data_recoded
if (enableCommitLog) {
printf ("f%d p%d 0x%x\n", load_wb_tag, load_wb_tag + UInt(32),
Mux(load_wb_single, load_wb_data(31,0), load_wb_data))
}
}
val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt())) val ex_ra1::ex_ra2::ex_ra3::Nil = List.fill(3)(Reg(UInt()))
when (io.valid) { when (io.valid) {
@ -420,11 +427,11 @@ class FPU extends Module
req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3) req.in3 := Mux(ex_reg_valid, ex_rs3, cp_rs3)
req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ) req.typ := Mux(ex_reg_valid, ex_reg_inst(21,20), io.cp_req.bits.typ)
val sfma = Module(new FPUFMAPipe(params(SFMALatency), 23, 9)) val sfma = Module(new FPUFMAPipe(p(SFMALatency), 23, 9))
sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single sfma.io.in.valid := req_valid && ex_ctrl.fma && ex_ctrl.single
sfma.io.in.bits := req sfma.io.in.bits := req
val dfma = Module(new FPUFMAPipe(params(DFMALatency), 52, 12)) val dfma = Module(new FPUFMAPipe(p(DFMALatency), 52, 12))
dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single dfma.io.in.valid := req_valid && ex_ctrl.fma && !ex_ctrl.single
dfma.io.in.bits := req dfma.io.in.bits := req
@ -457,12 +464,12 @@ class FPU extends Module
val divSqrt_cp = Reg(init=Bool(false)) val divSqrt_cp = Reg(init=Bool(false))
// writeback arbitration // writeback arbitration
case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, wdata: UInt, wexc: UInt) case class Pipe(p: Module, lat: Int, cond: (FPUCtrlSigs) => Bool, res: FPResult)
val pipes = List( val pipes = List(
Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits.data, fpmu.io.out.bits.exc), Pipe(fpmu, fpmu.latency, (c: FPUCtrlSigs) => c.fastpipe, fpmu.io.out.bits),
Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits.data, ifpu.io.out.bits.exc), Pipe(ifpu, ifpu.latency, (c: FPUCtrlSigs) => c.fromint, ifpu.io.out.bits),
Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, Cat(SInt(-1, 32), sfma.io.out.bits.data), sfma.io.out.bits.exc), Pipe(sfma, sfma.latency, (c: FPUCtrlSigs) => c.fma && c.single, sfma.io.out.bits),
Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits.data, dfma.io.out.bits.exc)) Pipe(dfma, dfma.latency, (c: FPUCtrlSigs) => c.fma && !c.single, dfma.io.out.bits))
def latencyMask(c: FPUCtrlSigs, offset: Int) = { def latencyMask(c: FPUCtrlSigs, offset: Int) = {
require(pipes.forall(_.lat >= offset)) require(pipes.forall(_.lat >= offset))
pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_) pipes.map(p => Mux(p.cond(c), UInt(1 << p.lat-offset), UInt(0))).reduce(_|_)
@ -472,10 +479,10 @@ class FPU extends Module
val memLatencyMask = latencyMask(mem_ctrl, 2) val memLatencyMask = latencyMask(mem_ctrl, 2)
val wen = Reg(init=Bits(0, maxLatency-1)) val wen = Reg(init=Bits(0, maxLatency-1))
val winfo = Reg(Vec.fill(maxLatency-1){Bits()}) val winfo = Reg(Vec(Bits(), maxLatency-1))
val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint) val mem_wen = mem_reg_valid && (mem_ctrl.fma || mem_ctrl.fastpipe || mem_ctrl.fromint)
val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid) val write_port_busy = RegEnable(mem_wen && (memLatencyMask & latencyMask(ex_ctrl, 1)).orR || (wen & latencyMask(ex_ctrl, 0)).orR, req_valid)
val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_reg_inst(11,7)) val mem_winfo = Cat(mem_cp_valid, pipeid(mem_ctrl), mem_ctrl.single, mem_reg_inst(11,7)) //single only used for debugging
for (i <- 0 until maxLatency-2) { for (i <- 0 until maxLatency-2) {
when (wen(i+1)) { winfo(i) := winfo(i+1) } when (wen(i+1)) { winfo(i) := winfo(i+1) }
@ -493,11 +500,20 @@ class FPU extends Module
} }
val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt) val waddr = Mux(divSqrt_wen, divSqrt_waddr, winfo(0)(4,0).toUInt)
val wsrc = winfo(0) >> 5 val wsrc = (winfo(0) >> 6)
val wcp = winfo(0)(5+log2Up(pipes.size)) val wcp = winfo(0)(6+log2Up(pipes.size))
val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.wdata))(wsrc)) val wdata = Mux(divSqrt_wen, divSqrt_wdata, Vec(pipes.map(_.res.data))(wsrc))
val wexc = Vec(pipes.map(_.wexc))(wsrc) val wexc = Vec(pipes.map(_.res.exc))(wsrc)
when ((!wcp && wen(0)) || (!divSqrt_cp && divSqrt_wen)) { regfile(waddr) := wdata } when ((!wcp && wen(0)) || (!divSqrt_cp && divSqrt_wen)) {
regfile(waddr) := wdata
if (enableCommitLog) {
val wdata_unrec_s = hardfloat.recodedFloatNToFloatN(wdata(64,0), 23, 9)
val wdata_unrec_d = hardfloat.recodedFloatNToFloatN(wdata(64,0), 52, 12)
val wb_single = (winfo(0) >> 5)(0)
printf ("f%d p%d 0x%x\n", waddr, waddr+ UInt(32),
Mux(wb_single, Cat(UInt(0,32), wdata_unrec_s), wdata_unrec_d))
}
}
when ((wcp && wen(0)) || (divSqrt_cp && divSqrt_wen)) { when ((wcp && wen(0)) || (divSqrt_cp && divSqrt_wen)) {
io.cp_resp.bits.data := wdata io.cp_resp.bits.data := wdata
io.cp_resp.valid := Bool(true) io.cp_resp.valid := Bool(true)
@ -525,7 +541,7 @@ class FPU extends Module
divSqrt_wdata := 0 divSqrt_wdata := 0
divSqrt_flags := 0 divSqrt_flags := 0
if (params(FDivSqrt)) { if (p(FDivSqrt)) {
val divSqrt_single = Reg(Bool()) val divSqrt_single = Reg(Bool())
val divSqrt_rm = Reg(Bits()) val divSqrt_rm = Reg(Bits())
val divSqrt_flags_double = Reg(Bits()) val divSqrt_flags_double = Reg(Bits())

View File

@ -0,0 +1,126 @@
package rocket
import Chisel._
import uncore._
import Util._
class FrontendReq(implicit p: Parameters) extends CoreBundle()(p) {
val pc = UInt(width = vaddrBitsExtended)
}
class FrontendResp(implicit p: Parameters) extends CoreBundle()(p) {
val pc = UInt(width = vaddrBitsExtended) // ID stage PC
val data = Vec(Bits(width = coreInstBits), fetchWidth)
val mask = Bits(width = fetchWidth)
val xcpt_if = Bool()
}
class FrontendIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Valid(new FrontendReq)
val resp = Decoupled(new FrontendResp).flip
val btb_resp = Valid(new BTBResp).flip
val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate)
val invalidate = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended)
}
class Frontend(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
val io = new Bundle {
val cpu = new FrontendIO().flip
val ptw = new TLBPTWIO()
val mem = new ClientUncachedTileLinkIO
}
val btb = Module(new BTB)
val icache = Module(new ICache)
val tlb = Module(new TLB)
val s1_pc_ = Reg(UInt())
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
val s1_same_block = Reg(Bool())
val s2_valid = Reg(init=Bool(true))
val s2_pc = Reg(init=UInt(START_ADDR))
val s2_btb_resp_valid = Reg(init=Bool(false))
val s2_btb_resp_bits = Reg(btb.io.resp.bits)
val s2_xcpt_if = Reg(init=Bool(false))
val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true))
val msb = vaddrBits-1
val lsb = log2Up(fetchWidth*coreInstBytes)
val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target)
val ntpc_0 = s1_pc + UInt(coreInstBytes*fetchWidth)
val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure
val icmiss = s2_valid && !icbuf.io.deq.valid
val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc)
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes))
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
when (!stall) {
s1_same_block := s0_same_block && !tlb.io.resp.miss
s1_pc_ := npc
s2_valid := !icmiss
when (!icmiss) {
s2_pc := s1_pc
s2_btb_resp_valid := btb.io.resp.valid
when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits }
s2_xcpt_if := tlb.io.resp.xcpt_if
}
}
when (io.cpu.req.valid) {
s1_same_block := Bool(false)
s1_pc_ := io.cpu.req.bits.pc
s2_valid := Bool(false)
}
btb.io.req.valid := !stall && !icmiss
btb.io.req.bits.addr := s1_pc
btb.io.btb_update := io.cpu.btb_update
btb.io.bht_update := io.cpu.bht_update
btb.io.ras_update := io.cpu.ras_update
btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate
io.ptw <> tlb.io.ptw
tlb.io.req.valid := !stall && !icmiss
tlb.io.req.bits.vpn := s1_pc >> pgIdxBits
tlb.io.req.bits.asid := UInt(0)
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true)
tlb.io.req.bits.store := Bool(false)
io.mem <> icache.io.mem
icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.idx := io.cpu.npc
icache.io.invalidate := io.cpu.invalidate
icache.io.req.bits.ppn := tlb.io.resp.ppn
icache.io.req.bits.kill := io.cpu.req.valid ||
tlb.io.resp.miss || tlb.io.resp.xcpt_if ||
icmiss || io.ptw.invalidate
icache.io.resp.ready := !stall && !s1_same_block
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid)
io.cpu.resp.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
icbuf.io.enq <> icache.io.resp
icbuf.io.deq.ready := !stall && !s1_same_block
require(fetchWidth * coreInstBytes <= rowBytes)
val fetch_data =
if (fetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock
else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(fetchWidth*coreInstBytes)) << log2Up(fetchWidth*coreInstBits))
for (i <- 0 until fetchWidth) {
io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)
}
val all_ones = UInt((1 << (fetchWidth+1))-1)
val msk_pc = if (fetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(fetchWidth) -1+2,2)
io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc)
io.cpu.resp.bits.xcpt_if := s2_xcpt_if
io.cpu.btb_resp.valid := s2_btb_resp_valid
io.cpu.btb_resp.bits := s2_btb_resp_bits
}

View File

@ -4,148 +4,26 @@ import Chisel._
import uncore._ import uncore._
import Util._ import Util._
abstract trait L1CacheParameters extends CacheParameters with CoreParameters { trait HasL1CacheParameters extends HasCacheParameters with HasCoreParameters {
val outerDataBeats = params(TLDataBeats) val outerDataBeats = p(TLKey(p(TLId))).dataBeats
val outerDataBits = params(TLDataBits) val outerDataBits = p(TLKey(p(TLId))).dataBitsPerBeat
val outerAddrBits = p(TLKey(p(TLId))).addrBits
val refillCyclesPerBeat = outerDataBits/rowBits val refillCyclesPerBeat = outerDataBits/rowBits
val refillCycles = refillCyclesPerBeat*outerDataBeats val refillCycles = refillCyclesPerBeat*outerDataBeats
} }
abstract trait FrontendParameters extends L1CacheParameters class ICacheReq(implicit p: Parameters) extends CoreBundle()(p) {
abstract class FrontendBundle extends Bundle with FrontendParameters
abstract class FrontendModule extends Module with FrontendParameters
class FrontendReq extends CoreBundle {
val pc = UInt(width = vaddrBitsExtended)
}
class FrontendResp extends CoreBundle {
val pc = UInt(width = vaddrBitsExtended) // ID stage PC
val data = Vec.fill(coreFetchWidth) (Bits(width = coreInstBits))
val mask = Bits(width = coreFetchWidth)
val xcpt_if = Bool()
}
class CPUFrontendIO extends CoreBundle {
val req = Valid(new FrontendReq)
val resp = Decoupled(new FrontendResp).flip
val btb_resp = Valid(new BTBResp).flip
val btb_update = Valid(new BTBUpdate)
val bht_update = Valid(new BHTUpdate)
val ras_update = Valid(new RASUpdate)
val invalidate = Bool(OUTPUT)
val npc = UInt(INPUT, width = vaddrBitsExtended)
}
class Frontend(btb_updates_out_of_order: Boolean = false) extends FrontendModule
{
val io = new Bundle {
val cpu = new CPUFrontendIO().flip
val ptw = new TLBPTWIO()
val mem = new ClientUncachedTileLinkIO
}
val btb = Module(new BTB(btb_updates_out_of_order))
val icache = Module(new ICache)
val tlb = Module(new TLB)
val s1_pc_ = Reg(UInt())
val s1_pc = ~(~s1_pc_ | (coreInstBytes-1)) // discard PC LSBS (this propagates down the pipeline)
val s1_same_block = Reg(Bool())
val s2_valid = Reg(init=Bool(true))
val s2_pc = Reg(init=UInt(START_ADDR))
val s2_btb_resp_valid = Reg(init=Bool(false))
val s2_btb_resp_bits = Reg(btb.io.resp.bits)
val s2_xcpt_if = Reg(init=Bool(false))
val icbuf = Module(new Queue(new ICacheResp, 1, pipe=true))
val msb = vaddrBits-1
val lsb = log2Up(coreFetchWidth*coreInstBytes)
val btbTarget = Cat(btb.io.resp.bits.target(msb), btb.io.resp.bits.target)
val ntpc_0 = s1_pc + UInt(coreInstBytes*coreFetchWidth)
val ntpc = Cat(s1_pc(msb) & ntpc_0(msb), ntpc_0(msb,lsb), Bits(0,lsb)) // unsure
val icmiss = s2_valid && !icbuf.io.deq.valid
val predicted_npc = Mux(btb.io.resp.bits.taken, btbTarget, ntpc)
val npc = Mux(icmiss, s2_pc, predicted_npc).toUInt
val s0_same_block = !icmiss && !io.cpu.req.valid && !btb.io.resp.bits.taken && ((ntpc & rowBytes) === (s1_pc & rowBytes))
val stall = io.cpu.resp.valid && !io.cpu.resp.ready
when (!stall) {
s1_same_block := s0_same_block && !tlb.io.resp.miss
s1_pc_ := npc
s2_valid := !icmiss
when (!icmiss) {
s2_pc := s1_pc
s2_btb_resp_valid := btb.io.resp.valid
when (btb.io.resp.valid) { s2_btb_resp_bits := btb.io.resp.bits }
s2_xcpt_if := tlb.io.resp.xcpt_if
}
}
when (io.cpu.req.valid) {
s1_same_block := Bool(false)
s1_pc_ := io.cpu.req.bits.pc
s2_valid := Bool(false)
}
btb.io.req.valid := !stall && !icmiss
btb.io.req.bits.addr := s1_pc
btb.io.btb_update := io.cpu.btb_update
btb.io.bht_update := io.cpu.bht_update
btb.io.ras_update := io.cpu.ras_update
btb.io.invalidate := io.cpu.invalidate || io.ptw.invalidate
io.ptw <> tlb.io.ptw
tlb.io.req.valid := !stall && !icmiss
tlb.io.req.bits.vpn := s1_pc >> UInt(pgIdxBits)
tlb.io.req.bits.asid := UInt(0)
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(true)
tlb.io.req.bits.store := Bool(false)
io.mem <> icache.io.mem
icache.io.req.valid := !stall && !s0_same_block
icache.io.req.bits.idx := io.cpu.npc
icache.io.invalidate := io.cpu.invalidate
icache.io.req.bits.ppn := tlb.io.resp.ppn
icache.io.req.bits.kill := io.cpu.req.valid || tlb.io.resp.miss || icmiss || io.ptw.invalidate
io.cpu.resp.valid := s2_valid && (s2_xcpt_if || icbuf.io.deq.valid)
io.cpu.resp.bits.pc := s2_pc
io.cpu.npc := Mux(io.cpu.req.valid, io.cpu.req.bits.pc, npc)
icbuf.io.enq <> icache.io.resp
icbuf.io.deq.ready := !stall && !s1_same_block
require(coreFetchWidth * coreInstBytes <= rowBytes)
val fetch_data =
if (coreFetchWidth * coreInstBytes == rowBytes) icbuf.io.deq.bits.datablock
else icbuf.io.deq.bits.datablock >> (s2_pc(log2Up(rowBytes)-1,log2Up(coreFetchWidth*coreInstBytes)) << log2Up(coreFetchWidth*coreInstBits))
for (i <- 0 until coreFetchWidth) {
io.cpu.resp.bits.data(i) := fetch_data(i*coreInstBits+coreInstBits-1, i*coreInstBits)
}
val all_ones = UInt((1 << (coreFetchWidth+1))-1)
val msk_pc = if (coreFetchWidth == 1) all_ones else all_ones << s2_pc(log2Up(coreFetchWidth) -1+2,2)
io.cpu.resp.bits.mask := Mux(s2_btb_resp_valid, msk_pc & s2_btb_resp_bits.mask, msk_pc)
io.cpu.resp.bits.xcpt_if := s2_xcpt_if
io.cpu.btb_resp.valid := s2_btb_resp_valid
io.cpu.btb_resp.bits := s2_btb_resp_bits
}
class ICacheReq extends FrontendBundle {
val idx = UInt(width = pgIdxBits) val idx = UInt(width = pgIdxBits)
val ppn = UInt(width = ppnBits) // delayed one cycle val ppn = UInt(width = ppnBits) // delayed one cycle
val kill = Bool() // delayed one cycle val kill = Bool() // delayed one cycle
} }
class ICacheResp extends FrontendBundle { class ICacheResp(implicit p: Parameters) extends CoreBundle()(p) with HasL1CacheParameters {
val data = Bits(width = coreInstBits)
val datablock = Bits(width = rowBits) val datablock = Bits(width = rowBits)
} }
class ICache extends FrontendModule class ICache(implicit p: Parameters) extends CoreModule()(p) with HasL1CacheParameters {
{
val io = new Bundle { val io = new Bundle {
val req = Valid(new ICacheReq).flip val req = Valid(new ICacheReq).flip
val resp = Decoupled(new ICacheResp) val resp = Decoupled(new ICacheResp)
@ -197,12 +75,11 @@ class ICache extends FrontendModule
val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0) val repl_way = if (isDM) UInt(0) else LFSR16(s1_miss)(log2Up(nWays)-1,0)
val entagbits = code.width(tagBits) val entagbits = code.width(tagBits)
val tag_array = SeqMem(Bits(width = entagbits*nWays), nSets) val tag_array = SeqMem(Vec(Bits(width = entagbits), nWays), nSets)
val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid) val tag_rdata = tag_array.read(s0_pgoff(untagBits-1,blockOffBits), !refill_done && s0_valid)
when (refill_done) { when (refill_done) {
val wmask = FillInterleaved(entagbits, if (isDM) Bits(1) else UIntToOH(repl_way))
val tag = code.encode(refill_tag).toUInt val tag = code.encode(refill_tag).toUInt
tag_array.write(s1_idx, Fill(nWays, tag), wmask) tag_array.write(s1_idx, Vec.fill(nWays)(tag), Vec.tabulate(nWays)(repl_way === _))
} }
val vb_array = Reg(init=Bits(0, nSets*nWays)) val vb_array = Reg(init=Bits(0, nSets*nWays))
@ -223,7 +100,7 @@ class ICache extends FrontendModule
for (i <- 0 until nWays) { for (i <- 0 until nWays) {
val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool val s1_vb = !io.invalidate && vb_array(Cat(UInt(i), s1_pgoff(untagBits-1,blockOffBits))).toBool
val tag_out = tag_rdata(entagbits*(i+1)-1, entagbits*i) val tag_out = tag_rdata(i)
val s1_tag_disparity = code.decode(tag_out).error val s1_tag_disparity = code.decode(tag_out).error
when (s1_valid && rdy && !stall) { when (s1_valid && rdy && !stall) {
} }
@ -252,7 +129,7 @@ class ICache extends FrontendModule
// output signals // output signals
io.resp.valid := s1_hit io.resp.valid := s1_hit
io.mem.acquire.valid := (state === s_request) io.mem.acquire.valid := (state === s_request)
io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> UInt(blockOffBits)) io.mem.acquire.bits := GetBlock(addr_block = refill_addr >> blockOffBits)
// control state machine // control state machine
switch (state) { switch (state) {

View File

@ -192,6 +192,26 @@ object Instructions {
def CUSTOM3_RD = BitPat("b?????????????????100?????1111011") def CUSTOM3_RD = BitPat("b?????????????????100?????1111011")
def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011") def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011")
def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011") def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011")
def SLLI_RV32 = BitPat("b0000000??????????001?????0010011")
def SRLI_RV32 = BitPat("b0000000??????????101?????0010011")
def SRAI_RV32 = BitPat("b0100000??????????101?????0010011")
def FRFLAGS = BitPat("b00000000000100000010?????1110011")
def FSFLAGS = BitPat("b000000000001?????001?????1110011")
def FSFLAGSI = BitPat("b000000000001?????101?????1110011")
def FRRM = BitPat("b00000000001000000010?????1110011")
def FSRM = BitPat("b000000000010?????001?????1110011")
def FSRMI = BitPat("b000000000010?????101?????1110011")
def FSCSR = BitPat("b000000000011?????001?????1110011")
def FRCSR = BitPat("b00000000001100000010?????1110011")
def RDCYCLE = BitPat("b11000000000000000010?????1110011")
def RDTIME = BitPat("b11000000000100000010?????1110011")
def RDINSTRET = BitPat("b11000000001000000010?????1110011")
def RDCYCLEH = BitPat("b11001000000000000010?????1110011")
def RDTIMEH = BitPat("b11001000000100000010?????1110011")
def RDINSTRETH = BitPat("b11001000001000000010?????1110011")
def ECALL = BitPat("b00000000000000000000000001110011")
def EBREAK = BitPat("b00000000000100000000000001110011")
def ERET = BitPat("b00010000000000000000000001110011")
} }
object Causes { object Causes {
val misaligned_fetch = 0x0 val misaligned_fetch = 0x0

View File

@ -6,29 +6,35 @@ import Chisel._
import ALU._ import ALU._
import Util._ import Util._
class MultiplierReq extends CoreBundle { class MultiplierReq(dataBits: Int, tagBits: Int) extends Bundle {
val fn = Bits(width = SZ_ALU_FN) val fn = Bits(width = SZ_ALU_FN)
val dw = Bits(width = SZ_DW) val dw = Bits(width = SZ_DW)
val in1 = Bits(width = xLen) val in1 = Bits(width = dataBits)
val in2 = Bits(width = xLen) val in2 = Bits(width = dataBits)
val tag = UInt(width = log2Up(params(NMultXpr))) val tag = UInt(width = tagBits)
override def cloneType = new MultiplierReq(dataBits, tagBits).asInstanceOf[this.type]
} }
class MultiplierResp extends CoreBundle { class MultiplierResp(dataBits: Int, tagBits: Int) extends Bundle {
val data = Bits(width = xLen) val data = Bits(width = dataBits)
val tag = UInt(width = log2Up(params(NMultXpr))) val tag = UInt(width = tagBits)
override def cloneType = new MultiplierResp(dataBits, tagBits).asInstanceOf[this.type]
} }
class MultiplierIO extends Bundle { class MultiplierIO(dataBits: Int, tagBits: Int) extends Bundle {
val req = Decoupled(new MultiplierReq).flip val req = Decoupled(new MultiplierReq(dataBits, tagBits)).flip
val kill = Bool(INPUT) val kill = Bool(INPUT)
val resp = Decoupled(new MultiplierResp) val resp = Decoupled(new MultiplierResp(dataBits, tagBits))
} }
class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module { class MulDiv(
val io = new MultiplierIO width: Int,
nXpr: Int = 32,
unroll: Int = 1,
earlyOut: Boolean = false) extends Module {
val io = new MultiplierIO(width, log2Up(nXpr))
val w = io.req.bits.in1.getWidth val w = io.req.bits.in1.getWidth
val mulw = (w+mulUnroll-1)/mulUnroll*mulUnroll val mulw = (w+unroll-1)/unroll*unroll
val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6) val s_ready :: s_neg_inputs :: s_busy :: s_move_rem :: s_neg_output :: s_done :: Nil = Enum(UInt(), 6)
val state = Reg(init=s_ready) val state = Reg(init=s_ready)
@ -87,18 +93,18 @@ class MulDiv(mulUnroll: Int = 1, earlyOut: Boolean = false) extends Module {
val mplier = mulReg(mulw-1,0) val mplier = mulReg(mulw-1,0)
val accum = mulReg(2*mulw,mulw).toSInt val accum = mulReg(2*mulw,mulw).toSInt
val mpcand = divisor.toSInt val mpcand = divisor.toSInt
val prod = mplier(mulUnroll-1,0) * mpcand + accum val prod = mplier(unroll-1,0) * mpcand + accum
val nextMulReg = Cat(prod, mplier(mulw-1,mulUnroll)).toUInt val nextMulReg = Cat(prod, mplier(mulw-1,unroll)).toUInt
val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * mulUnroll)(log2Up(mulw)-1,0))(mulw-1,0) val eOutMask = (SInt(BigInt(-1) << mulw) >> (count * unroll)(log2Up(mulw)-1,0))(mulw-1,0)
val eOut = Bool(earlyOut) && count != mulw/mulUnroll-1 && count != 0 && val eOut = Bool(earlyOut) && count != mulw/unroll-1 && count != 0 &&
!isHi && (mplier & ~eOutMask) === UInt(0) !isHi && (mplier & ~eOutMask) === UInt(0)
val eOutRes = (mulReg >> (mulw - count * mulUnroll)(log2Up(mulw)-1,0)) val eOutRes = (mulReg >> (mulw - count * unroll)(log2Up(mulw)-1,0))
val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0)) val nextMulReg1 = Cat(nextMulReg(2*mulw,mulw), Mux(eOut, eOutRes, nextMulReg)(mulw-1,0))
remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0)) remainder := Cat(nextMulReg1 >> w, Bool(false), nextMulReg1(w-1,0))
count := count + 1 count := count + 1
when (eOut || count === mulw/mulUnroll-1) { when (eOut || count === mulw/unroll-1) {
state := Mux(isHi, s_move_rem, s_done) state := Mux(isHi, s_move_rem, s_done)
} }
} }

View File

@ -4,18 +4,23 @@ package rocket
import Chisel._ import Chisel._
import uncore._ import uncore._
import junctions.MMIOBase
import Util._ import Util._
case object WordBits extends Field[Int] case object WordBits extends Field[Int]
case object StoreDataQueueDepth extends Field[Int] case object StoreDataQueueDepth extends Field[Int]
case object ReplayQueueDepth extends Field[Int] case object ReplayQueueDepth extends Field[Int]
case object NMSHRs extends Field[Int] case object NMSHRs extends Field[Int]
case object NIOMSHRs extends Field[Int]
case object LRSCCycles extends Field[Int] case object LRSCCycles extends Field[Int]
abstract trait L1HellaCacheParameters extends L1CacheParameters { trait HasL1HellaCacheParameters extends HasL1CacheParameters {
val wordBits = params(WordBits) val wordBits = p(WordBits)
val wordBytes = wordBits/8 val wordBytes = wordBits/8
val wordOffBits = log2Up(wordBytes) val wordOffBits = log2Up(wordBytes)
val beatBytes = p(CacheBlockBytes) / outerDataBeats
val beatWords = beatBytes / wordBytes
val beatOffBits = log2Up(beatBytes)
val idxMSB = untagBits-1 val idxMSB = untagBits-1
val idxLSB = blockOffBits val idxLSB = blockOffBits
val offsetmsb = idxLSB-1 val offsetmsb = idxLSB-1
@ -24,46 +29,53 @@ abstract trait L1HellaCacheParameters extends L1CacheParameters {
val doNarrowRead = coreDataBits * nWays % rowBits == 0 val doNarrowRead = coreDataBits * nWays % rowBits == 0
val encDataBits = code.width(coreDataBits) val encDataBits = code.width(coreDataBits)
val encRowBits = encDataBits*rowWords val encRowBits = encDataBits*rowWords
val sdqDepth = params(StoreDataQueueDepth) val sdqDepth = p(StoreDataQueueDepth)
val nMSHRs = params(NMSHRs) val nMSHRs = p(NMSHRs)
val nIOMSHRs = p(NIOMSHRs)
val lrscCycles = p(LRSCCycles)
} }
abstract class L1HellaCacheBundle extends Bundle with L1HellaCacheParameters abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
abstract class L1HellaCacheModule extends Module with L1HellaCacheParameters with HasL1HellaCacheParameters
abstract class L1HellaCacheBundle(implicit val p: Parameters) extends junctions.ParameterizedBundle()(p)
with HasL1HellaCacheParameters
trait HasCoreMemOp extends CoreBundle { trait HasCoreMemOp extends HasCoreParameters {
val addr = UInt(width = coreMaxAddrBits) val addr = UInt(width = coreMaxAddrBits)
val tag = Bits(width = coreDCacheReqTagBits) val tag = Bits(width = coreDCacheReqTagBits)
val cmd = Bits(width = M_SZ) val cmd = Bits(width = M_SZ)
val typ = Bits(width = MT_SZ) val typ = Bits(width = MT_SZ)
} }
trait HasCoreData extends CoreBundle { trait HasCoreData extends HasCoreParameters {
val data = Bits(width = coreDataBits) val data = Bits(width = coreDataBits)
} }
trait HasSDQId extends CoreBundle with L1HellaCacheParameters { trait HasSDQId extends HasL1HellaCacheParameters {
val sdq_id = UInt(width = log2Up(sdqDepth)) val sdq_id = UInt(width = log2Up(sdqDepth))
} }
trait HasMissInfo extends CoreBundle with L1HellaCacheParameters { trait HasMissInfo extends HasL1HellaCacheParameters {
val tag_match = Bool() val tag_match = Bool()
val old_meta = new L1Metadata val old_meta = new L1Metadata
val way_en = Bits(width = nWays) val way_en = Bits(width = nWays)
} }
class HellaCacheReqInternal extends HasCoreMemOp { class HellaCacheReqInternal(implicit p: Parameters) extends L1HellaCacheBundle()(p)
with HasCoreMemOp {
val kill = Bool() val kill = Bool()
val phys = Bool() val phys = Bool()
} }
class HellaCacheReq extends HellaCacheReqInternal with HasCoreData class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
class HellaCacheResp extends HasCoreMemOp with HasCoreData { class HellaCacheResp(implicit p: Parameters) extends L1HellaCacheBundle()(p)
with HasCoreMemOp
with HasCoreData {
val nack = Bool() // comes 2 cycles after req.fire val nack = Bool() // comes 2 cycles after req.fire
val replay = Bool() val replay = Bool()
val has_data = Bool() val has_data = Bool()
val data_subword = Bits(width = coreDataBits) val data_word_bypass = Bits(width = coreDataBits)
val store_data = Bits(width = coreDataBits) val store_data = Bits(width = coreDataBits)
} }
@ -78,7 +90,7 @@ class HellaCacheExceptions extends Bundle {
} }
// interface between D$ and processor/DTLB // interface between D$ and processor/DTLB
class HellaCacheIO extends CoreBundle { class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new HellaCacheReq) val req = Decoupled(new HellaCacheReq)
val resp = Valid(new HellaCacheResp).flip val resp = Valid(new HellaCacheResp).flip
val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip val replay_next = Valid(Bits(width = coreDCacheReqTagBits)).flip
@ -87,50 +99,128 @@ class HellaCacheIO extends CoreBundle {
val ordered = Bool(INPUT) val ordered = Bool(INPUT)
} }
class L1DataReadReq extends L1HellaCacheBundle { class L1DataReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
val way_en = Bits(width = nWays) val way_en = Bits(width = nWays)
val addr = Bits(width = untagBits) val addr = Bits(width = untagBits)
} }
class L1DataWriteReq extends L1DataReadReq { class L1DataWriteReq(implicit p: Parameters) extends L1DataReadReq()(p) {
val wmask = Bits(width = rowWords) val wmask = Bits(width = rowWords)
val data = Bits(width = encRowBits) val data = Bits(width = encRowBits)
} }
class L1RefillReq extends L1DataReadReq class L1RefillReq(implicit p: Parameters) extends L1DataReadReq()(p)
class L1MetaReadReq extends MetaReadReq { class L1MetaReadReq(implicit p: Parameters) extends MetaReadReq {
val tag = Bits(width = tagBits) val tag = Bits(width = tagBits)
override def cloneType = new L1MetaReadReq()(p).asInstanceOf[this.type] //TODO remove
} }
class L1MetaWriteReq extends class L1MetaWriteReq(implicit p: Parameters) extends
MetaWriteReq[L1Metadata](new L1Metadata) MetaWriteReq[L1Metadata](new L1Metadata)
object L1Metadata { object L1Metadata {
def apply(tag: Bits, coh: ClientMetadata) = { def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = {
val meta = Wire(new L1Metadata) val meta = Wire(new L1Metadata)
meta.tag := tag meta.tag := tag
meta.coh := coh meta.coh := coh
meta meta
} }
} }
class L1Metadata extends Metadata with L1HellaCacheParameters { class L1Metadata(implicit p: Parameters) extends Metadata()(p) with HasL1HellaCacheParameters {
val coh = new ClientMetadata val coh = new ClientMetadata
} }
class Replay extends HellaCacheReqInternal with HasCoreData class Replay(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
class ReplayInternal extends HellaCacheReqInternal with HasSDQId class ReplayInternal(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasSDQId
class MSHRReq extends Replay with HasMissInfo class MSHRReq(implicit p: Parameters) extends Replay()(p) with HasMissInfo
class MSHRReqInternal extends ReplayInternal with HasMissInfo class MSHRReqInternal(implicit p: Parameters) extends ReplayInternal()(p) with HasMissInfo
class ProbeInternal extends Probe with HasClientTransactionId class ProbeInternal(implicit p: Parameters) extends Probe()(p) with HasClientTransactionId
class WritebackReq extends Release with CacheParameters { class WritebackReq(implicit p: Parameters) extends Release()(p) with HasCacheParameters {
val way_en = Bits(width = nWays) val way_en = Bits(width = nWays)
} }
class MSHR(id: Int) extends L1HellaCacheModule { class IOMSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle {
val req = Decoupled(new HellaCacheReq).flip
val acquire = Decoupled(new Acquire)
val grant = Valid(new Grant).flip
val resp = Decoupled(new HellaCacheResp)
}
def wordFromBeat(addr: UInt, dat: UInt) = {
val offset = addr(beatOffBits - 1, wordOffBits)
val shift = Cat(offset, UInt(0, wordOffBits + 3))
(dat >> shift)(wordBits - 1, 0)
}
val req = Reg(new HellaCacheReq)
val req_cmd_sc = req.cmd === M_XSC
val grant_word = Reg(UInt(width = wordBits))
val storegen = new StoreGen64(req.typ, req.addr, req.data)
val loadgen = new LoadGen64(req.typ, req.addr, grant_word, req_cmd_sc)
val beat_offset = req.addr(beatOffBits - 1, wordOffBits)
val beat_mask = (storegen.mask << Cat(beat_offset, UInt(0, wordOffBits)))
val beat_data = Fill(beatWords, storegen.data)
val addr_byte = req.addr(beatOffBits - 1, 0)
val a_type = Mux(isRead(req.cmd), Acquire.getType, Acquire.putType)
val union = Mux(isRead(req.cmd),
Cat(addr_byte, req.typ, M_XRD), beat_mask)
val s_idle :: s_acquire :: s_grant :: s_resp :: Nil = Enum(Bits(), 4)
val state = Reg(init = s_idle)
io.req.ready := (state === s_idle)
io.acquire.valid := (state === s_acquire)
io.acquire.bits := Acquire(
is_builtin_type = Bool(true),
a_type = a_type,
client_xact_id = UInt(id),
addr_block = req.addr(paddrBits - 1, blockOffBits),
addr_beat = req.addr(blockOffBits - 1, beatOffBits),
data = beat_data,
// alloc bit should always be false
union = Cat(union, Bool(false)))
io.resp.valid := (state === s_resp)
io.resp.bits := req
io.resp.bits.has_data := isRead(req.cmd)
io.resp.bits.data := loadgen.byte | req_cmd_sc
io.resp.bits.store_data := req.data
io.resp.bits.nack := Bool(false)
io.resp.bits.replay := io.resp.valid
when (io.req.fire()) {
req := io.req.bits
state := s_acquire
}
when (io.acquire.fire()) {
state := s_grant
}
when (state === s_grant && io.grant.valid) {
when (isRead(req.cmd)) {
grant_word := wordFromBeat(req.addr, io.grant.bits.data)
state := s_resp
} .otherwise {
state := s_idle
}
}
when (io.resp.fire()) {
state := s_idle
}
}
class MSHR(id: Int)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle { val io = new Bundle {
val req_pri_val = Bool(INPUT) val req_pri_val = Bool(INPUT)
val req_pri_rdy = Bool(OUTPUT) val req_pri_rdy = Bool(OUTPUT)
@ -173,7 +263,7 @@ class MSHR(id: Int) extends L1HellaCacheModule {
val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) // TODO: Zero width? val (refill_cnt, refill_count_done) = Counter(io.mem_grant.valid && gnt_multi_data, refillCycles) // TODO: Zero width?
val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done) val refill_done = io.mem_grant.valid && (!gnt_multi_data || refill_count_done)
val rpq = Module(new Queue(new ReplayInternal, params(ReplayQueueDepth))) val rpq = Module(new Queue(new ReplayInternal, p(ReplayQueueDepth)))
rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd) rpq.io.enq.valid := (io.req_pri_val && io.req_pri_rdy || io.req_sec_val && sec_rdy) && !isPrefetch(io.req_bits.cmd)
rpq.io.enq.bits := io.req_bits rpq.io.enq.bits := io.req_bits
rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid rpq.io.deq.ready := io.replay.ready && state === s_drain_rpq || state === s_invalid
@ -279,9 +369,10 @@ class MSHR(id: Int) extends L1HellaCacheModule {
} }
} }
class MSHRFile extends L1HellaCacheModule { class MSHRFile(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle { val io = new Bundle {
val req = Decoupled(new MSHRReq).flip val req = Decoupled(new MSHRReq).flip
val resp = Decoupled(new HellaCacheResp)
val secondary_miss = Bool(OUTPUT) val secondary_miss = Bool(OUTPUT)
val mem_req = Decoupled(new Acquire) val mem_req = Decoupled(new Acquire)
@ -296,11 +387,14 @@ class MSHRFile extends L1HellaCacheModule {
val fence_rdy = Bool(OUTPUT) val fence_rdy = Bool(OUTPUT)
} }
// determine if the request is in the memory region or mmio region
val cacheable = io.req.bits.addr < UInt(mmioBase)
val sdq_val = Reg(init=Bits(0, sdqDepth)) val sdq_val = Reg(init=Bits(0, sdqDepth))
val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0)) val sdq_alloc_id = PriorityEncoder(~sdq_val(sdqDepth-1,0))
val sdq_rdy = !sdq_val.andR val sdq_rdy = !sdq_val.andR
val sdq_enq = io.req.valid && io.req.ready && isWrite(io.req.bits.cmd) val sdq_enq = io.req.valid && io.req.ready && cacheable && isWrite(io.req.bits.cmd)
val sdq = Mem(io.req.bits.data, sdqDepth) val sdq = Mem(sdqDepth, io.req.bits.data)
when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data } when (sdq_enq) { sdq(sdq_alloc_id) := io.req.bits.data }
val idxMatch = Wire(Vec(Bool(), nMSHRs)) val idxMatch = Wire(Vec(Bool(), nMSHRs))
@ -313,7 +407,7 @@ class MSHRFile extends L1HellaCacheModule {
val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs)) val meta_write_arb = Module(new Arbiter(new L1MetaWriteReq, nMSHRs))
val mem_req_arb = Module(new LockingArbiter( val mem_req_arb = Module(new LockingArbiter(
new Acquire, new Acquire,
nMSHRs, nMSHRs + nIOMSHRs,
outerDataBeats, outerDataBeats,
(a: Acquire) => a.hasMultibeatData())) (a: Acquire) => a.hasMultibeatData()))
val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs)) val wb_req_arb = Module(new Arbiter(new WritebackReq, nMSHRs))
@ -332,7 +426,7 @@ class MSHRFile extends L1HellaCacheModule {
idxMatch(i) := mshr.io.idx_match idxMatch(i) := mshr.io.idx_match
tagList(i) := mshr.io.tag tagList(i) := mshr.io.tag
wbTagList(i) := mshr.io.wb_req.bits.addr_block >> UInt(idxBits) wbTagList(i) := mshr.io.wb_req.bits.addr_block >> idxBits
alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy alloc_arb.io.in(i).valid := mshr.io.req_pri_rdy
mshr.io.req_pri_val := alloc_arb.io.in(i).ready mshr.io.req_pri_val := alloc_arb.io.in(i).ready
@ -360,14 +454,44 @@ class MSHRFile extends L1HellaCacheModule {
when (!mshr.io.probe_rdy) { io.probe_rdy := false } when (!mshr.io.probe_rdy) { io.probe_rdy := false }
} }
alloc_arb.io.out.ready := io.req.valid && sdq_rdy && !idx_match alloc_arb.io.out.ready := io.req.valid && sdq_rdy && cacheable && !idx_match
io.meta_read <> meta_read_arb.io.out io.meta_read <> meta_read_arb.io.out
io.meta_write <> meta_write_arb.io.out io.meta_write <> meta_write_arb.io.out
io.mem_req <> mem_req_arb.io.out io.mem_req <> mem_req_arb.io.out
io.wb_req <> wb_req_arb.io.out io.wb_req <> wb_req_arb.io.out
io.req.ready := Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy val mmio_alloc_arb = Module(new Arbiter(Bool(), nIOMSHRs))
val resp_arb = Module(new Arbiter(new HellaCacheResp, nIOMSHRs))
var mmio_rdy = Bool(false)
for (i <- 0 until nIOMSHRs) {
val id = nMSHRs + i
val mshr = Module(new IOMSHR(id))
mmio_alloc_arb.io.in(i).valid := mshr.io.req.ready
mshr.io.req.valid := mmio_alloc_arb.io.in(i).ready
mshr.io.req.bits := io.req.bits
mmio_rdy = mmio_rdy || mshr.io.req.ready
mem_req_arb.io.in(id) <> mshr.io.acquire
mshr.io.grant.bits := io.mem_grant.bits
mshr.io.grant.valid := io.mem_grant.valid &&
io.mem_grant.bits.client_xact_id === UInt(id)
resp_arb.io.in(i) <> mshr.io.resp
when (!mshr.io.req.ready) { io.fence_rdy := Bool(false) }
}
mmio_alloc_arb.io.out.ready := io.req.valid && !cacheable
io.resp <> resp_arb.io.out
io.req.ready := Mux(!cacheable, mmio_rdy,
Mux(idx_match, tag_match && sec_rdy, pri_rdy) && sdq_rdy)
io.secondary_miss := idx_match io.secondary_miss := idx_match
io.refill := refillMux(io.mem_grant.bits.client_xact_id) io.refill := refillMux(io.mem_grant.bits.client_xact_id)
@ -381,7 +505,7 @@ class MSHRFile extends L1HellaCacheModule {
} }
} }
class WritebackUnit extends L1HellaCacheModule { class WritebackUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle { val io = new Bundle {
val req = Decoupled(new WritebackReq).flip val req = Decoupled(new WritebackReq).flip
val meta_read = Decoupled(new L1MetaReadReq) val meta_read = Decoupled(new L1MetaReadReq)
@ -437,7 +561,7 @@ class WritebackUnit extends L1HellaCacheModule {
// We reissue the meta read as it sets up the mux ctrl for s2_data_muxed // We reissue the meta read as it sets up the mux ctrl for s2_data_muxed
io.meta_read.valid := fire io.meta_read.valid := fire
io.meta_read.bits.idx := req_idx io.meta_read.bits.idx := req_idx
io.meta_read.bits.tag := req.addr_block >> UInt(idxBits) io.meta_read.bits.tag := req.addr_block >> idxBits
io.data_req.valid := fire io.data_req.valid := fire
io.data_req.bits.way_en := req.way_en io.data_req.bits.way_en := req.way_en
@ -461,7 +585,7 @@ class WritebackUnit extends L1HellaCacheModule {
} else { io.data_resp }) } else { io.data_resp })
} }
class ProbeUnit extends L1HellaCacheModule { class ProbeUnit(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle { val io = new Bundle {
val req = Decoupled(new ProbeInternal).flip val req = Decoupled(new ProbeInternal).flip
val rep = Decoupled(new Release) val rep = Decoupled(new Release)
@ -513,7 +637,9 @@ class ProbeUnit extends L1HellaCacheModule {
req := io.req.bits req := io.req.bits
} }
val reply = old_coh.makeRelease(req) val miss_coh = ClientMetadata.onReset
val reply_coh = Mux(tag_matches, old_coh, miss_coh)
val reply = reply_coh.makeRelease(req)
io.req.ready := state === s_invalid io.req.ready := state === s_invalid
io.rep.valid := state === s_release && io.rep.valid := state === s_release &&
!(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release !(tag_matches && old_coh.requiresVoluntaryWriteback()) // Otherwise WBU will issue release
@ -534,11 +660,11 @@ class ProbeUnit extends L1HellaCacheModule {
io.wb_req.bits.way_en := way_en io.wb_req.bits.way_en := way_en
} }
class DataArray extends L1HellaCacheModule { class DataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle { val io = new Bundle {
val read = Decoupled(new L1DataReadReq).flip val read = Decoupled(new L1DataReadReq).flip
val write = Decoupled(new L1DataWriteReq).flip val write = Decoupled(new L1DataWriteReq).flip
val resp = Vec.fill(nWays){Bits(OUTPUT, encRowBits)} val resp = Vec(Bits(OUTPUT, encRowBits), nWays)
} }
val waddr = io.write.bits.addr >> rowOffBits val waddr = io.write.bits.addr >> rowOffBits
@ -551,13 +677,12 @@ class DataArray extends L1HellaCacheModule {
val resp = Wire(Vec(Bits(width = encRowBits), rowWords)) val resp = Wire(Vec(Bits(width = encRowBits), rowWords))
val r_raddr = RegEnable(io.read.bits.addr, io.read.valid) val r_raddr = RegEnable(io.read.bits.addr, io.read.valid)
for (p <- 0 until resp.size) { for (p <- 0 until resp.size) {
val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles)
when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) { when (wway_en.orR && io.write.valid && io.write.bits.wmask(p)) {
val data = Fill(rowWords, io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p)) val data = Vec.fill(rowWords)(io.write.bits.data(encDataBits*(p+1)-1,encDataBits*p))
val mask = FillInterleaved(encDataBits, wway_en) array.write(waddr, data, wway_en.toBools)
array.write(waddr, data, mask)
} }
resp(p) := array.read(raddr, rway_en.orR && io.read.valid) resp(p) := array.read(raddr, rway_en.orR && io.read.valid).toBits
} }
for (dw <- 0 until rowWords) { for (dw <- 0 until rowWords) {
val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw))) val r = Vec(resp.map(_(encDataBits*(dw+1)-1,encDataBits*dw)))
@ -568,13 +693,13 @@ class DataArray extends L1HellaCacheModule {
} }
} }
} else { } else {
val wmask = FillInterleaved(encDataBits, io.write.bits.wmask)
for (w <- 0 until nWays) { for (w <- 0 until nWays) {
val array = SeqMem(Bits(width=encRowBits), nSets*refillCycles) val array = SeqMem(Vec(Bits(width=encDataBits), rowWords), nSets*refillCycles)
when (io.write.bits.way_en(w) && io.write.valid) { when (io.write.bits.way_en(w) && io.write.valid) {
array.write(waddr, io.write.bits.data, wmask) val data = Vec.tabulate(rowWords)(i => io.write.bits.data(encDataBits*(i+1)-1,encDataBits*i))
array.write(waddr, data, io.write.bits.wmask.toBools)
} }
io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid) io.resp(w) := array.read(raddr, io.read.bits.way_en(w) && io.read.valid).toBits
} }
} }
@ -582,18 +707,18 @@ class DataArray extends L1HellaCacheModule {
io.write.ready := Bool(true) io.write.ready := Bool(true)
} }
class HellaCache extends L1HellaCacheModule { class HellaCache(implicit p: Parameters) extends L1HellaCacheModule()(p) {
val io = new Bundle { val io = new Bundle {
val cpu = (new HellaCacheIO).flip val cpu = (new HellaCacheIO).flip
val ptw = new TLBPTWIO() val ptw = new TLBPTWIO()
val mem = new ClientTileLinkIO val mem = new ClientTileLinkIO
} }
require(params(LRSCCycles) >= 32) // ISA requires 16-insn LRSC sequences to succeed require(lrscCycles >= 32) // ISA requires 16-insn LRSC sequences to succeed
require(isPow2(nSets)) require(isPow2(nSets))
require(isPow2(nWays)) // TODO: relax this require(isPow2(nWays)) // TODO: relax this
require(params(RowBits) <= params(TLDataBits)) require(rowBits <= outerDataBits)
require(paddrBits-blockOffBits == params(TLBlockAddrBits) ) require(paddrBits-blockOffBits == outerAddrBits)
require(untagBits <= pgIdxBits) require(untagBits <= pgIdxBits)
val wb = Module(new WritebackUnit) val wb = Module(new WritebackUnit)
@ -620,7 +745,6 @@ class HellaCache extends L1HellaCacheModule {
val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en)
val s1_read = isRead(s1_req.cmd) val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd) val s1_write = isWrite(s1_req.cmd)
val s1_sc = s1_req.cmd === M_XSC
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
val dtlb = Module(new TLB) val dtlb = Module(new TLB)
@ -672,8 +796,8 @@ class HellaCache extends L1HellaCacheModule {
io.cpu.xcpt.ma.ld := s1_read && misaligned io.cpu.xcpt.ma.ld := s1_read && misaligned
io.cpu.xcpt.ma.st := s1_write && misaligned io.cpu.xcpt.ma.st := s1_write && misaligned
io.cpu.xcpt.pf.ld := !s1_req.phys && s1_read && dtlb.io.resp.xcpt_ld io.cpu.xcpt.pf.ld := s1_read && dtlb.io.resp.xcpt_ld
io.cpu.xcpt.pf.st := !s1_req.phys && s1_write && dtlb.io.resp.xcpt_st io.cpu.xcpt.pf.st := s1_write && dtlb.io.resp.xcpt_st
assert (!(Reg(next= assert (!(Reg(next=
(io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) && (io.cpu.xcpt.ma.ld || io.cpu.xcpt.ma.st || io.cpu.xcpt.pf.ld || io.cpu.xcpt.pf.st)) &&
@ -738,7 +862,7 @@ class HellaCache extends L1HellaCacheModule {
when (lrsc_valid) { lrsc_count := lrsc_count - 1 } when (lrsc_valid) { lrsc_count := lrsc_count - 1 }
when (s2_valid_masked && s2_hit || s2_replay) { when (s2_valid_masked && s2_hit || s2_replay) {
when (s2_lr) { when (s2_lr) {
when (!lrsc_valid) { lrsc_count := params(LRSCCycles)-1 } when (!lrsc_valid) { lrsc_count := lrscCycles-1 }
lrsc_addr := s2_req.addr >> blockOffBits lrsc_addr := s2_req.addr >> blockOffBits
} }
when (s2_sc) { when (s2_sc) {
@ -749,7 +873,7 @@ class HellaCache extends L1HellaCacheModule {
val s2_data = Wire(Vec(Bits(width=encRowBits), nWays)) val s2_data = Wire(Vec(Bits(width=encRowBits), nWays))
for (w <- 0 until nWays) { for (w <- 0 until nWays) {
val regs = Reg(Vec.fill(rowWords){Bits(width = encDataBits)}) val regs = Reg(Vec(Bits(width = encDataBits), rowWords))
val en1 = s1_clk_en && s1_tag_eq_way(w) val en1 = s1_clk_en && s1_tag_eq_way(w)
for (i <- 0 until regs.size) { for (i <- 0 until regs.size) {
val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback)
@ -782,7 +906,7 @@ class HellaCache extends L1HellaCacheModule {
writeArb.io.in(0).bits.way_en := s3_way writeArb.io.in(0).bits.way_en := s3_way
// replacement policy // replacement policy
val replacer = params(Replacer)() val replacer = p(Replacer)()
val s1_replaced_way_en = UIntToOH(replacer.way) val s1_replaced_way_en = UIntToOH(replacer.way)
val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)
@ -825,10 +949,14 @@ class HellaCache extends L1HellaCacheModule {
mshrs.io.mem_grant.valid := narrow_grant.fire() mshrs.io.mem_grant.valid := narrow_grant.fire()
mshrs.io.mem_grant.bits := narrow_grant.bits mshrs.io.mem_grant.bits := narrow_grant.bits
narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData() narrow_grant.ready := writeArb.io.in(1).ready || !narrow_grant.bits.hasData()
writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() /* The last clause here is necessary in order to prevent the responses for
* the IOMSHRs from being written into the data array. It works because the
* IOMSHR ids start right the ones for the regular MSHRs. */
writeArb.io.in(1).valid := narrow_grant.valid && narrow_grant.bits.hasData() &&
narrow_grant.bits.client_xact_id < UInt(nMSHRs)
writeArb.io.in(1).bits.addr := mshrs.io.refill.addr writeArb.io.in(1).bits.addr := mshrs.io.refill.addr
writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en
writeArb.io.in(1).bits.wmask := ~UInt(0, nWays) writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords)
writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0) writeArb.io.in(1).bits.data := narrow_grant.bits.data(encRowBits-1,0)
data.io.read <> readArb.io.out data.io.read <> readArb.io.out
readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked readArb.io.out.ready := !narrow_grant.valid || narrow_grant.ready // insert bubble if refill gets blocked
@ -864,7 +992,7 @@ class HellaCache extends L1HellaCacheModule {
// load data subword mux/sign extension // load data subword mux/sign extension
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
val loadgen = new LoadGen(s2_req.typ, s2_req.addr, s2_data_word, s2_sc) val loadgen = new LoadGen64(s2_req.typ, s2_req.addr, s2_data_word, s2_sc)
amoalu.io.addr := s2_req.addr amoalu.io.addr := s2_req.addr
amoalu.io.cmd := s2_req.cmd amoalu.io.cmd := s2_req.cmd
@ -894,22 +1022,31 @@ class HellaCache extends L1HellaCacheModule {
io.cpu.req.ready := Bool(false) io.cpu.req.ready := Bool(false)
} }
io.cpu.resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable val cache_resp = Wire(Valid(new HellaCacheResp))
io.cpu.resp.bits.nack := s2_valid && s2_nack cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
io.cpu.resp.bits := s2_req cache_resp.bits := s2_req
io.cpu.resp.bits.has_data := isRead(s2_req.cmd) || s2_sc cache_resp.bits.has_data := isRead(s2_req.cmd)
io.cpu.resp.bits.replay := s2_replay cache_resp.bits.data := loadgen.byte | s2_sc_fail
io.cpu.resp.bits.data := loadgen.word cache_resp.bits.store_data := s2_req.data
io.cpu.resp.bits.data_subword := loadgen.byte | s2_sc_fail cache_resp.bits.nack := s2_valid && s2_nack
io.cpu.resp.bits.store_data := s2_req.data cache_resp.bits.replay := s2_replay
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
io.cpu.replay_next.valid := s1_replay && (s1_read || s1_sc) val uncache_resp = Wire(Valid(new HellaCacheResp))
uncache_resp.bits := mshrs.io.resp.bits
uncache_resp.valid := mshrs.io.resp.valid
val cache_pass = s2_valid || s2_replay
mshrs.io.resp.ready := !cache_pass
io.cpu.resp := Mux(cache_pass, cache_resp, uncache_resp)
io.cpu.resp.bits.data_word_bypass := loadgen.word
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
io.cpu.replay_next.valid := s1_replay && s1_read
io.cpu.replay_next.bits := s1_req.tag io.cpu.replay_next.bits := s1_req.tag
} }
// exposes a sane decoupled request interface // exposes a sane decoupled request interface
class SimpleHellaCacheIF extends Module class SimpleHellaCacheIF(implicit p: Parameters) extends Module
{ {
val io = new Bundle { val io = new Bundle {
val requestor = new HellaCacheIO().flip val requestor = new HellaCacheIO().flip

View File

@ -6,32 +6,32 @@ import Chisel._
import uncore._ import uncore._
import Util._ import Util._
class PTWReq extends CoreBundle { class PTWReq(implicit p: Parameters) extends CoreBundle()(p) {
val addr = UInt(width = vpnBits) val addr = UInt(width = vpnBits)
val prv = Bits(width = 2) val prv = Bits(width = 2)
val store = Bool() val store = Bool()
val fetch = Bool() val fetch = Bool()
} }
class PTWResp extends CoreBundle { class PTWResp(implicit p: Parameters) extends CoreBundle()(p) {
val error = Bool() val error = Bool()
val pte = new PTE val pte = new PTE
} }
class TLBPTWIO extends CoreBundle { class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val req = Decoupled(new PTWReq) val req = Decoupled(new PTWReq)
val resp = Valid(new PTWResp).flip val resp = Valid(new PTWResp).flip
val status = new MStatus().asInput val status = new MStatus().asInput
val invalidate = Bool(INPUT) val invalidate = Bool(INPUT)
} }
class DatapathPTWIO extends CoreBundle { class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) {
val ptbr = UInt(INPUT, paddrBits) val ptbr = UInt(INPUT, paddrBits)
val invalidate = Bool(INPUT) val invalidate = Bool(INPUT)
val status = new MStatus().asInput val status = new MStatus().asInput
} }
class PTE extends CoreBundle { class PTE(implicit p: Parameters) extends CoreBundle()(p) {
val ppn = Bits(width = ppnBits) val ppn = Bits(width = ppnBits)
val reserved_for_software = Bits(width = 3) val reserved_for_software = Bits(width = 3)
val d = Bool() val d = Bool()
@ -51,10 +51,9 @@ class PTE extends CoreBundle {
Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur()))) Mux(prv(0), Mux(fetch, sx(), Mux(store, sw(), sr())), Mux(fetch, ux(), Mux(store, uw(), ur())))
} }
class PTW(n: Int) extends CoreModule class PTW(n: Int)(implicit p: Parameters) extends CoreModule()(p) {
{
val io = new Bundle { val io = new Bundle {
val requestor = Vec.fill(n){new TLBPTWIO}.flip val requestor = Vec(new TLBPTWIO, n).flip
val mem = new HellaCacheIO val mem = new HellaCacheIO
val dpath = new DatapathPTWIO val dpath = new DatapathPTWIO
} }
@ -87,8 +86,8 @@ class PTW(n: Int) extends CoreModule
val plru = new PseudoLRU(size) val plru = new PseudoLRU(size)
val valid = Reg(Vec(Bool(), size)) val valid = Reg(Vec(Bool(), size))
val validBits = valid.toBits val validBits = valid.toBits
val tags = Mem(UInt(width = paddrBits), size) val tags = Mem(size, UInt(width = paddrBits))
val data = Mem(UInt(width = ppnBits), size) val data = Mem(size, UInt(width = ppnBits))
val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits val hits = Vec(tags.map(_ === pte_addr)).toBits & validBits
val hit = hits.orR val hit = hits.orR
@ -125,7 +124,7 @@ class PTW(n: Int) extends CoreModule
val resp_err = state === s_error val resp_err = state === s_error
val resp_val = state === s_done || resp_err val resp_val = state === s_done || resp_err
val r_resp_ppn = io.mem.req.bits.addr >> UInt(pgIdxBits) val r_resp_ppn = io.mem.req.bits.addr >> pgIdxBits
val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count) val resp_ppn = Vec((0 until pgLevels-1).map(i => Cat(r_resp_ppn >> pgLevelBits*(pgLevels-i-1), r_req.addr(pgLevelBits*(pgLevels-i-1)-1,0))) :+ r_resp_ppn)(count)
for (i <- 0 until io.requestor.size) { for (i <- 0 until io.requestor.size) {

View File

@ -20,24 +20,21 @@ class RoCCInstruction extends Bundle
val opcode = Bits(width = 7) val opcode = Bits(width = 7)
} }
class RoCCCommand extends CoreBundle class RoCCCommand(implicit p: Parameters) extends CoreBundle()(p) {
{
val inst = new RoCCInstruction val inst = new RoCCInstruction
val rs1 = Bits(width = xLen) val rs1 = Bits(width = xLen)
val rs2 = Bits(width = xLen) val rs2 = Bits(width = xLen)
} }
class RoCCResponse extends CoreBundle class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
{
val rd = Bits(width = 5) val rd = Bits(width = 5)
val data = Bits(width = xLen) val data = Bits(width = xLen)
} }
class RoCCInterface extends Bundle class RoCCInterface(implicit p: Parameters) extends Bundle {
{
val cmd = Decoupled(new RoCCCommand).flip val cmd = Decoupled(new RoCCCommand).flip
val resp = Decoupled(new RoCCResponse) val resp = Decoupled(new RoCCResponse)
val mem = new HellaCacheIO val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
val busy = Bool(OUTPUT) val busy = Bool(OUTPUT)
val s = Bool(INPUT) val s = Bool(INPUT)
val interrupt = Bool(OUTPUT) val interrupt = Bool(OUTPUT)
@ -53,15 +50,12 @@ class RoCCInterface extends Bundle
val exception = Bool(INPUT) val exception = Bool(INPUT)
} }
abstract class RoCC extends CoreModule abstract class RoCC(implicit p: Parameters) extends CoreModule()(p) {
{
val io = new RoCCInterface val io = new RoCCInterface
io.mem.req.bits.phys := Bool(true) // don't perform address translation io.mem.req.bits.phys := Bool(true) // don't perform address translation
} }
class AccumulatorExample extends RoCC class AccumulatorExample(n: Int = 4)(implicit p: Parameters) extends RoCC()(p) {
{
val n = 4
val regfile = Mem(UInt(width = xLen), n) val regfile = Mem(UInt(width = xLen), n)
val busy = Reg(init=Vec(Bool(false), n)) val busy = Reg(init=Vec(Bool(false), n))

View File

@ -7,13 +7,13 @@ import junctions._
import uncore._ import uncore._
import Util._ import Util._
case object BuildFPU extends Field[Option[() => FPU]] case object BuildFPU extends Field[Option[Parameters => FPU]]
case object FDivSqrt extends Field[Boolean] case object FDivSqrt extends Field[Boolean]
case object XLen extends Field[Int] case object XLen extends Field[Int]
case object NMultXpr extends Field[Int]
case object FetchWidth extends Field[Int] case object FetchWidth extends Field[Int]
case object RetireWidth extends Field[Int] case object RetireWidth extends Field[Int]
case object UseVM extends Field[Boolean] case object UseVM extends Field[Boolean]
case object UsePerfCounters extends Field[Boolean]
case object FastLoadWord extends Field[Boolean] case object FastLoadWord extends Field[Boolean]
case object FastLoadByte extends Field[Boolean] case object FastLoadByte extends Field[Boolean]
case object FastMulDiv extends Field[Boolean] case object FastMulDiv extends Field[Boolean]
@ -22,54 +22,57 @@ case object CoreDataBits extends Field[Int]
case object CoreDCacheReqTagBits extends Field[Int] case object CoreDCacheReqTagBits extends Field[Int]
case object NCustomMRWCSRs extends Field[Int] case object NCustomMRWCSRs extends Field[Int]
abstract trait CoreParameters extends UsesParameters { trait HasCoreParameters extends HasAddrMapParameters {
val xLen = params(XLen) implicit val p: Parameters
val paddrBits = params(PAddrBits) val xLen = p(XLen)
val vaddrBits = params(VAddrBits)
val pgIdxBits = params(PgIdxBits)
val ppnBits = params(PPNBits)
val vpnBits = params(VPNBits)
val pgLevels = params(PgLevels)
val pgLevelBits = params(PgLevelBits)
val asIdBits = params(ASIdBits)
val retireWidth = params(RetireWidth) val retireWidth = p(RetireWidth)
val coreFetchWidth = params(FetchWidth) val fetchWidth = p(FetchWidth)
val coreInstBits = params(CoreInstBits) val coreInstBits = p(CoreInstBits)
val coreInstBytes = coreInstBits/8 val coreInstBytes = coreInstBits/8
val coreDataBits = xLen val coreDataBits = xLen
val coreDataBytes = coreDataBits/8 val coreDataBytes = coreDataBits/8
val coreDCacheReqTagBits = params(CoreDCacheReqTagBits) val coreDCacheReqTagBits = p(CoreDCacheReqTagBits)
val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits val coreMaxAddrBits = math.max(ppnBits,vpnBits+1) + pgIdxBits
val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt
val mmioBase = p(MMIOBase)
val nCustomMrwCsrs = p(NCustomMRWCSRs)
if(params(FastLoadByte)) require(params(FastLoadWord)) val usingVM = p(UseVM)
val usingFPU = !p(BuildFPU).isEmpty
val usingFDivSqrt = p(FDivSqrt)
val usingRoCC = !p(BuildRoCC).isEmpty
val usingFastMulDiv = p(FastMulDiv)
val fastLoadWord = p(FastLoadWord)
val fastLoadByte = p(FastLoadByte)
// Print out log of committed instructions and their writeback values.
// Requires post-processing due to out-of-order writebacks.
val enableCommitLog = false
val usingPerfCounters = p(UsePerfCounters)
if (fastLoadByte) require(fastLoadWord)
} }
abstract trait RocketCoreParameters extends CoreParameters abstract class CoreModule(implicit val p: Parameters) extends Module
{ with HasCoreParameters
require(params(FetchWidth) == 1) // for now... abstract class CoreBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
require(params(RetireWidth) == 1) // for now... with HasCoreParameters
}
abstract class CoreBundle extends Bundle with CoreParameters class Rocket(implicit p: Parameters) extends CoreModule()(p) {
abstract class CoreModule extends Module with CoreParameters
class Rocket extends CoreModule
{
val io = new Bundle { val io = new Bundle {
val host = new HTIFIO val host = new HtifIO
val imem = new CPUFrontendIO val imem = new FrontendIO()(p.alterPartial({case CacheName => "L1I" }))
val dmem = new HellaCacheIO val dmem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
val ptw = new DatapathPTWIO().flip val ptw = new DatapathPTWIO().flip
val fpu = new FPUIO().flip val fpu = new FPUIO().flip
val rocc = new RoCCInterface().flip val rocc = new RoCCInterface().flip
} }
var decode_table = XDecode.table var decode_table = XDecode.table
if (!params(BuildFPU).isEmpty) decode_table ++= FDecode.table if (usingFPU) decode_table ++= FDecode.table
if (!params(BuildFPU).isEmpty && params(FDivSqrt)) decode_table ++= FDivSqrtDecode.table if (usingFPU && usingFDivSqrt) decode_table ++= FDivSqrtDecode.table
if (!params(BuildRoCC).isEmpty) decode_table ++= RoCCDecode.table if (usingRoCC) decode_table ++= RoCCDecode.table
val ex_ctrl = Reg(new IntCtrlSigs) val ex_ctrl = Reg(new IntCtrlSigs)
val mem_ctrl = Reg(new IntCtrlSigs) val mem_ctrl = Reg(new IntCtrlSigs)
@ -117,7 +120,7 @@ class Rocket extends CoreModule
// decode stage // decode stage
val id_pc = io.imem.resp.bits.pc val id_pc = io.imem.resp.bits.pc
val id_inst = io.imem.resp.bits.data(0).toBits; require(params(FetchWidth) == 1) val id_inst = io.imem.resp.bits.data(0).toBits; require(fetchWidth == 1)
val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table) val id_ctrl = Wire(new IntCtrlSigs()).decode(id_inst, decode_table)
val id_raddr3 = id_inst(31,27) val id_raddr3 = id_inst(31,27)
val id_raddr2 = id_inst(24,20) val id_raddr2 = id_inst(24,20)
@ -150,7 +153,7 @@ class Rocket extends CoreModule
val id_amo_rl = id_inst(25) val id_amo_rl = id_inst(25)
val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl val id_fence_next = id_ctrl.fence || id_ctrl.amo && id_amo_rl
val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid val id_mem_busy = !io.dmem.ordered || io.dmem.req.valid
val id_rocc_busy = Bool(!params(BuildRoCC).isEmpty) && val id_rocc_busy = Bool(usingRoCC) &&
(io.rocc.busy || ex_reg_valid && ex_ctrl.rocc || (io.rocc.busy || ex_reg_valid && ex_ctrl.rocc ||
mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc) mem_reg_valid && mem_ctrl.rocc || wb_reg_valid && wb_ctrl.rocc)
id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy id_reg_fence := id_fence_next || id_reg_fence && id_mem_busy
@ -163,8 +166,8 @@ class Rocket extends CoreModule
(id_illegal_insn, UInt(Causes.illegal_instruction)))) (id_illegal_insn, UInt(Causes.illegal_instruction))))
val dcache_bypass_data = val dcache_bypass_data =
if(params(FastLoadByte)) io.dmem.resp.bits.data_subword if (fastLoadByte) io.dmem.resp.bits.data
else if(params(FastLoadWord)) io.dmem.resp.bits.data else if (fastLoadWord) io.dmem.resp.bits.data_word_bypass
else wb_reg_wdata else wb_reg_wdata
// detect bypass opportunities // detect bypass opportunities
@ -180,9 +183,9 @@ class Rocket extends CoreModule
// execute stage // execute stage
val bypass_mux = Vec(bypass_sources.map(_._3)) val bypass_mux = Vec(bypass_sources.map(_._3))
val ex_reg_rs_bypass = Reg(Vec.fill(id_raddr.size)(Bool())) val ex_reg_rs_bypass = Reg(Vec(Bool(), id_raddr.size))
val ex_reg_rs_lsb = Reg(Vec.fill(id_raddr.size)(Bits())) val ex_reg_rs_lsb = Reg(Vec(UInt(), id_raddr.size))
val ex_reg_rs_msb = Reg(Vec.fill(id_raddr.size)(Bits())) val ex_reg_rs_msb = Reg(Vec(UInt(), id_raddr.size))
val ex_rs = for (i <- 0 until id_raddr.size) val ex_rs = for (i <- 0 until id_raddr.size)
yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i))) yield Mux(ex_reg_rs_bypass(i), bypass_mux(ex_reg_rs_lsb(i)), Cat(ex_reg_rs_msb(i), ex_reg_rs_lsb(i)))
val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst) val ex_imm = imm(ex_ctrl.sel_imm, ex_reg_inst)
@ -201,8 +204,9 @@ class Rocket extends CoreModule
alu.io.in1 := ex_op1.toUInt alu.io.in1 := ex_op1.toUInt
// multiplier and divider // multiplier and divider
val div = Module(new MulDiv(mulUnroll = if(params(FastMulDiv)) 8 else 1, val div = Module(new MulDiv(width = xLen,
earlyOut = params(FastMulDiv))) unroll = if(usingFastMulDiv) 8 else 1,
earlyOut = usingFastMulDiv))
div.io.req.valid := ex_reg_valid && ex_ctrl.div div.io.req.valid := ex_reg_valid && ex_ctrl.div
div.io.req.bits.dw := ex_ctrl.alu_dw div.io.req.bits.dw := ex_ctrl.alu_dw
div.io.req.bits.fn := ex_ctrl.alu_fn div.io.req.bits.fn := ex_ctrl.alu_fn
@ -331,7 +335,7 @@ class Rocket extends CoreModule
// writeback arbitration // writeback arbitration
val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool val dmem_resp_xpu = !io.dmem.resp.bits.tag(0).toBool
val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool val dmem_resp_fpu = io.dmem.resp.bits.tag(0).toBool
val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt >> UInt(1) val dmem_resp_waddr = io.dmem.resp.bits.tag.toUInt()(5,1)
val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data val dmem_resp_valid = io.dmem.resp.valid && io.dmem.resp.bits.has_data
val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data val dmem_resp_replay = io.dmem.resp.bits.replay && io.dmem.resp.bits.has_data
@ -339,7 +343,7 @@ class Rocket extends CoreModule
val ll_wdata = Wire(init = div.io.resp.bits.data) val ll_wdata = Wire(init = div.io.resp.bits.data)
val ll_waddr = Wire(init = div.io.resp.bits.tag) val ll_waddr = Wire(init = div.io.resp.bits.tag)
val ll_wen = Wire(init = div.io.resp.fire()) val ll_wen = Wire(init = div.io.resp.fire())
if (!params(BuildRoCC).isEmpty) { if (usingRoCC) {
io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd) io.rocc.resp.ready := !(wb_reg_valid && wb_ctrl.wxd)
when (io.rocc.resp.fire()) { when (io.rocc.resp.fire()) {
div.io.resp.ready := Bool(false) div.io.resp.ready := Bool(false)
@ -350,7 +354,7 @@ class Rocket extends CoreModule
} }
when (dmem_resp_replay && dmem_resp_xpu) { when (dmem_resp_replay && dmem_resp_xpu) {
div.io.resp.ready := Bool(false) div.io.resp.ready := Bool(false)
if (!params(BuildRoCC).isEmpty) if (usingRoCC)
io.rocc.resp.ready := Bool(false) io.rocc.resp.ready := Bool(false)
ll_waddr := dmem_resp_waddr ll_waddr := dmem_resp_waddr
ll_wen := Bool(true) ll_wen := Bool(true)
@ -360,7 +364,7 @@ class Rocket extends CoreModule
val wb_wen = wb_valid && wb_ctrl.wxd val wb_wen = wb_valid && wb_ctrl.wxd
val rf_wen = wb_wen || ll_wen val rf_wen = wb_wen || ll_wen
val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr) val rf_waddr = Mux(ll_wen, ll_waddr, wb_waddr)
val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data_subword, val rf_wdata = Mux(dmem_resp_valid && dmem_resp_xpu, io.dmem.resp.bits.data,
Mux(ll_wen, ll_wdata, Mux(ll_wen, ll_wdata,
Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata, Mux(wb_ctrl.csr != CSR.N, csr.io.rw.rdata,
wb_reg_wdata))) wb_reg_wdata)))
@ -404,7 +408,7 @@ class Rocket extends CoreModule
// stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage. // stall for RAW/WAW hazards on CSRs, LB/LH, and mul/div in memory stage.
val mem_mem_cmd_bh = val mem_mem_cmd_bh =
if (params(FastLoadWord)) Bool(!params(FastLoadByte)) && mem_reg_slow_bypass if (fastLoadWord) Bool(!fastLoadByte) && mem_reg_slow_bypass
else Bool(true) else Bool(true)
val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc val mem_cannot_bypass = mem_ctrl.csr != CSR.N || mem_ctrl.mem && mem_mem_cmd_bh || mem_ctrl.div || mem_ctrl.fp || mem_ctrl.rocc
val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr) val data_hazard_mem = mem_ctrl.wxd && checkHazards(hazard_targets, _ === mem_waddr)
@ -417,7 +421,7 @@ class Rocket extends CoreModule
val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr) val fp_data_hazard_wb = wb_ctrl.wfd && checkHazards(fp_hazard_targets, _ === wb_waddr)
val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb) val id_wb_hazard = wb_reg_valid && (data_hazard_wb && wb_set_sboard || fp_data_hazard_wb)
val id_stall_fpu = if (!params(BuildFPU).isEmpty) { val id_stall_fpu = if (usingFPU) {
val fp_sboard = new Scoreboard(32) val fp_sboard = new Scoreboard(32)
fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr) fp_sboard.set((wb_dcache_miss && wb_ctrl.wfd || io.fpu.sboard_set) && wb_valid, wb_waddr)
fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr) fp_sboard.clear(dmem_resp_replay && dmem_resp_fpu, dmem_resp_waddr)
@ -430,7 +434,7 @@ class Rocket extends CoreModule
id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard || id_ex_hazard || id_mem_hazard || id_wb_hazard || id_sboard_hazard ||
id_ctrl.fp && id_stall_fpu || id_ctrl.fp && id_stall_fpu ||
id_ctrl.mem && !io.dmem.req.ready || id_ctrl.mem && !io.dmem.req.ready ||
Bool(!params(BuildRoCC).isEmpty) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready || Bool(usingRoCC) && wb_reg_rocc_pending && id_ctrl.rocc && !io.rocc.cmd.ready ||
id_do_fence || id_do_fence ||
csr.io.csr_stall csr.io.csr_stall
ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt ctrl_killd := !io.imem.resp.valid || take_pc || ctrl_stalld || csr.io.interrupt
@ -470,7 +474,7 @@ class Rocket extends CoreModule
io.fpu.inst := id_inst io.fpu.inst := id_inst
io.fpu.fromint_data := ex_rs(0) io.fpu.fromint_data := ex_rs(0)
io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu io.fpu.dmem_resp_val := dmem_resp_valid && dmem_resp_fpu
io.fpu.dmem_resp_data := io.dmem.resp.bits.data io.fpu.dmem_resp_data := io.dmem.resp.bits.data_word_bypass
io.fpu.dmem_resp_type := io.dmem.resp.bits.typ io.fpu.dmem_resp_type := io.dmem.resp.bits.typ
io.fpu.dmem_resp_tag := dmem_resp_waddr io.fpu.dmem_resp_tag := dmem_resp_waddr
@ -482,7 +486,7 @@ class Rocket extends CoreModule
io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt io.dmem.req.bits.addr := Cat(vaSign(ex_rs(0), alu.io.adder_out), alu.io.adder_out(vaddrBits-1,0)).toUInt
io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp) io.dmem.req.bits.tag := Cat(ex_waddr, ex_ctrl.fp)
io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2) io.dmem.req.bits.data := Mux(mem_ctrl.fp, io.fpu.store_data, mem_reg_rs2)
require(params(CoreDCacheReqTagBits) >= 6) require(p(CoreDCacheReqTagBits) >= 6)
io.dmem.invalidate_lr := wb_xcpt io.dmem.invalidate_lr := wb_xcpt
io.rocc.cmd.valid := wb_rocc_val io.rocc.cmd.valid := wb_rocc_val
@ -492,19 +496,50 @@ class Rocket extends CoreModule
io.rocc.cmd.bits.rs1 := wb_reg_wdata io.rocc.cmd.bits.rs1 := wb_reg_wdata
io.rocc.cmd.bits.rs2 := wb_reg_rs2 io.rocc.cmd.bits.rs2 := wb_reg_rs2
if (!params(BuildFPU).isEmpty && !params(BuildRoCC).isEmpty) { if (!p(BuildFPU).isEmpty && !p(BuildRoCC).isEmpty) {
io.fpu.cp_req <> io.rocc.fpu_req io.fpu.cp_req <> io.rocc.fpu_req
io.fpu.cp_resp <> io.rocc.fpu_resp io.fpu.cp_resp <> io.rocc.fpu_resp
} else { } else {
io.fpu.cp_req.valid := Bool(false) io.fpu.cp_req.valid := Bool(false)
} }
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n", if (enableCommitLog) {
val pc = Wire(SInt(width=64))
pc := wb_reg_pc
val inst = wb_reg_inst
val rd = RegNext(RegNext(RegNext(id_waddr)))
val wfd = wb_ctrl.wfd
val wxd = wb_ctrl.wxd
val has_data = wb_wen && !wb_set_sboard
val priv = csr.io.status.prv
when (wb_valid) {
when (wfd) {
printf ("%d 0x%x (0x%x) f%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd+UInt(32))
}
.elsewhen (wxd && rd != UInt(0) && has_data) {
printf ("%d 0x%x (0x%x) x%d 0x%x\n", priv, pc, inst, rd, rf_wdata)
}
.elsewhen (wxd && rd != UInt(0) && !has_data) {
printf ("%d 0x%x (0x%x) x%d p%d 0xXXXXXXXXXXXXXXXX\n", priv, pc, inst, rd, rd)
}
.otherwise {
printf ("%d 0x%x (0x%x)\n", priv, pc, inst)
}
}
when (ll_wen && rf_waddr != UInt(0)) {
printf ("x%d p%d 0x%x\n", rf_waddr, rf_waddr, rf_wdata)
}
}
else {
printf("C%d: %d [%d] pc=[%x] W[r%d=%x][%d] R[r%d=%x] R[r%d=%x] inst=[%x] DASM(%x)\n",
io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc, io.host.id, csr.io.time(32,0), wb_valid, wb_reg_pc,
Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen, Mux(rf_wen, rf_waddr, UInt(0)), rf_wdata, rf_wen,
wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))), wb_reg_inst(19,15), Reg(next=Reg(next=ex_rs(0))),
wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))), wb_reg_inst(24,20), Reg(next=Reg(next=ex_rs(1))),
wb_reg_inst, wb_reg_inst) wb_reg_inst, wb_reg_inst)
}
def checkExceptions(x: Seq[(Bool, UInt)]) = def checkExceptions(x: Seq[(Bool, UInt)]) =
(x.map(_._1).reduce(_||_), PriorityMux(x)) (x.map(_._1).reduce(_||_), PriorityMux(x))
@ -541,7 +576,7 @@ class Rocket extends CoreModule
} }
class RegFile { class RegFile {
private val rf = Mem(UInt(width = 64), 31) private val rf = Mem(31, UInt(width = 64))
private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]() private val reads = collection.mutable.ArrayBuffer[(UInt,UInt)]()
private var canRead = true private var canRead = true
def read(addr: UInt) = { def read(addr: UInt) = {

View File

@ -9,24 +9,29 @@ import Util._
case object CoreName extends Field[String] case object CoreName extends Field[String]
case object NDCachePorts extends Field[Int] case object NDCachePorts extends Field[Int]
case object NPTWPorts extends Field[Int] case object NPTWPorts extends Field[Int]
case object BuildRoCC extends Field[Option[() => RoCC]] case object BuildRoCC extends Field[Option[Parameters => RoCC]]
abstract class Tile(resetSignal: Bool = null) extends Module(_reset = resetSignal) { abstract class Tile(resetSignal: Bool = null)
(implicit p: Parameters) extends Module(_reset = resetSignal) {
val io = new Bundle { val io = new Bundle {
val cached = new ClientTileLinkIO val cached = new ClientTileLinkIO
val uncached = new ClientUncachedTileLinkIO val uncached = new ClientUncachedTileLinkIO
val host = new HTIFIO val host = new HtifIO
} }
} }
class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) { class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(resetSignal)(p) {
val icache = Module(new Frontend, { case CacheName => "L1I"; case CoreName => "Rocket" }) //TODO
val dcache = Module(new HellaCache, { case CacheName => "L1D" }) val dcacheParams = p.alterPartial({ case CacheName => "L1D" })
val ptw = Module(new PTW(params(NPTWPorts))) val icache = Module(new Frontend()(p.alterPartial({
val core = Module(new Rocket, { case CoreName => "Rocket" }) case CacheName => "L1I"
case CoreName => "Rocket" })))
val dcache = Module(new HellaCache()(dcacheParams))
val ptw = Module(new PTW(p(NPTWPorts))(dcacheParams))
val core = Module(new Rocket()(p.alterPartial({ case CoreName => "Rocket" })))
dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache dcache.io.cpu.invalidate_lr := core.io.dmem.invalidate_lr // Bypass signal to dcache
val dcArb = Module(new HellaCacheArbiter(params(NDCachePorts))) val dcArb = Module(new HellaCacheArbiter(p(NDCachePorts))(dcacheParams))
dcArb.io.requestor(0) <> ptw.io.mem dcArb.io.requestor(0) <> ptw.io.mem
dcArb.io.requestor(1) <> core.io.dmem dcArb.io.requestor(1) <> core.io.dmem
dcache.io.cpu <> dcArb.io.mem dcache.io.cpu <> dcArb.io.mem
@ -39,20 +44,16 @@ class RocketTile(resetSignal: Bool = null) extends Tile(resetSignal) {
core.io.ptw <> ptw.io.dpath core.io.ptw <> ptw.io.dpath
//If so specified, build an FPU module and wire it in //If so specified, build an FPU module and wire it in
params(BuildFPU) p(BuildFPU) foreach { fpu => core.io.fpu <> fpu(p).io }
.map { bf => bf() }
.foreach { fpu =>
core.io.fpu <> fpu.io
}
// Connect the caches and ROCC to the outer memory system // Connect the caches and ROCC to the outer memory system
io.cached <> dcache.io.mem io.cached <> dcache.io.mem
// If so specified, build an RoCC module and wire it in // If so specified, build an RoCC module and wire it in
// otherwise, just hookup the icache // otherwise, just hookup the icache
io.uncached <> params(BuildRoCC).map { buildItHere => io.uncached <> p(BuildRoCC).map { buildItHere =>
val rocc = buildItHere() val rocc = buildItHere(p)
val memArb = Module(new ClientTileLinkIOArbiter(3)) val memArb = Module(new ClientTileLinkIOArbiter(3))
val dcIF = Module(new SimpleHellaCacheIF) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
core.io.rocc <> rocc.io core.io.rocc <> rocc.io
dcIF.io.requestor <> rocc.io.mem dcIF.io.requestor <> rocc.io.mem
dcArb.io.requestor(2) <> dcIF.io.cache dcArb.io.requestor(2) <> dcIF.io.cache

View File

@ -9,16 +9,18 @@ import scala.math._
case object NTLBEntries extends Field[Int] case object NTLBEntries extends Field[Int]
abstract trait TLBParameters extends CoreParameters { trait HasTLBParameters extends HasAddrMapParameters {
val entries = params(NTLBEntries) val entries = p(NTLBEntries)
val camAddrBits = ceil(log(entries)/log(2)).toInt val camAddrBits = log2Ceil(entries)
val camTagBits = asIdBits + vpnBits val camTagBits = asIdBits + vpnBits
} }
abstract class TLBBundle extends Bundle with TLBParameters abstract class TLBModule(implicit val p: Parameters) extends Module
abstract class TLBModule extends Module with TLBParameters with HasTLBParameters
abstract class TLBBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
with HasTLBParameters
class CAMIO extends TLBBundle { class CAMIO(implicit p: Parameters) extends TLBBundle()(p) {
val clear = Bool(INPUT) val clear = Bool(INPUT)
val clear_mask = Bits(INPUT, entries) val clear_mask = Bits(INPUT, entries)
val tag = Bits(INPUT, camTagBits) val tag = Bits(INPUT, camTagBits)
@ -31,9 +33,9 @@ class CAMIO extends TLBBundle {
val write_addr = UInt(INPUT, camAddrBits) val write_addr = UInt(INPUT, camAddrBits)
} }
class RocketCAM extends TLBModule { class RocketCAM(implicit p: Parameters) extends TLBModule()(p) {
val io = new CAMIO val io = new CAMIO
val cam_tags = Mem(Bits(width = camTagBits), entries) val cam_tags = Mem(entries, Bits(width = camTagBits))
val vb_array = Reg(init=Bits(0, entries)) val vb_array = Reg(init=Bits(0, entries))
when (io.write) { when (io.write) {
@ -74,7 +76,7 @@ class PseudoLRU(n: Int)
} }
} }
class TLBReq extends CoreBundle { class TLBReq(implicit p: Parameters) extends CoreBundle()(p) {
val asid = UInt(width = asIdBits) val asid = UInt(width = asIdBits)
val vpn = UInt(width = vpnBits+1) val vpn = UInt(width = vpnBits+1)
val passthrough = Bool() val passthrough = Bool()
@ -82,7 +84,7 @@ class TLBReq extends CoreBundle {
val store = Bool() val store = Bool()
} }
class TLBRespNoHitIndex extends CoreBundle { class TLBRespNoHitIndex(implicit p: Parameters) extends CoreBundle()(p) {
// lookup responses // lookup responses
val miss = Bool(OUTPUT) val miss = Bool(OUTPUT)
val ppn = UInt(OUTPUT, ppnBits) val ppn = UInt(OUTPUT, ppnBits)
@ -91,11 +93,11 @@ class TLBRespNoHitIndex extends CoreBundle {
val xcpt_if = Bool(OUTPUT) val xcpt_if = Bool(OUTPUT)
} }
class TLBResp extends TLBRespNoHitIndex with TLBParameters { class TLBResp(implicit p: Parameters) extends TLBRespNoHitIndex()(p) with HasTLBParameters {
val hit_idx = UInt(OUTPUT, entries) val hit_idx = UInt(OUTPUT, entries)
} }
class TLB extends TLBModule { class TLB(implicit p: Parameters) extends TLBModule()(p) {
val io = new Bundle { val io = new Bundle {
val req = Decoupled(new TLBReq).flip val req = Decoupled(new TLBReq).flip
val resp = new TLBResp val resp = new TLBResp
@ -109,7 +111,7 @@ class TLB extends TLBModule {
val r_req = Reg(new TLBReq) val r_req = Reg(new TLBReq)
val tag_cam = Module(new RocketCAM) val tag_cam = Module(new RocketCAM)
val tag_ram = Mem(io.ptw.resp.bits.pte.ppn, entries) val tag_ram = Mem(entries, io.ptw.resp.bits.pte.ppn)
val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt val lookup_tag = Cat(io.req.bits.asid, io.req.bits.vpn).toUInt
tag_cam.io.tag := lookup_tag tag_cam.io.tag := lookup_tag
@ -155,24 +157,28 @@ class TLB extends TLBModule {
val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits) val w_array = Mux(priv_s, sw_array.toBits, uw_array.toBits)
val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits) val x_array = Mux(priv_s, sx_array.toBits, ux_array.toBits)
val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm val vm_enabled = io.ptw.status.vm(3) && priv_uses_vm && !io.req.bits.passthrough
val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1) val bad_va = io.req.bits.vpn(vpnBits) != io.req.bits.vpn(vpnBits-1)
// it's only a store hit if the dirty bit is set // it's only a store hit if the dirty bit is set
val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0))) val tag_hits = tag_cam.io.hits & (dirty_array.toBits | ~Mux(io.req.bits.store, w_array, UInt(0)))
val tag_hit = tag_hits.orR val tag_hit = tag_hits.orR
val tlb_hit = vm_enabled && tag_hit val tlb_hit = vm_enabled && tag_hit
val tlb_miss = vm_enabled && !tag_hit && !bad_va val tlb_miss = vm_enabled && !tag_hit && !bad_va
when (io.req.valid && tlb_hit) { when (io.req.valid && tlb_hit) {
plru.access(OHToUInt(tag_cam.io.hits)) plru.access(OHToUInt(tag_cam.io.hits))
} }
val paddr = Cat(io.resp.ppn, UInt(0, pgIdxBits))
val addr_ok = addrMap.isValid(paddr)
val addr_prot = addrMap.getProt(paddr)
io.req.ready := state === s_ready io.req.ready := state === s_ready
io.resp.xcpt_ld := bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR io.resp.xcpt_ld := !addr_ok || !addr_prot.r || bad_va || tlb_hit && !(r_array & tag_cam.io.hits).orR
io.resp.xcpt_st := bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR io.resp.xcpt_st := !addr_ok || !addr_prot.w || bad_va || tlb_hit && !(w_array & tag_cam.io.hits).orR
io.resp.xcpt_if := bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR io.resp.xcpt_if := !addr_ok || !addr_prot.x || bad_va || tlb_hit && !(x_array & tag_cam.io.hits).orR
io.resp.miss := tlb_miss io.resp.miss := tlb_miss
io.resp.ppn := Mux(vm_enabled && !io.req.bits.passthrough, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(params(PPNBits)-1,0)) io.resp.ppn := Mux(vm_enabled, Mux1H(tag_cam.io.hits, tag_ram), io.req.bits.vpn(ppnBits-1,0))
io.resp.hit_idx := tag_cam.io.hits io.resp.hit_idx := tag_cam.io.hits
// clear invalid entries on access, or all entries on a TLB flush // clear invalid entries on access, or all entries on a TLB flush