1
0

send DMA requests through MMIO and get responses through CSRs

This commit is contained in:
Howard Mao 2016-01-14 11:37:58 -08:00
parent 58fcc6b7c6
commit 305185c034
7 changed files with 189 additions and 113 deletions

View File

@ -252,12 +252,16 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
}
for (i <- 0 until nCustomMrwCsrs) {
val addr = 0x790 + i // turn 0x790 into parameter CustomMRWCSRBase?
require(addr >= 0x780 && addr <= 0x7ff, "custom MRW CSR address " + i + " is out of range")
val addr = CSRs.mrwbase + i
require(!read_mapping.contains(addr), "custom MRW CSR address " + i + " is already in use")
read_mapping += addr -> io.custom_mrw_csrs(i)
}
for ((addr, i) <- roccCsrs.zipWithIndex) {
require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use")
read_mapping += addr -> io.rocc.csr.rdata(i)
}
val addr = Mux(cpu_ren, io.rw.addr, host_csr_bits.addr)
val decoded_addr = read_mapping map { case (k, v) => k -> (addr === k) }
@ -449,6 +453,10 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
}
}
io.rocc.csr.waddr := addr
io.rocc.csr.wdata := wdata
io.rocc.csr.wen := wen
when(this.reset) {
reg_mstatus.zero1 := 0
reg_mstatus.zero2 := 0

View File

@ -71,19 +71,23 @@ class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
val resp = Valid(new ClientDmaResponse).flip
}
class DmaFrontend(implicit val p: Parameters)
extends Module with HasClientDmaParameters {
class DmaFrontend(implicit p: Parameters) extends CoreModule()(p)
with HasClientDmaParameters with HasTileLinkParameters {
val io = new Bundle {
val cpu = (new ClientDmaIO).flip
val dma = new DmaIO
val mem = new ClientUncachedTileLinkIO
val ptw = new TLBPTWIO
val busy = Bool(OUTPUT)
val incr_outstanding = Bool(OUTPUT)
val host_id = UInt(INPUT, log2Up(nCores))
}
private val pgSize = 1 << pgIdxBits
val tlb = Module(new DecoupledTLB()(p.alterPartial({
case CacheName => "L1D"
})))
io.ptw <> tlb.io.ptw
val priv = Mux(io.ptw.status.mprv, io.ptw.status.prv1, io.ptw.status.prv)
val vm_enabled = io.ptw.status.vm(3) && priv <= UInt(PRV_S)
private val pgSize = 1 << pgIdxBits
val cmd = Reg(UInt(width = DMA_CMD_SZ))
val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq(
@ -112,17 +116,17 @@ class DmaFrontend(implicit val p: Parameters)
val src_ppn = Reg(UInt(width = ppnBits))
val dst_ppn = Reg(UInt(width = ppnBits))
val src_paddr = Mux(vm_enabled, Cat(src_ppn, src_idx), src_vaddr)
val dst_paddr = Mux(vm_enabled, Cat(dst_ppn, dst_idx), dst_vaddr)
val src_paddr = Cat(src_ppn, src_idx)
val dst_paddr = Cat(dst_ppn, dst_idx)
val last_src_vpn = Reg(UInt(width = vpnBits))
val last_dst_vpn = Reg(UInt(width = vpnBits))
val tx_len = Mux(!vm_enabled, bytes_left,
Util.minUInt(src_pglen, dst_pglen, bytes_left))
val tx_len = Util.minUInt(src_pglen, dst_pglen, bytes_left)
val (dma_xact_id, _) = Counter(io.dma.req.fire(), nDmaXactsPerClient)
val dma_busy = Reg(init = UInt(0, nDmaXactsPerClient))
val dma_busy = Reg(init = UInt(0, tlMaxClientXacts))
val dma_xact_id = PriorityEncoder(~dma_busy)
val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
val (s_idle :: s_translate :: s_dma_req :: s_dma_update ::
s_prepare :: s_finish :: Nil) = Enum(Bits(), 6)
@ -130,49 +134,80 @@ class DmaFrontend(implicit val p: Parameters)
// lower bit is for src, higher bit is for dst
val to_translate = Reg(init = UInt(0, 2))
val ptw_sent = Reg(init = UInt(0, 2))
val ptw_to_send = to_translate & ~ptw_sent
val ptw_resp_id = Reg(init = UInt(0, 1))
val tlb_sent = Reg(init = UInt(0, 2))
val tlb_to_send = to_translate & ~tlb_sent
val resp_status = Reg(UInt(width = dmaStatusBits))
io.ptw.req.valid := ptw_to_send.orR && vm_enabled
io.ptw.req.bits.addr := Mux(ptw_to_send(0), src_vpn, dst_vpn)
io.ptw.req.bits.prv := io.ptw.status.prv
io.ptw.req.bits.store := !ptw_to_send(0) // storing to destination
io.ptw.req.bits.fetch := Bool(true)
def make_acquire(
addr_beat: UInt, client_xact_id: UInt, client_id: UInt,
cmd: UInt, source: UInt, dest: UInt,
length: UInt, size: UInt): Acquire = {
when (io.ptw.req.fire()) {
ptw_sent := ptw_sent | PriorityEncoderOH(ptw_to_send)
val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits))
data_blob := DmaRequest(
xact_id = UInt(0),
client_id = client_id,
cmd = cmd,
source = source,
dest = dest,
length = length,
size = size).toBits
val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob)
val base_addr = addrMap("devices:dma").start
val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits))
PutBlock(
client_xact_id = client_xact_id,
addr_block = addr_block,
addr_beat = addr_beat,
data = data_beats(addr_beat),
alloc = Bool(false))
}
when (io.ptw.resp.valid) {
when (io.ptw.resp.bits.error) {
tlb.io.req.valid := tlb_to_send.orR
tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn)
tlb.io.req.bits.passthrough := Bool(false)
tlb.io.req.bits.instruction := Bool(false)
tlb.io.req.bits.store := !tlb_to_send(0)
tlb.io.resp.ready := tlb_sent.orR
when (tlb.io.req.fire()) {
tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send)
}
when (tlb.io.resp.fire()) {
val recv_choice = PriorityEncoderOH(to_translate)
val error = Mux(recv_choice(0),
tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st)
when (error) {
resp_status := ClientDmaResponse.pagefault
state := s_finish
}
val recv_choice = PriorityEncoderOH(to_translate)
to_translate := to_translate & ~recv_choice
// getting the src translation
when (recv_choice(0)) {
src_ppn := io.ptw.resp.bits.pte.ppn
src_ppn := tlb.io.resp.bits.ppn
} .otherwise {
dst_ppn := io.ptw.resp.bits.pte.ppn
dst_ppn := tlb.io.resp.bits.ppn
}
to_translate := to_translate & ~recv_choice
}
io.cpu.req.ready := state === s_idle
io.cpu.resp.valid := state === s_finish
io.cpu.resp.bits := ClientDmaResponse(resp_status)
io.dma.req.valid := state === s_dma_req && !dma_busy(dma_xact_id)
io.dma.req.bits := DmaRequest(
io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR
io.mem.acquire.bits := make_acquire(
addr_beat = dma_req_beat,
client_id = io.host_id,
client_xact_id = dma_xact_id,
cmd = cmd,
source = src_paddr,
dest = dst_paddr,
length = tx_len,
size = word_size)
io.dma.resp.ready := Bool(true)
cmd = cmd, source = src_paddr, dest = dst_paddr,
length = tx_len, size = word_size)
io.mem.grant.ready := (state =/= s_dma_req)
when (io.cpu.req.fire()) {
val req = io.cpu.req.bits
@ -187,24 +222,23 @@ class DmaFrontend(implicit val p: Parameters)
bytes_left := req.segment_size
word_size := req.word_size
to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11"))
ptw_sent := UInt(0)
state := Mux(vm_enabled, s_translate, s_dma_req)
tlb_sent := UInt(0)
state := s_translate
}
when (state === s_translate && !to_translate.orR) {
state := s_dma_req
}
def setBusyOnSend(req: DecoupledIO[DmaRequest]): UInt =
Mux(req.fire(), UIntToOH(req.bits.client_xact_id), UInt(0))
def setBusy(set: Bool, xact_id: UInt): UInt =
Mux(set, UIntToOH(xact_id), UInt(0))
def clearBusyOnRecv(resp: DecoupledIO[DmaResponse]): UInt =
~Mux(resp.fire(), UIntToOH(resp.bits.client_xact_id), UInt(0))
dma_busy := (dma_busy |
setBusy(dma_req_done, dma_xact_id)) &
~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id)
dma_busy := (dma_busy | setBusyOnSend(io.dma.req)) &
clearBusyOnRecv(io.dma.resp)
when (io.dma.req.fire()) {
when (dma_req_done) {
src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0))
dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0))
bytes_left := bytes_left - tx_len
@ -223,11 +257,11 @@ class DmaFrontend(implicit val p: Parameters)
dst_vaddr := dst_vaddr + dst_stride
bytes_left := segment_size
segments_left := segments_left - UInt(1)
state := Mux(vm_enabled, s_prepare, s_dma_req)
state := s_prepare
}
} .otherwise {
to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0))
ptw_sent := UInt(0)
tlb_sent := UInt(0)
state := s_translate
}
}
@ -236,13 +270,14 @@ class DmaFrontend(implicit val p: Parameters)
to_translate := adv_ptr & Cat(
dst_vpn =/= last_dst_vpn,
src_vpn =/= last_src_vpn)
ptw_sent := UInt(0)
tlb_sent := UInt(0)
state := s_translate
}
when (state === s_finish) { state := s_idle }
io.busy := (state =/= s_idle) || dma_busy.orR
io.incr_outstanding := dma_req_done
}
object DmaCtrlRegNumbers {
@ -252,19 +287,23 @@ object DmaCtrlRegNumbers {
val NSEGMENTS = 3
val WORD_SIZE = 4
val RESP_STATUS = 5
val OUTSTANDING = 6
val NCSRS = 7
val CSR_BASE = 0x800
val CSR_END = CSR_BASE + NCSRS
}
import DmaCtrlRegNumbers._
class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) {
class DmaCtrlRegFile(implicit val p: Parameters) extends Module
with HasClientDmaParameters with HasTileLinkParameters {
private val nWriteRegs = 5
private val nReadRegs = 1
private val nRegs = nWriteRegs + nReadRegs
private val nRegs = nWriteRegs + 2
val io = new Bundle {
val wen = Bool(INPUT)
val addr = UInt(INPUT, log2Up(nRegs))
val waddr = UInt(INPUT, log2Up(nRegs))
val wdata = UInt(INPUT, dmaSegmentSizeBits)
val rdata = UInt(OUTPUT, dmaSegmentSizeBits)
val src_stride = UInt(OUTPUT, dmaSegmentSizeBits)
val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits)
@ -272,10 +311,12 @@ class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) {
val nsegments = UInt(OUTPUT, dmaSegmentBits)
val word_size = UInt(OUTPUT, dmaWordSizeBits)
val status = UInt(INPUT, dmaStatusBits)
val incr_outstanding = Bool(INPUT)
val xact_outstanding = Bool(OUTPUT)
}
val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits)))
val waddr = io.waddr(log2Up(NCSRS) - 1, 0)
io.src_stride := regs(SRC_STRIDE)
io.dst_stride := regs(DST_STRIDE)
@ -283,44 +324,48 @@ class DmaCtrlRegFile(implicit p: Parameters) extends ClientDmaModule()(p) {
io.nsegments := regs(NSEGMENTS)
io.word_size := regs(WORD_SIZE)
when (io.wen && io.addr < UInt(nWriteRegs)) {
regs.write(io.addr, io.wdata)
when (io.wen && waddr < UInt(nWriteRegs)) {
regs.write(waddr, io.wdata)
}
io.rdata := MuxLookup(io.addr, regs(io.addr), Seq(
UInt(RESP_STATUS) -> io.status))
val outstanding_cnt = TwoWayCounter(
io.incr_outstanding,
io.wen && io.waddr === UInt(OUTSTANDING),
tlMaxClientXacts)
io.xact_outstanding := outstanding_cnt > UInt(0)
}
class DmaController(implicit p: Parameters) extends RoCC()(p)
with HasClientDmaParameters {
io.mem.req.valid := Bool(false)
io.autl.acquire.valid := Bool(false)
io.autl.grant.ready := Bool(false)
io.iptw.req.valid := Bool(false)
io.pptw.req.valid := Bool(false)
io.resp.valid := Bool(false)
io.interrupt := Bool(false)
val cmd = Queue(io.cmd)
val inst = cmd.bits.inst
val is_transfer = inst.funct < UInt(8)
val is_cr_write = inst.funct === UInt(8)
val is_cr_read = inst.funct === UInt(9)
val is_cr_access = is_cr_write || is_cr_read
val resp_rd = Reg(io.resp.bits.rd)
val resp_data = Reg(io.resp.bits.data)
val s_idle :: s_resp :: Nil = Enum(Bits(), 2)
val state = Reg(init = s_idle)
val reg_status = Reg(UInt(width = dmaStatusBits))
val crfile = Module(new DmaCtrlRegFile)
crfile.io.addr := cmd.bits.rs1
crfile.io.wdata := cmd.bits.rs2
crfile.io.wen := cmd.fire() && is_cr_write
crfile.io.waddr := io.csr.waddr
crfile.io.wdata := io.csr.wdata
crfile.io.wen := io.csr.wen
io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride
io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride
io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size
io.csr.rdata(NSEGMENTS) := crfile.io.nsegments
io.csr.rdata(WORD_SIZE) := crfile.io.word_size
io.csr.rdata(RESP_STATUS) := reg_status
val frontend = Module(new DmaFrontend)
io.dma <> frontend.io.dma
io.dptw <> frontend.io.ptw
io.autl <> frontend.io.mem
crfile.io.incr_outstanding := frontend.io.incr_outstanding
frontend.io.host_id := io.host_id
frontend.io.cpu.req.valid := cmd.valid && is_transfer
frontend.io.cpu.req.bits := ClientDmaRequest(
cmd = cmd.bits.inst.funct,
@ -331,26 +376,11 @@ class DmaController(implicit p: Parameters) extends RoCC()(p)
segment_size = crfile.io.segment_size,
nsegments = crfile.io.nsegments,
word_size = crfile.io.word_size)
cmd.ready := state === s_idle && (!is_transfer || frontend.io.cpu.req.ready)
io.resp.valid := state === s_resp
io.resp.bits.rd := resp_rd
io.resp.bits.data := resp_data
when (cmd.fire()) {
when (is_cr_read) {
resp_rd := inst.rd
resp_data := crfile.io.rdata
state := s_resp
}
}
when (io.resp.fire()) { state := s_idle }
cmd.ready := is_transfer && frontend.io.cpu.req.ready
when (frontend.io.cpu.resp.valid) {
reg_status := frontend.io.cpu.resp.bits.status
}
io.busy := (state =/= s_idle) || cmd.valid || frontend.io.busy
io.interrupt := Bool(false)
io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding
}

View File

@ -311,6 +311,7 @@ object CSRs {
val stimehw = 0xa81
val mtimecmph = 0x361
val mtimeh = 0x741
val mrwbase = 0x790
val all = {
val res = collection.mutable.ArrayBuffer[Int]()
res += fflags

View File

@ -9,6 +9,14 @@ import cde.{Parameters, Field}
case object RoccMaxTaggedMemXacts extends Field[Int]
case object RoccNMemChannels extends Field[Int]
case object RoccNCSRs extends Field[Int]
class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) {
val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen))
val waddr = UInt(OUTPUT, CSR.ADDRSZ)
val wdata = UInt(OUTPUT, xLen)
val wen = Bool(OUTPUT)
}
class RoCCInstruction extends Bundle
{
@ -33,7 +41,7 @@ class RoCCResponse(implicit p: Parameters) extends CoreBundle()(p) {
val data = Bits(width = xLen)
}
class RoCCInterface(implicit p: Parameters) extends Bundle {
class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) {
val cmd = Decoupled(new RoCCCommand).flip
val resp = Decoupled(new RoCCResponse)
val mem = new HellaCacheIO()(p.alterPartial({ case CacheName => "L1D" }))
@ -50,8 +58,8 @@ class RoCCInterface(implicit p: Parameters) extends Bundle {
val fpu_req = Decoupled(new FPInput)
val fpu_resp = Decoupled(new FPResult).flip
val exception = Bool(INPUT)
val dma = new DmaIO
val csr = (new RoCCCSRs).flip
val host_id = UInt(INPUT, log2Up(nCores))
override def cloneType = new RoCCInterface().asInstanceOf[this.type]
}

View File

@ -46,6 +46,10 @@ trait HasCoreParameters extends HasAddrMapParameters {
val vaddrBitsExtended = vaddrBits + (vaddrBits < xLen).toInt
val mmioBase = p(MMIOBase)
val nCustomMrwCsrs = p(NCustomMRWCSRs)
val roccCsrs = if (p(BuildRoCC).isEmpty) Nil
else p(BuildRoCC).flatMap(_.csrs)
val nRoccCsrs = p(RoccNCSRs)
val nCores = p(HtifKey).nCores
// Print out log of committed instructions and their writeback values.
// Requires post-processing due to out-of-order writebacks.

View File

@ -14,8 +14,8 @@ case class RoccParameters(
opcodes: OpcodeSet,
generator: Parameters => RoCC,
nMemChannels: Int = 0,
useFPU: Boolean = false,
useDma: Boolean = false)
csrs: Seq[Int] = Nil,
useFPU: Boolean = false)
abstract class Tile(resetSignal: Bool = null)
(implicit p: Parameters) extends Module(_reset = resetSignal) {
@ -23,7 +23,6 @@ abstract class Tile(resetSignal: Bool = null)
val usingRocc = !buildRocc.isEmpty
val nRocc = buildRocc.size
val nFPUPorts = buildRocc.filter(_.useFPU).size
val nDmaPorts = buildRocc.filter(_.useDma).size
val nDCachePorts = 2 + nRocc
val nPTWPorts = 2 + 3 * nRocc
val nCachedTileLinkPorts = 1
@ -77,12 +76,15 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(
cmdRouter.io.in <> core.io.rocc.cmd
val roccs = buildRocc.zipWithIndex.map { case (accelParams, i) =>
val rocc = accelParams.generator(
p.alterPartial({ case RoccNMemChannels => accelParams.nMemChannels }))
val rocc = accelParams.generator(p.alterPartial({
case RoccNMemChannels => accelParams.nMemChannels
case RoccNCSRs => accelParams.csrs.size
}))
val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
rocc.io.cmd <> cmdRouter.io.out(i)
rocc.io.s := core.io.rocc.s
rocc.io.exception := core.io.rocc.exception
rocc.io.host_id := io.host.id
dcIF.io.requestor <> rocc.io.mem
dcArb.io.requestor(2 + i) <> dcIF.io.cache
uncachedArb.io.in(1 + i) <> rocc.io.autl
@ -107,18 +109,22 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(
}
}
if (nDmaPorts > 0) {
val dmaArb = Module(new DmaArbiter(nDmaPorts))
dmaArb.io.in <> roccs.zip(buildRocc)
.filter { case (_, params) => params.useDma }
.map { case (rocc, _) => rocc.io.dma }
io.dma <> dmaArb.io.out
}
core.io.rocc.busy := cmdRouter.io.busy || roccs.map(_.io.busy).reduce(_ || _)
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
if (p(RoccNCSRs) > 0) {
core.io.rocc.csr.rdata <> roccs.map(_.io.csr.rdata).reduce(_ ++ _)
for ((rocc, accelParams) <- roccs.zip(buildRocc)) {
rocc.io.csr.waddr := core.io.rocc.csr.waddr
rocc.io.csr.wdata := core.io.rocc.csr.wdata
rocc.io.csr.wen := core.io.rocc.csr.wen &&
accelParams.csrs
.map(core.io.rocc.csr.waddr === UInt(_))
.reduce((a, b) => a || b)
}
}
roccs.flatMap(_.io.utl) :+ uncachedArb.io.out
} else { Seq(icache.io.mem) })
@ -128,9 +134,4 @@ class RocketTile(resetSignal: Bool = null)(implicit p: Parameters) extends Tile(
fpu.io.cp_resp.ready := Bool(false)
}
}
if (!usingRocc || nDmaPorts == 0) {
io.dma.req.valid := Bool(false)
io.dma.resp.ready := Bool(false)
}
}

View File

@ -193,3 +193,27 @@ class TLB(implicit p: Parameters) extends TLBModule()(p) {
state := s_ready
}
}
class DecoupledTLB(implicit p: Parameters) extends Module {
val io = new Bundle {
val req = Decoupled(new TLBReq).flip
val resp = Decoupled(new TLBResp)
val ptw = new TLBPTWIO
}
val reqq = Queue(io.req)
val tlb = Module(new TLB)
val resp_helper = DecoupledHelper(
reqq.valid, tlb.io.req.ready, io.resp.ready)
val tlb_miss = tlb.io.resp.miss
tlb.io.req.valid := resp_helper.fire(tlb.io.req.ready)
tlb.io.req.bits := reqq.bits
reqq.ready := resp_helper.fire(reqq.valid, !tlb_miss)
io.resp.valid := resp_helper.fire(io.resp.ready, !tlb_miss)
io.resp.bits := tlb.io.resp
io.ptw <> tlb.io.ptw
}