strip DMA and RoCC CSRs out of rocket and uncore (#201)
This commit is contained in:
parent
47a0c880a4
commit
38e0967816
@ -101,7 +101,6 @@ class BaseCoreplexConfig extends Config (
|
|||||||
case BuildRoCC => Nil
|
case BuildRoCC => Nil
|
||||||
case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _)
|
case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _)
|
||||||
case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _)
|
case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _)
|
||||||
case RoccNCSRs => site(BuildRoCC).map(_.csrs.size).foldLeft(0)(_ + _)
|
|
||||||
//Rocket Core Constants
|
//Rocket Core Constants
|
||||||
case FetchWidth => if (site(UseCompressed)) 2 else 1
|
case FetchWidth => if (site(UseCompressed)) 2 else 1
|
||||||
case RetireWidth => 1
|
case RetireWidth => 1
|
||||||
|
@ -49,7 +49,6 @@ class WithGroundTest extends Config(
|
|||||||
case GroundTestId => i
|
case GroundTestId => i
|
||||||
case NCachedTileLinkPorts => if(tileSettings.cached > 0) 1 else 0
|
case NCachedTileLinkPorts => if(tileSettings.cached > 0) 1 else 0
|
||||||
case NUncachedTileLinkPorts => tileSettings.uncached
|
case NUncachedTileLinkPorts => tileSettings.uncached
|
||||||
case RoccNCSRs => tileSettings.csrs
|
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,8 +11,7 @@ import cde.{Parameters, Field}
|
|||||||
case object BuildGroundTest extends Field[Parameters => GroundTest]
|
case object BuildGroundTest extends Field[Parameters => GroundTest]
|
||||||
|
|
||||||
case class GroundTestTileSettings(
|
case class GroundTestTileSettings(
|
||||||
uncached: Int = 0, cached: Int = 0, ptw: Int = 0,
|
uncached: Int = 0, cached: Int = 0, ptw: Int = 0, maxXacts: Int = 1)
|
||||||
maxXacts: Int = 1, csrs: Int = 0)
|
|
||||||
case object GroundTestKey extends Field[Seq[GroundTestTileSettings]]
|
case object GroundTestKey extends Field[Seq[GroundTestTileSettings]]
|
||||||
case object GroundTestId extends Field[Int]
|
case object GroundTestId extends Field[Int]
|
||||||
|
|
||||||
|
@ -332,11 +332,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
read_mapping += addr -> io.custom_mrw_csrs(i)
|
read_mapping += addr -> io.custom_mrw_csrs(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
for ((addr, i) <- roccCsrs.zipWithIndex) {
|
|
||||||
require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use")
|
|
||||||
read_mapping += addr -> io.rocc.csr.rdata(i)
|
|
||||||
}
|
|
||||||
|
|
||||||
val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) }
|
val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) }
|
||||||
|
|
||||||
val addr_valid = decoded_addr.values.reduce(_||_)
|
val addr_valid = decoded_addr.values.reduce(_||_)
|
||||||
@ -559,10 +554,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p)
|
|||||||
reg_dcsr.debugint := io.prci.interrupts.debug
|
reg_dcsr.debugint := io.prci.interrupts.debug
|
||||||
reg_dcsr.hwbpcount := UInt(p(NBreakpoints))
|
reg_dcsr.hwbpcount := UInt(p(NBreakpoints))
|
||||||
|
|
||||||
io.rocc.csr.waddr := io.rw.addr
|
|
||||||
io.rocc.csr.wdata := wdata
|
|
||||||
io.rocc.csr.wen := wen
|
|
||||||
|
|
||||||
if (!usingUser) {
|
if (!usingUser) {
|
||||||
reg_mstatus.mpp := PRV.M
|
reg_mstatus.mpp := PRV.M
|
||||||
reg_mstatus.prv := PRV.M
|
reg_mstatus.prv := PRV.M
|
||||||
|
@ -1,400 +0,0 @@
|
|||||||
package rocket
|
|
||||||
|
|
||||||
import Chisel._
|
|
||||||
import uncore.tilelink._
|
|
||||||
import uncore.devices._
|
|
||||||
import uncore.devices.DmaRequest._
|
|
||||||
import uncore.agents._
|
|
||||||
import uncore.util._
|
|
||||||
import junctions.{ParameterizedBundle, AddrMap}
|
|
||||||
import cde.Parameters
|
|
||||||
|
|
||||||
trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters {
|
|
||||||
val dmaAddrBits = coreMaxAddrBits
|
|
||||||
val dmaSegmentSizeBits = coreMaxAddrBits
|
|
||||||
val dmaSegmentBits = 24
|
|
||||||
}
|
|
||||||
|
|
||||||
abstract class ClientDmaBundle(implicit val p: Parameters)
|
|
||||||
extends ParameterizedBundle()(p) with HasClientDmaParameters
|
|
||||||
abstract class ClientDmaModule(implicit val p: Parameters)
|
|
||||||
extends Module with HasClientDmaParameters
|
|
||||||
|
|
||||||
class ClientDmaRequest(implicit p: Parameters) extends ClientDmaBundle()(p) {
|
|
||||||
val cmd = UInt(width = DMA_CMD_SZ)
|
|
||||||
val src_start = UInt(width = dmaAddrBits)
|
|
||||||
val dst_start = UInt(width = dmaAddrBits)
|
|
||||||
val src_stride = UInt(width = dmaSegmentSizeBits)
|
|
||||||
val dst_stride = UInt(width = dmaSegmentSizeBits)
|
|
||||||
val segment_size = UInt(width = dmaSegmentSizeBits)
|
|
||||||
val nsegments = UInt(width = dmaSegmentBits)
|
|
||||||
val word_size = UInt(width = dmaWordSizeBits)
|
|
||||||
}
|
|
||||||
|
|
||||||
object ClientDmaRequest {
|
|
||||||
def apply(cmd: UInt,
|
|
||||||
src_start: UInt,
|
|
||||||
dst_start: UInt,
|
|
||||||
segment_size: UInt,
|
|
||||||
nsegments: UInt = UInt(1),
|
|
||||||
src_stride: UInt = UInt(0),
|
|
||||||
dst_stride: UInt = UInt(0),
|
|
||||||
word_size: UInt = UInt(0))
|
|
||||||
(implicit p: Parameters) = {
|
|
||||||
val req = Wire(new ClientDmaRequest)
|
|
||||||
req.cmd := cmd
|
|
||||||
req.src_start := src_start
|
|
||||||
req.dst_start := dst_start
|
|
||||||
req.src_stride := src_stride
|
|
||||||
req.dst_stride := dst_stride
|
|
||||||
req.segment_size := segment_size
|
|
||||||
req.nsegments := nsegments
|
|
||||||
req.word_size := word_size
|
|
||||||
req
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
object ClientDmaResponse {
|
|
||||||
val pagefault = UInt("b01")
|
|
||||||
val invalid_region = UInt("b10")
|
|
||||||
|
|
||||||
def apply(status: UInt = UInt(0))(implicit p: Parameters) = {
|
|
||||||
val resp = Wire(new ClientDmaResponse)
|
|
||||||
resp.status := status
|
|
||||||
resp
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class ClientDmaResponse(implicit p: Parameters) extends ClientDmaBundle {
|
|
||||||
val status = UInt(width = dmaStatusBits)
|
|
||||||
}
|
|
||||||
|
|
||||||
class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) {
|
|
||||||
val req = Decoupled(new ClientDmaRequest)
|
|
||||||
val resp = Valid(new ClientDmaResponse).flip
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaFrontend(implicit p: Parameters) extends CoreModule()(p)
|
|
||||||
with HasClientDmaParameters with HasTileLinkParameters {
|
|
||||||
val io = new Bundle {
|
|
||||||
val cpu = (new ClientDmaIO).flip
|
|
||||||
val mem = new ClientUncachedTileLinkIO
|
|
||||||
val ptw = new TLBPTWIO
|
|
||||||
val busy = Bool(OUTPUT)
|
|
||||||
val incr_outstanding = Bool(OUTPUT)
|
|
||||||
val host_id = UInt(INPUT, log2Up(nCores))
|
|
||||||
}
|
|
||||||
|
|
||||||
val tlb = Module(new DecoupledTLB()(p.alterPartial({
|
|
||||||
case CacheName => "L1D"
|
|
||||||
})))
|
|
||||||
io.ptw <> tlb.io.ptw
|
|
||||||
|
|
||||||
private val pgSize = 1 << pgIdxBits
|
|
||||||
|
|
||||||
val cmd = Reg(UInt(width = DMA_CMD_SZ))
|
|
||||||
val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq(
|
|
||||||
DMA_CMD_PFR -> UInt("b10"),
|
|
||||||
DMA_CMD_PFW -> UInt("b10"),
|
|
||||||
DMA_CMD_SIN -> UInt("b10"),
|
|
||||||
DMA_CMD_SOUT -> UInt("b01")))
|
|
||||||
|
|
||||||
val segment_size = Reg(UInt(width = dmaSegmentSizeBits))
|
|
||||||
val bytes_left = Reg(UInt(width = dmaSegmentSizeBits))
|
|
||||||
val segments_left = Reg(UInt(width = dmaSegmentBits))
|
|
||||||
val word_size = Reg(UInt(width = dmaWordSizeBits))
|
|
||||||
|
|
||||||
val src_vaddr = Reg(UInt(width = dmaAddrBits))
|
|
||||||
val dst_vaddr = Reg(UInt(width = dmaAddrBits))
|
|
||||||
val src_vpn = src_vaddr(dmaAddrBits - 1, pgIdxBits)
|
|
||||||
val dst_vpn = dst_vaddr(dmaAddrBits - 1, pgIdxBits)
|
|
||||||
val src_idx = src_vaddr(pgIdxBits - 1, 0)
|
|
||||||
val dst_idx = dst_vaddr(pgIdxBits - 1, 0)
|
|
||||||
val src_pglen = UInt(pgSize) - src_idx
|
|
||||||
val dst_pglen = UInt(pgSize) - dst_idx
|
|
||||||
|
|
||||||
val src_stride = Reg(UInt(width = dmaSegmentSizeBits))
|
|
||||||
val dst_stride = Reg(UInt(width = dmaSegmentSizeBits))
|
|
||||||
|
|
||||||
val src_ppn = Reg(UInt(width = ppnBits))
|
|
||||||
val dst_ppn = Reg(UInt(width = ppnBits))
|
|
||||||
|
|
||||||
val src_paddr = Cat(src_ppn, src_idx)
|
|
||||||
val dst_paddr = Cat(dst_ppn, dst_idx)
|
|
||||||
|
|
||||||
val last_src_vpn = Reg(UInt(width = vpnBits))
|
|
||||||
val last_dst_vpn = Reg(UInt(width = vpnBits))
|
|
||||||
|
|
||||||
val tx_len = src_pglen min dst_pglen min bytes_left
|
|
||||||
|
|
||||||
val dma_busy = Reg(init = UInt(0, tlMaxClientXacts))
|
|
||||||
val dma_xact_id = PriorityEncoder(~dma_busy)
|
|
||||||
val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats)
|
|
||||||
|
|
||||||
val (s_idle :: s_translate :: s_dma_req :: s_dma_update ::
|
|
||||||
s_prepare :: s_finish :: Nil) = Enum(Bits(), 6)
|
|
||||||
val state = Reg(init = s_idle)
|
|
||||||
|
|
||||||
// lower bit is for src, higher bit is for dst
|
|
||||||
val to_translate = Reg(init = UInt(0, 2))
|
|
||||||
val tlb_sent = Reg(init = UInt(0, 2))
|
|
||||||
val tlb_to_send = to_translate & ~tlb_sent
|
|
||||||
val resp_status = Reg(UInt(width = dmaStatusBits))
|
|
||||||
|
|
||||||
def make_acquire(
|
|
||||||
addr_beat: UInt, client_xact_id: UInt, client_id: UInt,
|
|
||||||
cmd: UInt, source: UInt, dest: UInt,
|
|
||||||
length: UInt, size: UInt): Acquire = {
|
|
||||||
|
|
||||||
val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits))
|
|
||||||
data_blob := DmaRequest(
|
|
||||||
xact_id = UInt(0),
|
|
||||||
client_id = client_id,
|
|
||||||
cmd = cmd,
|
|
||||||
source = source,
|
|
||||||
dest = dest,
|
|
||||||
length = length,
|
|
||||||
size = size).asUInt
|
|
||||||
val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob)
|
|
||||||
val base_addr = addrMap("devices:dma").start
|
|
||||||
val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits))
|
|
||||||
|
|
||||||
PutBlock(
|
|
||||||
client_xact_id = client_xact_id,
|
|
||||||
addr_block = addr_block,
|
|
||||||
addr_beat = addr_beat,
|
|
||||||
data = data_beats(addr_beat),
|
|
||||||
alloc = Bool(false))
|
|
||||||
}
|
|
||||||
|
|
||||||
def check_region(cmd: UInt, src: UInt, dst: UInt): Bool = {
|
|
||||||
val src_cacheable = addrMap.isCacheable(src)
|
|
||||||
val dst_cacheable = addrMap.isCacheable(dst)
|
|
||||||
val dst_ok = Mux(cmd === DMA_CMD_SOUT, !dst_cacheable, dst_cacheable)
|
|
||||||
val src_ok = Mux(cmd === DMA_CMD_SIN, !src_cacheable, Bool(true))
|
|
||||||
dst_ok && src_ok
|
|
||||||
}
|
|
||||||
|
|
||||||
tlb.io.req.valid := tlb_to_send.orR
|
|
||||||
tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn)
|
|
||||||
tlb.io.req.bits.passthrough := Bool(false)
|
|
||||||
tlb.io.req.bits.instruction := Bool(false)
|
|
||||||
tlb.io.req.bits.store := !tlb_to_send(0)
|
|
||||||
tlb.io.resp.ready := tlb_sent.orR
|
|
||||||
|
|
||||||
when (tlb.io.req.fire()) {
|
|
||||||
tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send)
|
|
||||||
}
|
|
||||||
|
|
||||||
when (tlb.io.resp.fire()) {
|
|
||||||
val recv_choice = PriorityEncoderOH(to_translate)
|
|
||||||
val error = Mux(recv_choice(0),
|
|
||||||
tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st)
|
|
||||||
|
|
||||||
when (error) {
|
|
||||||
resp_status := ClientDmaResponse.pagefault
|
|
||||||
state := s_finish
|
|
||||||
}
|
|
||||||
|
|
||||||
// getting the src translation
|
|
||||||
when (recv_choice(0)) {
|
|
||||||
src_ppn := tlb.io.resp.bits.ppn
|
|
||||||
} .otherwise {
|
|
||||||
dst_ppn := tlb.io.resp.bits.ppn
|
|
||||||
}
|
|
||||||
|
|
||||||
to_translate := to_translate & ~recv_choice
|
|
||||||
}
|
|
||||||
|
|
||||||
io.cpu.req.ready := state === s_idle
|
|
||||||
io.cpu.resp.valid := state === s_finish
|
|
||||||
io.cpu.resp.bits := ClientDmaResponse(resp_status)
|
|
||||||
|
|
||||||
io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR
|
|
||||||
io.mem.acquire.bits := make_acquire(
|
|
||||||
addr_beat = dma_req_beat,
|
|
||||||
client_id = io.host_id,
|
|
||||||
client_xact_id = dma_xact_id,
|
|
||||||
cmd = cmd, source = src_paddr, dest = dst_paddr,
|
|
||||||
length = tx_len, size = word_size)
|
|
||||||
|
|
||||||
io.mem.grant.ready := (state =/= s_dma_req)
|
|
||||||
|
|
||||||
when (io.cpu.req.fire()) {
|
|
||||||
val req = io.cpu.req.bits
|
|
||||||
val is_prefetch = req.cmd(2, 1) === UInt("b01")
|
|
||||||
cmd := req.cmd
|
|
||||||
src_vaddr := req.src_start
|
|
||||||
dst_vaddr := req.dst_start
|
|
||||||
src_stride := req.src_stride
|
|
||||||
dst_stride := req.dst_stride
|
|
||||||
segment_size := req.segment_size
|
|
||||||
segments_left := req.nsegments - UInt(1)
|
|
||||||
bytes_left := req.segment_size
|
|
||||||
word_size := req.word_size
|
|
||||||
to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11"))
|
|
||||||
tlb_sent := UInt(0)
|
|
||||||
state := s_translate
|
|
||||||
}
|
|
||||||
|
|
||||||
when (state === s_translate && !to_translate.orR) {
|
|
||||||
when (check_region(cmd, src_paddr, dst_paddr)) {
|
|
||||||
state := s_dma_req
|
|
||||||
} .otherwise {
|
|
||||||
resp_status := ClientDmaResponse.invalid_region
|
|
||||||
state := s_finish
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def setBusy(set: Bool, xact_id: UInt): UInt =
|
|
||||||
Mux(set, UIntToOH(xact_id), UInt(0))
|
|
||||||
|
|
||||||
dma_busy := (dma_busy |
|
|
||||||
setBusy(dma_req_done, dma_xact_id)) &
|
|
||||||
~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id)
|
|
||||||
|
|
||||||
|
|
||||||
when (dma_req_done) {
|
|
||||||
src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0))
|
|
||||||
dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0))
|
|
||||||
bytes_left := bytes_left - tx_len
|
|
||||||
state := s_dma_update
|
|
||||||
}
|
|
||||||
|
|
||||||
when (state === s_dma_update) {
|
|
||||||
when (bytes_left === UInt(0)) {
|
|
||||||
when (segments_left === UInt(0)) {
|
|
||||||
resp_status := UInt(0)
|
|
||||||
state := s_finish
|
|
||||||
} .otherwise {
|
|
||||||
last_src_vpn := src_vpn
|
|
||||||
last_dst_vpn := dst_vpn
|
|
||||||
src_vaddr := src_vaddr + src_stride
|
|
||||||
dst_vaddr := dst_vaddr + dst_stride
|
|
||||||
bytes_left := segment_size
|
|
||||||
segments_left := segments_left - UInt(1)
|
|
||||||
state := s_prepare
|
|
||||||
}
|
|
||||||
} .otherwise {
|
|
||||||
to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0))
|
|
||||||
tlb_sent := UInt(0)
|
|
||||||
state := s_translate
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
when (state === s_prepare) {
|
|
||||||
to_translate := adv_ptr & Cat(
|
|
||||||
dst_vpn =/= last_dst_vpn,
|
|
||||||
src_vpn =/= last_src_vpn)
|
|
||||||
tlb_sent := UInt(0)
|
|
||||||
state := s_translate
|
|
||||||
}
|
|
||||||
|
|
||||||
when (state === s_finish) { state := s_idle }
|
|
||||||
|
|
||||||
io.busy := (state =/= s_idle) || dma_busy.orR
|
|
||||||
io.incr_outstanding := dma_req_done
|
|
||||||
}
|
|
||||||
|
|
||||||
object DmaCtrlRegNumbers {
|
|
||||||
val SRC_STRIDE = 0
|
|
||||||
val DST_STRIDE = 1
|
|
||||||
val SEGMENT_SIZE = 2
|
|
||||||
val NSEGMENTS = 3
|
|
||||||
val WORD_SIZE = 4
|
|
||||||
val RESP_STATUS = 5
|
|
||||||
val OUTSTANDING = 6
|
|
||||||
val NCSRS = 7
|
|
||||||
val CSR_BASE = 0x800
|
|
||||||
val CSR_END = CSR_BASE + NCSRS
|
|
||||||
}
|
|
||||||
import DmaCtrlRegNumbers._
|
|
||||||
|
|
||||||
class DmaCtrlRegFile(implicit val p: Parameters) extends Module
|
|
||||||
with HasClientDmaParameters with HasTileLinkParameters {
|
|
||||||
|
|
||||||
private val nWriteRegs = 5
|
|
||||||
private val nRegs = nWriteRegs + 2
|
|
||||||
|
|
||||||
val io = new Bundle {
|
|
||||||
val wen = Bool(INPUT)
|
|
||||||
val waddr = UInt(INPUT, log2Up(nRegs))
|
|
||||||
val wdata = UInt(INPUT, dmaSegmentSizeBits)
|
|
||||||
|
|
||||||
val src_stride = UInt(OUTPUT, dmaSegmentSizeBits)
|
|
||||||
val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits)
|
|
||||||
val segment_size = UInt(OUTPUT, dmaSegmentSizeBits)
|
|
||||||
val nsegments = UInt(OUTPUT, dmaSegmentBits)
|
|
||||||
val word_size = UInt(OUTPUT, dmaWordSizeBits)
|
|
||||||
|
|
||||||
val incr_outstanding = Bool(INPUT)
|
|
||||||
val xact_outstanding = Bool(OUTPUT)
|
|
||||||
}
|
|
||||||
|
|
||||||
val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits)))
|
|
||||||
val waddr = io.waddr(log2Up(NCSRS) - 1, 0)
|
|
||||||
|
|
||||||
io.src_stride := regs(SRC_STRIDE)
|
|
||||||
io.dst_stride := regs(DST_STRIDE)
|
|
||||||
io.segment_size := regs(SEGMENT_SIZE)
|
|
||||||
io.nsegments := regs(NSEGMENTS)
|
|
||||||
io.word_size := regs(WORD_SIZE)
|
|
||||||
|
|
||||||
when (io.wen && waddr < UInt(nWriteRegs)) {
|
|
||||||
regs(waddr) := io.wdata
|
|
||||||
}
|
|
||||||
|
|
||||||
val outstanding_cnt = TwoWayCounter(
|
|
||||||
io.incr_outstanding,
|
|
||||||
io.wen && io.waddr === UInt(OUTSTANDING),
|
|
||||||
tlMaxClientXacts)
|
|
||||||
|
|
||||||
io.xact_outstanding := outstanding_cnt > UInt(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaController(implicit p: Parameters) extends RoCC()(p)
|
|
||||||
with HasClientDmaParameters {
|
|
||||||
io.mem.req.valid := Bool(false)
|
|
||||||
io.resp.valid := Bool(false)
|
|
||||||
io.interrupt := Bool(false)
|
|
||||||
|
|
||||||
val cmd = Queue(io.cmd)
|
|
||||||
val inst = cmd.bits.inst
|
|
||||||
val is_transfer = inst.funct < UInt(8)
|
|
||||||
|
|
||||||
val reg_status = Reg(UInt(width = dmaStatusBits))
|
|
||||||
val crfile = Module(new DmaCtrlRegFile)
|
|
||||||
crfile.io.waddr := io.csr.waddr
|
|
||||||
crfile.io.wdata := io.csr.wdata
|
|
||||||
crfile.io.wen := io.csr.wen
|
|
||||||
|
|
||||||
io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride
|
|
||||||
io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride
|
|
||||||
io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size
|
|
||||||
io.csr.rdata(NSEGMENTS) := crfile.io.nsegments
|
|
||||||
io.csr.rdata(WORD_SIZE) := crfile.io.word_size
|
|
||||||
io.csr.rdata(RESP_STATUS) := reg_status
|
|
||||||
|
|
||||||
val frontend = Module(new DmaFrontend)
|
|
||||||
io.ptw(0) <> frontend.io.ptw
|
|
||||||
io.autl <> frontend.io.mem
|
|
||||||
crfile.io.incr_outstanding := frontend.io.incr_outstanding
|
|
||||||
frontend.io.host_id := io.host_id
|
|
||||||
frontend.io.cpu.req.valid := cmd.valid && is_transfer
|
|
||||||
frontend.io.cpu.req.bits := ClientDmaRequest(
|
|
||||||
cmd = cmd.bits.inst.funct,
|
|
||||||
src_start = cmd.bits.rs2,
|
|
||||||
dst_start = cmd.bits.rs1,
|
|
||||||
src_stride = crfile.io.src_stride,
|
|
||||||
dst_stride = crfile.io.dst_stride,
|
|
||||||
segment_size = crfile.io.segment_size,
|
|
||||||
nsegments = crfile.io.nsegments,
|
|
||||||
word_size = crfile.io.word_size)
|
|
||||||
cmd.ready := is_transfer && frontend.io.cpu.req.ready
|
|
||||||
|
|
||||||
when (frontend.io.cpu.resp.valid) {
|
|
||||||
reg_status := frontend.io.cpu.resp.bits.status
|
|
||||||
}
|
|
||||||
|
|
||||||
io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding
|
|
||||||
}
|
|
@ -12,14 +12,6 @@ import cde.{Parameters, Field}
|
|||||||
case object RoccMaxTaggedMemXacts extends Field[Int]
|
case object RoccMaxTaggedMemXacts extends Field[Int]
|
||||||
case object RoccNMemChannels extends Field[Int]
|
case object RoccNMemChannels extends Field[Int]
|
||||||
case object RoccNPTWPorts extends Field[Int]
|
case object RoccNPTWPorts extends Field[Int]
|
||||||
case object RoccNCSRs extends Field[Int]
|
|
||||||
|
|
||||||
class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) {
|
|
||||||
val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen))
|
|
||||||
val waddr = UInt(OUTPUT, CSR.ADDRSZ)
|
|
||||||
val wdata = UInt(OUTPUT, xLen)
|
|
||||||
val wen = Bool(OUTPUT)
|
|
||||||
}
|
|
||||||
|
|
||||||
class RoCCInstruction extends Bundle
|
class RoCCInstruction extends Bundle
|
||||||
{
|
{
|
||||||
@ -59,8 +51,6 @@ class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) {
|
|||||||
val fpu_req = Decoupled(new FPInput)
|
val fpu_req = Decoupled(new FPInput)
|
||||||
val fpu_resp = Decoupled(new FPResult).flip
|
val fpu_resp = Decoupled(new FPResult).flip
|
||||||
val exception = Bool(INPUT)
|
val exception = Bool(INPUT)
|
||||||
val csr = (new RoCCCSRs).flip
|
|
||||||
val host_id = UInt(INPUT, log2Up(nCores))
|
|
||||||
|
|
||||||
override def cloneType = new RoCCInterface().asInstanceOf[this.type]
|
override def cloneType = new RoCCInterface().asInstanceOf[this.type]
|
||||||
}
|
}
|
||||||
|
@ -63,9 +63,6 @@ trait HasCoreParameters extends HasAddrMapParameters {
|
|||||||
val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
|
val vaddrBitsExtended = vpnBitsExtended + pgIdxBits
|
||||||
val coreMaxAddrBits = paddrBits max vaddrBitsExtended
|
val coreMaxAddrBits = paddrBits max vaddrBitsExtended
|
||||||
val nCustomMrwCsrs = p(NCustomMRWCSRs)
|
val nCustomMrwCsrs = p(NCustomMRWCSRs)
|
||||||
val roccCsrs = if (p(BuildRoCC).isEmpty) Nil
|
|
||||||
else p(BuildRoCC).flatMap(_.csrs)
|
|
||||||
val nRoccCsrs = p(RoccNCSRs)
|
|
||||||
val nCores = p(NTiles)
|
val nCores = p(NTiles)
|
||||||
|
|
||||||
// fetchWidth doubled, but coreInstBytes halved, for RVC
|
// fetchWidth doubled, but coreInstBytes halved, for RVC
|
||||||
@ -499,7 +496,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) {
|
|||||||
csr.io.prci <> io.prci
|
csr.io.prci <> io.prci
|
||||||
io.fpu.fcsr_rm := csr.io.fcsr_rm
|
io.fpu.fcsr_rm := csr.io.fcsr_rm
|
||||||
csr.io.fcsr_flags := io.fpu.fcsr_flags
|
csr.io.fcsr_flags := io.fpu.fcsr_flags
|
||||||
io.rocc.csr <> csr.io.rocc.csr
|
|
||||||
csr.io.rocc.interrupt <> io.rocc.interrupt
|
csr.io.rocc.interrupt <> io.rocc.interrupt
|
||||||
csr.io.pc := wb_reg_pc
|
csr.io.pc := wb_reg_pc
|
||||||
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
|
csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata)
|
||||||
|
@ -18,7 +18,6 @@ case class RoccParameters(
|
|||||||
generator: Parameters => RoCC,
|
generator: Parameters => RoCC,
|
||||||
nMemChannels: Int = 0,
|
nMemChannels: Int = 0,
|
||||||
nPTWPorts : Int = 0,
|
nPTWPorts : Int = 0,
|
||||||
csrs: Seq[Int] = Nil,
|
|
||||||
useFPU: Boolean = false)
|
useFPU: Boolean = false)
|
||||||
|
|
||||||
abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
|
abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null)
|
||||||
@ -70,12 +69,10 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
|
|||||||
val rocc = accelParams.generator(p.alterPartial({
|
val rocc = accelParams.generator(p.alterPartial({
|
||||||
case RoccNMemChannels => accelParams.nMemChannels
|
case RoccNMemChannels => accelParams.nMemChannels
|
||||||
case RoccNPTWPorts => accelParams.nPTWPorts
|
case RoccNPTWPorts => accelParams.nPTWPorts
|
||||||
case RoccNCSRs => accelParams.csrs.size
|
|
||||||
}))
|
}))
|
||||||
val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
|
val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams))
|
||||||
rocc.io.cmd <> cmdRouter.io.out(i)
|
rocc.io.cmd <> cmdRouter.io.out(i)
|
||||||
rocc.io.exception := core.io.rocc.exception
|
rocc.io.exception := core.io.rocc.exception
|
||||||
rocc.io.host_id := io.prci.id
|
|
||||||
dcIF.io.requestor <> rocc.io.mem
|
dcIF.io.requestor <> rocc.io.mem
|
||||||
dcPorts += dcIF.io.cache
|
dcPorts += dcIF.io.cache
|
||||||
uncachedArbPorts += rocc.io.autl
|
uncachedArbPorts += rocc.io.autl
|
||||||
@ -101,18 +98,6 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null)
|
|||||||
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
|
core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _)
|
||||||
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
|
respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp))
|
||||||
|
|
||||||
if (p(RoccNCSRs) > 0) {
|
|
||||||
core.io.rocc.csr.rdata <> roccs.flatMap(_.io.csr.rdata)
|
|
||||||
for ((rocc, accelParams) <- roccs.zip(buildRocc)) {
|
|
||||||
rocc.io.csr.waddr := core.io.rocc.csr.waddr
|
|
||||||
rocc.io.csr.wdata := core.io.rocc.csr.wdata
|
|
||||||
rocc.io.csr.wen := core.io.rocc.csr.wen &&
|
|
||||||
accelParams.csrs
|
|
||||||
.map(core.io.rocc.csr.waddr === UInt(_))
|
|
||||||
.reduce((a, b) => a || b)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ptwPorts ++= roccs.flatMap(_.io.ptw)
|
ptwPorts ++= roccs.flatMap(_.io.ptw)
|
||||||
uncachedPorts ++= roccs.flatMap(_.io.utl)
|
uncachedPorts ++= roccs.flatMap(_.io.utl)
|
||||||
}
|
}
|
||||||
|
@ -1,535 +0,0 @@
|
|||||||
package uncore.devices
|
|
||||||
|
|
||||||
import Chisel._
|
|
||||||
import cde.{Parameters, Field}
|
|
||||||
import junctions._
|
|
||||||
import junctions.NastiConstants._
|
|
||||||
import uncore.tilelink._
|
|
||||||
import uncore.Util._
|
|
||||||
|
|
||||||
case object NDmaTransactors extends Field[Int]
|
|
||||||
case object NDmaXacts extends Field[Int]
|
|
||||||
case object NDmaClients extends Field[Int]
|
|
||||||
|
|
||||||
trait HasDmaParameters {
|
|
||||||
implicit val p: Parameters
|
|
||||||
val nDmaTransactors = p(NDmaTransactors)
|
|
||||||
val nDmaXacts = p(NDmaXacts)
|
|
||||||
val nDmaClients = p(NDmaClients)
|
|
||||||
val dmaXactIdBits = log2Up(nDmaXacts)
|
|
||||||
val dmaClientIdBits = log2Up(nDmaClients)
|
|
||||||
val addrBits = p(PAddrBits)
|
|
||||||
val dmaStatusBits = 2
|
|
||||||
val dmaWordSizeBits = 2
|
|
||||||
}
|
|
||||||
|
|
||||||
abstract class DmaModule(implicit val p: Parameters) extends Module with HasDmaParameters
|
|
||||||
abstract class DmaBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasDmaParameters
|
|
||||||
|
|
||||||
class DmaRequest(implicit p: Parameters) extends DmaBundle()(p) {
|
|
||||||
val xact_id = UInt(width = dmaXactIdBits)
|
|
||||||
val client_id = UInt(width = dmaClientIdBits)
|
|
||||||
val cmd = UInt(width = DmaRequest.DMA_CMD_SZ)
|
|
||||||
val source = UInt(width = addrBits)
|
|
||||||
val dest = UInt(width = addrBits)
|
|
||||||
val length = UInt(width = addrBits)
|
|
||||||
val size = UInt(width = dmaWordSizeBits)
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaResponse(implicit p: Parameters) extends DmaBundle()(p) {
|
|
||||||
val xact_id = UInt(width = dmaXactIdBits)
|
|
||||||
val client_id = UInt(width = dmaClientIdBits)
|
|
||||||
val status = UInt(width = dmaStatusBits)
|
|
||||||
}
|
|
||||||
|
|
||||||
object DmaRequest {
|
|
||||||
val DMA_CMD_SZ = 3
|
|
||||||
|
|
||||||
val DMA_CMD_COPY = UInt("b000")
|
|
||||||
val DMA_CMD_PFR = UInt("b010")
|
|
||||||
val DMA_CMD_PFW = UInt("b011")
|
|
||||||
val DMA_CMD_SIN = UInt("b100")
|
|
||||||
val DMA_CMD_SOUT = UInt("b101")
|
|
||||||
|
|
||||||
def apply(xact_id: UInt = UInt(0),
|
|
||||||
client_id: UInt,
|
|
||||||
cmd: UInt,
|
|
||||||
source: UInt,
|
|
||||||
dest: UInt,
|
|
||||||
length: UInt,
|
|
||||||
size: UInt = UInt(0))(implicit p: Parameters): DmaRequest = {
|
|
||||||
val req = Wire(new DmaRequest)
|
|
||||||
req.xact_id := xact_id
|
|
||||||
req.client_id := client_id
|
|
||||||
req.cmd := cmd
|
|
||||||
req.source := source
|
|
||||||
req.dest := dest
|
|
||||||
req.length := length
|
|
||||||
req.size := size
|
|
||||||
req
|
|
||||||
}
|
|
||||||
}
|
|
||||||
import DmaRequest._
|
|
||||||
|
|
||||||
class DmaIO(implicit p: Parameters) extends DmaBundle()(p) {
|
|
||||||
val req = Decoupled(new DmaRequest)
|
|
||||||
val resp = Decoupled(new DmaResponse).flip
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaTrackerIO(implicit p: Parameters) extends DmaBundle()(p) {
|
|
||||||
val dma = (new DmaIO).flip
|
|
||||||
val mem = new ClientUncachedTileLinkIO
|
|
||||||
val mmio = new NastiIO
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaManager(outstandingCSR: Int)(implicit p: Parameters)
|
|
||||||
extends DmaModule()(p)
|
|
||||||
with HasNastiParameters
|
|
||||||
with HasAddrMapParameters {
|
|
||||||
|
|
||||||
val io = new Bundle {
|
|
||||||
val ctrl = (new NastiIO).flip
|
|
||||||
val mmio = new NastiIO
|
|
||||||
val dma = new DmaIO
|
|
||||||
}
|
|
||||||
|
|
||||||
private val wordBits = 1 << log2Up(addrBits)
|
|
||||||
private val wordBytes = wordBits / 8
|
|
||||||
private val wordOff = log2Up(wordBytes)
|
|
||||||
private val wordMSB = wordOff + 2
|
|
||||||
|
|
||||||
val s_idle :: s_wdata :: s_dma_req :: s_wresp :: Nil = Enum(Bits(), 4)
|
|
||||||
val state = Reg(init = s_idle)
|
|
||||||
|
|
||||||
val nCtrlWords = (addrBits * 4) / nastiXDataBits
|
|
||||||
val ctrl_regs = Reg(Vec(nCtrlWords, UInt(width = nastiXDataBits)))
|
|
||||||
val ctrl_idx = Reg(UInt(width = log2Up(nCtrlWords)))
|
|
||||||
val ctrl_done = Reg(Bool())
|
|
||||||
val ctrl_blob = ctrl_regs.asUInt
|
|
||||||
val ctrl_id = Reg(UInt(width = nastiXIdBits))
|
|
||||||
|
|
||||||
val sizeOffset = 3 * addrBits
|
|
||||||
val cmdOffset = sizeOffset + dmaWordSizeBits
|
|
||||||
|
|
||||||
val dma_req = new DmaRequest().fromBits(ctrl_blob)
|
|
||||||
val dma_busy = Reg(init = UInt(0, nDmaXacts))
|
|
||||||
val dma_xact_id = PriorityEncoder(~dma_busy)
|
|
||||||
|
|
||||||
when (io.ctrl.aw.fire()) {
|
|
||||||
ctrl_id := io.ctrl.aw.bits.id
|
|
||||||
ctrl_idx := UInt(0)
|
|
||||||
ctrl_done := Bool(false)
|
|
||||||
state := s_wdata
|
|
||||||
}
|
|
||||||
|
|
||||||
when (io.ctrl.w.fire()) {
|
|
||||||
when (!ctrl_done) {
|
|
||||||
ctrl_regs(ctrl_idx) := io.ctrl.w.bits.data
|
|
||||||
ctrl_idx := ctrl_idx + UInt(1)
|
|
||||||
}
|
|
||||||
when (ctrl_idx === UInt(nCtrlWords - 1)) { ctrl_done := Bool(true) }
|
|
||||||
when (io.ctrl.w.bits.last) { state := s_dma_req }
|
|
||||||
}
|
|
||||||
|
|
||||||
dma_busy := (dma_busy |
|
|
||||||
Mux(io.dma.req.fire(), UIntToOH(dma_xact_id), UInt(0))) &
|
|
||||||
~Mux(io.dma.resp.fire(), UIntToOH(io.dma.resp.bits.xact_id), UInt(0))
|
|
||||||
|
|
||||||
when (io.dma.req.fire()) { state := s_wresp }
|
|
||||||
when (io.ctrl.b.fire()) { state := s_idle }
|
|
||||||
|
|
||||||
io.ctrl.ar.ready := Bool(false)
|
|
||||||
io.ctrl.aw.ready := (state === s_idle)
|
|
||||||
io.ctrl.w.ready := (state === s_wdata)
|
|
||||||
|
|
||||||
io.ctrl.r.valid := Bool(false)
|
|
||||||
io.ctrl.b.valid := (state === s_wresp)
|
|
||||||
io.ctrl.b.bits := NastiWriteResponseChannel(id = ctrl_id)
|
|
||||||
|
|
||||||
io.dma.req.valid := (state === s_dma_req) && !dma_busy.andR
|
|
||||||
io.dma.req.bits := dma_req
|
|
||||||
io.dma.req.bits.xact_id := dma_xact_id
|
|
||||||
|
|
||||||
val resp_waddr_pending = Reg(init = Bool(false))
|
|
||||||
val resp_wdata_pending = Reg(init = Bool(false))
|
|
||||||
val resp_wresp_pending = Reg(init = Bool(false))
|
|
||||||
val resp_pending = resp_waddr_pending || resp_wdata_pending || resp_wresp_pending
|
|
||||||
|
|
||||||
val resp_client_id = Reg(UInt(width = dmaClientIdBits))
|
|
||||||
val resp_status = Reg(UInt(width = dmaStatusBits))
|
|
||||||
|
|
||||||
io.dma.resp.ready := !resp_pending
|
|
||||||
|
|
||||||
when (io.dma.resp.fire()) {
|
|
||||||
resp_client_id := io.dma.resp.bits.client_id
|
|
||||||
resp_status := io.dma.resp.bits.status
|
|
||||||
resp_waddr_pending := Bool(true)
|
|
||||||
resp_wdata_pending := Bool(true)
|
|
||||||
resp_wresp_pending := Bool(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
val addrTable = Vec.tabulate(nDmaClients) { i =>
|
|
||||||
//UInt(addrMap(s"conf:csr$i").start + outstandingCSR * csrDataBytes)
|
|
||||||
require(false, "CSR MMIO ports no longer exist")
|
|
||||||
UInt(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
io.mmio.ar.valid := Bool(false)
|
|
||||||
io.mmio.aw.valid := resp_waddr_pending
|
|
||||||
io.mmio.aw.bits := NastiWriteAddressChannel(
|
|
||||||
id = UInt(0),
|
|
||||||
addr = addrTable(resp_client_id),
|
|
||||||
size = { require(false, "CSR MMIO ports no longer exist"); UInt(0) })
|
|
||||||
io.mmio.w.valid := resp_wdata_pending
|
|
||||||
io.mmio.w.bits := NastiWriteDataChannel(data = resp_status)
|
|
||||||
io.mmio.b.ready := resp_wresp_pending
|
|
||||||
io.mmio.r.ready := Bool(false)
|
|
||||||
|
|
||||||
when (io.mmio.aw.fire()) { resp_waddr_pending := Bool(false) }
|
|
||||||
when (io.mmio.w.fire()) { resp_wdata_pending := Bool(false) }
|
|
||||||
when (io.mmio.b.fire()) { resp_wresp_pending := Bool(false) }
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaEngine(outstandingCSR: Int)(implicit p: Parameters) extends DmaModule()(p) {
|
|
||||||
val io = new Bundle {
|
|
||||||
val ctrl = (new NastiIO).flip
|
|
||||||
val mem = new ClientUncachedTileLinkIO
|
|
||||||
val mmio = new NastiIO
|
|
||||||
}
|
|
||||||
|
|
||||||
val manager = Module(new DmaManager(outstandingCSR))
|
|
||||||
val trackers = Module(new DmaTrackerFile)
|
|
||||||
|
|
||||||
manager.io.ctrl <> io.ctrl
|
|
||||||
trackers.io.dma <> manager.io.dma
|
|
||||||
|
|
||||||
val innerIOs = trackers.io.mem
|
|
||||||
val outerIOs = trackers.io.mmio :+ manager.io.mmio
|
|
||||||
|
|
||||||
val innerArb = Module(new ClientUncachedTileLinkIOArbiter(innerIOs.size))
|
|
||||||
innerArb.io.in <> innerIOs
|
|
||||||
io.mem <> innerArb.io.out
|
|
||||||
|
|
||||||
val outerArb = Module(new NastiArbiter(outerIOs.size))
|
|
||||||
outerArb.io.master <> outerIOs
|
|
||||||
io.mmio <> outerArb.io.slave
|
|
||||||
|
|
||||||
assert(!io.mmio.b.valid || io.mmio.b.bits.resp === UInt(0),
|
|
||||||
"DmaEngine: NASTI write response error")
|
|
||||||
|
|
||||||
assert(!io.mmio.r.valid || io.mmio.r.bits.resp === UInt(0),
|
|
||||||
"DmaEngine: NASTI read response error")
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaTrackerFile(implicit p: Parameters) extends DmaModule()(p) {
|
|
||||||
val io = new Bundle {
|
|
||||||
val dma = (new DmaIO).flip
|
|
||||||
val mem = Vec(nDmaTransactors, new ClientUncachedTileLinkIO)
|
|
||||||
val mmio = Vec(nDmaTransactors, new NastiIO)
|
|
||||||
}
|
|
||||||
|
|
||||||
val trackers = List.fill(nDmaTransactors) { Module(new DmaTracker) }
|
|
||||||
val reqReadys = trackers.map(_.io.dma.req.ready).asUInt
|
|
||||||
|
|
||||||
io.mem <> trackers.map(_.io.mem)
|
|
||||||
io.mmio <> trackers.map(_.io.mmio)
|
|
||||||
|
|
||||||
if (nDmaTransactors > 1) {
|
|
||||||
val resp_arb = Module(new RRArbiter(new DmaResponse, nDmaTransactors))
|
|
||||||
resp_arb.io.in <> trackers.map(_.io.dma.resp)
|
|
||||||
io.dma.resp <> resp_arb.io.out
|
|
||||||
|
|
||||||
val selection = PriorityEncoder(reqReadys)
|
|
||||||
trackers.zipWithIndex.foreach { case (tracker, i) =>
|
|
||||||
tracker.io.dma.req.valid := io.dma.req.valid && selection === UInt(i)
|
|
||||||
tracker.io.dma.req.bits := io.dma.req.bits
|
|
||||||
}
|
|
||||||
io.dma.req.ready := reqReadys.orR
|
|
||||||
} else {
|
|
||||||
io.dma <> trackers.head.io.dma
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
|
||||||
with HasTileLinkParameters with HasNastiParameters {
|
|
||||||
val io = new DmaTrackerIO
|
|
||||||
|
|
||||||
private val blockOffset = tlBeatAddrBits + tlByteAddrBits
|
|
||||||
private val blockBytes = tlDataBeats * tlDataBytes
|
|
||||||
|
|
||||||
val data_buffer = Reg(Vec(2 * tlDataBeats, Bits(width = tlDataBits)))
|
|
||||||
val get_inflight = Reg(UInt(2 * tlDataBeats))
|
|
||||||
val put_inflight = Reg(Bool())
|
|
||||||
val put_half = Reg(UInt(width = 1))
|
|
||||||
val get_half = Reg(UInt(width = 1))
|
|
||||||
val prefetch_put = Reg(Bool())
|
|
||||||
val get_done = !get_inflight.orR
|
|
||||||
|
|
||||||
val src_block = Reg(UInt(width = tlBlockAddrBits))
|
|
||||||
val dst_block = Reg(UInt(width = tlBlockAddrBits))
|
|
||||||
val offset = Reg(UInt(width = blockOffset))
|
|
||||||
val alignment = Reg(UInt(width = blockOffset))
|
|
||||||
val shift_dir = Reg(Bool())
|
|
||||||
|
|
||||||
val bytes_left = Reg(UInt(width = addrBits))
|
|
||||||
val streaming = Reg(Bool())
|
|
||||||
val stream_addr = Reg(UInt(width = nastiXAddrBits))
|
|
||||||
val stream_len = Reg(UInt(width = nastiXLenBits))
|
|
||||||
val stream_size = Reg(UInt(width = nastiXSizeBits))
|
|
||||||
val stream_idx = Reg(UInt(width = blockOffset))
|
|
||||||
val stream_bytesel = MuxLookup(stream_size, UInt("b11111111"), Seq(
|
|
||||||
UInt("b00") -> UInt("b00000001"),
|
|
||||||
UInt("b01") -> UInt("b00000011"),
|
|
||||||
UInt("b10") -> UInt("b00001111")))
|
|
||||||
val stream_mask = FillInterleaved(8, stream_bytesel)
|
|
||||||
val stream_last = Reg(Bool())
|
|
||||||
|
|
||||||
val stream_word_bytes = UInt(1) << stream_size
|
|
||||||
val stream_beat_idx = stream_idx(blockOffset - 1, tlByteAddrBits)
|
|
||||||
val stream_byte_idx = stream_idx(tlByteAddrBits - 1, 0)
|
|
||||||
val stream_bitshift = Cat(stream_byte_idx, UInt(0, 3))
|
|
||||||
val stream_in_beat =
|
|
||||||
(((io.mmio.r.bits.data & stream_mask) << stream_bitshift)) |
|
|
||||||
(data_buffer(stream_beat_idx) & ~(stream_mask << stream_bitshift))
|
|
||||||
val stream_out_word = data_buffer(stream_beat_idx) >> stream_bitshift
|
|
||||||
val stream_out_last = bytes_left === stream_word_bytes
|
|
||||||
|
|
||||||
val acq = io.mem.acquire.bits
|
|
||||||
val gnt = io.mem.grant.bits
|
|
||||||
|
|
||||||
val (s_idle :: s_get :: s_put :: s_prefetch ::
|
|
||||||
s_stream_read_req :: s_stream_read_resp ::
|
|
||||||
s_stream_write_req :: s_stream_write_data :: s_stream_write_resp ::
|
|
||||||
s_wait :: s_resp :: Nil) = Enum(Bits(), 11)
|
|
||||||
val state = Reg(init = s_idle)
|
|
||||||
|
|
||||||
val (put_beat, put_done) = Counter(
|
|
||||||
io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
|
|
||||||
|
|
||||||
val put_mask = Seq.tabulate(tlDataBytes) { i =>
|
|
||||||
val byte_index = Cat(put_beat, UInt(i, tlByteAddrBits))
|
|
||||||
byte_index >= offset && byte_index < bytes_left
|
|
||||||
}.asUInt
|
|
||||||
|
|
||||||
val prefetch_sent = io.mem.acquire.fire() && io.mem.acquire.bits.isPrefetch()
|
|
||||||
val prefetch_busy = Reg(init = UInt(0, tlMaxClientXacts))
|
|
||||||
val (prefetch_id, _) = Counter(prefetch_sent, tlMaxClientXacts)
|
|
||||||
|
|
||||||
val base_index = Cat(put_half, put_beat)
|
|
||||||
val put_data = Wire(init = Bits(0, tlDataBits))
|
|
||||||
val beat_align = alignment(blockOffset - 1, tlByteAddrBits)
|
|
||||||
val bit_align = Cat(alignment(tlByteAddrBits - 1, 0), UInt(0, 3))
|
|
||||||
val rev_align = UInt(tlDataBits) - bit_align
|
|
||||||
|
|
||||||
def getBit(value: UInt, sel: UInt): Bool =
|
|
||||||
(value >> sel)(0)
|
|
||||||
|
|
||||||
when (alignment === UInt(0)) {
|
|
||||||
put_data := data_buffer(base_index)
|
|
||||||
} .elsewhen (shift_dir) {
|
|
||||||
val shift_index = base_index - beat_align
|
|
||||||
when (bit_align === UInt(0)) {
|
|
||||||
put_data := data_buffer(shift_index)
|
|
||||||
} .otherwise {
|
|
||||||
val upper_bits = data_buffer(shift_index)
|
|
||||||
val lower_bits = data_buffer(shift_index - UInt(1))
|
|
||||||
val upper_shifted = upper_bits << bit_align
|
|
||||||
val lower_shifted = lower_bits >> rev_align
|
|
||||||
put_data := upper_shifted | lower_shifted
|
|
||||||
}
|
|
||||||
} .otherwise {
|
|
||||||
val shift_index = base_index + beat_align
|
|
||||||
when (bit_align === UInt(0)) {
|
|
||||||
put_data := data_buffer(shift_index)
|
|
||||||
} .otherwise {
|
|
||||||
val upper_bits = data_buffer(shift_index + UInt(1))
|
|
||||||
val lower_bits = data_buffer(shift_index)
|
|
||||||
val upper_shifted = upper_bits << rev_align
|
|
||||||
val lower_shifted = lower_bits >> bit_align
|
|
||||||
put_data := upper_shifted | lower_shifted
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
val put_acquire = PutBlock(
|
|
||||||
client_xact_id = UInt(2),
|
|
||||||
addr_block = dst_block,
|
|
||||||
addr_beat = put_beat,
|
|
||||||
data = put_data,
|
|
||||||
wmask = Some(put_mask))
|
|
||||||
|
|
||||||
val get_acquire = GetBlock(
|
|
||||||
client_xact_id = get_half,
|
|
||||||
addr_block = src_block,
|
|
||||||
alloc = Bool(false))
|
|
||||||
|
|
||||||
val prefetch_acquire = Mux(prefetch_put,
|
|
||||||
PutPrefetch(client_xact_id = prefetch_id, addr_block = dst_block),
|
|
||||||
GetPrefetch(client_xact_id = prefetch_id, addr_block = dst_block))
|
|
||||||
|
|
||||||
val resp_xact_id = Reg(UInt(width = dmaXactIdBits))
|
|
||||||
val resp_client_id = Reg(UInt(width = dmaClientIdBits))
|
|
||||||
|
|
||||||
io.mem.acquire.valid := (state === s_get) ||
|
|
||||||
(state === s_put && get_done) ||
|
|
||||||
(state === s_prefetch && !prefetch_busy(prefetch_id))
|
|
||||||
io.mem.acquire.bits := MuxLookup(
|
|
||||||
state, prefetch_acquire, Seq(
|
|
||||||
s_get -> get_acquire,
|
|
||||||
s_put -> put_acquire))
|
|
||||||
io.mem.grant.ready := Bool(true)
|
|
||||||
io.dma.req.ready := state === s_idle
|
|
||||||
io.dma.resp.valid := state === s_resp
|
|
||||||
io.dma.resp.bits.xact_id := resp_xact_id
|
|
||||||
io.dma.resp.bits.client_id := resp_client_id
|
|
||||||
io.dma.resp.bits.status := UInt(0)
|
|
||||||
io.mmio.ar.valid := (state === s_stream_read_req)
|
|
||||||
io.mmio.ar.bits := NastiReadAddressChannel(
|
|
||||||
id = UInt(0),
|
|
||||||
addr = stream_addr,
|
|
||||||
size = stream_size,
|
|
||||||
len = stream_len,
|
|
||||||
burst = BURST_FIXED)
|
|
||||||
io.mmio.r.ready := (state === s_stream_read_resp)
|
|
||||||
|
|
||||||
io.mmio.aw.valid := (state === s_stream_write_req)
|
|
||||||
io.mmio.aw.bits := NastiWriteAddressChannel(
|
|
||||||
id = UInt(0),
|
|
||||||
addr = stream_addr,
|
|
||||||
size = stream_size,
|
|
||||||
len = stream_len,
|
|
||||||
burst = BURST_FIXED)
|
|
||||||
io.mmio.w.valid := (state === s_stream_write_data) && get_done
|
|
||||||
io.mmio.w.bits := NastiWriteDataChannel(
|
|
||||||
data = stream_out_word,
|
|
||||||
last = stream_out_last)
|
|
||||||
io.mmio.b.ready := (state === s_stream_write_resp)
|
|
||||||
|
|
||||||
when (io.dma.req.fire()) {
|
|
||||||
val src_off = io.dma.req.bits.source(blockOffset - 1, 0)
|
|
||||||
val dst_off = io.dma.req.bits.dest(blockOffset - 1, 0)
|
|
||||||
val direction = src_off < dst_off
|
|
||||||
|
|
||||||
resp_xact_id := io.dma.req.bits.xact_id
|
|
||||||
resp_client_id := io.dma.req.bits.client_id
|
|
||||||
src_block := io.dma.req.bits.source(addrBits - 1, blockOffset)
|
|
||||||
dst_block := io.dma.req.bits.dest(addrBits - 1, blockOffset)
|
|
||||||
alignment := Mux(direction, dst_off - src_off, src_off - dst_off)
|
|
||||||
shift_dir := direction
|
|
||||||
offset := dst_off
|
|
||||||
bytes_left := io.dma.req.bits.length + dst_off
|
|
||||||
get_inflight := UInt(0)
|
|
||||||
put_inflight := Bool(false)
|
|
||||||
get_half := UInt(0)
|
|
||||||
put_half := UInt(0)
|
|
||||||
streaming := Bool(false)
|
|
||||||
stream_len := (io.dma.req.bits.length >> io.dma.req.bits.size) - UInt(1)
|
|
||||||
stream_size := io.dma.req.bits.size
|
|
||||||
stream_last := Bool(false)
|
|
||||||
|
|
||||||
when (io.dma.req.bits.cmd === DMA_CMD_COPY) {
|
|
||||||
state := s_get
|
|
||||||
} .elsewhen (io.dma.req.bits.cmd(2, 1) === UInt("b01")) {
|
|
||||||
prefetch_put := io.dma.req.bits.cmd(0)
|
|
||||||
state := s_prefetch
|
|
||||||
} .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SIN) {
|
|
||||||
stream_addr := io.dma.req.bits.source
|
|
||||||
stream_idx := dst_off
|
|
||||||
streaming := Bool(true)
|
|
||||||
alignment := UInt(0)
|
|
||||||
state := s_stream_read_req
|
|
||||||
} .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SOUT) {
|
|
||||||
stream_addr := io.dma.req.bits.dest
|
|
||||||
stream_idx := src_off
|
|
||||||
streaming := Bool(true)
|
|
||||||
bytes_left := io.dma.req.bits.length
|
|
||||||
state := s_stream_write_req
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
when (io.mmio.ar.fire()) { state := s_stream_read_resp }
|
|
||||||
|
|
||||||
when (io.mmio.r.fire()) {
|
|
||||||
data_buffer(stream_beat_idx) := stream_in_beat
|
|
||||||
stream_idx := stream_idx + stream_word_bytes
|
|
||||||
val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes
|
|
||||||
when (block_finished || io.mmio.r.bits.last) { state := s_put }
|
|
||||||
}
|
|
||||||
|
|
||||||
when (io.mmio.aw.fire()) { state := s_get }
|
|
||||||
|
|
||||||
when (io.mmio.w.fire()) {
|
|
||||||
stream_idx := stream_idx + stream_word_bytes
|
|
||||||
bytes_left := bytes_left - stream_word_bytes
|
|
||||||
val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes
|
|
||||||
when (stream_out_last) {
|
|
||||||
state := s_stream_write_resp
|
|
||||||
} .elsewhen (block_finished) {
|
|
||||||
state := s_get
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
when (io.mmio.b.fire()) { state := s_resp }
|
|
||||||
|
|
||||||
when (state === s_get && io.mem.acquire.ready) {
|
|
||||||
get_inflight := get_inflight | FillInterleaved(tlDataBeats, UIntToOH(get_half))
|
|
||||||
src_block := src_block + UInt(1)
|
|
||||||
when (streaming) {
|
|
||||||
state := s_stream_write_data
|
|
||||||
} .otherwise {
|
|
||||||
val bytes_in_buffer = UInt(blockBytes) - alignment
|
|
||||||
val extra_read = alignment > UInt(0) && !shift_dir && // dst_off < src_off
|
|
||||||
get_half === UInt(0) && // this is the first block
|
|
||||||
bytes_in_buffer < bytes_left // there is still more data left to fetch
|
|
||||||
get_half := get_half + UInt(1)
|
|
||||||
when (!extra_read) { state := s_put }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
when (prefetch_sent) {
|
|
||||||
prefetch_busy := prefetch_busy | UIntToOH(prefetch_id)
|
|
||||||
when (bytes_left < UInt(blockBytes)) {
|
|
||||||
bytes_left := UInt(0)
|
|
||||||
state := s_resp
|
|
||||||
} .otherwise {
|
|
||||||
bytes_left := bytes_left - UInt(blockBytes)
|
|
||||||
dst_block := dst_block + UInt(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
when (io.mem.grant.fire()) {
|
|
||||||
when (gnt.g_type === Grant.prefetchAckType) {
|
|
||||||
prefetch_busy := prefetch_busy & ~UIntToOH(gnt.client_xact_id)
|
|
||||||
} .elsewhen (gnt.hasData()) {
|
|
||||||
val write_half = gnt.client_xact_id(0)
|
|
||||||
val write_idx = Cat(write_half, gnt.addr_beat)
|
|
||||||
get_inflight := get_inflight & ~UIntToOH(write_idx)
|
|
||||||
data_buffer(write_idx) := gnt.data
|
|
||||||
} .otherwise {
|
|
||||||
put_inflight := Bool(false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
when (put_done) { // state === s_put
|
|
||||||
when (!streaming) {
|
|
||||||
put_half := put_half + UInt(1)
|
|
||||||
}
|
|
||||||
offset := UInt(0)
|
|
||||||
stream_idx := UInt(0)
|
|
||||||
when (bytes_left < UInt(blockBytes)) {
|
|
||||||
bytes_left := UInt(0)
|
|
||||||
} .otherwise {
|
|
||||||
bytes_left := bytes_left - UInt(blockBytes)
|
|
||||||
}
|
|
||||||
put_inflight := Bool(true)
|
|
||||||
dst_block := dst_block + UInt(1)
|
|
||||||
state := s_wait
|
|
||||||
}
|
|
||||||
|
|
||||||
when (state === s_wait && get_done && !put_inflight) {
|
|
||||||
state := MuxCase(s_get, Seq(
|
|
||||||
(bytes_left === UInt(0)) -> s_resp,
|
|
||||||
streaming -> s_stream_read_resp))
|
|
||||||
}
|
|
||||||
|
|
||||||
when (io.dma.resp.fire()) { state := s_idle }
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user