implement streaming DMA functionality
This commit is contained in:
parent
09f3c5a6e3
commit
46069ea13b
@ -3,6 +3,7 @@ package uncore
|
||||
import Chisel._
|
||||
import cde.{Parameters, Field}
|
||||
import junctions._
|
||||
import junctions.NastiConstants._
|
||||
|
||||
case object NDmaTransactors extends Field[Int]
|
||||
case object NDmaClients extends Field[Int]
|
||||
@ -16,6 +17,7 @@ trait HasDmaParameters {
|
||||
val dmaClientXactIdBits = log2Up(nDmaClients * nDmaXactsPerClient)
|
||||
val addrBits = p(PAddrBits)
|
||||
val dmaStatusBits = 2
|
||||
val dmaWordSizeBits = 2
|
||||
}
|
||||
|
||||
abstract class DmaModule(implicit val p: Parameters) extends Module with HasDmaParameters
|
||||
@ -27,6 +29,7 @@ class DmaRequest(implicit p: Parameters) extends DmaBundle()(p) {
|
||||
val source = UInt(width = addrBits)
|
||||
val dest = UInt(width = addrBits)
|
||||
val length = UInt(width = addrBits)
|
||||
val size = UInt(width = dmaWordSizeBits)
|
||||
}
|
||||
|
||||
class DmaResponse(implicit p: Parameters) extends DmaBundle()(p) {
|
||||
@ -35,23 +38,27 @@ class DmaResponse(implicit p: Parameters) extends DmaBundle()(p) {
|
||||
}
|
||||
|
||||
object DmaRequest {
|
||||
val DMA_CMD_SZ = 2
|
||||
val DMA_CMD_SZ = 3
|
||||
|
||||
val DMA_CMD_COPY = UInt(0, DMA_CMD_SZ)
|
||||
val DMA_CMD_PFR = UInt(2, DMA_CMD_SZ)
|
||||
val DMA_CMD_PFW = UInt(3, DMA_CMD_SZ)
|
||||
val DMA_CMD_COPY = UInt("b000")
|
||||
val DMA_CMD_PFR = UInt("b010")
|
||||
val DMA_CMD_PFW = UInt("b011")
|
||||
val DMA_CMD_SIN = UInt("b100")
|
||||
val DMA_CMD_SOUT = UInt("b101")
|
||||
|
||||
def apply(client_xact_id: UInt = UInt(0),
|
||||
cmd: UInt,
|
||||
source: UInt,
|
||||
dest: UInt,
|
||||
length: UInt)(implicit p: Parameters): DmaRequest = {
|
||||
length: UInt,
|
||||
size: UInt = UInt(0))(implicit p: Parameters): DmaRequest = {
|
||||
val req = Wire(new DmaRequest)
|
||||
req.client_xact_id := client_xact_id
|
||||
req.cmd := cmd
|
||||
req.source := source
|
||||
req.dest := dest
|
||||
req.length := length
|
||||
req.size := size
|
||||
req
|
||||
}
|
||||
}
|
||||
@ -62,19 +69,26 @@ class DmaIO(implicit p: Parameters) extends DmaBundle()(p) {
|
||||
val resp = Decoupled(new DmaResponse).flip
|
||||
}
|
||||
|
||||
class DmaTrackerIO(implicit p: Parameters) extends DmaBundle()(p) {
|
||||
val dma = (new DmaIO).flip
|
||||
val inner = new ClientUncachedTileLinkIO
|
||||
val outer = new NastiIO
|
||||
}
|
||||
|
||||
class DmaEngine(implicit p: Parameters) extends DmaModule()(p) {
|
||||
val io = new Bundle {
|
||||
val dma = (new DmaIO).flip
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
}
|
||||
val io = new DmaTrackerIO
|
||||
|
||||
val trackers = List.fill(nDmaTransactors) { Module(new DmaTracker) }
|
||||
val reqReadys = Vec(trackers.map(_.io.dma.req.ready)).toBits
|
||||
|
||||
if (nDmaTransactors > 1) {
|
||||
val mem_arb = Module(new ClientUncachedTileLinkIOArbiter(nDmaTransactors))
|
||||
mem_arb.io.in <> trackers.map(_.io.mem)
|
||||
io.mem <> mem_arb.io.out
|
||||
val inner_arb = Module(new ClientUncachedTileLinkIOArbiter(nDmaTransactors))
|
||||
inner_arb.io.in <> trackers.map(_.io.inner)
|
||||
io.inner <> inner_arb.io.out
|
||||
|
||||
val outer_arb = Module(new NastiArbiter(nDmaTransactors))
|
||||
outer_arb.io.master <> trackers.map(_.io.outer)
|
||||
io.outer <> outer_arb.io.slave
|
||||
|
||||
val resp_arb = Module(new RRArbiter(new DmaResponse, nDmaTransactors))
|
||||
resp_arb.io.in <> trackers.map(_.io.dma.resp)
|
||||
@ -87,17 +101,15 @@ class DmaEngine(implicit p: Parameters) extends DmaModule()(p) {
|
||||
}
|
||||
io.dma.req.ready := reqReadys.orR
|
||||
} else {
|
||||
io.mem <> trackers.head.io.mem
|
||||
io.inner <> trackers.head.io.inner
|
||||
io.outer <> trackers.head.io.outer
|
||||
io.dma <> trackers.head.io.dma
|
||||
}
|
||||
}
|
||||
|
||||
class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
||||
with HasTileLinkParameters {
|
||||
val io = new Bundle {
|
||||
val dma = (new DmaIO).flip
|
||||
val mem = new ClientUncachedTileLinkIO
|
||||
}
|
||||
with HasTileLinkParameters with HasNastiParameters {
|
||||
val io = new DmaTrackerIO
|
||||
|
||||
private val blockOffset = tlBeatAddrBits + tlByteAddrBits
|
||||
private val blockBytes = tlDataBeats * tlDataBytes
|
||||
@ -117,23 +129,46 @@ class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
||||
val shift_dir = Reg(Bool())
|
||||
|
||||
val bytes_left = Reg(UInt(width = addrBits))
|
||||
val streaming = Reg(Bool())
|
||||
val stream_addr = Reg(UInt(width = nastiXAddrBits))
|
||||
val stream_len = Reg(UInt(width = nastiXLenBits))
|
||||
val stream_size = Reg(UInt(width = nastiXSizeBits))
|
||||
val stream_idx = Reg(UInt(width = blockOffset))
|
||||
val stream_bytesel = MuxLookup(stream_size, UInt("b11111111"), Seq(
|
||||
UInt("b00") -> UInt("b00000001"),
|
||||
UInt("b01") -> UInt("b00000011"),
|
||||
UInt("b10") -> UInt("b00001111")))
|
||||
val stream_mask = FillInterleaved(8, stream_bytesel)
|
||||
val stream_last = Reg(Bool())
|
||||
|
||||
val acq = io.mem.acquire.bits
|
||||
val gnt = io.mem.grant.bits
|
||||
val stream_word_bytes = UInt(1) << stream_size
|
||||
val stream_beat_idx = stream_idx(blockOffset - 1, tlByteAddrBits)
|
||||
val stream_byte_idx = stream_idx(tlByteAddrBits - 1, 0)
|
||||
val stream_bitshift = Cat(stream_byte_idx, UInt(0, 3))
|
||||
val stream_in_beat =
|
||||
(((io.outer.r.bits.data & stream_mask) << stream_bitshift)) |
|
||||
(data_buffer(stream_beat_idx) & ~(stream_mask << stream_bitshift))
|
||||
val stream_out_word = data_buffer(stream_beat_idx) >> stream_bitshift
|
||||
val stream_out_last = bytes_left === stream_word_bytes
|
||||
|
||||
val acq = io.inner.acquire.bits
|
||||
val gnt = io.inner.grant.bits
|
||||
|
||||
val (s_idle :: s_get :: s_put :: s_prefetch ::
|
||||
s_wait :: s_resp :: Nil) = Enum(Bits(), 6)
|
||||
s_stream_read_req :: s_stream_read_resp ::
|
||||
s_stream_write_req :: s_stream_write_data :: s_stream_write_resp ::
|
||||
s_wait :: s_resp :: Nil) = Enum(Bits(), 11)
|
||||
val state = Reg(init = s_idle)
|
||||
|
||||
val (put_beat, put_done) = Counter(
|
||||
io.mem.acquire.fire() && acq.hasData(), tlDataBeats)
|
||||
io.inner.acquire.fire() && acq.hasData(), tlDataBeats)
|
||||
|
||||
val put_mask = Vec.tabulate(tlDataBytes) { i =>
|
||||
val byte_index = Cat(put_beat, UInt(i, tlByteAddrBits))
|
||||
byte_index >= offset && byte_index < bytes_left
|
||||
}.toBits
|
||||
|
||||
val prefetch_sent = io.mem.acquire.fire() && io.mem.acquire.bits.isPrefetch()
|
||||
val prefetch_sent = io.inner.acquire.fire() && io.inner.acquire.bits.isPrefetch()
|
||||
val prefetch_busy = Reg(init = UInt(0, tlMaxClientXacts))
|
||||
val (prefetch_id, _) = Counter(prefetch_sent, tlMaxClientXacts)
|
||||
|
||||
@ -190,18 +225,39 @@ class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
||||
|
||||
val resp_id = Reg(UInt(width = dmaClientXactIdBits))
|
||||
|
||||
io.mem.acquire.valid := (state === s_get) ||
|
||||
io.inner.acquire.valid := (state === s_get) ||
|
||||
(state === s_put && get_done) ||
|
||||
(state === s_prefetch && !prefetch_busy(prefetch_id))
|
||||
io.mem.acquire.bits := MuxBundle(
|
||||
io.inner.acquire.bits := MuxBundle(
|
||||
state, prefetch_acquire, Seq(
|
||||
s_get -> get_acquire,
|
||||
s_put -> put_acquire))
|
||||
io.mem.grant.ready := Bool(true)
|
||||
io.inner.grant.ready := Bool(true)
|
||||
io.dma.req.ready := state === s_idle
|
||||
io.dma.resp.valid := state === s_resp
|
||||
io.dma.resp.bits.client_xact_id := resp_id
|
||||
io.dma.resp.bits.status := UInt(0)
|
||||
io.outer.ar.valid := (state === s_stream_read_req)
|
||||
io.outer.ar.bits := NastiReadAddressChannel(
|
||||
id = UInt(0),
|
||||
addr = stream_addr,
|
||||
size = stream_size,
|
||||
len = stream_len,
|
||||
burst = BURST_FIXED)
|
||||
io.outer.r.ready := (state === s_stream_read_resp)
|
||||
|
||||
io.outer.aw.valid := (state === s_stream_write_req)
|
||||
io.outer.aw.bits := NastiWriteAddressChannel(
|
||||
id = UInt(0),
|
||||
addr = stream_addr,
|
||||
size = stream_size,
|
||||
len = stream_len,
|
||||
burst = BURST_FIXED)
|
||||
io.outer.w.valid := (state === s_stream_write_data) && get_done
|
||||
io.outer.w.bits := NastiWriteDataChannel(
|
||||
data = stream_out_word,
|
||||
last = stream_out_last)
|
||||
io.outer.b.ready := (state === s_stream_write_resp)
|
||||
|
||||
when (io.dma.req.fire()) {
|
||||
val src_off = io.dma.req.bits.source(blockOffset - 1, 0)
|
||||
@ -219,25 +275,65 @@ class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
||||
put_inflight := Bool(false)
|
||||
get_half := UInt(0)
|
||||
put_half := UInt(0)
|
||||
streaming := Bool(false)
|
||||
stream_len := (io.dma.req.bits.length >> io.dma.req.bits.size) - UInt(1)
|
||||
stream_size := io.dma.req.bits.size
|
||||
stream_last := Bool(false)
|
||||
|
||||
when (io.dma.req.bits.cmd === DMA_CMD_COPY) {
|
||||
state := s_get
|
||||
} .otherwise {
|
||||
} .elsewhen (io.dma.req.bits.cmd(2, 1) === UInt("b01")) {
|
||||
prefetch_put := io.dma.req.bits.cmd(0)
|
||||
state := s_prefetch
|
||||
} .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SIN) {
|
||||
stream_addr := io.dma.req.bits.source
|
||||
stream_idx := dst_off
|
||||
streaming := Bool(true)
|
||||
alignment := UInt(0)
|
||||
state := s_stream_read_req
|
||||
} .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SOUT) {
|
||||
stream_addr := io.dma.req.bits.dest
|
||||
stream_idx := src_off
|
||||
streaming := Bool(true)
|
||||
bytes_left := io.dma.req.bits.length
|
||||
state := s_stream_write_req
|
||||
}
|
||||
}
|
||||
|
||||
when (state === s_get && io.mem.acquire.ready) {
|
||||
val bytes_in_buffer = UInt(blockBytes) - alignment
|
||||
val extra_read = alignment > UInt(0) && !shift_dir && // dst_off < src_off
|
||||
get_half === UInt(0) && // this is the first block
|
||||
bytes_in_buffer < bytes_left // there is still more data left to fetch
|
||||
when (io.outer.ar.fire()) { state := s_stream_read_resp }
|
||||
|
||||
when (io.outer.r.fire()) {
|
||||
data_buffer(stream_beat_idx) := stream_in_beat
|
||||
stream_idx := stream_idx + stream_word_bytes
|
||||
val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes
|
||||
when (block_finished || io.outer.r.bits.last) { state := s_put }
|
||||
}
|
||||
|
||||
when (io.outer.aw.fire()) { state := s_get }
|
||||
|
||||
when (io.outer.w.fire()) {
|
||||
stream_idx := stream_idx + stream_word_bytes
|
||||
bytes_left := bytes_left - stream_word_bytes
|
||||
val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes
|
||||
when (stream_out_last) {
|
||||
state := s_resp
|
||||
} .elsewhen (block_finished) {
|
||||
state := s_get
|
||||
}
|
||||
}
|
||||
|
||||
when (state === s_get && io.inner.acquire.ready) {
|
||||
get_inflight := get_inflight | FillInterleaved(tlDataBeats, UIntToOH(get_half))
|
||||
get_half := get_half + UInt(1)
|
||||
src_block := src_block + UInt(1)
|
||||
when (!extra_read) {
|
||||
state := s_put
|
||||
when (streaming) {
|
||||
state := s_stream_write_data
|
||||
} .otherwise {
|
||||
val bytes_in_buffer = UInt(blockBytes) - alignment
|
||||
val extra_read = alignment > UInt(0) && !shift_dir && // dst_off < src_off
|
||||
get_half === UInt(0) && // this is the first block
|
||||
bytes_in_buffer < bytes_left // there is still more data left to fetch
|
||||
get_half := get_half + UInt(1)
|
||||
when (!extra_read) { state := s_put }
|
||||
}
|
||||
}
|
||||
|
||||
@ -252,7 +348,7 @@ class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
||||
}
|
||||
}
|
||||
|
||||
when (io.mem.grant.fire()) {
|
||||
when (io.inner.grant.fire()) {
|
||||
when (gnt.g_type === Grant.prefetchAckType) {
|
||||
prefetch_busy := prefetch_busy & ~UIntToOH(gnt.client_xact_id)
|
||||
} .elsewhen (gnt.hasData()) {
|
||||
@ -266,8 +362,11 @@ class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
||||
}
|
||||
|
||||
when (put_done) { // state === s_put
|
||||
put_half := put_half + UInt(1)
|
||||
when (!streaming) {
|
||||
put_half := put_half + UInt(1)
|
||||
}
|
||||
offset := UInt(0)
|
||||
stream_idx := UInt(0)
|
||||
when (bytes_left < UInt(blockBytes)) {
|
||||
bytes_left := UInt(0)
|
||||
} .otherwise {
|
||||
@ -279,7 +378,9 @@ class DmaTracker(implicit p: Parameters) extends DmaModule()(p)
|
||||
}
|
||||
|
||||
when (state === s_wait && get_done && !put_inflight) {
|
||||
state := Mux(bytes_left === UInt(0), s_resp, s_get)
|
||||
state := MuxCase(s_get, Seq(
|
||||
(bytes_left === UInt(0)) -> s_resp,
|
||||
streaming -> s_stream_read_resp))
|
||||
}
|
||||
|
||||
when (io.dma.resp.fire()) { state := s_idle }
|
||||
|
Loading…
Reference in New Issue
Block a user