From 38e09678161497880b16786abfea9306d930797c Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Mon, 15 Aug 2016 23:08:55 -0700 Subject: [PATCH] strip DMA and RoCC CSRs out of rocket and uncore (#201) --- coreplex/src/main/scala/Configs.scala | 1 - coreplex/src/main/scala/TestConfigs.scala | 1 - groundtest/src/main/scala/Tile.scala | 3 +- rocket/src/main/scala/csr.scala | 9 - rocket/src/main/scala/dma.scala | 400 ---------------- rocket/src/main/scala/rocc.scala | 10 - rocket/src/main/scala/rocket.scala | 4 - rocket/src/main/scala/tile.scala | 15 - uncore/src/main/scala/devices/Dma.scala | 535 ---------------------- 9 files changed, 1 insertion(+), 977 deletions(-) delete mode 100644 rocket/src/main/scala/dma.scala delete mode 100644 uncore/src/main/scala/devices/Dma.scala diff --git a/coreplex/src/main/scala/Configs.scala b/coreplex/src/main/scala/Configs.scala index d869d604..29c2098f 100644 --- a/coreplex/src/main/scala/Configs.scala +++ b/coreplex/src/main/scala/Configs.scala @@ -101,7 +101,6 @@ class BaseCoreplexConfig extends Config ( case BuildRoCC => Nil case RoccNMemChannels => site(BuildRoCC).map(_.nMemChannels).foldLeft(0)(_ + _) case RoccNPTWPorts => site(BuildRoCC).map(_.nPTWPorts).foldLeft(0)(_ + _) - case RoccNCSRs => site(BuildRoCC).map(_.csrs.size).foldLeft(0)(_ + _) //Rocket Core Constants case FetchWidth => if (site(UseCompressed)) 2 else 1 case RetireWidth => 1 diff --git a/coreplex/src/main/scala/TestConfigs.scala b/coreplex/src/main/scala/TestConfigs.scala index 151a5bcc..24f0b837 100644 --- a/coreplex/src/main/scala/TestConfigs.scala +++ b/coreplex/src/main/scala/TestConfigs.scala @@ -49,7 +49,6 @@ class WithGroundTest extends Config( case GroundTestId => i case NCachedTileLinkPorts => if(tileSettings.cached > 0) 1 else 0 case NUncachedTileLinkPorts => tileSettings.uncached - case RoccNCSRs => tileSettings.csrs }))) } } diff --git a/groundtest/src/main/scala/Tile.scala b/groundtest/src/main/scala/Tile.scala index 9d2c5859..c878fba5 100644 --- a/groundtest/src/main/scala/Tile.scala +++ b/groundtest/src/main/scala/Tile.scala @@ -11,8 +11,7 @@ import cde.{Parameters, Field} case object BuildGroundTest extends Field[Parameters => GroundTest] case class GroundTestTileSettings( - uncached: Int = 0, cached: Int = 0, ptw: Int = 0, - maxXacts: Int = 1, csrs: Int = 0) + uncached: Int = 0, cached: Int = 0, ptw: Int = 0, maxXacts: Int = 1) case object GroundTestKey extends Field[Seq[GroundTestTileSettings]] case object GroundTestId extends Field[Int] diff --git a/rocket/src/main/scala/csr.scala b/rocket/src/main/scala/csr.scala index 77a630ee..a4f1e790 100644 --- a/rocket/src/main/scala/csr.scala +++ b/rocket/src/main/scala/csr.scala @@ -332,11 +332,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) read_mapping += addr -> io.custom_mrw_csrs(i) } - for ((addr, i) <- roccCsrs.zipWithIndex) { - require(!read_mapping.contains(addr), "RoCC: CSR address " + addr + " is already in use") - read_mapping += addr -> io.rocc.csr.rdata(i) - } - val decoded_addr = read_mapping map { case (k, v) => k -> (io.rw.addr === k) } val addr_valid = decoded_addr.values.reduce(_||_) @@ -559,10 +554,6 @@ class CSRFile(implicit p: Parameters) extends CoreModule()(p) reg_dcsr.debugint := io.prci.interrupts.debug reg_dcsr.hwbpcount := UInt(p(NBreakpoints)) - io.rocc.csr.waddr := io.rw.addr - io.rocc.csr.wdata := wdata - io.rocc.csr.wen := wen - if (!usingUser) { reg_mstatus.mpp := PRV.M reg_mstatus.prv := PRV.M diff --git a/rocket/src/main/scala/dma.scala b/rocket/src/main/scala/dma.scala deleted file mode 100644 index b1453526..00000000 --- a/rocket/src/main/scala/dma.scala +++ /dev/null @@ -1,400 +0,0 @@ -package rocket - -import Chisel._ -import uncore.tilelink._ -import uncore.devices._ -import uncore.devices.DmaRequest._ -import uncore.agents._ -import uncore.util._ -import junctions.{ParameterizedBundle, AddrMap} -import cde.Parameters - -trait HasClientDmaParameters extends HasCoreParameters with HasDmaParameters { - val dmaAddrBits = coreMaxAddrBits - val dmaSegmentSizeBits = coreMaxAddrBits - val dmaSegmentBits = 24 -} - -abstract class ClientDmaBundle(implicit val p: Parameters) - extends ParameterizedBundle()(p) with HasClientDmaParameters -abstract class ClientDmaModule(implicit val p: Parameters) - extends Module with HasClientDmaParameters - -class ClientDmaRequest(implicit p: Parameters) extends ClientDmaBundle()(p) { - val cmd = UInt(width = DMA_CMD_SZ) - val src_start = UInt(width = dmaAddrBits) - val dst_start = UInt(width = dmaAddrBits) - val src_stride = UInt(width = dmaSegmentSizeBits) - val dst_stride = UInt(width = dmaSegmentSizeBits) - val segment_size = UInt(width = dmaSegmentSizeBits) - val nsegments = UInt(width = dmaSegmentBits) - val word_size = UInt(width = dmaWordSizeBits) -} - -object ClientDmaRequest { - def apply(cmd: UInt, - src_start: UInt, - dst_start: UInt, - segment_size: UInt, - nsegments: UInt = UInt(1), - src_stride: UInt = UInt(0), - dst_stride: UInt = UInt(0), - word_size: UInt = UInt(0)) - (implicit p: Parameters) = { - val req = Wire(new ClientDmaRequest) - req.cmd := cmd - req.src_start := src_start - req.dst_start := dst_start - req.src_stride := src_stride - req.dst_stride := dst_stride - req.segment_size := segment_size - req.nsegments := nsegments - req.word_size := word_size - req - } -} - -object ClientDmaResponse { - val pagefault = UInt("b01") - val invalid_region = UInt("b10") - - def apply(status: UInt = UInt(0))(implicit p: Parameters) = { - val resp = Wire(new ClientDmaResponse) - resp.status := status - resp - } -} - -class ClientDmaResponse(implicit p: Parameters) extends ClientDmaBundle { - val status = UInt(width = dmaStatusBits) -} - -class ClientDmaIO(implicit p: Parameters) extends ParameterizedBundle()(p) { - val req = Decoupled(new ClientDmaRequest) - val resp = Valid(new ClientDmaResponse).flip -} - -class DmaFrontend(implicit p: Parameters) extends CoreModule()(p) - with HasClientDmaParameters with HasTileLinkParameters { - val io = new Bundle { - val cpu = (new ClientDmaIO).flip - val mem = new ClientUncachedTileLinkIO - val ptw = new TLBPTWIO - val busy = Bool(OUTPUT) - val incr_outstanding = Bool(OUTPUT) - val host_id = UInt(INPUT, log2Up(nCores)) - } - - val tlb = Module(new DecoupledTLB()(p.alterPartial({ - case CacheName => "L1D" - }))) - io.ptw <> tlb.io.ptw - - private val pgSize = 1 << pgIdxBits - - val cmd = Reg(UInt(width = DMA_CMD_SZ)) - val adv_ptr = MuxLookup(cmd, UInt("b11"), Seq( - DMA_CMD_PFR -> UInt("b10"), - DMA_CMD_PFW -> UInt("b10"), - DMA_CMD_SIN -> UInt("b10"), - DMA_CMD_SOUT -> UInt("b01"))) - - val segment_size = Reg(UInt(width = dmaSegmentSizeBits)) - val bytes_left = Reg(UInt(width = dmaSegmentSizeBits)) - val segments_left = Reg(UInt(width = dmaSegmentBits)) - val word_size = Reg(UInt(width = dmaWordSizeBits)) - - val src_vaddr = Reg(UInt(width = dmaAddrBits)) - val dst_vaddr = Reg(UInt(width = dmaAddrBits)) - val src_vpn = src_vaddr(dmaAddrBits - 1, pgIdxBits) - val dst_vpn = dst_vaddr(dmaAddrBits - 1, pgIdxBits) - val src_idx = src_vaddr(pgIdxBits - 1, 0) - val dst_idx = dst_vaddr(pgIdxBits - 1, 0) - val src_pglen = UInt(pgSize) - src_idx - val dst_pglen = UInt(pgSize) - dst_idx - - val src_stride = Reg(UInt(width = dmaSegmentSizeBits)) - val dst_stride = Reg(UInt(width = dmaSegmentSizeBits)) - - val src_ppn = Reg(UInt(width = ppnBits)) - val dst_ppn = Reg(UInt(width = ppnBits)) - - val src_paddr = Cat(src_ppn, src_idx) - val dst_paddr = Cat(dst_ppn, dst_idx) - - val last_src_vpn = Reg(UInt(width = vpnBits)) - val last_dst_vpn = Reg(UInt(width = vpnBits)) - - val tx_len = src_pglen min dst_pglen min bytes_left - - val dma_busy = Reg(init = UInt(0, tlMaxClientXacts)) - val dma_xact_id = PriorityEncoder(~dma_busy) - val (dma_req_beat, dma_req_done) = Counter(io.mem.acquire.fire(), tlDataBeats) - - val (s_idle :: s_translate :: s_dma_req :: s_dma_update :: - s_prepare :: s_finish :: Nil) = Enum(Bits(), 6) - val state = Reg(init = s_idle) - - // lower bit is for src, higher bit is for dst - val to_translate = Reg(init = UInt(0, 2)) - val tlb_sent = Reg(init = UInt(0, 2)) - val tlb_to_send = to_translate & ~tlb_sent - val resp_status = Reg(UInt(width = dmaStatusBits)) - - def make_acquire( - addr_beat: UInt, client_xact_id: UInt, client_id: UInt, - cmd: UInt, source: UInt, dest: UInt, - length: UInt, size: UInt): Acquire = { - - val data_blob = Wire(UInt(width = tlDataBeats * tlDataBits)) - data_blob := DmaRequest( - xact_id = UInt(0), - client_id = client_id, - cmd = cmd, - source = source, - dest = dest, - length = length, - size = size).asUInt - val data_beats = Vec(tlDataBeats, UInt(width = tlDataBits)).fromBits(data_blob) - val base_addr = addrMap("devices:dma").start - val addr_block = UInt(base_addr >> (tlBeatAddrBits + tlByteAddrBits)) - - PutBlock( - client_xact_id = client_xact_id, - addr_block = addr_block, - addr_beat = addr_beat, - data = data_beats(addr_beat), - alloc = Bool(false)) - } - - def check_region(cmd: UInt, src: UInt, dst: UInt): Bool = { - val src_cacheable = addrMap.isCacheable(src) - val dst_cacheable = addrMap.isCacheable(dst) - val dst_ok = Mux(cmd === DMA_CMD_SOUT, !dst_cacheable, dst_cacheable) - val src_ok = Mux(cmd === DMA_CMD_SIN, !src_cacheable, Bool(true)) - dst_ok && src_ok - } - - tlb.io.req.valid := tlb_to_send.orR - tlb.io.req.bits.vpn := Mux(tlb_to_send(0), src_vpn, dst_vpn) - tlb.io.req.bits.passthrough := Bool(false) - tlb.io.req.bits.instruction := Bool(false) - tlb.io.req.bits.store := !tlb_to_send(0) - tlb.io.resp.ready := tlb_sent.orR - - when (tlb.io.req.fire()) { - tlb_sent := tlb_sent | PriorityEncoderOH(tlb_to_send) - } - - when (tlb.io.resp.fire()) { - val recv_choice = PriorityEncoderOH(to_translate) - val error = Mux(recv_choice(0), - tlb.io.resp.bits.xcpt_ld, tlb.io.resp.bits.xcpt_st) - - when (error) { - resp_status := ClientDmaResponse.pagefault - state := s_finish - } - - // getting the src translation - when (recv_choice(0)) { - src_ppn := tlb.io.resp.bits.ppn - } .otherwise { - dst_ppn := tlb.io.resp.bits.ppn - } - - to_translate := to_translate & ~recv_choice - } - - io.cpu.req.ready := state === s_idle - io.cpu.resp.valid := state === s_finish - io.cpu.resp.bits := ClientDmaResponse(resp_status) - - io.mem.acquire.valid := (state === s_dma_req) && !dma_busy.andR - io.mem.acquire.bits := make_acquire( - addr_beat = dma_req_beat, - client_id = io.host_id, - client_xact_id = dma_xact_id, - cmd = cmd, source = src_paddr, dest = dst_paddr, - length = tx_len, size = word_size) - - io.mem.grant.ready := (state =/= s_dma_req) - - when (io.cpu.req.fire()) { - val req = io.cpu.req.bits - val is_prefetch = req.cmd(2, 1) === UInt("b01") - cmd := req.cmd - src_vaddr := req.src_start - dst_vaddr := req.dst_start - src_stride := req.src_stride - dst_stride := req.dst_stride - segment_size := req.segment_size - segments_left := req.nsegments - UInt(1) - bytes_left := req.segment_size - word_size := req.word_size - to_translate := Mux(is_prefetch, UInt("b10"), UInt("b11")) - tlb_sent := UInt(0) - state := s_translate - } - - when (state === s_translate && !to_translate.orR) { - when (check_region(cmd, src_paddr, dst_paddr)) { - state := s_dma_req - } .otherwise { - resp_status := ClientDmaResponse.invalid_region - state := s_finish - } - } - - def setBusy(set: Bool, xact_id: UInt): UInt = - Mux(set, UIntToOH(xact_id), UInt(0)) - - dma_busy := (dma_busy | - setBusy(dma_req_done, dma_xact_id)) & - ~setBusy(io.mem.grant.fire(), io.mem.grant.bits.client_xact_id) - - - when (dma_req_done) { - src_vaddr := src_vaddr + Mux(adv_ptr(0), tx_len, UInt(0)) - dst_vaddr := dst_vaddr + Mux(adv_ptr(1), tx_len, UInt(0)) - bytes_left := bytes_left - tx_len - state := s_dma_update - } - - when (state === s_dma_update) { - when (bytes_left === UInt(0)) { - when (segments_left === UInt(0)) { - resp_status := UInt(0) - state := s_finish - } .otherwise { - last_src_vpn := src_vpn - last_dst_vpn := dst_vpn - src_vaddr := src_vaddr + src_stride - dst_vaddr := dst_vaddr + dst_stride - bytes_left := segment_size - segments_left := segments_left - UInt(1) - state := s_prepare - } - } .otherwise { - to_translate := adv_ptr & Cat(dst_idx === UInt(0), src_idx === UInt(0)) - tlb_sent := UInt(0) - state := s_translate - } - } - - when (state === s_prepare) { - to_translate := adv_ptr & Cat( - dst_vpn =/= last_dst_vpn, - src_vpn =/= last_src_vpn) - tlb_sent := UInt(0) - state := s_translate - } - - when (state === s_finish) { state := s_idle } - - io.busy := (state =/= s_idle) || dma_busy.orR - io.incr_outstanding := dma_req_done -} - -object DmaCtrlRegNumbers { - val SRC_STRIDE = 0 - val DST_STRIDE = 1 - val SEGMENT_SIZE = 2 - val NSEGMENTS = 3 - val WORD_SIZE = 4 - val RESP_STATUS = 5 - val OUTSTANDING = 6 - val NCSRS = 7 - val CSR_BASE = 0x800 - val CSR_END = CSR_BASE + NCSRS -} -import DmaCtrlRegNumbers._ - -class DmaCtrlRegFile(implicit val p: Parameters) extends Module - with HasClientDmaParameters with HasTileLinkParameters { - - private val nWriteRegs = 5 - private val nRegs = nWriteRegs + 2 - - val io = new Bundle { - val wen = Bool(INPUT) - val waddr = UInt(INPUT, log2Up(nRegs)) - val wdata = UInt(INPUT, dmaSegmentSizeBits) - - val src_stride = UInt(OUTPUT, dmaSegmentSizeBits) - val dst_stride = UInt(OUTPUT, dmaSegmentSizeBits) - val segment_size = UInt(OUTPUT, dmaSegmentSizeBits) - val nsegments = UInt(OUTPUT, dmaSegmentBits) - val word_size = UInt(OUTPUT, dmaWordSizeBits) - - val incr_outstanding = Bool(INPUT) - val xact_outstanding = Bool(OUTPUT) - } - - val regs = Reg(Vec(nWriteRegs, UInt(width = dmaSegmentSizeBits))) - val waddr = io.waddr(log2Up(NCSRS) - 1, 0) - - io.src_stride := regs(SRC_STRIDE) - io.dst_stride := regs(DST_STRIDE) - io.segment_size := regs(SEGMENT_SIZE) - io.nsegments := regs(NSEGMENTS) - io.word_size := regs(WORD_SIZE) - - when (io.wen && waddr < UInt(nWriteRegs)) { - regs(waddr) := io.wdata - } - - val outstanding_cnt = TwoWayCounter( - io.incr_outstanding, - io.wen && io.waddr === UInt(OUTSTANDING), - tlMaxClientXacts) - - io.xact_outstanding := outstanding_cnt > UInt(0) -} - -class DmaController(implicit p: Parameters) extends RoCC()(p) - with HasClientDmaParameters { - io.mem.req.valid := Bool(false) - io.resp.valid := Bool(false) - io.interrupt := Bool(false) - - val cmd = Queue(io.cmd) - val inst = cmd.bits.inst - val is_transfer = inst.funct < UInt(8) - - val reg_status = Reg(UInt(width = dmaStatusBits)) - val crfile = Module(new DmaCtrlRegFile) - crfile.io.waddr := io.csr.waddr - crfile.io.wdata := io.csr.wdata - crfile.io.wen := io.csr.wen - - io.csr.rdata(SRC_STRIDE) := crfile.io.src_stride - io.csr.rdata(DST_STRIDE) := crfile.io.dst_stride - io.csr.rdata(SEGMENT_SIZE) := crfile.io.segment_size - io.csr.rdata(NSEGMENTS) := crfile.io.nsegments - io.csr.rdata(WORD_SIZE) := crfile.io.word_size - io.csr.rdata(RESP_STATUS) := reg_status - - val frontend = Module(new DmaFrontend) - io.ptw(0) <> frontend.io.ptw - io.autl <> frontend.io.mem - crfile.io.incr_outstanding := frontend.io.incr_outstanding - frontend.io.host_id := io.host_id - frontend.io.cpu.req.valid := cmd.valid && is_transfer - frontend.io.cpu.req.bits := ClientDmaRequest( - cmd = cmd.bits.inst.funct, - src_start = cmd.bits.rs2, - dst_start = cmd.bits.rs1, - src_stride = crfile.io.src_stride, - dst_stride = crfile.io.dst_stride, - segment_size = crfile.io.segment_size, - nsegments = crfile.io.nsegments, - word_size = crfile.io.word_size) - cmd.ready := is_transfer && frontend.io.cpu.req.ready - - when (frontend.io.cpu.resp.valid) { - reg_status := frontend.io.cpu.resp.bits.status - } - - io.busy := cmd.valid || frontend.io.busy || crfile.io.xact_outstanding -} diff --git a/rocket/src/main/scala/rocc.scala b/rocket/src/main/scala/rocc.scala index a58dcd30..d53979cd 100644 --- a/rocket/src/main/scala/rocc.scala +++ b/rocket/src/main/scala/rocc.scala @@ -12,14 +12,6 @@ import cde.{Parameters, Field} case object RoccMaxTaggedMemXacts extends Field[Int] case object RoccNMemChannels extends Field[Int] case object RoccNPTWPorts extends Field[Int] -case object RoccNCSRs extends Field[Int] - -class RoCCCSRs(implicit p: Parameters) extends CoreBundle()(p) { - val rdata = Vec(nRoccCsrs, UInt(INPUT, xLen)) - val waddr = UInt(OUTPUT, CSR.ADDRSZ) - val wdata = UInt(OUTPUT, xLen) - val wen = Bool(OUTPUT) -} class RoCCInstruction extends Bundle { @@ -59,8 +51,6 @@ class RoCCInterface(implicit p: Parameters) extends CoreBundle()(p) { val fpu_req = Decoupled(new FPInput) val fpu_resp = Decoupled(new FPResult).flip val exception = Bool(INPUT) - val csr = (new RoCCCSRs).flip - val host_id = UInt(INPUT, log2Up(nCores)) override def cloneType = new RoCCInterface().asInstanceOf[this.type] } diff --git a/rocket/src/main/scala/rocket.scala b/rocket/src/main/scala/rocket.scala index 4d3f1da9..6c3f6e75 100644 --- a/rocket/src/main/scala/rocket.scala +++ b/rocket/src/main/scala/rocket.scala @@ -63,9 +63,6 @@ trait HasCoreParameters extends HasAddrMapParameters { val vaddrBitsExtended = vpnBitsExtended + pgIdxBits val coreMaxAddrBits = paddrBits max vaddrBitsExtended val nCustomMrwCsrs = p(NCustomMRWCSRs) - val roccCsrs = if (p(BuildRoCC).isEmpty) Nil - else p(BuildRoCC).flatMap(_.csrs) - val nRoccCsrs = p(RoccNCSRs) val nCores = p(NTiles) // fetchWidth doubled, but coreInstBytes halved, for RVC @@ -499,7 +496,6 @@ class Rocket(implicit p: Parameters) extends CoreModule()(p) { csr.io.prci <> io.prci io.fpu.fcsr_rm := csr.io.fcsr_rm csr.io.fcsr_flags := io.fpu.fcsr_flags - io.rocc.csr <> csr.io.rocc.csr csr.io.rocc.interrupt <> io.rocc.interrupt csr.io.pc := wb_reg_pc csr.io.badaddr := encodeVirtualAddress(wb_reg_wdata, wb_reg_wdata) diff --git a/rocket/src/main/scala/tile.scala b/rocket/src/main/scala/tile.scala index cdc35953..f029c3cc 100644 --- a/rocket/src/main/scala/tile.scala +++ b/rocket/src/main/scala/tile.scala @@ -18,7 +18,6 @@ case class RoccParameters( generator: Parameters => RoCC, nMemChannels: Int = 0, nPTWPorts : Int = 0, - csrs: Seq[Int] = Nil, useFPU: Boolean = false) abstract class Tile(clockSignal: Clock = null, resetSignal: Bool = null) @@ -70,12 +69,10 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null) val rocc = accelParams.generator(p.alterPartial({ case RoccNMemChannels => accelParams.nMemChannels case RoccNPTWPorts => accelParams.nPTWPorts - case RoccNCSRs => accelParams.csrs.size })) val dcIF = Module(new SimpleHellaCacheIF()(dcacheParams)) rocc.io.cmd <> cmdRouter.io.out(i) rocc.io.exception := core.io.rocc.exception - rocc.io.host_id := io.prci.id dcIF.io.requestor <> rocc.io.mem dcPorts += dcIF.io.cache uncachedArbPorts += rocc.io.autl @@ -101,18 +98,6 @@ class RocketTile(clockSignal: Clock = null, resetSignal: Bool = null) core.io.rocc.interrupt := roccs.map(_.io.interrupt).reduce(_ || _) respArb.io.in <> roccs.map(rocc => Queue(rocc.io.resp)) - if (p(RoccNCSRs) > 0) { - core.io.rocc.csr.rdata <> roccs.flatMap(_.io.csr.rdata) - for ((rocc, accelParams) <- roccs.zip(buildRocc)) { - rocc.io.csr.waddr := core.io.rocc.csr.waddr - rocc.io.csr.wdata := core.io.rocc.csr.wdata - rocc.io.csr.wen := core.io.rocc.csr.wen && - accelParams.csrs - .map(core.io.rocc.csr.waddr === UInt(_)) - .reduce((a, b) => a || b) - } - } - ptwPorts ++= roccs.flatMap(_.io.ptw) uncachedPorts ++= roccs.flatMap(_.io.utl) } diff --git a/uncore/src/main/scala/devices/Dma.scala b/uncore/src/main/scala/devices/Dma.scala deleted file mode 100644 index 44f1c0b6..00000000 --- a/uncore/src/main/scala/devices/Dma.scala +++ /dev/null @@ -1,535 +0,0 @@ -package uncore.devices - -import Chisel._ -import cde.{Parameters, Field} -import junctions._ -import junctions.NastiConstants._ -import uncore.tilelink._ -import uncore.Util._ - -case object NDmaTransactors extends Field[Int] -case object NDmaXacts extends Field[Int] -case object NDmaClients extends Field[Int] - -trait HasDmaParameters { - implicit val p: Parameters - val nDmaTransactors = p(NDmaTransactors) - val nDmaXacts = p(NDmaXacts) - val nDmaClients = p(NDmaClients) - val dmaXactIdBits = log2Up(nDmaXacts) - val dmaClientIdBits = log2Up(nDmaClients) - val addrBits = p(PAddrBits) - val dmaStatusBits = 2 - val dmaWordSizeBits = 2 -} - -abstract class DmaModule(implicit val p: Parameters) extends Module with HasDmaParameters -abstract class DmaBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) with HasDmaParameters - -class DmaRequest(implicit p: Parameters) extends DmaBundle()(p) { - val xact_id = UInt(width = dmaXactIdBits) - val client_id = UInt(width = dmaClientIdBits) - val cmd = UInt(width = DmaRequest.DMA_CMD_SZ) - val source = UInt(width = addrBits) - val dest = UInt(width = addrBits) - val length = UInt(width = addrBits) - val size = UInt(width = dmaWordSizeBits) -} - -class DmaResponse(implicit p: Parameters) extends DmaBundle()(p) { - val xact_id = UInt(width = dmaXactIdBits) - val client_id = UInt(width = dmaClientIdBits) - val status = UInt(width = dmaStatusBits) -} - -object DmaRequest { - val DMA_CMD_SZ = 3 - - val DMA_CMD_COPY = UInt("b000") - val DMA_CMD_PFR = UInt("b010") - val DMA_CMD_PFW = UInt("b011") - val DMA_CMD_SIN = UInt("b100") - val DMA_CMD_SOUT = UInt("b101") - - def apply(xact_id: UInt = UInt(0), - client_id: UInt, - cmd: UInt, - source: UInt, - dest: UInt, - length: UInt, - size: UInt = UInt(0))(implicit p: Parameters): DmaRequest = { - val req = Wire(new DmaRequest) - req.xact_id := xact_id - req.client_id := client_id - req.cmd := cmd - req.source := source - req.dest := dest - req.length := length - req.size := size - req - } -} -import DmaRequest._ - -class DmaIO(implicit p: Parameters) extends DmaBundle()(p) { - val req = Decoupled(new DmaRequest) - val resp = Decoupled(new DmaResponse).flip -} - -class DmaTrackerIO(implicit p: Parameters) extends DmaBundle()(p) { - val dma = (new DmaIO).flip - val mem = new ClientUncachedTileLinkIO - val mmio = new NastiIO -} - -class DmaManager(outstandingCSR: Int)(implicit p: Parameters) - extends DmaModule()(p) - with HasNastiParameters - with HasAddrMapParameters { - - val io = new Bundle { - val ctrl = (new NastiIO).flip - val mmio = new NastiIO - val dma = new DmaIO - } - - private val wordBits = 1 << log2Up(addrBits) - private val wordBytes = wordBits / 8 - private val wordOff = log2Up(wordBytes) - private val wordMSB = wordOff + 2 - - val s_idle :: s_wdata :: s_dma_req :: s_wresp :: Nil = Enum(Bits(), 4) - val state = Reg(init = s_idle) - - val nCtrlWords = (addrBits * 4) / nastiXDataBits - val ctrl_regs = Reg(Vec(nCtrlWords, UInt(width = nastiXDataBits))) - val ctrl_idx = Reg(UInt(width = log2Up(nCtrlWords))) - val ctrl_done = Reg(Bool()) - val ctrl_blob = ctrl_regs.asUInt - val ctrl_id = Reg(UInt(width = nastiXIdBits)) - - val sizeOffset = 3 * addrBits - val cmdOffset = sizeOffset + dmaWordSizeBits - - val dma_req = new DmaRequest().fromBits(ctrl_blob) - val dma_busy = Reg(init = UInt(0, nDmaXacts)) - val dma_xact_id = PriorityEncoder(~dma_busy) - - when (io.ctrl.aw.fire()) { - ctrl_id := io.ctrl.aw.bits.id - ctrl_idx := UInt(0) - ctrl_done := Bool(false) - state := s_wdata - } - - when (io.ctrl.w.fire()) { - when (!ctrl_done) { - ctrl_regs(ctrl_idx) := io.ctrl.w.bits.data - ctrl_idx := ctrl_idx + UInt(1) - } - when (ctrl_idx === UInt(nCtrlWords - 1)) { ctrl_done := Bool(true) } - when (io.ctrl.w.bits.last) { state := s_dma_req } - } - - dma_busy := (dma_busy | - Mux(io.dma.req.fire(), UIntToOH(dma_xact_id), UInt(0))) & - ~Mux(io.dma.resp.fire(), UIntToOH(io.dma.resp.bits.xact_id), UInt(0)) - - when (io.dma.req.fire()) { state := s_wresp } - when (io.ctrl.b.fire()) { state := s_idle } - - io.ctrl.ar.ready := Bool(false) - io.ctrl.aw.ready := (state === s_idle) - io.ctrl.w.ready := (state === s_wdata) - - io.ctrl.r.valid := Bool(false) - io.ctrl.b.valid := (state === s_wresp) - io.ctrl.b.bits := NastiWriteResponseChannel(id = ctrl_id) - - io.dma.req.valid := (state === s_dma_req) && !dma_busy.andR - io.dma.req.bits := dma_req - io.dma.req.bits.xact_id := dma_xact_id - - val resp_waddr_pending = Reg(init = Bool(false)) - val resp_wdata_pending = Reg(init = Bool(false)) - val resp_wresp_pending = Reg(init = Bool(false)) - val resp_pending = resp_waddr_pending || resp_wdata_pending || resp_wresp_pending - - val resp_client_id = Reg(UInt(width = dmaClientIdBits)) - val resp_status = Reg(UInt(width = dmaStatusBits)) - - io.dma.resp.ready := !resp_pending - - when (io.dma.resp.fire()) { - resp_client_id := io.dma.resp.bits.client_id - resp_status := io.dma.resp.bits.status - resp_waddr_pending := Bool(true) - resp_wdata_pending := Bool(true) - resp_wresp_pending := Bool(true) - } - - val addrTable = Vec.tabulate(nDmaClients) { i => - //UInt(addrMap(s"conf:csr$i").start + outstandingCSR * csrDataBytes) - require(false, "CSR MMIO ports no longer exist") - UInt(0) - } - - io.mmio.ar.valid := Bool(false) - io.mmio.aw.valid := resp_waddr_pending - io.mmio.aw.bits := NastiWriteAddressChannel( - id = UInt(0), - addr = addrTable(resp_client_id), - size = { require(false, "CSR MMIO ports no longer exist"); UInt(0) }) - io.mmio.w.valid := resp_wdata_pending - io.mmio.w.bits := NastiWriteDataChannel(data = resp_status) - io.mmio.b.ready := resp_wresp_pending - io.mmio.r.ready := Bool(false) - - when (io.mmio.aw.fire()) { resp_waddr_pending := Bool(false) } - when (io.mmio.w.fire()) { resp_wdata_pending := Bool(false) } - when (io.mmio.b.fire()) { resp_wresp_pending := Bool(false) } -} - -class DmaEngine(outstandingCSR: Int)(implicit p: Parameters) extends DmaModule()(p) { - val io = new Bundle { - val ctrl = (new NastiIO).flip - val mem = new ClientUncachedTileLinkIO - val mmio = new NastiIO - } - - val manager = Module(new DmaManager(outstandingCSR)) - val trackers = Module(new DmaTrackerFile) - - manager.io.ctrl <> io.ctrl - trackers.io.dma <> manager.io.dma - - val innerIOs = trackers.io.mem - val outerIOs = trackers.io.mmio :+ manager.io.mmio - - val innerArb = Module(new ClientUncachedTileLinkIOArbiter(innerIOs.size)) - innerArb.io.in <> innerIOs - io.mem <> innerArb.io.out - - val outerArb = Module(new NastiArbiter(outerIOs.size)) - outerArb.io.master <> outerIOs - io.mmio <> outerArb.io.slave - - assert(!io.mmio.b.valid || io.mmio.b.bits.resp === UInt(0), - "DmaEngine: NASTI write response error") - - assert(!io.mmio.r.valid || io.mmio.r.bits.resp === UInt(0), - "DmaEngine: NASTI read response error") -} - -class DmaTrackerFile(implicit p: Parameters) extends DmaModule()(p) { - val io = new Bundle { - val dma = (new DmaIO).flip - val mem = Vec(nDmaTransactors, new ClientUncachedTileLinkIO) - val mmio = Vec(nDmaTransactors, new NastiIO) - } - - val trackers = List.fill(nDmaTransactors) { Module(new DmaTracker) } - val reqReadys = trackers.map(_.io.dma.req.ready).asUInt - - io.mem <> trackers.map(_.io.mem) - io.mmio <> trackers.map(_.io.mmio) - - if (nDmaTransactors > 1) { - val resp_arb = Module(new RRArbiter(new DmaResponse, nDmaTransactors)) - resp_arb.io.in <> trackers.map(_.io.dma.resp) - io.dma.resp <> resp_arb.io.out - - val selection = PriorityEncoder(reqReadys) - trackers.zipWithIndex.foreach { case (tracker, i) => - tracker.io.dma.req.valid := io.dma.req.valid && selection === UInt(i) - tracker.io.dma.req.bits := io.dma.req.bits - } - io.dma.req.ready := reqReadys.orR - } else { - io.dma <> trackers.head.io.dma - } -} - -class DmaTracker(implicit p: Parameters) extends DmaModule()(p) - with HasTileLinkParameters with HasNastiParameters { - val io = new DmaTrackerIO - - private val blockOffset = tlBeatAddrBits + tlByteAddrBits - private val blockBytes = tlDataBeats * tlDataBytes - - val data_buffer = Reg(Vec(2 * tlDataBeats, Bits(width = tlDataBits))) - val get_inflight = Reg(UInt(2 * tlDataBeats)) - val put_inflight = Reg(Bool()) - val put_half = Reg(UInt(width = 1)) - val get_half = Reg(UInt(width = 1)) - val prefetch_put = Reg(Bool()) - val get_done = !get_inflight.orR - - val src_block = Reg(UInt(width = tlBlockAddrBits)) - val dst_block = Reg(UInt(width = tlBlockAddrBits)) - val offset = Reg(UInt(width = blockOffset)) - val alignment = Reg(UInt(width = blockOffset)) - val shift_dir = Reg(Bool()) - - val bytes_left = Reg(UInt(width = addrBits)) - val streaming = Reg(Bool()) - val stream_addr = Reg(UInt(width = nastiXAddrBits)) - val stream_len = Reg(UInt(width = nastiXLenBits)) - val stream_size = Reg(UInt(width = nastiXSizeBits)) - val stream_idx = Reg(UInt(width = blockOffset)) - val stream_bytesel = MuxLookup(stream_size, UInt("b11111111"), Seq( - UInt("b00") -> UInt("b00000001"), - UInt("b01") -> UInt("b00000011"), - UInt("b10") -> UInt("b00001111"))) - val stream_mask = FillInterleaved(8, stream_bytesel) - val stream_last = Reg(Bool()) - - val stream_word_bytes = UInt(1) << stream_size - val stream_beat_idx = stream_idx(blockOffset - 1, tlByteAddrBits) - val stream_byte_idx = stream_idx(tlByteAddrBits - 1, 0) - val stream_bitshift = Cat(stream_byte_idx, UInt(0, 3)) - val stream_in_beat = - (((io.mmio.r.bits.data & stream_mask) << stream_bitshift)) | - (data_buffer(stream_beat_idx) & ~(stream_mask << stream_bitshift)) - val stream_out_word = data_buffer(stream_beat_idx) >> stream_bitshift - val stream_out_last = bytes_left === stream_word_bytes - - val acq = io.mem.acquire.bits - val gnt = io.mem.grant.bits - - val (s_idle :: s_get :: s_put :: s_prefetch :: - s_stream_read_req :: s_stream_read_resp :: - s_stream_write_req :: s_stream_write_data :: s_stream_write_resp :: - s_wait :: s_resp :: Nil) = Enum(Bits(), 11) - val state = Reg(init = s_idle) - - val (put_beat, put_done) = Counter( - io.mem.acquire.fire() && acq.hasData(), tlDataBeats) - - val put_mask = Seq.tabulate(tlDataBytes) { i => - val byte_index = Cat(put_beat, UInt(i, tlByteAddrBits)) - byte_index >= offset && byte_index < bytes_left - }.asUInt - - val prefetch_sent = io.mem.acquire.fire() && io.mem.acquire.bits.isPrefetch() - val prefetch_busy = Reg(init = UInt(0, tlMaxClientXacts)) - val (prefetch_id, _) = Counter(prefetch_sent, tlMaxClientXacts) - - val base_index = Cat(put_half, put_beat) - val put_data = Wire(init = Bits(0, tlDataBits)) - val beat_align = alignment(blockOffset - 1, tlByteAddrBits) - val bit_align = Cat(alignment(tlByteAddrBits - 1, 0), UInt(0, 3)) - val rev_align = UInt(tlDataBits) - bit_align - - def getBit(value: UInt, sel: UInt): Bool = - (value >> sel)(0) - - when (alignment === UInt(0)) { - put_data := data_buffer(base_index) - } .elsewhen (shift_dir) { - val shift_index = base_index - beat_align - when (bit_align === UInt(0)) { - put_data := data_buffer(shift_index) - } .otherwise { - val upper_bits = data_buffer(shift_index) - val lower_bits = data_buffer(shift_index - UInt(1)) - val upper_shifted = upper_bits << bit_align - val lower_shifted = lower_bits >> rev_align - put_data := upper_shifted | lower_shifted - } - } .otherwise { - val shift_index = base_index + beat_align - when (bit_align === UInt(0)) { - put_data := data_buffer(shift_index) - } .otherwise { - val upper_bits = data_buffer(shift_index + UInt(1)) - val lower_bits = data_buffer(shift_index) - val upper_shifted = upper_bits << rev_align - val lower_shifted = lower_bits >> bit_align - put_data := upper_shifted | lower_shifted - } - } - - val put_acquire = PutBlock( - client_xact_id = UInt(2), - addr_block = dst_block, - addr_beat = put_beat, - data = put_data, - wmask = Some(put_mask)) - - val get_acquire = GetBlock( - client_xact_id = get_half, - addr_block = src_block, - alloc = Bool(false)) - - val prefetch_acquire = Mux(prefetch_put, - PutPrefetch(client_xact_id = prefetch_id, addr_block = dst_block), - GetPrefetch(client_xact_id = prefetch_id, addr_block = dst_block)) - - val resp_xact_id = Reg(UInt(width = dmaXactIdBits)) - val resp_client_id = Reg(UInt(width = dmaClientIdBits)) - - io.mem.acquire.valid := (state === s_get) || - (state === s_put && get_done) || - (state === s_prefetch && !prefetch_busy(prefetch_id)) - io.mem.acquire.bits := MuxLookup( - state, prefetch_acquire, Seq( - s_get -> get_acquire, - s_put -> put_acquire)) - io.mem.grant.ready := Bool(true) - io.dma.req.ready := state === s_idle - io.dma.resp.valid := state === s_resp - io.dma.resp.bits.xact_id := resp_xact_id - io.dma.resp.bits.client_id := resp_client_id - io.dma.resp.bits.status := UInt(0) - io.mmio.ar.valid := (state === s_stream_read_req) - io.mmio.ar.bits := NastiReadAddressChannel( - id = UInt(0), - addr = stream_addr, - size = stream_size, - len = stream_len, - burst = BURST_FIXED) - io.mmio.r.ready := (state === s_stream_read_resp) - - io.mmio.aw.valid := (state === s_stream_write_req) - io.mmio.aw.bits := NastiWriteAddressChannel( - id = UInt(0), - addr = stream_addr, - size = stream_size, - len = stream_len, - burst = BURST_FIXED) - io.mmio.w.valid := (state === s_stream_write_data) && get_done - io.mmio.w.bits := NastiWriteDataChannel( - data = stream_out_word, - last = stream_out_last) - io.mmio.b.ready := (state === s_stream_write_resp) - - when (io.dma.req.fire()) { - val src_off = io.dma.req.bits.source(blockOffset - 1, 0) - val dst_off = io.dma.req.bits.dest(blockOffset - 1, 0) - val direction = src_off < dst_off - - resp_xact_id := io.dma.req.bits.xact_id - resp_client_id := io.dma.req.bits.client_id - src_block := io.dma.req.bits.source(addrBits - 1, blockOffset) - dst_block := io.dma.req.bits.dest(addrBits - 1, blockOffset) - alignment := Mux(direction, dst_off - src_off, src_off - dst_off) - shift_dir := direction - offset := dst_off - bytes_left := io.dma.req.bits.length + dst_off - get_inflight := UInt(0) - put_inflight := Bool(false) - get_half := UInt(0) - put_half := UInt(0) - streaming := Bool(false) - stream_len := (io.dma.req.bits.length >> io.dma.req.bits.size) - UInt(1) - stream_size := io.dma.req.bits.size - stream_last := Bool(false) - - when (io.dma.req.bits.cmd === DMA_CMD_COPY) { - state := s_get - } .elsewhen (io.dma.req.bits.cmd(2, 1) === UInt("b01")) { - prefetch_put := io.dma.req.bits.cmd(0) - state := s_prefetch - } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SIN) { - stream_addr := io.dma.req.bits.source - stream_idx := dst_off - streaming := Bool(true) - alignment := UInt(0) - state := s_stream_read_req - } .elsewhen (io.dma.req.bits.cmd === DMA_CMD_SOUT) { - stream_addr := io.dma.req.bits.dest - stream_idx := src_off - streaming := Bool(true) - bytes_left := io.dma.req.bits.length - state := s_stream_write_req - } - } - - when (io.mmio.ar.fire()) { state := s_stream_read_resp } - - when (io.mmio.r.fire()) { - data_buffer(stream_beat_idx) := stream_in_beat - stream_idx := stream_idx + stream_word_bytes - val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes - when (block_finished || io.mmio.r.bits.last) { state := s_put } - } - - when (io.mmio.aw.fire()) { state := s_get } - - when (io.mmio.w.fire()) { - stream_idx := stream_idx + stream_word_bytes - bytes_left := bytes_left - stream_word_bytes - val block_finished = stream_idx === UInt(blockBytes) - stream_word_bytes - when (stream_out_last) { - state := s_stream_write_resp - } .elsewhen (block_finished) { - state := s_get - } - } - - when (io.mmio.b.fire()) { state := s_resp } - - when (state === s_get && io.mem.acquire.ready) { - get_inflight := get_inflight | FillInterleaved(tlDataBeats, UIntToOH(get_half)) - src_block := src_block + UInt(1) - when (streaming) { - state := s_stream_write_data - } .otherwise { - val bytes_in_buffer = UInt(blockBytes) - alignment - val extra_read = alignment > UInt(0) && !shift_dir && // dst_off < src_off - get_half === UInt(0) && // this is the first block - bytes_in_buffer < bytes_left // there is still more data left to fetch - get_half := get_half + UInt(1) - when (!extra_read) { state := s_put } - } - } - - when (prefetch_sent) { - prefetch_busy := prefetch_busy | UIntToOH(prefetch_id) - when (bytes_left < UInt(blockBytes)) { - bytes_left := UInt(0) - state := s_resp - } .otherwise { - bytes_left := bytes_left - UInt(blockBytes) - dst_block := dst_block + UInt(1) - } - } - - when (io.mem.grant.fire()) { - when (gnt.g_type === Grant.prefetchAckType) { - prefetch_busy := prefetch_busy & ~UIntToOH(gnt.client_xact_id) - } .elsewhen (gnt.hasData()) { - val write_half = gnt.client_xact_id(0) - val write_idx = Cat(write_half, gnt.addr_beat) - get_inflight := get_inflight & ~UIntToOH(write_idx) - data_buffer(write_idx) := gnt.data - } .otherwise { - put_inflight := Bool(false) - } - } - - when (put_done) { // state === s_put - when (!streaming) { - put_half := put_half + UInt(1) - } - offset := UInt(0) - stream_idx := UInt(0) - when (bytes_left < UInt(blockBytes)) { - bytes_left := UInt(0) - } .otherwise { - bytes_left := bytes_left - UInt(blockBytes) - } - put_inflight := Bool(true) - dst_block := dst_block + UInt(1) - state := s_wait - } - - when (state === s_wait && get_done && !put_inflight) { - state := MuxCase(s_get, Seq( - (bytes_left === UInt(0)) -> s_resp, - streaming -> s_stream_read_resp)) - } - - when (io.dma.resp.fire()) { state := s_idle } -}